Skip to content

Commit

Permalink
[X86] Support EGPR (R16-R31) for APX (#70958)
Browse files Browse the repository at this point in the history
1. Map R16-R31 to DWARF registers 130-145.
2. Make R16-R31 caller-saved registers.
3. Make R16-31 allocatable only when feature EGPR is supported
4. Make R16-31 availabe for instructions in legacy maps 0/1 and EVEX
space, except XSAVE*/XRSTOR

RFC:

https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4

Explanations for some seemingly unrelated changes:

inline-asm-registers.mir, statepoint-invoke-ra-enter-at-end.mir:
The immediate (TargetInstrInfo.cpp:1612) used for the regdef/reguse is
the encoding for the register
  class in the enum generated by tablegen. This encoding will change
  any time a new register class is added. Since the number is part
  of the input, this means it can become stale.

seh-directive-errors.s:
   R16-R31 makes ".seh_pushreg 17" legal

musttail-varargs.ll:
It seems some LLVM passes use the number of registers rather the number
of allocatable registers as heuristic.

This PR is to reland #67702 after #70222 in order to reduce some
compile-time regression when EGPR is not used.
  • Loading branch information
KanRobert committed Nov 9, 2023
1 parent ed86e74 commit c9017bc
Show file tree
Hide file tree
Showing 17 changed files with 1,580 additions and 50 deletions.
37 changes: 37 additions & 0 deletions llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1237,6 +1237,43 @@ namespace X86II {
return false;
}

inline bool canUseApxExtendedReg(const MCInstrDesc &Desc) {
uint64_t TSFlags = Desc.TSFlags;
uint64_t Encoding = TSFlags & EncodingMask;
// EVEX can always use egpr.
if (Encoding == X86II::EVEX)
return true;

// To be conservative, egpr is not used for all pseudo instructions
// because we are not sure what instruction it will become.
// FIXME: Could we improve it in X86ExpandPseudo?
if (isPseudo(TSFlags))
return false;

// MAP OB/TB in legacy encoding space can always use egpr except
// XSAVE*/XRSTOR*.
unsigned Opcode = Desc.Opcode;
switch (Opcode) {
default:
break;
case X86::XSAVE:
case X86::XSAVE64:
case X86::XSAVEOPT:
case X86::XSAVEOPT64:
case X86::XSAVEC:
case X86::XSAVEC64:
case X86::XSAVES:
case X86::XSAVES64:
case X86::XRSTOR:
case X86::XRSTOR64:
case X86::XRSTORS:
case X86::XRSTORS64:
return false;
}
uint64_t OpMap = TSFlags & X86II::OpMapMask;
return !Encoding && (OpMap == X86II::OB || OpMap == X86II::TB);
}

/// \returns true if the MemoryOperand is a 32 extended (zmm16 or higher)
/// registers, e.g. zmm21, etc.
static inline bool is32ExtendedReg(unsigned RegNo) {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86.td
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
"Support AVX10.1 up to 512-bit instruction",
[FeatureAVX10_1, FeatureEVEX512]>;
def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true",
"Support extended general purpose register">;

// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
// "string operations"). See "REP String Enhancement" in the Intel Software
Expand Down
31 changes: 31 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,37 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
Subtarget(STI), RI(STI.getTargetTriple()) {
}

const TargetRegisterClass *
X86InstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
const TargetRegisterInfo *TRI,
const MachineFunction &MF) const {
auto *RC = TargetInstrInfo::getRegClass(MCID, OpNum, TRI, MF);
// If the target does not have egpr, then r16-r31 will be resereved for all
// instructions.
if (!RC || !Subtarget.hasEGPR())
return RC;

if (X86II::canUseApxExtendedReg(MCID))
return RC;

switch (RC->getID()) {
default:
return RC;
case X86::GR8RegClassID:
return &X86::GR8_NOREX2RegClass;
case X86::GR16RegClassID:
return &X86::GR16_NOREX2RegClass;
case X86::GR32RegClassID:
return &X86::GR32_NOREX2RegClass;
case X86::GR64RegClassID:
return &X86::GR64_NOREX2RegClass;
case X86::GR32_NOSPRegClassID:
return &X86::GR32_NOREX2_NOSPRegClass;
case X86::GR64_NOSPRegClassID:
return &X86::GR64_NOREX2_NOSPRegClass;
}
}

bool
X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
Register &SrcReg, Register &DstReg,
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,17 @@ class X86InstrInfo final : public X86GenInstrInfo {
public:
explicit X86InstrInfo(X86Subtarget &STI);

/// Given a machine instruction descriptor, returns the register
/// class constraint for OpNum, or NULL. Returned register class
/// may be different from the definition in the TD file, e.g.
/// GR*RegClass (definition in TD file)
/// ->
/// GR*_NOREX2RegClass (Returned register class)
const TargetRegisterClass *
getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
const TargetRegisterInfo *TRI,
const MachineFunction &MF) const override;

/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
Expand Down
15 changes: 12 additions & 3 deletions llvm/lib/Target/X86/X86RegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
case X86::GR16RegClassID:
case X86::GR32RegClassID:
case X86::GR64RegClassID:
case X86::GR8_NOREX2RegClassID:
case X86::GR16_NOREX2RegClassID:
case X86::GR32_NOREX2RegClassID:
case X86::GR64_NOREX2RegClassID:
case X86::RFP32RegClassID:
case X86::RFP64RegClassID:
case X86::RFP80RegClassID:
Expand Down Expand Up @@ -611,6 +615,10 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}

// Reserve the extended general purpose registers.
if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR())
Reserved.set(X86::R16, X86::R31WH + 1);

assert(checkAllSuperRegsMarked(Reserved,
{X86::SIL, X86::DIL, X86::BPL, X86::SPL,
X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
Expand All @@ -629,13 +637,14 @@ unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const {
// APX registers (R16-R31)
//
// and try to return the minimum number of registers supported by the target.

assert((X86::R15WH + 1 == X86 ::YMM0) && (X86::YMM15 + 1 == X86::K0) &&
(X86::K6_K7 + 1 == X86::TMMCFG) &&
(X86::TMM7 + 1 == X86::NUM_TARGET_REGS) &&
(X86::K6_K7 + 1 == X86::TMMCFG) && (X86::TMM7 + 1 == X86::R16) &&
(X86::R31WH + 1 == X86::NUM_TARGET_REGS) &&
"Register number may be incorrect");

const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
if (ST.hasEGPR())
return X86::NUM_TARGET_REGS;
if (ST.hasAMXTILE())
return X86::TMM7 + 1;
if (ST.hasAVX512())
Expand Down

0 comments on commit c9017bc

Please sign in to comment.