Skip to content

Commit

Permalink
Introduce codegen for the Signal Processing Engine
Browse files Browse the repository at this point in the history
Summary:
The Signal Processing Engine (SPE) is found on NXP/Freescale e500v1,
e500v2, and several e200 cores.  This adds support targeting the e500v2,
as this is more common than the e500v1, and is in SoCs still on the
market.

This patch is very intrusive because the SPE is binary incompatible with
the traditional FPU.  After discussing with others, the cleanest
solution was to make both SPE and FPU features on top of a base PowerPC
subset, so all FPU instructions are now wrapped with HasFPU predicates.

Supported by this are:
* Code generation following the SPE ABI at the LLVM IR level (calling
conventions)
* Single- and Double-precision math at the level supported by the APU.

Still to do:
* Vector operations
* SPE intrinsics

As this changes the Callee-saved register list order, one test, which
tests the precise generated code, was updated to account for the new
register order.

Reviewed by: nemanjai
Differential Revision: https://reviews.llvm.org/D44830

llvm-svn: 337347
  • Loading branch information
Justin Hibbits committed Jul 18, 2018
1 parent 4fa4fa6 commit d52990c
Show file tree
Hide file tree
Showing 23 changed files with 1,922 additions and 624 deletions.
20 changes: 20 additions & 0 deletions llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
Expand Up @@ -83,6 +83,16 @@ static const MCPhysReg FRegs[32] = {
PPC::F24, PPC::F25, PPC::F26, PPC::F27,
PPC::F28, PPC::F29, PPC::F30, PPC::F31
};
static const MCPhysReg SPERegs[32] = {
PPC::S0, PPC::S1, PPC::S2, PPC::S3,
PPC::S4, PPC::S5, PPC::S6, PPC::S7,
PPC::S8, PPC::S9, PPC::S10, PPC::S11,
PPC::S12, PPC::S13, PPC::S14, PPC::S15,
PPC::S16, PPC::S17, PPC::S18, PPC::S19,
PPC::S20, PPC::S21, PPC::S22, PPC::S23,
PPC::S24, PPC::S25, PPC::S26, PPC::S27,
PPC::S28, PPC::S29, PPC::S30, PPC::S31
};
static const MCPhysReg VFRegs[32] = {
PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
Expand Down Expand Up @@ -648,6 +658,16 @@ struct PPCOperand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
}

void addRegSPE4RCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(RRegs[getReg()]));
}

void addRegSPERCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(SPERegs[getReg()]));
}

void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(CRBITRegs[getCRBit()]));
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
Expand Up @@ -226,6 +226,17 @@ static const unsigned QFRegs[] = {
PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
};

static const unsigned SPERegs[] = {
PPC::S0, PPC::S1, PPC::S2, PPC::S3,
PPC::S4, PPC::S5, PPC::S6, PPC::S7,
PPC::S8, PPC::S9, PPC::S10, PPC::S11,
PPC::S12, PPC::S13, PPC::S14, PPC::S15,
PPC::S16, PPC::S17, PPC::S18, PPC::S19,
PPC::S20, PPC::S21, PPC::S22, PPC::S23,
PPC::S24, PPC::S25, PPC::S26, PPC::S27,
PPC::S28, PPC::S29, PPC::S30, PPC::S31
};

template <std::size_t N>
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
const unsigned (&Regs)[N]) {
Expand Down Expand Up @@ -327,6 +338,18 @@ static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, QFRegs);
}

static DecodeStatus DecodeSPE4RCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
return decodeRegisterClass(Inst, RegNo, GPRegs);
}

static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
return decodeRegisterClass(Inst, RegNo, SPERegs);
}

#define DecodeQSRCRegisterClass DecodeQFRCRegisterClass
#define DecodeQBRCRegisterClass DecodeQFRCRegisterClass

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
Expand Up @@ -50,6 +50,9 @@ namespace PPC {
PRED_UN_PLUS = (3 << 5) | 15,
PRED_NU_PLUS = (3 << 5) | 7,

// SPE scalar compare instructions always set the GT bit.
PRED_SPE = PRED_GT,

// When dealing with individual condition-register bits, we have simple set
// and unset predicates.
PRED_BIT_SET = 1024,
Expand Down
31 changes: 17 additions & 14 deletions llvm/lib/Target/PowerPC/PPC.td
Expand Up @@ -61,46 +61,49 @@ def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
"Enable 64-bit registers usage for ppc32 [beta]">;
def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true",
"Use condition-register bits individually">;
def FeatureFPU : SubtargetFeature<"fpu","HasFPU","true",
"Enable classic FPU instructions",
[FeatureHardFloat]>;
def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
"Enable Altivec instructions",
[FeatureHardFloat]>;
[FeatureFPU]>;
def FeatureSPE : SubtargetFeature<"spe","HasSPE", "true",
"Enable SPE instructions",
[FeatureHardFloat]>;
def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
"Enable the MFOCRF instruction">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
"Enable the fsqrt instruction",
[FeatureHardFloat]>;
[FeatureFPU]>;
def FeatureFCPSGN : SubtargetFeature<"fcpsgn", "HasFCPSGN", "true",
"Enable the fcpsgn instruction",
[FeatureHardFloat]>;
[FeatureFPU]>;
def FeatureFRE : SubtargetFeature<"fre", "HasFRE", "true",
"Enable the fre instruction",
[FeatureHardFloat]>;
[FeatureFPU]>;
def FeatureFRES : SubtargetFeature<"fres", "HasFRES", "true",
"Enable the fres instruction",
[FeatureHardFloat]>;
[FeatureFPU]>;
def FeatureFRSQRTE : SubtargetFeature<"frsqrte", "HasFRSQRTE", "true",
"Enable the frsqrte instruction",
[FeatureHardFloat]>;
[FeatureFPU]>;
def FeatureFRSQRTES : SubtargetFeature<"frsqrtes", "HasFRSQRTES", "true",
"Enable the frsqrtes instruction",
[FeatureHardFloat]>;
[FeatureFPU]>;
def FeatureRecipPrec : SubtargetFeature<"recipprec", "HasRecipPrec", "true",
"Assume higher precision reciprocal estimates">;
def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
"Enable the stfiwx instruction",
[FeatureHardFloat]>;
[FeatureFPU]>;
def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true",
"Enable the lfiwax instruction",
[FeatureHardFloat]>;
[FeatureFPU]>;
def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true",
"Enable the fri[mnpz] instructions",
[FeatureHardFloat]>;
[FeatureFPU]>;
def FeatureFPCVT : SubtargetFeature<"fpcvt", "HasFPCVT", "true",
"Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions",
[FeatureHardFloat]>;
[FeatureFPU]>;
def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
"Enable the isel instruction">;
def FeatureBPERMD : SubtargetFeature<"bpermd", "HasBPERMD", "true",
Expand Down Expand Up @@ -129,7 +132,7 @@ def FeaturePPC6xx : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true",
"Enable PPC 6xx instructions">;
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
"Enable QPX instructions",
[FeatureHardFloat]>;
[FeatureFPU]>;
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
"Enable VSX instructions",
[FeatureAltivec]>;
Expand Down Expand Up @@ -308,8 +311,8 @@ def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
FeatureICBT, FeatureBookE,
FeatureMSYNC, FeatureMFTB]>;
def : Processor<"601", G3Itineraries, [Directive601, FeatureHardFloat]>;
def : Processor<"602", G3Itineraries, [Directive602, FeatureHardFloat,
def : Processor<"601", G3Itineraries, [Directive601, FeatureFPU]>;
def : Processor<"602", G3Itineraries, [Directive602, FeatureFPU,
FeatureMFTB]>;
def : Processor<"603", G3Itineraries, [Directive603,
FeatureFRES, FeatureFRSQRTE,
Expand Down
26 changes: 26 additions & 0 deletions llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
Expand Up @@ -510,6 +510,32 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const Module *M = MF->getFunction().getParent();
PICLevel::Level PL = M->getPICLevel();

#ifndef NDEBUG
// Validate that SPE and FPU are mutually exclusive in codegen
if (!MI->isInlineAsm()) {
for (const MachineOperand &MO: MI->operands()) {
if (MO.isReg()) {
unsigned Reg = MO.getReg();
if (Subtarget->hasSPE()) {
if (PPC::F4RCRegClass.contains(Reg) ||
PPC::F8RCRegClass.contains(Reg) ||
PPC::QBRCRegClass.contains(Reg) ||
PPC::QFRCRegClass.contains(Reg) ||
PPC::QSRCRegClass.contains(Reg) ||
PPC::VFRCRegClass.contains(Reg) ||
PPC::VRRCRegClass.contains(Reg) ||
PPC::VSFRCRegClass.contains(Reg) ||
PPC::VSSRCRegClass.contains(Reg)
)
llvm_unreachable("SPE targets cannot have FPRegs!");
} else {
if (PPC::SPERCRegClass.contains(Reg))
llvm_unreachable("SPE register found in FPU-targeted code!");
}
}
}
}
#endif
// Lower multi-instruction pseudo operations.
switch (MI->getOpcode()) {
default: break;
Expand Down
42 changes: 34 additions & 8 deletions llvm/lib/Target/PowerPC/PPCCallingConv.td
Expand Up @@ -83,8 +83,14 @@ def RetCC_PPC : CallingConv<[

// Floating point types returned as "direct" go into F1 .. F8; note that
// only the ELFv2 ABI fully utilizes all these registers.
CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfNotSubtarget<"hasSPE()",
CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>,
CCIfNotSubtarget<"hasSPE()",
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>,
CCIfSubtarget<"hasSPE()",
CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
CCIfSubtarget<"hasSPE()",
CCIfType<[f64], CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>,

// For P9, f128 are passed in vector registers.
CCIfType<[f128],
Expand Down Expand Up @@ -188,7 +194,15 @@ def CC_PPC32_SVR4_Common : CallingConv<[
CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignFPArgRegs">>>,

// FP values are passed in F1 - F8.
CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[f32, f64],
CCIfNotSubtarget<"hasSPE()",
CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>,
CCIfType<[f64],
CCIfSubtarget<"hasSPE()",
CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>,
CCIfType<[f32],
CCIfSubtarget<"hasSPE()",
CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,

// Split arguments have an alignment of 8 bytes on the stack.
CCIfType<[i32], CCIfSplit<CCAssignToStack<4, 8>>>,
Expand All @@ -197,7 +211,11 @@ def CC_PPC32_SVR4_Common : CallingConv<[

// Floats are stored in double precision format, thus they have the same
// alignment and size as doubles.
CCIfType<[f32,f64], CCAssignToStack<8, 8>>,
// With SPE floats are stored as single precision, so have alignment and
// size of int.
CCIfType<[f32,f64], CCIfNotSubtarget<"hasSPE()", CCAssignToStack<8, 8>>>,
CCIfType<[f32], CCIfSubtarget<"hasSPE()", CCAssignToStack<4, 4>>>,
CCIfType<[f64], CCIfSubtarget<"hasSPE()", CCAssignToStack<8, 8>>>,

// QPX vectors that are stored in double precision need 32-byte alignment.
CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>,
Expand Down Expand Up @@ -265,15 +283,23 @@ def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,

def CSR_Darwin32_Altivec : CalleeSavedRegs<(add CSR_Darwin32, CSR_Altivec)>;

def CSR_SVR432 : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20,
R21, R22, R23, R24, R25, R26, R27, R28,
R29, R30, R31, F14, F15, F16, F17, F18,
// SPE does not use FPRs, so break out the common register set as base.
def CSR_SVR432_COMM : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20,
R21, R22, R23, R24, R25, R26, R27,
R28, R29, R30, R31, CR2, CR3, CR4
)>;
def CSR_SVR432 : CalleeSavedRegs<(add CSR_SVR432_COMM, F14, F15, F16, F17, F18,
F19, F20, F21, F22, F23, F24, F25, F26,
F27, F28, F29, F30, F31, CR2, CR3, CR4
F27, F28, F29, F30, F31
)>;
def CSR_SPE : CalleeSavedRegs<(add S14, S15, S16, S17, S18, S19, S20, S21, S22,
S23, S24, S25, S26, S27, S28, S29, S30, S31
)>;

def CSR_SVR432_Altivec : CalleeSavedRegs<(add CSR_SVR432, CSR_Altivec)>;

def CSR_SVR432_SPE : CalleeSavedRegs<(add CSR_SVR432_COMM, CSR_SPE)>;

def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
X21, X22, X23, X24, X25, X26, X27, X28,
X29, X30, X31, F14, F15, F16, F17, F18,
Expand Down

0 comments on commit d52990c

Please sign in to comment.