Skip to content

Commit

Permalink
[PowerPC][Future] Add Support For Functions That Do Not Use A TOC.
Browse files Browse the repository at this point in the history
On PowerPC most functions require a valid TOC pointer.

This is the case because either the function itself needs to use this
pointer to access the TOC or because other functions that are called
from that function expect a valid TOC pointer in the register R2.
The main exception to this is leaf functions that do not access the TOC
since they are guaranteed not to need a valid TOC pointer.

This patch introduces a feature that will allow more functions to not
require a valid TOC pointer in R2.

Differential Revision: https://reviews.llvm.org/D73664
  • Loading branch information
stefanp-ibm authored and kamaub committed Apr 8, 2020
1 parent f3bf25e commit 6c4b40d
Show file tree
Hide file tree
Showing 25 changed files with 950 additions and 35 deletions.
6 changes: 0 additions & 6 deletions llvm/include/llvm/BinaryFormat/ELF.h
Expand Up @@ -393,12 +393,6 @@ static inline int64_t decodePPC64LocalEntryOffset(unsigned Other) {
unsigned Val = (Other & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT;
return ((1 << Val) >> 2) << 2;
}
static inline unsigned encodePPC64LocalEntryOffset(int64_t Offset) {
unsigned Val =
(Offset >= 4 * 4 ? (Offset >= 8 * 4 ? (Offset >= 16 * 4 ? 6 : 5) : 4)
: (Offset >= 2 * 4 ? 3 : (Offset >= 1 * 4 ? 2 : 0)));
return Val << STO_PPC64_LOCAL_BIT;
}

// ELF Relocation types for PPC64
enum {
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def
Expand Up @@ -96,6 +96,7 @@
#undef R_PPC64_TPREL16_HIGHA
#undef R_PPC64_DTPREL16_HIGH
#undef R_PPC64_DTPREL16_HIGHA
#undef R_PPC64_REL24_NOTOC
#undef R_PPC64_IRELATIVE
#undef R_PPC64_REL16
#undef R_PPC64_REL16_LO
Expand Down Expand Up @@ -190,6 +191,7 @@ ELF_RELOC(R_PPC64_TPREL16_HIGH, 112)
ELF_RELOC(R_PPC64_TPREL16_HIGHA, 113)
ELF_RELOC(R_PPC64_DTPREL16_HIGH, 114)
ELF_RELOC(R_PPC64_DTPREL16_HIGHA, 115)
ELF_RELOC(R_PPC64_REL24_NOTOC, 116)
ELF_RELOC(R_PPC64_IRELATIVE, 248)
ELF_RELOC(R_PPC64_REL16, 249)
ELF_RELOC(R_PPC64_REL16_LO, 250)
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/MC/MCExpr.h
Expand Up @@ -284,6 +284,7 @@ class MCSymbolRefExpr : public MCExpr {
VK_PPC_GOT_TLSLD_HA, // symbol@got@tlsld@ha
VK_PPC_TLSLD, // symbol@tlsld
VK_PPC_LOCAL, // symbol@local
VK_PPC_NOTOC, // symbol@notoc

VK_COFF_IMGREL32, // symbol@imgrel (image-relative)

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/MC/MCExpr.cpp
Expand Up @@ -319,6 +319,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_PPC_GOT_TLSLD_HA: return "got@tlsld@ha";
case VK_PPC_TLSLD: return "tlsld";
case VK_PPC_LOCAL: return "local";
case VK_PPC_NOTOC: return "notoc";
case VK_COFF_IMGREL32: return "IMGREL";
case VK_Hexagon_LO16: return "LO16";
case VK_Hexagon_HI16: return "HI16";
Expand Down Expand Up @@ -432,6 +433,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("got@tlsld@l", VK_PPC_GOT_TLSLD_LO)
.Case("got@tlsld@h", VK_PPC_GOT_TLSLD_HI)
.Case("got@tlsld@ha", VK_PPC_GOT_TLSLD_HA)
.Case("notoc", VK_PPC_NOTOC)
.Case("gdgot", VK_Hexagon_GD_GOT)
.Case("gdplt", VK_Hexagon_GD_PLT)
.Case("iegot", VK_Hexagon_IE_GOT)
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
Expand Up @@ -39,6 +39,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
return Value & 0xfffc;
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
case PPC::fixup_ppc_br24_notoc:
return Value & 0x3fffffc;
case PPC::fixup_ppc_half16:
return Value & 0xffff;
Expand All @@ -62,6 +63,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case PPC::fixup_ppc_brcond14abs:
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
case PPC::fixup_ppc_br24_notoc:
return 4;
case FK_Data_8:
return 8;
Expand All @@ -88,6 +90,7 @@ class PPCAsmBackend : public MCAsmBackend {
const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_br24_notoc", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_br24abs", 6, 24, 0 },
{ "fixup_ppc_brcond14abs", 16, 14, 0 },
Expand All @@ -98,6 +101,7 @@ class PPCAsmBackend : public MCAsmBackend {
const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_ppc_br24", 2, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_br24_notoc", 2, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_brcond14", 2, 14, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_br24abs", 2, 24, 0 },
{ "fixup_ppc_brcond14abs", 2, 14, 0 },
Expand Down Expand Up @@ -151,6 +155,7 @@ class PPCAsmBackend : public MCAsmBackend {
return Kind >= FirstLiteralRelocationKind;
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
case PPC::fixup_ppc_br24_notoc:
// If the target symbol has a local entry point we must not attempt
// to resolve the fixup directly. Emit a relocation and leave
// resolution of the final target address to the linker.
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
Expand Up @@ -86,6 +86,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
llvm_unreachable("Unimplemented");
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
case PPC::fixup_ppc_br24_notoc:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
case MCSymbolRefExpr::VK_None:
Expand All @@ -97,6 +98,9 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_LOCAL:
Type = ELF::R_PPC_LOCAL24PC;
break;
case MCSymbolRefExpr::VK_PPC_NOTOC:
Type = ELF::R_PPC64_REL24_NOTOC;
break;
}
break;
case PPC::fixup_ppc_brcond14:
Expand Down Expand Up @@ -431,6 +435,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
return false;

case ELF::R_PPC_REL24:
case ELF::R_PPC64_REL24_NOTOC:
// If the target symbol has a local entry point, we must keep the
// target symbol to preserve that information for the linker.
// The "other" values are stored in the last 6 bits of the second byte.
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
Expand Up @@ -19,6 +19,10 @@ enum Fixups {
// 24-bit PC relative relocation for direct branches like 'b' and 'bl'.
fixup_ppc_br24 = FirstTargetFixupKind,

// 24-bit PC relative relocation for direct branches like 'b' and 'bl' where
// the caller does not use the TOC.
fixup_ppc_br24_notoc,

/// 14-bit PC relative relocation for conditional branches.
fixup_ppc_brcond14,

Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
Expand Up @@ -48,7 +48,9 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,

// Add a fixup for the branch target.
Fixups.push_back(MCFixup::create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_br24));
((MI.getOpcode() == PPC::BL8_NOTOC)
? (MCFixupKind)PPC::fixup_ppc_br24_notoc
: (MCFixupKind)PPC::fixup_ppc_br24)));
return 0;
}

Expand Down
35 changes: 28 additions & 7 deletions llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
Expand Up @@ -179,13 +179,9 @@ class PPCTargetELFStreamer : public PPCTargetStreamer {
void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
MCAssembler &MCA = getStreamer().getAssembler();

int64_t Res;
if (!LocalOffset->evaluateAsAbsolute(Res, MCA))
report_fatal_error(".localentry expression must be absolute.");

unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res);
if (Res != ELF::decodePPC64LocalEntryOffset(Encoded))
report_fatal_error(".localentry expression cannot be encoded.");
// encodePPC64LocalEntryOffset will report an error if it cannot
// encode LocalOffset.
unsigned Encoded = encodePPC64LocalEntryOffset(LocalOffset);

unsigned Other = S->getOther();
Other &= ~ELF::STO_PPC64_LOCAL_MASK;
Expand Down Expand Up @@ -230,6 +226,31 @@ class PPCTargetELFStreamer : public PPCTargetStreamer {
D->setOther(Other);
return true;
}

unsigned encodePPC64LocalEntryOffset(const MCExpr *LocalOffset) {
MCAssembler &MCA = getStreamer().getAssembler();
int64_t Offset;
if (!LocalOffset->evaluateAsAbsolute(Offset, MCA))
MCA.getContext().reportFatalError(
LocalOffset->getLoc(), ".localentry expression must be absolute.");

switch (Offset) {
default:
MCA.getContext().reportFatalError(
LocalOffset->getLoc(),
".localentry expression is not a valid power of 2.");
case 0:
return 0;
case 1:
return 1 << ELF::STO_PPC64_LOCAL_BIT;
case 4:
case 8:
case 16:
case 32:
case 64:
return (int)Log2(Offset) << (int)ELF::STO_PPC64_LOCAL_BIT;
}
}
};

class PPCTargetMachOStreamer : public PPCTargetStreamer {
Expand Down
41 changes: 36 additions & 5 deletions llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
Expand Up @@ -1460,14 +1460,16 @@ void PPCLinuxAsmPrinter::emitFunctionBodyStart() {
//
// This ensures we have r2 set up correctly while executing the function
// body, no matter which entry point is called.
if (Subtarget->isELFv2ABI()
// Only do all that if the function uses r2 in the first place.
&& !MF->getRegInfo().use_empty(PPC::X2)) {
const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
const bool UsesX2OrR2 = !MF->getRegInfo().use_empty(PPC::X2) ||
!MF->getRegInfo().use_empty(PPC::R2);
// Only do all that if the function uses R2 as the TOC pointer
// in the first place. We don't need the global entry point if the
// function uses R2 as an allocatable register.
if (Subtarget->isELFv2ABI() && UsesX2OrR2 && PPCFI->usesTOCBasePtr()) {
// Note: The logic here must be synchronized with the code in the
// branch-selection pass which sets the offset of the first block in the
// function. This matters because it affects the alignment.
const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();

MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol();
OutStreamer->emitLabel(GlobalEntryLabel);
const MCSymbolRefExpr *GlobalEntryLabelExp =
Expand Down Expand Up @@ -1519,6 +1521,35 @@ void PPCLinuxAsmPrinter::emitFunctionBodyStart() {

if (TS)
TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym), LocalOffsetExp);
} else if (Subtarget->isELFv2ABI()) {
// When generating the entry point for a function we have a few scenarios
// based on whether or not that function uses R2 and whether or not that
// function makes calls (or is a leaf function).
// 1) A leaf function that does not use R2 (or treats it as callee-saved
// and preserves it). In this case st_other=0 and both
// the local and global entry points for the function are the same.
// No special entry point code is required.
// 2) A function uses the TOC pointer R2. This function may or may not have
// calls. In this case st_other=[2,6] and the global and local entry
// points are different. Code to correctly setup the TOC pointer in R2
// is put between the global and local entry points. This case is
// covered by the if statatement above.
// 3) A function does not use the TOC pointer R2 but does have calls.
// In this case st_other=1 since we do not know whether or not any
// of the callees clobber R2. This case is dealt with in this else if
// block.
// 4) The function does not use the TOC pointer but R2 is used inside
// the function. In this case st_other=1 once again.
// 5) This function uses inline asm. We mark R2 as reserved if the function
// has inline asm so we have to assume that it may be used.
if (MF->getFrameInfo().hasCalls() || MF->hasInlineAsm() ||
(!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) {
PPCTargetStreamer *TS =
static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
if (TS)
TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym),
MCConstantExpr::create(1, OutContext));
}
}
}

Expand Down
34 changes: 29 additions & 5 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -1404,6 +1404,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
Expand Down Expand Up @@ -4689,6 +4690,16 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
SelectionDAG& DAG) const {
bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;

// FIXME: Tail calls are currently disabled when using PC Relative addressing.
// The issue is that PC Relative is only partially implemented and so there
// is currently a mix of functions that require the TOC and functions that do
// not require it. If we have A calls B calls C and both A and B require the
// TOC and C does not and is marked as clobbering R2 then it is not safe for
// B to tail call C. Since we do not have the information of whether or not
// a funciton needs to use the TOC here in this function we need to be
// conservatively safe and disable all tail calls for now.
if (Subtarget.isUsingPCRelativeCalls()) return false;

if (DisableSCO && !TailCallOpt) return false;

// Variadic argument functions are not supported.
Expand Down Expand Up @@ -5085,6 +5096,17 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
return PPCISD::BCTRL;
}

// FIXME: At this moment indirect calls are treated ahead of the
// PC Relative condition because binaries can still contain a possible
// mix of functions that use a TOC and functions that do not use a TOC.
// Once the PC Relative feature is complete this condition should be moved
// up ahead of the indirect calls and should return a PPCISD::BCTRL for
// that case.
if (Subtarget.isUsingPCRelativeCalls()) {
assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
return PPCISD::CALL_NOTOC;
}

// The ABIs that maintain a TOC pointer accross calls need to have a nop
// immediately following the call instruction if the caller and callee may
// have different TOC bases. At link time if the linker determines the calls
Expand All @@ -5094,8 +5116,8 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
// will rewrite the nop to be a load of the TOC pointer from the linkage area
// into gpr2.
if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
: PPCISD::CALL_NOP;
return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
: PPCISD::CALL_NOP;

return PPCISD::CALL;
}
Expand Down Expand Up @@ -5372,7 +5394,7 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops,
// no way to mark dependencies as implicit here.
// We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
!CFlags.IsPatchPoint)
!CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));

// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
Expand All @@ -5398,7 +5420,8 @@ SDValue PPCTargetLowering::FinishCall(
unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {

if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI())
if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
Subtarget.isAIXABI())
setUsesTOCBasePtr(DAG);

unsigned CallOpc =
Expand Down Expand Up @@ -11373,7 +11396,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
if (MI.getOpcode() == TargetOpcode::STACKMAP ||
MI.getOpcode() == TargetOpcode::PATCHPOINT) {
if (Subtarget.is64BitELFABI() &&
MI.getOpcode() == TargetOpcode::PATCHPOINT) {
MI.getOpcode() == TargetOpcode::PATCHPOINT &&
!Subtarget.isUsingPCRelativeCalls()) {
// Call lowering should have added an r2 operand to indicate a dependence
// on the TOC base pointer value. It can't however, because there is no
// way to mark the dependence as implicit there, and so the stackmap code
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.h
Expand Up @@ -165,9 +165,11 @@ namespace llvm {

/// CALL - A direct function call.
/// CALL_NOP is a call with the special NOP which follows 64-bit
/// CALL_NOTOC the caller does not use the TOC.
/// SVR4 calls and 32-bit/64-bit AIX calls.
CALL,
CALL_NOP,
CALL_NOTOC,

/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstr64Bit.td
Expand Up @@ -140,6 +140,15 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
(outs), (ins abscalltarget:$func),
"bla $func\n\tnop", IIC_BrB,
[(PPCcall_nop (i64 imm:$func))]>;
let Predicates = [PCRelativeMemops] in {
// BL8_NOTOC means that the caller does not use the TOC pointer and if
// it does use R2 then it is just a caller saved register. Therefore it is
// safe to emit only the bl and not the nop for this instruction. The
// linker will not try to restore R2 after the call.
def BL8_NOTOC : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs),
(ins calltarget:$func),
"bl $func", IIC_BrB, []>;
}
}
let Uses = [CTR8, RM] in {
let isPredicable = 1 in
Expand Down Expand Up @@ -194,6 +203,11 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
(BL8_NOP texternalsym:$dst)>;

def : Pat<(PPCcall_notoc (i64 tglobaladdr:$dst)),
(BL8_NOTOC tglobaladdr:$dst)>;
def : Pat<(PPCcall_notoc (i64 texternalsym:$dst)),
(BL8_NOTOC texternalsym:$dst)>;

// Calls for AIX
def : Pat<(PPCcall (i64 mcsym:$dst)),
(BL8 mcsym:$dst)>;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.td
Expand Up @@ -252,6 +252,9 @@ def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCcall_notoc : SDNode<"PPCISD::CALL_NOTOC", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
Expand Down Expand Up @@ -994,6 +997,7 @@ def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">;
def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">;
def IsISA3_0 : Predicate<"PPCSubTarget->isISA3_0()">;
def HasFPU : Predicate<"PPCSubTarget->hasFPU()">;
def PCRelativeMemops : Predicate<"PPCSubTarget->hasPCRelativeMemops()">;

//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
Expand Down

0 comments on commit 6c4b40d

Please sign in to comment.