Skip to content

Commit

Permalink
[PowerPC] Respect rounding mode in the back end
Browse files Browse the repository at this point in the history
Currently, the floating point instructions that depend on
rounding mode are correctly marked in the PPC back end with
an implicit use of the RM register. Similarly, instructions
that explicitly define the register are marked with an
implicit def of the same register. So for the most part,
RM-using code won't be moved across RM-setting instructions.

However, calls are not marked as RM-setting instructions so
code can be moved across calls. This is generally desired,
but so is the ability to turn off this behaviour with an
appropriate option - and -frounding-math really should be
that option.

This patch provides a set of call instructions (for direct
and indirect calls) that are marked with an implicit def of
the RM register. These will be used for calls that are marked
with the strictfp attribute.

Differential revision: https://reviews.llvm.org/D111433
  • Loading branch information
nemanjai committed Nov 10, 2021
1 parent ba2ac9c commit 5840f71
Show file tree
Hide file tree
Showing 11 changed files with 474 additions and 44 deletions.
6 changes: 3 additions & 3 deletions llvm/lib/Target/PowerPC/P10InstrResources.td
Expand Up @@ -312,17 +312,17 @@ def : InstRW<[P10W_BR_2C, P10W_DISP_ANY],
(instrs
BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR,
BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL,
BL, BL8, BL8_NOP, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_TLS, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_TLS
BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, BL_RM, BL_TLS
)>;

// 2 Cycles Branch operations, 1 input operands
def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read],
(instrs
B, BCC, BCCA, BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, CTRL_DEP, TAILB, TAILB8,
BA, TAILBA, TAILBA8,
BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL_LWZinto_toc, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat,
BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL8_LDinto_toc_RM, BCTRL8_RM, BCTRL_LWZinto_toc, BCTRL_LWZinto_toc_RM, BCTRL_RM, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat,
BCL, BCLalways, BCLn, BDNZL, BDNZLm, BDNZLp, BDZL, BDZLm, BDZLp, gBCL, gBCLat,
BLA, BLA8, BLA8_NOP
BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM
)>;

// 2 Cycles Branch operations, 3 input operands
Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/Target/PowerPC/P9InstrResources.td
Expand Up @@ -1302,15 +1302,15 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C],
(instregex "BCCTR(L)?(8)?(n)?$"),
(instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
(instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
(instregex "BL(_TLS|_NOP)?$"),
(instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
(instregex "BLA(8|8_NOP)?$"),
(instregex "BL(_TLS|_NOP)?(_RM)?$"),
(instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"),
(instregex "BLA(8|8_NOP)?(_RM)?$"),
(instregex "BLR(8|L)?$"),
(instregex "TAILB(A)?(8)?$"),
(instregex "TAILBCTR(8)?$"),
(instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
(instregex "BCLR(L)?(n)?$"),
(instregex "BCTR(L)?(8)?$"),
(instregex "BCTR(L)?(8)?(_RM)?$"),
B,
BA,
BC,
Expand All @@ -1321,6 +1321,8 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C],
BCLn,
BCTRL8_LDinto_toc,
BCTRL_LWZinto_toc,
BCTRL8_LDinto_toc_RM,
BCTRL_LWZinto_toc_RM,
BCn,
CTRL_DEP
)>;
Expand Down
77 changes: 53 additions & 24 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -1630,9 +1630,19 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
case PPCISD::CALL_RM:
return "PPCISD::CALL_RM";
case PPCISD::CALL_NOP_RM:
return "PPCISD::CALL_NOP_RM";
case PPCISD::CALL_NOTOC_RM:
return "PPCISD::CALL_NOTOC_RM";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
case PPCISD::BCTRL_RM:
return "PPCISD::BCTRL_RM";
case PPCISD::BCTRL_LOAD_TOC_RM:
return "PPCISD::BCTRL_LOAD_TOC_RM";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
Expand Down Expand Up @@ -5172,13 +5182,14 @@ static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
}

static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
const Function &Caller,
const SDValue &Callee,
const Function &Caller, const SDValue &Callee,
const PPCSubtarget &Subtarget,
const TargetMachine &TM) {
const TargetMachine &TM,
bool IsStrictFPCall = false) {
if (CFlags.IsTailCall)
return PPCISD::TC_RETURN;

unsigned RetOpc = 0;
// This is a call through a function pointer.
if (CFlags.IsIndirect) {
// AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
Expand All @@ -5189,28 +5200,46 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
// immediately followed by a load of the TOC pointer from the the stack save
// slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
// as it is not saved or used.
return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
: PPCISD::BCTRL;
}

if (Subtarget.isUsingPCRelativeCalls()) {
RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
: PPCISD::BCTRL;
} else if (Subtarget.isUsingPCRelativeCalls()) {
assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
return PPCISD::CALL_NOTOC;
RetOpc = PPCISD::CALL_NOTOC;
} else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
// The ABIs that maintain a TOC pointer accross calls need to have a nop
// immediately following the call instruction if the caller and callee may
// have different TOC bases. At link time if the linker determines the calls
// may not share a TOC base, the call is redirected to a trampoline inserted
// by the linker. The trampoline will (among other things) save the callers
// TOC pointer at an ABI designated offset in the linkage area and the
// linker will rewrite the nop to be a load of the TOC pointer from the
// linkage area into gpr2.
RetOpc = callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
: PPCISD::CALL_NOP;
else
RetOpc = PPCISD::CALL;
if (IsStrictFPCall) {
switch (RetOpc) {
default:
llvm_unreachable("Unknown call opcode");
case PPCISD::BCTRL_LOAD_TOC:
RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
break;
case PPCISD::BCTRL:
RetOpc = PPCISD::BCTRL_RM;
break;
case PPCISD::CALL_NOTOC:
RetOpc = PPCISD::CALL_NOTOC_RM;
break;
case PPCISD::CALL:
RetOpc = PPCISD::CALL_RM;
break;
case PPCISD::CALL_NOP:
RetOpc = PPCISD::CALL_NOP_RM;
break;
}
}

// The ABIs that maintain a TOC pointer accross calls need to have a nop
// immediately following the call instruction if the caller and callee may
// have different TOC bases. At link time if the linker determines the calls
// may not share a TOC base, the call is redirected to a trampoline inserted
// by the linker. The trampoline will (among other things) save the callers
// TOC pointer at an ABI designated offset in the linkage area and the linker
// will rewrite the nop to be a load of the TOC pointer from the linkage area
// into gpr2.
if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
: PPCISD::CALL_NOP;

return PPCISD::CALL;
return RetOpc;
}

static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
Expand Down Expand Up @@ -5506,7 +5535,7 @@ SDValue PPCTargetLowering::FinishCall(

unsigned CallOpc =
getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
Subtarget, DAG.getTarget());
Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);

if (!CFlags.IsIndirect)
Callee = transformCallee(Callee, DAG, dl, Subtarget);
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.h
Expand Up @@ -200,6 +200,14 @@ namespace llvm {
/// and 64-bit AIX.
BCTRL_LOAD_TOC,

/// The variants that implicitly define rounding mode for calls with
/// strictfp semantics.
CALL_RM,
CALL_NOP_RM,
CALL_NOTOC_RM,
BCTRL_RM,
BCTRL_LOAD_TOC_RM,

/// Return with a flag operand, matched by 'blr'
RET_FLAG,

Expand Down
63 changes: 63 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstr64Bit.td
Expand Up @@ -178,6 +178,39 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8], hasSideEffects = 0 in {
}
}

let isCall = 1, PPC970_Unit = 7, Defs = [LR8, RM], hasSideEffects = 0,
isCodeGenOnly = 1, Uses = [RM] in {
// Convenient aliases for call instructions
def BL8_RM : IForm<18, 0, 1, (outs), (ins calltarget:$func),
"bl $func", IIC_BrB, []>; // See Pat patterns below.

def BLA8_RM : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
"bla $func", IIC_BrB, [(PPCcall_rm (i64 imm:$func))]>;
def BL8_NOP_RM : IForm_and_DForm_4_zero<18, 0, 1, 24,
(outs), (ins calltarget:$func),
"bl $func\n\tnop", IIC_BrB, []>;

def BLA8_NOP_RM : IForm_and_DForm_4_zero<18, 1, 1, 24,
(outs), (ins abscalltarget:$func),
"bla $func\n\tnop", IIC_BrB,
[(PPCcall_nop_rm (i64 imm:$func))]>;
let Predicates = [PCRelativeMemops] in {
// BL8_NOTOC means that the caller does not use the TOC pointer and if
// it does use R2 then it is just a caller saved register. Therefore it is
// safe to emit only the bl and not the nop for this instruction. The
// linker will not try to restore R2 after the call.
def BL8_NOTOC_RM : IForm<18, 0, 1, (outs),
(ins calltarget:$func),
"bl $func", IIC_BrB, []>;
}
let Uses = [CTR8, RM] in {
let isPredicable = 1 in
def BCTRL8_RM : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
"bctrl", IIC_BrB, [(PPCbctrl_rm)]>,
Requires<[In64BitMode]>;
}
}

let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
Defs = [LR8, X2], Uses = [CTR8, RM], RST = 2 in {
def BCTRL8_LDinto_toc :
Expand All @@ -188,6 +221,16 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
Requires<[In64BitMode]>;
}

let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
Defs = [LR8, X2, RM], Uses = [CTR8, RM], RST = 2 in {
def BCTRL8_LDinto_toc_RM :
XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs),
(ins memrix:$src),
"bctrl\n\tld 2, $src", IIC_BrB,
[(PPCbctrl_load_toc_rm iaddrX4:$src)]>,
Requires<[In64BitMode]>;
}

} // Interpretation64Bit

// FIXME: Duplicating this for the asm parser should be unnecessary, but the
Expand All @@ -214,12 +257,32 @@ def : Pat<(PPCcall_notoc (i64 tglobaladdr:$dst)),
def : Pat<(PPCcall_notoc (i64 texternalsym:$dst)),
(BL8_NOTOC texternalsym:$dst)>;

def : Pat<(PPCcall_rm (i64 tglobaladdr:$dst)),
(BL8_RM tglobaladdr:$dst)>;
def : Pat<(PPCcall_nop_rm (i64 tglobaladdr:$dst)),
(BL8_NOP_RM tglobaladdr:$dst)>;

def : Pat<(PPCcall_rm (i64 texternalsym:$dst)),
(BL8_RM texternalsym:$dst)>;
def : Pat<(PPCcall_nop_rm (i64 texternalsym:$dst)),
(BL8_NOP_RM texternalsym:$dst)>;

def : Pat<(PPCcall_notoc_rm (i64 tglobaladdr:$dst)),
(BL8_NOTOC_RM tglobaladdr:$dst)>;
def : Pat<(PPCcall_notoc_rm (i64 texternalsym:$dst)),
(BL8_NOTOC_RM texternalsym:$dst)>;

// Calls for AIX
def : Pat<(PPCcall (i64 mcsym:$dst)),
(BL8 mcsym:$dst)>;
def : Pat<(PPCcall_nop (i64 mcsym:$dst)),
(BL8_NOP mcsym:$dst)>;

def : Pat<(PPCcall_rm (i64 mcsym:$dst)),
(BL8_RM mcsym:$dst)>;
def : Pat<(PPCcall_nop_rm (i64 mcsym:$dst)),
(BL8_NOP_RM mcsym:$dst)>;

// Atomic operations
// FIXME: some of these might be used with constant operands. This will result
// in constant materialization instructions that may be redundant. We currently
Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Expand Up @@ -2246,11 +2246,13 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,

return true;
} else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
OpC == PPC::BCTRL8) {
OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
OpC == PPC::BCTRL8_RM) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
llvm_unreachable("Cannot predicate bctr[l] on the ctr register");

bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
bool isPPC64 = Subtarget.isPPC64();

if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
Expand All @@ -2274,6 +2276,9 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
.addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addReg(PPC::RM, RegState::ImplicitDefine);

return true;
}
Expand Down
61 changes: 61 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.td
Expand Up @@ -316,6 +316,24 @@ def PPCbctrl_load_toc : SDNode<"PPCISD::BCTRL_LOAD_TOC",
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;

// Call nodes for strictfp calls (that define RM).
def PPCcall_rm : SDNode<"PPCISD::CALL_RM", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCcall_nop_rm : SDNode<"PPCISD::CALL_NOP_RM", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCcall_notoc_rm : SDNode<"PPCISD::CALL_NOTOC_RM", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCbctrl_rm : SDNode<"PPCISD::BCTRL_RM", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCbctrl_load_toc_rm : SDNode<"PPCISD::BCTRL_LOAD_TOC_RM",
SDTypeProfile<0, 1, []>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;

def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;

Expand Down Expand Up @@ -1892,6 +1910,26 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
}
}

let isCall = 1, PPC970_Unit = 7, Defs = [LR, RM], isCodeGenOnly = 1 in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL_RM : IForm<18, 0, 1, (outs), (ins calltarget:$func),
"bl $func", IIC_BrB, []>; // See Pat patterns below.
def BLA_RM : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
"bla $func", IIC_BrB, [(PPCcall_rm (i32 imm:$func))]>;

def BL_NOP_RM : IForm_and_DForm_4_zero<18, 0, 1, 24,
(outs), (ins calltarget:$func),
"bl $func\n\tnop", IIC_BrB, []>;
}
let Uses = [CTR, RM] in {
let isPredicable = 1 in
def BCTRL_RM : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
"bctrl", IIC_BrB, [(PPCbctrl_rm)]>,
Requires<[In32BitMode]>;
}
}

let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNdi :PPCEmitTimePseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
Expand All @@ -1918,6 +1956,14 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,

}

let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
Defs = [LR, R2, RM], Uses = [CTR, RM], RST = 2 in {
def BCTRL_LWZinto_toc_RM:
XLForm_2_ext_and_DForm_1<19, 528, 20, 0, 1, 32, (outs),
(ins memri:$src), "bctrl\n\tlwz 2, $src", IIC_BrB,
[(PPCbctrl_load_toc_rm iaddr:$src)]>, Requires<[In32BitMode]>;

}

let isCodeGenOnly = 1, hasSideEffects = 0 in {

Expand Down Expand Up @@ -3435,6 +3481,12 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
def : Pat<(PPCcall (i32 texternalsym:$dst)),
(BL texternalsym:$dst)>;

def : Pat<(PPCcall_rm (i32 tglobaladdr:$dst)),
(BL_RM tglobaladdr:$dst)>;

def : Pat<(PPCcall_rm (i32 texternalsym:$dst)),
(BL_RM texternalsym:$dst)>;

// Calls for AIX only
def : Pat<(PPCcall (i32 mcsym:$dst)),
(BL mcsym:$dst)>;
Expand All @@ -3445,6 +3497,15 @@ def : Pat<(PPCcall_nop (i32 mcsym:$dst)),
def : Pat<(PPCcall_nop (i32 texternalsym:$dst)),
(BL_NOP texternalsym:$dst)>;

def : Pat<(PPCcall_rm (i32 mcsym:$dst)),
(BL_RM mcsym:$dst)>;

def : Pat<(PPCcall_nop_rm (i32 mcsym:$dst)),
(BL_NOP_RM mcsym:$dst)>;

def : Pat<(PPCcall_nop_rm (i32 texternalsym:$dst)),
(BL_NOP_RM texternalsym:$dst)>;

def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;

Expand Down

0 comments on commit 5840f71

Please sign in to comment.