Skip to content

Commit

Permalink
[PowerPC] Add handling for ColdCC calling convention and a pass to mark
Browse files Browse the repository at this point in the history
candidates with coldcc attribute.

This patch adds support for the coldcc calling convention for Power.
This changes the set of non-volatile registers. It includes a pass to stress
test the implementation by marking all static directly called functions with
the coldcc attribute through the option -enable-coldcc-stress-test. It also
includes an option, -ppc-enable-coldcc, to add the coldcc attribute to
functions which are cold at all call sites based on BlockFrequencyInfo when
the containing function does not call any non cold functions.

Differential Revision: https://reviews.llvm.org/D38413

llvm-svn: 322721
  • Loading branch information
syzaara committed Jan 17, 2018
1 parent 2686e3c commit 8e951fd
Show file tree
Hide file tree
Showing 17 changed files with 507 additions and 13 deletions.
9 changes: 9 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Expand Up @@ -541,6 +541,10 @@ class TargetTransformInfo {
/// containing this constant value for the target.
bool shouldBuildLookupTablesForConstant(Constant *C) const;

/// \brief Return true if the input function which is cold at all call sites,
/// should use coldcc calling convention.
bool useColdCCForColdCall(Function &F) const;

unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;

unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
Expand Down Expand Up @@ -992,6 +996,7 @@ class TargetTransformInfo::Concept {
virtual unsigned getJumpBufSize() = 0;
virtual bool shouldBuildLookupTables() = 0;
virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
virtual bool useColdCCForColdCall(Function &F) = 0;
virtual unsigned
getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
Expand Down Expand Up @@ -1237,6 +1242,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
bool shouldBuildLookupTablesForConstant(Constant *C) override {
return Impl.shouldBuildLookupTablesForConstant(C);
}
bool useColdCCForColdCall(Function &F) override {
return Impl.useColdCCForColdCall(F);
}

unsigned getScalarizationOverhead(Type *Ty, bool Insert,
bool Extract) override {
return Impl.getScalarizationOverhead(Ty, Insert, Extract);
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Expand Up @@ -284,6 +284,8 @@ class TargetTransformInfoImplBase {
bool shouldBuildLookupTables() { return true; }
bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }

bool useColdCCForColdCall(Function &F) { return false; }

unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
return 0;
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Expand Up @@ -226,6 +226,10 @@ bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const
return TTIImpl->shouldBuildLookupTablesForConstant(C);
}

bool TargetTransformInfo::useColdCCForColdCall(Function &F) const {
return TTIImpl->useColdCCForColdCall(F);
}

unsigned TargetTransformInfo::
getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
Expand Down
53 changes: 53 additions & 0 deletions llvm/lib/Target/PowerPC/PPCCallingConv.td
Expand Up @@ -45,6 +45,29 @@ def RetCC_PPC64_AnyReg : CallingConv<[
CCCustom<"CC_PPC_AnyReg_Error">
]>;

// Return-value convention for PowerPC coldcc.
def RetCC_PPC_Cold : CallingConv<[
// Use the same return registers as RetCC_PPC, but limited to only
// one return value. The remaining return values will be saved to
// the stack.
CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>,
CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType<i32>>>,

CCIfType<[i32], CCAssignToReg<[R3]>>,
CCIfType<[i64], CCAssignToReg<[X3]>>,
CCIfType<[i128], CCAssignToReg<[X3]>>,

CCIfType<[f32], CCAssignToReg<[F1]>>,
CCIfType<[f64], CCAssignToReg<[F1]>>,

CCIfType<[v4f64, v4f32, v4i1],
CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1]>>>,

CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2]>>>
]>;

// Return-value convention for PowerPC
def RetCC_PPC : CallingConv<[
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,
Expand Down Expand Up @@ -271,6 +294,36 @@ def CSR_SVR464_R2_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2_Altivec)>

def CSR_NoRegs : CalleeSavedRegs<(add)>;

// coldcc calling convection marks most registers as non-volatile.
// Do not include r1 since the stack pointer is never considered a CSR.
// Do not include r2, since it is the TOC register and is added depending
// on wether or not the function uses the TOC and is a non-leaf.
// Do not include r0,r11,r13 as they are optional in functional linkage
// and value may be altered by inter-library calls.
// Do not include r12 as it is used as a scratch register.
// Do not include return registers r3, f1, v2.
def CSR_SVR32_ColdCC : CalleeSavedRegs<(add (sequence "R%u", 4, 10),
(sequence "R%u", 14, 31),
F0, (sequence "F%u", 2, 31),
(sequence "CR%u", 0, 7))>;

def CSR_SVR32_ColdCC_Altivec : CalleeSavedRegs<(add CSR_SVR32_ColdCC,
(sequence "V%u", 0, 1),
(sequence "V%u", 3, 31))>;

def CSR_SVR64_ColdCC : CalleeSavedRegs<(add (sequence "X%u", 4, 10),
(sequence "X%u", 14, 31),
F0, (sequence "F%u", 2, 31),
(sequence "CR%u", 0, 7))>;

def CSR_SVR64_ColdCC_R2: CalleeSavedRegs<(add CSR_SVR64_ColdCC, X2)>;

def CSR_SVR64_ColdCC_Altivec : CalleeSavedRegs<(add CSR_SVR64_ColdCC,
(sequence "V%u", 0, 1),
(sequence "V%u", 3, 31))>;

def CSR_SVR64_ColdCC_R2_Altivec : CalleeSavedRegs<(add CSR_SVR64_ColdCC_Altivec, X2)>;

def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10),
(sequence "X%u", 14, 31),
(sequence "F%u", 0, 31),
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/PowerPC/PPCFastISel.cpp
Expand Up @@ -206,6 +206,8 @@ CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
return CC_PPC32_SVR4_ByVal;
else if (Flag == 3)
return CC_PPC32_SVR4_VarArg;
else if (Flag == 4)
return RetCC_PPC_Cold;
else
return RetCC_PPC;
}
Expand Down
14 changes: 12 additions & 2 deletions llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
Expand Up @@ -1950,7 +1950,14 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;

// Add the callee-saved register as live-in; it's killed at the spill.
MBB.addLiveIn(Reg);
// Do not do this for callee-saved registers that are live-in to the
// function because they will already be marked live-in and this will be
// adding it for a second time. It is an error to add the same register
// to the set more than once.
const MachineRegisterInfo &MRI = MF->getRegInfo();
bool IsLiveIn = MRI.isLiveIn(Reg);
if (!IsLiveIn)
MBB.addLiveIn(Reg);

if (CRSpilled && IsCRField) {
CRMIB.addReg(Reg, RegState::ImplicitKill);
Expand Down Expand Up @@ -1980,7 +1987,10 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
}
} else {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII.storeRegToStackSlot(MBB, MI, Reg, true,
// Use !IsLiveIn for the kill flag.
// We do not want to kill registers that are live in this function
// before their use because they will become undefined registers.
TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
CSI[i].getFrameIdx(), RC, TRI);
}
}
Expand Down
17 changes: 14 additions & 3 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -4939,7 +4939,11 @@ SDValue PPCTargetLowering::LowerCallResult(
SmallVector<CCValAssign, 16> RVLocs;
CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);

CCRetInfo.AnalyzeCallResult(
Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
? RetCC_PPC_Cold
: RetCC_PPC);

// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
Expand Down Expand Up @@ -5159,6 +5163,7 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
// of the 32-bit SVR4 ABI stack frame layout.

assert((CallConv == CallingConv::C ||
CallConv == CallingConv::Cold ||
CallConv == CallingConv::Fast) && "Unknown calling convention!");

unsigned PtrByteSize = 4;
Expand Down Expand Up @@ -6420,7 +6425,10 @@ PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_PPC);
return CCInfo.CheckReturn(
Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
? RetCC_PPC_Cold
: RetCC_PPC);
}

SDValue
Expand All @@ -6432,7 +6440,10 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
CCInfo.AnalyzeReturn(Outs,
(Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
? RetCC_PPC_Cold
: RetCC_PPC);

SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
Expand Up @@ -144,6 +144,17 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
// On PPC64, we might need to save r2 (but only if it is not reserved).
bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);

if (MF->getFunction().getCallingConv() == CallingConv::Cold) {
return TM.isPPC64()
? (Subtarget.hasAltivec()
? (SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
: CSR_SVR64_ColdCC_Altivec_SaveList)
: (SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList
: CSR_SVR64_ColdCC_SaveList))
: (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_SaveList
: CSR_SVR32_ColdCC_SaveList);
}

return TM.isPPC64()
? (Subtarget.hasAltivec()
? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
Expand Down Expand Up @@ -196,6 +207,13 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
: (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask
: CSR_Darwin32_RegMask);

if (CC == CallingConv::Cold) {
return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
: CSR_SVR64_ColdCC_RegMask)
: (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask
: CSR_SVR32_ColdCC_RegMask);
}

return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask
: CSR_SVR464_RegMask)
: (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
Expand Up @@ -27,6 +27,11 @@ static cl::opt<unsigned>
CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
cl::desc("The loop prefetch cache line size"));

static cl::opt<bool>
EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
cl::desc("Enable using coldcc calling conv for cold "
"internal functions"));

//===----------------------------------------------------------------------===//
//
// PPC cost model.
Expand Down Expand Up @@ -215,6 +220,14 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
BaseT::getUnrollingPreferences(L, SE, UP);
}

// This function returns true to allow using coldcc calling convention.
// Returning true results in coldcc being used for functions which are cold at
// all call sites when the callers of the functions are not calling any other
// non coldcc functions.
bool PPCTTIImpl::useColdCCForColdCall(Function &F) {
return EnablePPCColdCC;
}

bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
// On the A2, always unroll aggressively. For QPX unaligned loads, we depend
// on combining the loads generated for consecutive accesses, and failure to
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
Expand Up @@ -61,7 +61,7 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {

/// \name Vector TTI Implementations
/// @{

bool useColdCCForColdCall(Function &F);
bool enableAggressiveInterleaving(bool LoopHasReductions);
const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
bool IsZeroCmp) const;
Expand Down

0 comments on commit 8e951fd

Please sign in to comment.