123 changes: 54 additions & 69 deletions llvm/lib/Target/PowerPC/PPCFastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCCallingConv.h"
#include "PPCISelLowering.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
Expand Down Expand Up @@ -119,6 +120,8 @@ class PPCFastISel final : public FastISel {
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);

bool fastLowerCall(CallLoweringInfo &CLI);

// Instruction selection routines.
private:
bool SelectLoad(const Instruction *I);
Expand All @@ -130,7 +133,6 @@ class PPCFastISel final : public FastISel {
bool SelectIToFP(const Instruction *I, bool IsSigned);
bool SelectFPToI(const Instruction *I, bool IsSigned);
bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
bool SelectCall(const Instruction *I);
bool SelectRet(const Instruction *I);
bool SelectTrunc(const Instruction *I);
bool SelectIntExt(const Instruction *I);
Expand Down Expand Up @@ -174,9 +176,7 @@ class PPCFastISel final : public FastISel {
CallingConv::ID CC,
unsigned &NumBytes,
bool IsVarArg);
void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const Instruction *I, CallingConv::ID CC,
unsigned &NumBytes, bool IsVarArg);
bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
CCAssignFn *usePPC32CCs(unsigned Flag);

private:
Expand Down Expand Up @@ -1339,9 +1339,9 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,

// For a call that we've determined we can fast-select, finish the
// call sequence and generate a copy to obtain the return value (if any).
void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const Instruction *I, CallingConv::ID CC,
unsigned &NumBytes, bool IsVarArg) {
bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
CallingConv::ID CC = CLI.CallConv;

// Issue CallSEQ_END.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TII.getCallFrameDestroyOpcode()))
Expand All @@ -1352,7 +1352,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
// any real difficulties there.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
CCValAssign &VA = RVLocs[0];
assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
Expand Down Expand Up @@ -1397,39 +1397,35 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
}

assert(ResultReg && "ResultReg unset!");
UsedRegs.push_back(SourcePhysReg);
updateValueMap(I, ResultReg);
CLI.InRegs.push_back(SourcePhysReg);
CLI.ResultReg = ResultReg;
CLI.NumResultRegs = 1;
}

return true;
}

// Attempt to fast-select a call instruction.
bool PPCFastISel::SelectCall(const Instruction *I) {
const CallInst *CI = cast<CallInst>(I);
const Value *Callee = CI->getCalledValue();
bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
CallingConv::ID CC = CLI.CallConv;
bool IsTailCall = CLI.IsTailCall;
bool IsVarArg = CLI.IsVarArg;
const Value *Callee = CLI.Callee;
const char *SymName = CLI.SymName;

// Can't handle inline asm.
if (isa<InlineAsm>(Callee))
if (!Callee && !SymName)
return false;

// Allow SelectionDAG isel to handle tail calls.
if (CI->isTailCall())
if (IsTailCall)
return false;

// Obtain calling convention.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();

PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
FunctionType *FTy = cast<FunctionType>(PT->getElementType());
bool IsVarArg = FTy->isVarArg();

// Not ready for varargs yet.
// Let SDISel handle vararg functions.
if (IsVarArg)
return false;

// Handle simple calls for now, with legal return types and
// those that can be extended.
Type *RetTy = I->getType();
Type *RetTy = CLI.RetTy;
MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
Expand All @@ -1450,7 +1446,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) {

// Bail early if more than 8 arguments, as we only currently
// handle arguments passed in registers.
unsigned NumArgs = CS.arg_size();
unsigned NumArgs = CLI.OutVals.size();
if (NumArgs > 8)
return false;

Expand All @@ -1465,43 +1461,28 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
ArgVTs.reserve(NumArgs);
ArgFlags.reserve(NumArgs);

for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end();
II != IE; ++II) {
// FIXME: ARM does something for intrinsic calls here, check into that.

unsigned AttrIdx = II - CS.arg_begin() + 1;

for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
// Only handle easy calls for now. It would be reasonably easy
// to handle <= 8-byte structures passed ByVal in registers, but we
// have to ensure they are right-justified in the register.
if (CS.paramHasAttr(AttrIdx, Attribute::InReg) ||
CS.paramHasAttr(AttrIdx, Attribute::StructRet) ||
CS.paramHasAttr(AttrIdx, Attribute::Nest) ||
CS.paramHasAttr(AttrIdx, Attribute::ByVal))
ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
return false;

ISD::ArgFlagsTy Flags;
if (CS.paramHasAttr(AttrIdx, Attribute::SExt))
Flags.setSExt();
if (CS.paramHasAttr(AttrIdx, Attribute::ZExt))
Flags.setZExt();

Type *ArgTy = (*II)->getType();
Value *ArgValue = CLI.OutVals[i];
Type *ArgTy = ArgValue->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
return false;

if (ArgVT.isVector())
return false;

unsigned Arg = getRegForValue(*II);
unsigned Arg = getRegForValue(ArgValue);
if (Arg == 0)
return false;

unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
Flags.setOrigAlign(OriginalAlignment);

Args.push_back(*II);
Args.push_back(ArgValue);
ArgRegs.push_back(Arg);
ArgVTs.push_back(ArgVT);
ArgFlags.push_back(Flags);
Expand All @@ -1515,18 +1496,28 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
RegArgs, CC, NumBytes, IsVarArg))
return false;

MachineInstrBuilder MIB;
// FIXME: No handling for function pointers yet. This requires
// implementing the function descriptor (OPD) setup.
const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
if (!GV)
return false;

// Build direct call with NOP for TOC restore.
// FIXME: We can and should optimize away the NOP for local calls.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::BL8_NOP));
// Add callee.
MIB.addGlobalAddress(GV);
if (!GV) {
// patchpoints are a special case; they always dispatch to a pointer value.
// However, we don't actually want to generate the indirect call sequence
// here (that will be generated, as necessary, during asm printing), and
// the call we generate here will be erased by FastISel::selectPatchpoint,
// so don't try very hard...
if (CLI.IsPatchPoint)
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP));
else
return false;
} else {
// Build direct call with NOP for TOC restore.
// FIXME: We can and should optimize away the NOP for local calls.
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::BL8_NOP));
// Add callee.
MIB.addGlobalAddress(GV);
}

// Add implicit physical register uses to the call.
for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
Expand All @@ -1540,14 +1531,10 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
// defs for return values will be added by setPhysRegsDeadExcept().
MIB.addRegMask(TRI.getCallPreservedMask(CC));

// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg);

// Set all unused physregs defs as dead.
static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
CLI.Call = MIB;

return true;
// Finish off the call including any return values.
return finishCall(RetVT, CLI, NumBytes);
}

// Attempt to fast-select a return instruction.
Expand Down Expand Up @@ -1837,9 +1824,7 @@ bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
case Instruction::Sub:
return SelectBinaryIntOp(I, ISD::SUB);
case Instruction::Call:
if (dyn_cast<IntrinsicInst>(I))
return false;
return SelectCall(I);
return selectCall(I);
case Instruction::Ret:
return SelectRet(I);
case Instruction::Trunc:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {

return MF.getTarget().Options.DisableFramePointerElim(MF) ||
MFI->hasVarSizedObjects() ||
MFI->hasStackMap() || MFI->hasPatchPoint() ||
(MF.getTarget().Options.GuaranteedTailCallOpt &&
MF.getInfo<PPCFunctionInfo>()->hasFastCall());
}
Expand Down
62 changes: 42 additions & 20 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "PPCISelLowering.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCCallingConv.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
#include "PPCTargetMachine.h"
Expand Down Expand Up @@ -3590,6 +3591,7 @@ static bool isFunctionGlobalAddress(SDValue Callee) {
static
unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
bool IsPatchPoint,
SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
const PPCSubtarget &Subtarget) {
Expand Down Expand Up @@ -3663,7 +3665,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// to do the call, we can't use PPCISD::CALL.
SDValue MTCTROps[] = {Chain, Callee, InFlag};

if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
if (isSVR4ABI && isPPC64 && !isELFv2ABI && !IsPatchPoint) {
// Function pointers in the 64-bit SVR4 ABI do not point to the function
// entry point, but to the function descriptor (the function entry point
// address is part of the function descriptor though).
Expand Down Expand Up @@ -3732,9 +3734,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
MTCTROps[2] = InFlag;
}

Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
InFlag = Chain.getValue(1);
if (!IsPatchPoint) {
Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
InFlag = Chain.getValue(1);
}

NodeTys.clear();
NodeTys.push_back(MVT::Other);
Expand All @@ -3743,7 +3747,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
CallOpc = PPCISD::BCTRL;
Callee.setNode(nullptr);
// Add use of X11 (holding environment pointer)
if (isSVR4ABI && isPPC64 && !isELFv2ABI)
if (isSVR4ABI && isPPC64 && !isELFv2ABI && !IsPatchPoint)
Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
Expand Down Expand Up @@ -3783,7 +3787,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
RegsToPass[i].second.getValueType()));

// Direct calls in the ELFv2 ABI need the TOC register live into the call.
if (Callee.getNode() && isELFv2ABI)
if (Callee.getNode() && isELFv2ABI && !IsPatchPoint)
Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));

return CallOpc;
Expand Down Expand Up @@ -3846,7 +3850,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,

SDValue
PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
bool isTailCall, bool isVarArg,
bool isTailCall, bool isVarArg, bool IsPatchPoint,
SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8>
&RegsToPass,
Expand All @@ -3860,8 +3864,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
isTailCall, RegsToPass, Ops, NodeTys,
Subtarget);
isTailCall, IsPatchPoint, RegsToPass, Ops,
NodeTys, Subtarget);

// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
Expand Down Expand Up @@ -3963,6 +3967,7 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool &isTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
bool IsPatchPoint = CLI.IsPatchPoint;

if (isTailCall)
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Expand All @@ -3975,23 +3980,23 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (Subtarget.isSVR4ABI()) {
if (Subtarget.isPPC64())
return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, Outs, OutVals, Ins,
isTailCall, IsPatchPoint, Outs, OutVals, Ins,
dl, DAG, InVals);
else
return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, Outs, OutVals, Ins,
isTailCall, IsPatchPoint, Outs, OutVals, Ins,
dl, DAG, InVals);
}

return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
isTailCall, Outs, OutVals, Ins,
isTailCall, IsPatchPoint, Outs, OutVals, Ins,
dl, DAG, InVals);
}

SDValue
PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
Expand Down Expand Up @@ -4201,7 +4206,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
false, TailCallArguments);

return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
Ins, InVals);
}
Expand Down Expand Up @@ -4229,7 +4234,7 @@ PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
SDValue
PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
Expand Down Expand Up @@ -4665,7 +4670,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// Check if this is an indirect call (MTCTR/BCTRL).
// See PrepareCall() for more information about calls through function
// pointers in the 64-bit SVR4 ABI.
if (!isTailCall &&
if (!isTailCall && !IsPatchPoint &&
!isFunctionGlobalAddress(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) {
// Load r2 into a virtual register and store it to the TOC save area.
Expand All @@ -4679,7 +4684,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// In the ELFv2 ABI, R12 must contain the address of an indirect callee.
// This does not mean the MTCTR instruction must use R12; it's easier
// to model this as an extra parameter, so do that.
if (isELFv2ABI)
if (isELFv2ABI && !IsPatchPoint)
RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}

Expand All @@ -4696,15 +4701,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);

return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
Ins, InVals);
}

SDValue
PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
Expand Down Expand Up @@ -5089,7 +5094,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);

return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
Ins, InVals);
}
Expand Down Expand Up @@ -7246,6 +7251,10 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
if (MI->getOpcode() == TargetOpcode::STACKMAP ||
MI->getOpcode() == TargetOpcode::PATCHPOINT)
return emitPatchPoint(MI, BB);

if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
return emitEHSjLjSetJmp(MI, BB);
Expand Down Expand Up @@ -9882,6 +9891,19 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}

const MCPhysReg *
PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
// LR is a callee-save register, but we must treat it as clobbered by any call
// site. Hence we include LR in the scratch registers, which are in turn added
// as implicit-defs for stackmaps and patchpoints. The same reasoning applies
// to CTR, which is used by any indirect call.
static const MCPhysReg ScratchRegs[] = {
PPC::X11, PPC::X12, PPC::LR8, PPC::CTR8, 0
};

return ScratchRegs;
}

bool
PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
EVT VT , unsigned DefinedValues) const {
Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,8 @@ namespace llvm {
/// expanded to fmul + fadd.
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;

const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;

// Should we expand the build vector with shuffles?
bool
shouldExpandBuildVectorWithShuffles(EVT VT,
Expand Down Expand Up @@ -681,7 +683,7 @@ namespace llvm {
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall,
bool isVarArg,
bool isVarArg, bool IsPatchPoint,
SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8>
&RegsToPass,
Expand Down Expand Up @@ -746,7 +748,7 @@ namespace llvm {
SDValue
LowerCall_Darwin(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv,
bool isVarArg, bool isTailCall,
bool isVarArg, bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
Expand All @@ -755,15 +757,15 @@ namespace llvm {
SDValue
LowerCall_64SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv,
bool isVarArg, bool isTailCall,
bool isVarArg, bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue
LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
bool isVarArg, bool isTailCall,
bool isVarArg, bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -1596,6 +1597,11 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
const MachineFunction *MF = MI->getParent()->getParent();
const char *AsmStr = MI->getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
} else if (Opcode == TargetOpcode::STACKMAP) {
return MI->getOperand(1).getImm();
} else if (Opcode == TargetOpcode::PATCHPOINT) {
PatchPointOpers Opers(MI);
return Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
} else {
const MCInstrDesc &Desc = get(Opcode);
return Desc.getSize();
Expand Down
40 changes: 36 additions & 4 deletions llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,14 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)

const MCPhysReg*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) {
if (Subtarget.hasVSX())
return CSR_64_AllRegs_VSX_SaveList;
if (Subtarget.hasAltivec())
return CSR_64_AllRegs_Altivec_SaveList;
return CSR_64_AllRegs_SaveList;
}

if (Subtarget.isDarwinABI())
return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
CSR_Darwin64_Altivec_SaveList :
Expand All @@ -117,6 +125,14 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {

const uint32_t*
PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
if (CC == CallingConv::AnyReg) {
if (Subtarget.hasVSX())
return CSR_64_AllRegs_VSX_RegMask;
if (Subtarget.hasAltivec())
return CSR_64_AllRegs_Altivec_RegMask;
return CSR_64_AllRegs_RegMask;
}

if (Subtarget.isDarwinABI())
return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
CSR_Darwin64_Altivec_RegMask :
Expand All @@ -138,6 +154,14 @@ PPCRegisterInfo::getNoPreservedMask() const {
return CSR_NoRegs_RegMask;
}

void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
unsigned PseudoRegs[] = { PPC::ZERO, PPC::ZERO8, PPC::RM };
for (unsigned i = 0, ie = array_lengthof(PseudoRegs); i != ie; ++i) {
unsigned Reg = PseudoRegs[i];
Mask[Reg / 32] &= ~(1u << (Reg % 32));
}
}

BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(
Expand Down Expand Up @@ -700,7 +724,10 @@ static unsigned getOffsetONFromFION(const MachineInstr &MI,
// Take into account whether it's an add or mem instruction
unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
if (MI.isInlineAsm())
OffsetOperandNo = FIOperandNum-1;
OffsetOperandNo = FIOperandNum - 1;
else if (MI.getOpcode() == TargetOpcode::STACKMAP ||
MI.getOpcode() == TargetOpcode::PATCHPOINT)
OffsetOperandNo = FIOperandNum + 1;

return OffsetOperandNo;
}
Expand Down Expand Up @@ -772,7 +799,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,

// If the instruction is not present in ImmToIdxMap, then it has no immediate
// form (and must be r+r).
bool noImmForm = !MI.isInlineAsm() && !ImmToIdxMap.count(OpC);
bool noImmForm = !MI.isInlineAsm() && OpC != TargetOpcode::STACKMAP &&
OpC != TargetOpcode::PATCHPOINT && !ImmToIdxMap.count(OpC);

// Now add the frame object offset to the offset from r1.
int Offset = MFI->getObjectOffset(FrameIndex);
Expand All @@ -796,8 +824,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// only "std" to a stack slot that is at least 4-byte aligned, but it can
// happen in invalid code.
assert(OpC != PPC::DBG_VALUE &&
"This should be handle in a target independent way");
if (!noImmForm && isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
"This should be handled in a target-independent way");
if (!noImmForm && ((isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) ||
OpC == TargetOpcode::STACKMAP ||
OpC == TargetOpcode::PATCHPOINT)) {
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
return;
}
Expand Down Expand Up @@ -1008,6 +1038,8 @@ bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
Offset += MI->getOperand(OffsetOperandNo).getImm();

return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
MI->getOpcode() == TargetOpcode::STACKMAP ||
MI->getOpcode() == TargetOpcode::PATCHPOINT ||
(isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
}

2 changes: 2 additions & 0 deletions llvm/lib/Target/PowerPC/PPCRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
const uint32_t *getNoPreservedMask() const;

void adjustStackMapLiveOutMask(uint32_t *Mask) const override;

BitVector getReservedRegs(const MachineFunction &MF) const override;

/// We require the register scavenger.
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,15 @@ unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
return TCC_Free;
break;
case Intrinsic::experimental_stackmap:
if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
return TCC_Free;
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
return TCC_Free;
break;
}
return PPCTTI::getIntImmCost(Imm, Ty);
}
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; RUN: not llc < %s -mtriple=powerpc64-unknown-linux-gnu 2>&1 | FileCheck %s
;
; Check that misuse of anyregcc results in a compile time error.

; CHECK: LLVM ERROR: ran out of registers during register allocation
define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8,
i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16,
i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32) {
entry:
%result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 32,
i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8,
i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16,
i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32)
ret i64 %result
}

declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
367 changes: 367 additions & 0 deletions llvm/test/CodeGen/PowerPC/ppc64-anyregcc.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,367 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"

; Stackmap Header: no constants - 6 callsites
; CHECK-LABEL: .section .llvm_stackmaps
; CHECK-NEXT: __LLVM_StackMaps:
; Header
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 0
; Num Functions
; CHECK-NEXT: .long 8
; Num LargeConstants
; CHECK-NEXT: .long 0
; Num Callsites
; CHECK-NEXT: .long 8

; Functions and stack size
; CHECK-NEXT: .quad test
; CHECK-NEXT: .quad 128
; CHECK-NEXT: .quad property_access1
; CHECK-NEXT: .quad 128
; CHECK-NEXT: .quad property_access2
; CHECK-NEXT: .quad 128
; CHECK-NEXT: .quad property_access3
; CHECK-NEXT: .quad 128
; CHECK-NEXT: .quad anyreg_test1
; CHECK-NEXT: .quad 160
; CHECK-NEXT: .quad anyreg_test2
; CHECK-NEXT: .quad 160
; CHECK-NEXT: .quad patchpoint_spilldef
; CHECK-NEXT: .quad 256
; CHECK-NEXT: .quad patchpoint_spillargs
; CHECK-NEXT: .quad 288


; test
; CHECK-LABEL: .long .L{{.*}}-.L.test
; CHECK-NEXT: .short 0
; 3 locations
; CHECK-NEXT: .short 3
; Loc 0: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 1: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 2: Constant 3
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 3
define i64 @test() nounwind ssp uwtable {
entry:
call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 0, i32 24, i8* null, i32 2, i32 1, i32 2, i64 3)
ret i64 0
}

; property access 1 - %obj is an anyreg call argument and should therefore be in a register
; CHECK-LABEL: .long .L{{.*}}-.L.property_access1
; CHECK-NEXT: .short 0
; 2 locations
; CHECK-NEXT: .short 2
; Loc 0: Register <-- this is the return register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 1: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
entry:
%f = inttoptr i64 281474417671919 to i8*
%ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 1, i32 24, i8* %f, i32 1, i8* %obj)
ret i64 %ret
}

; property access 2 - %obj is an anyreg call argument and should therefore be in a register
; CHECK-LABEL: .long .L{{.*}}-.L.property_access2
; CHECK-NEXT: .short 0
; 2 locations
; CHECK-NEXT: .short 2
; Loc 0: Register <-- this is the return register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 1: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
define i64 @property_access2() nounwind ssp uwtable {
entry:
%obj = alloca i64, align 8
%f = inttoptr i64 281474417671919 to i8*
%ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 24, i8* %f, i32 1, i64* %obj)
ret i64 %ret
}

; property access 3 - %obj is a frame index
; CHECK-LABEL: .long .L{{.*}}-.L.property_access3
; CHECK-NEXT: .short 0
; 2 locations
; CHECK-NEXT: .short 2
; Loc 0: Register <-- this is the return register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 1: Direct FP - 8
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short 31
; CHECK-NEXT: .long 112
define i64 @property_access3() nounwind ssp uwtable {
entry:
%obj = alloca i64, align 8
%f = inttoptr i64 281474417671919 to i8*
%ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 3, i32 24, i8* %f, i32 0, i64* %obj)
ret i64 %ret
}

; anyreg_test1
; CHECK-LABEL: .long .L{{.*}}-.L.anyreg_test1
; CHECK-NEXT: .short 0
; 14 locations
; CHECK-NEXT: .short 14
; Loc 0: Register <-- this is the return register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 1: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 2: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 3: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 4: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 5: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 6: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 7: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 8: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 9: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 10: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 11: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 12: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 13: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
entry:
%f = inttoptr i64 281474417671919 to i8*
%ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 4, i32 24, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
ret i64 %ret
}

; anyreg_test2
; CHECK-LABEL: .long .L{{.*}}-.L.anyreg_test2
; CHECK-NEXT: .short 0
; 14 locations
; CHECK-NEXT: .short 14
; Loc 0: Register <-- this is the return register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 1: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 2: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 3: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 4: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 5: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 6: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 7: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 8: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 9: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 10: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 11: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 12: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 13: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
entry:
%f = inttoptr i64 281474417671919 to i8*
%ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 24, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
ret i64 %ret
}

; Test spilling the return value of an anyregcc call.
;
; <rdar://problem/15432754> [JS] Assertion: "Folded a def to a non-store!"
;
; CHECK-LABEL: .long .L{{.*}}-.L.patchpoint_spilldef
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 3
; Loc 0: Register (some register that will be spilled to the stack)
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 1: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 1: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
%result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 24, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17
},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
ret i64 %result
}

; Test spilling the arguments of an anyregcc call.
;
; <rdar://problem/15487687> [JS] AnyRegCC argument ends up being spilled
;
; CHECK-LABEL: .long .L{{.*}}-.L.patchpoint_spillargs
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 5
; Loc 0: Return a register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 1: Arg0 in a Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 2: Arg1 in a Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 3: Arg2 spilled to FP -96
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short 31
; CHECK-NEXT: .long 128
; Loc 4: Arg3 spilled to FP - 88
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short 31
; CHECK-NEXT: .long 136
define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17
},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
%result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 13, i32 24, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
ret i64 %result
}

declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
93 changes: 93 additions & 0 deletions llvm/test/CodeGen/PowerPC/ppc64-patchpoint.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
; RUN: llc < %s | FileCheck %s
; RUN: llc -fast-isel -fast-isel-abort < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"

; Trivial patchpoint codegen
;
define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
; CHECK-LABEL: trivial_patchpoint_codegen:

; CHECK: li 11, -8531
; CHECK-NEXT: rldic 11, 11, 32, 16
; CHECK-NEXT: oris 11, 11, 48879
; CHECK-NEXT: ori 11, 11, 51966
; CHECK-NEXT: mtctr 11
; CHECK-NEXT: bctrl

; CHECK: li 11, -8531
; CHECK-NEXT: rldic 11, 11, 32, 16
; CHECK-NEXT: oris 11, 11, 48879
; CHECK-NEXT: ori 11, 11, 51967
; CHECK-NEXT: mtctr 11
; CHECK-NEXT: bctrl

; CHECK: blr

%resolveCall2 = inttoptr i64 244837814094590 to i8*
%result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 24, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
%resolveCall3 = inttoptr i64 244837814094591 to i8*
tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 3, i32 24, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
ret i64 %result
}

; Caller frame metadata with stackmaps. This should not be optimized
; as a leaf function.
;
; CHECK-LABEL: caller_meta_leaf
; CHECK: stdu 1, -80(1)
; CHECK: Ltmp
; CHECK: addi 1, 1, 80
; CHECK: blr

define void @caller_meta_leaf() {
entry:
%metadata = alloca i64, i32 3, align 8
store i64 11, i64* %metadata
store i64 12, i64* %metadata
store i64 13, i64* %metadata
call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
ret void
}

; Test patchpoints reusing the same TargetConstant.
; <rdar:15390785> Assertion failed: (CI.getNumArgOperands() >= NumArgs + 4)
; There is no way to verify this, since it depends on memory allocation.
; But I think it's useful to include as a working example.
define i64 @testLowerConstant(i64 %arg, i64 %tmp2, i64 %tmp10, i64* %tmp33, i64 %tmp79) {
entry:
%tmp80 = add i64 %tmp79, -16
%tmp81 = inttoptr i64 %tmp80 to i64*
%tmp82 = load i64* %tmp81, align 8
tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
%tmp83 = load i64* %tmp33, align 8
%tmp84 = add i64 %tmp83, -24
%tmp85 = inttoptr i64 %tmp84 to i64*
%tmp86 = load i64* %tmp85, align 8
tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
ret i64 10
}

; Test small patchpoints that don't emit calls.
define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
; CHECK-LABEL: small_patchpoint_codegen:
; CHECK: Ltmp
; CHECK: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NOT: nop
; CHECK: blr
%result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* null, i32 2, i64 %p1, i64 %p2)
ret void
}

declare void @llvm.experimental.stackmap(i64, i32, ...)
declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)

24 changes: 24 additions & 0 deletions llvm/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-gnu-linux | FileCheck %s

define void @test_shadow_optimization() {
entry:
; Expect 12 bytes worth of nops here rather than 32: With the shadow optimization
; in place, 20 bytes will be consumed by the frame teardown and return instr.
; CHECK-LABEL: test_shadow_optimization:

; CHECK: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NOT: nop
; CHECK: addi 1, 1, 64
; CHECK: ld [[REG1:[0-9]+]], 16(1)
; CHECK: ld 31, -8(1)
; CHECK: mtlr [[REG1]]
; CHECK: blr

tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 32)
ret void
}

declare void @llvm.experimental.stackmap(i64, i32, ...)

289 changes: 289 additions & 0 deletions llvm/test/CodeGen/PowerPC/ppc64-stackmap.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
; RUN: llc < %s | FileCheck %s
;
; Note: Print verbose stackmaps using -debug-only=stackmaps.

; We are not getting the correct stack alignment when cross compiling for arm64.
; So specify a datalayout here.
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"

; CHECK-LABEL: .section .llvm_stackmaps
; CHECK-NEXT: __LLVM_StackMaps:
; Header
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 0
; Num Functions
; CHECK-NEXT: .long 11
; Num LargeConstants
; CHECK-NEXT: .long 2
; Num Callsites
; CHECK-NEXT: .long 11

; Functions and stack size
; CHECK-NEXT: .quad constantargs
; CHECK-NEXT: .quad 128
; CHECK-NEXT: .quad osrinline
; CHECK-NEXT: .quad 144
; CHECK-NEXT: .quad osrcold
; CHECK-NEXT: .quad 128
; CHECK-NEXT: .quad propertyRead
; CHECK-NEXT: .quad 128
; CHECK-NEXT: .quad propertyWrite
; CHECK-NEXT: .quad 128
; CHECK-NEXT: .quad jsVoidCall
; CHECK-NEXT: .quad 128
; CHECK-NEXT: .quad jsIntCall
; CHECK-NEXT: .quad 128
; CHECK-NEXT: .quad spilledValue
; CHECK-NEXT: .quad 320
; CHECK-NEXT: .quad spilledStackMapValue
; CHECK-NEXT: .quad 224
; CHECK-NEXT: .quad liveConstant
; CHECK-NEXT: .quad 64
; CHECK-NEXT: .quad clobberLR
; CHECK-NEXT: .quad 208

; Num LargeConstants
; CHECK-NEXT: .quad 4294967295
; CHECK-NEXT: .quad 4294967296

; Constant arguments
;
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .long .L{{.*}}-.L.constantargs
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 4
; SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 65535
; SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 65536
; SmallConstant
; CHECK-NEXT: .byte 5
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
; LargeConstant at index 0
; CHECK-NEXT: .byte 5
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 1

define void @constantargs() {
entry:
%0 = inttoptr i64 244837814094590 to i8*
tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 1, i32 24, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296)
ret void
}

; Inline OSR Exit
;
; CHECK-LABEL: .long .L{{.*}}-.L.osrinline
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
define void @osrinline(i64 %a, i64 %b) {
entry:
; Runtime void->void call.
call void inttoptr (i64 244837814094590 to void ()*)()
; Followed by inline OSR patchpoint with 12-byte shadow and 2 live vars.
call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
ret void
}

; Cold OSR Exit
;
; 2 live variables in register.
;
; CHECK-LABEL: .long .L{{.*}}-.L.osrcold
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
define void @osrcold(i64 %a, i64 %b) {
entry:
%test = icmp slt i64 %a, %b
br i1 %test, label %ret, label %cold
cold:
; OSR patchpoint with 12-byte nop-slide and 2 live vars.
%thunk = inttoptr i64 244837814094590 to i8*
call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4, i32 24, i8* %thunk, i32 0, i64 %a, i64 %b)
unreachable
ret:
ret void
}

; Property Read
; CHECK-LABEL: .long .L{{.*}}-.L.propertyRead
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
;
; FIXME: There are currently no stackmap entries. After moving to
; AnyRegCC, we will have entries for the object and return value.
define i64 @propertyRead(i64* %obj) {
entry:
%resolveRead = inttoptr i64 244837814094590 to i8*
%result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 24, i8* %resolveRead, i32 1, i64* %obj)
%add = add i64 %result, 3
ret i64 %add
}

; Property Write
; CHECK-LABEL: .long .L{{.*}}-.L.propertyWrite
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
entry:
%resolveWrite = inttoptr i64 244837814094590 to i8*
call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 24, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
ret void
}

; Void JS Call
;
; 2 live variables in registers.
;
; CHECK-LABEL: .long .L{{.*}}-.L.jsVoidCall
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
entry:
%resolveCall = inttoptr i64 244837814094590 to i8*
call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 7, i32 24, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
ret void
}

; i64 JS Call
;
; 2 live variables in registers.
;
; CHECK-LABEL: .long .L{{.*}}-.L.jsIntCall
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
entry:
%resolveCall = inttoptr i64 244837814094590 to i8*
%result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 8, i32 24, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
%add = add i64 %result, 3
ret i64 %add
}

; Spilled stack map values.
;
; Verify 28 stack map entries.
;
; CHECK-LABEL: .long .L{{.*}}-.L.spilledValue
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 28
;
; Check that at least one is a spilled entry from r31.
; Location: Indirect FP + ...
; CHECK: .byte 3
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short 31
define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) {
entry:
call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 11, i32 24, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27)
ret void
}

; Spilled stack map values.
;
; Verify 30 stack map entries.
;
; CHECK-LABEL: .long .L{{.*}}-.L.spilledStackMapValue
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 30
;
; Check that at least one is a spilled entry from r31.
; Location: Indirect FP + ...
; CHECK: .byte 3
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short 31
define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29) {
entry:
call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29)
ret void
}


; Map a constant value.
;
; CHECK-LABEL: .long .L{{.*}}-.L.liveConstant
; CHECK-NEXT: .short 0
; 1 location
; CHECK-NEXT: .short 1
; Loc 0: SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 33

define void @liveConstant() {
tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 8, i32 33)
ret void
}

; Map a value when LR is the only free register.
;
; CHECK-LABEL: .long .L{{.*}}-.L.clobberLR
; CHECK-NEXT: .short 0
; 1 location
; CHECK-NEXT: .short 1
; Loc 0: Indirect FP (r31) - offset
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .short 31
; CHECK-NEXT: .long {{[0-9]+}}
define void @clobberLR(i32 %a) {
tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
ret void
}

declare void @llvm.experimental.stackmap(i64, i32, ...)
declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)