183 changes: 183 additions & 0 deletions llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//

#include "SparcTargetMachine.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Compiler.h"
Expand Down Expand Up @@ -62,6 +63,7 @@ class SparcDAGToDAGISel : public SelectionDAGISel {

private:
SDNode* getGlobalBaseReg();
SDNode *SelectInlineAsm(SDNode *N);
};
} // end anonymous namespace

Expand Down Expand Up @@ -141,6 +143,181 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
return true;
}


// Re-assemble i64 arguments split up in SelectionDAGBuilder's
// visitInlineAsm / GetRegistersForValue functions.
//
// Note: This function was copied from, and is essentially identical
// to ARMISelDAGToDAG::SelectInlineAsm. It is very unfortunate that
// such hacking-up is necessary; a rethink of how inline asm operands
// are handled may be in order to make doing this more sane.
//
// TODO: fix inline asm support so I can simply tell it that 'i64'
// inputs to asm need to be allocated to the IntPair register type,
// and have that work. Then, delete this function.
SDNode *SparcDAGToDAGISel::SelectInlineAsm(SDNode *N){
std::vector<SDValue> AsmNodeOperands;
unsigned Flag, Kind;
bool Changed = false;
unsigned NumOps = N->getNumOperands();

// Normally, i64 data is bounded to two arbitrary GPRs for "%r"
// constraint. However, some instructions (e.g. ldd/std) require
// (even/even+1) GPRs.

// So, here, we check for this case, and mutate the inlineasm to use
// a single IntPair register instead, which guarantees such even/odd
// placement.

SDLoc dl(N);
SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
: SDValue(nullptr,0);

SmallVector<bool, 8> OpChanged;
// Glue node will be appended late.
for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
SDValue op = N->getOperand(i);
AsmNodeOperands.push_back(op);

if (i < InlineAsm::Op_FirstOperand)
continue;

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
Flag = C->getZExtValue();
Kind = InlineAsm::getKind(Flag);
}
else
continue;

// Immediate operands to inline asm in the SelectionDAG are modeled with
// two operands. The first is a constant of value InlineAsm::Kind_Imm, and
// the second is a constant with the value of the immediate. If we get here
// and we have a Kind_Imm, skip the next operand, and continue.
if (Kind == InlineAsm::Kind_Imm) {
SDValue op = N->getOperand(++i);
AsmNodeOperands.push_back(op);
continue;
}

unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
if (NumRegs)
OpChanged.push_back(false);

unsigned DefIdx = 0;
bool IsTiedToChangedOp = false;
// If it's a use that is tied with a previous def, it has no
// reg class constraint.
if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
IsTiedToChangedOp = OpChanged[DefIdx];

if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
&& Kind != InlineAsm::Kind_RegDefEarlyClobber)
continue;

unsigned RC;
bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
if ((!IsTiedToChangedOp && (!HasRC || RC != SP::IntRegsRegClassID))
|| NumRegs != 2)
continue;

assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
SDValue V0 = N->getOperand(i+1);
SDValue V1 = N->getOperand(i+2);
unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
SDValue PairedReg;
MachineRegisterInfo &MRI = MF->getRegInfo();

if (Kind == InlineAsm::Kind_RegDef ||
Kind == InlineAsm::Kind_RegDefEarlyClobber) {
// Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
// the original GPRs.

unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32);
SDValue Chain = SDValue(N,0);

SDNode *GU = N->getGluedUser();
SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::v2i32,
Chain.getValue(1));

// Extract values from a GPRPair reg and copy to the original GPR reg.
SDValue Sub0 = CurDAG->getTargetExtractSubreg(SP::sub_even, dl, MVT::i32,
RegCopy);
SDValue Sub1 = CurDAG->getTargetExtractSubreg(SP::sub_odd, dl, MVT::i32,
RegCopy);
SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
RegCopy.getValue(1));
SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));

// Update the original glue user.
std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
Ops.push_back(T1.getValue(1));
CurDAG->UpdateNodeOperands(GU, Ops);
}
else {
// For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
// GPRPair and then pass the GPRPair to the inline asm.
SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];

// As REG_SEQ doesn't take RegisterSDNode, we copy them first.
SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
Chain.getValue(1));
SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
T0.getValue(1));
SDValue Pair = SDValue(
CurDAG->getMachineNode(
TargetOpcode::REG_SEQUENCE, dl, MVT::v2i32,
{
CurDAG->getTargetConstant(SP::IntPairRegClassID, dl,
MVT::i32),
T0,
CurDAG->getTargetConstant(SP::sub_even, dl, MVT::i32),
T1,
CurDAG->getTargetConstant(SP::sub_odd, dl, MVT::i32),
}),
0);

// Copy REG_SEQ into a GPRPair-typed VR and replace the original two
// i32 VRs of inline asm with it.
unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32);
Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));

AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
Glue = Chain.getValue(1);
}

Changed = true;

if(PairedReg.getNode()) {
OpChanged[OpChanged.size() -1 ] = true;
Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
if (IsTiedToChangedOp)
Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
else
Flag = InlineAsm::getFlagWordForRegClass(Flag, SP::IntPairRegClassID);
// Replace the current flag.
AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
Flag, dl, MVT::i32);
// Add the new register node and skip the original two GPRs.
AsmNodeOperands.push_back(PairedReg);
// Skip the next two GPRs.
i += 2;
}
}

if (Glue.getNode())
AsmNodeOperands.push_back(Glue);
if (!Changed)
return nullptr;

SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
New->setNodeId(-1);
return New.getNode();
}

SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
if (N->isMachineOpcode()) {
Expand All @@ -150,6 +327,12 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {

switch (N->getOpcode()) {
default: break;
case ISD::INLINEASM: {
SDNode *ResNode = SelectInlineAsm(N);
if (ResNode)
return ResNode;
break;
}
case SPISD::GLOBAL_BASE_REG:
return getGlobalBaseReg();

Expand Down
219 changes: 181 additions & 38 deletions llvm/lib/Target/Sparc/SparcISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
return true;
}

static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
MVT &LocVT, CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State)
static bool CC_Sparc_Assign_Split_64(unsigned &ValNo, MVT &ValVT,
MVT &LocVT, CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State)
{
static const MCPhysReg RegList[] = {
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
Expand All @@ -77,6 +77,29 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
return true;
}

static bool CC_Sparc_Assign_Ret_Split_64(unsigned &ValNo, MVT &ValVT,
MVT &LocVT, CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State)
{
static const MCPhysReg RegList[] = {
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
};

// Try to get first reg.
if (unsigned Reg = State.AllocateReg(RegList))
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
else
return false;

// Try to get second reg.
if (unsigned Reg = State.AllocateReg(RegList))
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
else
return false;

return true;
}

// Allocate a full-sized argument for the 64-bit ABI.
static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
MVT &LocVT, CCValAssign::LocInfo &LocInfo,
Expand Down Expand Up @@ -202,12 +225,34 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain,
RetOps.push_back(SDValue());

// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
for (unsigned i = 0, realRVLocIdx = 0;
i != RVLocs.size();
++i, ++realRVLocIdx) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");

Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(),
OutVals[i], Flag);
SDValue Arg = OutVals[realRVLocIdx];

if (VA.needsCustom()) {
assert(VA.getLocVT() == MVT::v2i32);
// Legalize ret v2i32 -> ret 2 x i32 (Basically: do what would
// happen by default if this wasn't a legal type)

SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
Arg,
DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout())));
SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
Arg,
DAG.getConstant(1, DL, getVectorIdxTy(DAG.getDataLayout())));

Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part0, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part1,
Flag);
} else
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);

// Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
Expand Down Expand Up @@ -375,7 +420,8 @@ LowerFormalArguments_32(SDValue Chain,

if (VA.isRegLoc()) {
if (VA.needsCustom()) {
assert(VA.getLocVT() == MVT::f64);
assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32);

unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
Expand All @@ -398,7 +444,7 @@ LowerFormalArguments_32(SDValue Chain,
}
SDValue WholeValue =
DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), WholeValue);
InVals.push_back(WholeValue);
continue;
}
Expand All @@ -422,7 +468,7 @@ LowerFormalArguments_32(SDValue Chain,
auto PtrVT = getPointerTy(DAG.getDataLayout());

if (VA.needsCustom()) {
assert(VA.getValVT() == MVT::f64);
assert(VA.getValVT() == MVT::f64 || MVT::v2i32);
// If it is double-word aligned, just load.
if (Offset % 8 == 0) {
int FI = MF.getFrameInfo()->CreateFixedObject(8,
Expand Down Expand Up @@ -454,7 +500,7 @@ LowerFormalArguments_32(SDValue Chain,

SDValue WholeValue =
DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), WholeValue);
InVals.push_back(WholeValue);
continue;
}
Expand Down Expand Up @@ -788,7 +834,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
}

if (VA.needsCustom()) {
assert(VA.getLocVT() == MVT::f64);
assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32);

if (VA.isMemLoc()) {
unsigned Offset = VA.getLocMemOffset() + StackOffset;
Expand All @@ -804,49 +850,54 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
}
}

SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
Arg, StackPtr, MachinePointerInfo(),
false, false, 0);
// Sparc is big-endian, so the high part comes first.
SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
MachinePointerInfo(), false, false, false, 0);
// Increment the pointer to the other half.
StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
DAG.getIntPtrConstant(4, dl));
// Load the low part.
SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
MachinePointerInfo(), false, false, false, 0);
if (VA.getLocVT() == MVT::f64) {
// Move from the float value from float registers into the
// integer registers.

// TODO: this conversion is done in two steps, because
// f64->i64 conversion is done efficiently, and i64->v2i32 is
// basically a no-op. But f64->v2i32 is NOT done efficiently
// for some reason.
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
}

SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
Arg,
DAG.getConstant(0, dl, getVectorIdxTy(DAG.getDataLayout())));
SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
Arg,
DAG.getConstant(1, dl, getVectorIdxTy(DAG.getDataLayout())));

if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Part0));
assert(i+1 != e);
CCValAssign &NextVA = ArgLocs[++i];
if (NextVA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Part1));
} else {
// Store the low part in stack.
// Store the second part in stack.
unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff,
MachinePointerInfo(),
false, false, 0));
}
} else {
unsigned Offset = VA.getLocMemOffset() + StackOffset;
// Store the high part.
// Store the first part.
SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
MemOpChains.push_back(DAG.getStore(Chain, dl, Part0, PtrOff,
MachinePointerInfo(),
false, false, 0));
// Store the low part.
// Store the second part.
PtrOff = DAG.getIntPtrConstant(Offset + 4, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff,
MachinePointerInfo(),
false, false, 0));
}
Expand Down Expand Up @@ -1377,8 +1428,45 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
addRegisterClass(MVT::f128, &SP::QFPRegsRegClass);
if (Subtarget->is64Bit())
if (Subtarget->is64Bit()) {
addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
} else {
// On 32bit sparc, we define a double-register 32bit register
// class, as well. This is modeled in LLVM as a 2-vector of i32.
addRegisterClass(MVT::v2i32, &SP::IntPairRegClass);

// ...but almost all operations must be expanded, so set that as
// the default.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
setOperationAction(Op, MVT::v2i32, Expand);
}
// Truncating/extending stores/loads are also not supported.
for (MVT VT : MVT::integer_vector_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand);

setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, VT, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, VT, Expand);

setTruncStoreAction(VT, MVT::v2i32, Expand);
setTruncStoreAction(MVT::v2i32, VT, Expand);
}
// However, load and store *are* legal.
setOperationAction(ISD::LOAD, MVT::v2i32, Legal);
setOperationAction(ISD::STORE, MVT::v2i32, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Legal);

// And we need to promote i64 loads/stores into vector load/store
setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::STORE, MVT::i64, Custom);

// Sadly, this doesn't work:
// AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
// AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
}

// Turn FP extload into load/fextend
for (MVT VT : MVT::fp_valuetypes()) {
Expand Down Expand Up @@ -2604,6 +2692,17 @@ static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG)
return DAG.getMergeValues(Ops, dl);
}

static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG)
{
LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());

EVT MemVT = LdNode->getMemoryVT();
if (MemVT == MVT::f128)
return LowerF128Load(Op, DAG);

return Op;
}

// Lower a f128 store into two f64 stores.
static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
Expand Down Expand Up @@ -2648,6 +2747,29 @@ static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}

static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG)
{
SDLoc dl(Op);
StoreSDNode *St = cast<StoreSDNode>(Op.getNode());

EVT MemVT = St->getMemoryVT();
if (MemVT == MVT::f128)
return LowerF128Store(Op, DAG);

if (MemVT == MVT::i64) {
// Custom handling for i64 stores: turn it into a bitcast and a
// v2i32 store.
SDValue Val = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, St->getValue());
SDValue Chain = DAG.getStore(
St->getChain(), dl, Val, St->getBasePtr(), St->getPointerInfo(),
St->isVolatile(), St->isNonTemporal(), St->getAlignment(),
St->getAAInfo());
return Chain;
}

return SDValue();
}

static SDValue LowerFNEGorFABS(SDValue Op, SelectionDAG &DAG, bool isV9) {
assert((Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS)
&& "invalid opcode");
Expand Down Expand Up @@ -2786,7 +2908,6 @@ static SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}


SDValue SparcTargetLowering::
LowerOperation(SDValue Op, SelectionDAG &DAG) const {

Expand Down Expand Up @@ -2821,8 +2942,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG,
Subtarget);

case ISD::LOAD: return LowerF128Load(Op, DAG);
case ISD::STORE: return LowerF128Store(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::FADD: return LowerF128Op(Op, DAG,
getLibcallName(RTLIB::ADD_F128), 2);
case ISD::FSUB: return LowerF128Op(Op, DAG,
Expand Down Expand Up @@ -3152,9 +3273,12 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
return std::make_pair(0U, &SP::IntRegsRegClass);
if (VT == MVT::v2i32)
return std::make_pair(0U, &SP::IntPairRegClass);
else
return std::make_pair(0U, &SP::IntRegsRegClass);
}
} else if (!Constraint.empty() && Constraint.size() <= 5
} else if (!Constraint.empty() && Constraint.size() <= 5
&& Constraint[0] == '{' && *(Constraint.end()-1) == '}') {
// constraint = '{r<d>}'
// Remove the braces from around the name.
Expand Down Expand Up @@ -3230,5 +3354,24 @@ void SparcTargetLowering::ReplaceNodeResults(SDNode *N,
getLibcallName(libCall),
1));
return;
case ISD::LOAD: {
LoadSDNode *Ld = cast<LoadSDNode>(N);
// Custom handling only for i64: turn i64 load into a v2i32 load,
// and a bitcast.
if (Ld->getValueType(0) != MVT::i64 || Ld->getMemoryVT() != MVT::i64)
return;

SDLoc dl(N);
SDValue LoadRes = DAG.getExtLoad(
Ld->getExtensionType(), dl, MVT::v2i32,
Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
MVT::v2i32, Ld->isVolatile(), Ld->isNonTemporal(),
Ld->isInvariant(), Ld->getAlignment(), Ld->getAAInfo());

SDValue Res = DAG.getNode(ISD::BITCAST, dl, MVT::i64, LoadRes);
Results.push_back(Res);
Results.push_back(LoadRes.getValue(1));
return;
}
}
}
4 changes: 2 additions & 2 deletions llvm/lib/Target/Sparc/SparcISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ namespace llvm {
}

void ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>& Results,
SelectionDAG &DAG) const override;
SmallVectorImpl<SDValue>& Results,
SelectionDAG &DAG) const override;

MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB,
unsigned BROpcode) const;
Expand Down
23 changes: 20 additions & 3 deletions llvm/lib/Target/Sparc/SparcInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,9 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned numSubRegs = 0;
unsigned movOpc = 0;
const unsigned *subRegIdx = nullptr;
bool ExtraG0 = false;

const unsigned DW_SubRegsIdx[] = { SP::sub_even, SP::sub_odd };
const unsigned DFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd };
const unsigned QFP_DFP_SubRegsIdx[] = { SP::sub_even64, SP::sub_odd64 };
const unsigned QFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd,
Expand All @@ -294,7 +296,12 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (SP::IntRegsRegClass.contains(DestReg, SrcReg))
BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
else if (SP::IntPairRegClass.contains(DestReg, SrcReg)) {
subRegIdx = DW_SubRegsIdx;
numSubRegs = 2;
movOpc = SP::ORrr;
ExtraG0 = true;
} else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) {
Expand Down Expand Up @@ -347,7 +354,11 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]);
assert(Dst && Src && "Bad sub-register");

MovMI = BuildMI(MBB, I, DL, get(movOpc), Dst).addReg(Src);
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(movOpc), Dst);
if (ExtraG0)
MIB.addReg(SP::G0);
MIB.addReg(Src);
MovMI = MIB.getInstr();
}
// Add implicit super-register defs and kills to the last MovMI.
MovMI->addRegisterDefined(DestReg, TRI);
Expand All @@ -372,12 +383,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MFI.getObjectAlignment(FI));

// On the order of operands here: think "[FrameIdx + 0] = SrcReg".
if (RC == &SP::I64RegsRegClass)
if (RC == &SP::I64RegsRegClass)
BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
else if (RC == &SP::IntRegsRegClass)
BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
else if (RC == &SP::IntPairRegClass)
BuildMI(MBB, I, DL, get(SP::STDri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
else if (RC == &SP::FPRegsRegClass)
BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
Expand Down Expand Up @@ -415,6 +429,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
else if (RC == &SP::IntRegsRegClass)
BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0)
.addMemOperand(MMO);
else if (RC == &SP::IntPairRegClass)
BuildMI(MBB, I, DL, get(SP::LDDri), DestReg).addFrameIndex(FI).addImm(0)
.addMemOperand(MMO);
else if (RC == &SP::FPRegsRegClass)
BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0)
.addMemOperand(MMO);
Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/Target/Sparc/SparcInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,9 @@ let DecoderMethod = "DecodeLoadInt" in {
defm LD : LoadA<"ld", 0b000000, 0b010000, load, IntRegs, i32>;
}

let DecoderMethod = "DecodeLoadIntPair" in
defm LDD : LoadA<"ldd", 0b000011, 0b010011, load, IntPair, v2i32>;

// Section B.2 - Load Floating-point Instructions, p. 92
let DecoderMethod = "DecodeLoadFP" in
defm LDF : Load<"ld", 0b100000, load, FPRegs, f32>;
Expand All @@ -424,6 +427,9 @@ let DecoderMethod = "DecodeStoreInt" in {
defm ST : StoreA<"st", 0b000100, 0b010100, store, IntRegs, i32>;
}

let DecoderMethod = "DecodeStoreIntPair" in
defm STD : StoreA<"std", 0b000111, 0b010111, store, IntPair, v2i32>;

// Section B.5 - Store Floating-point Instructions, p. 97
let DecoderMethod = "DecodeStoreFP" in
defm STF : Store<"st", 0b100100, store, FPRegs, f32>;
Expand Down Expand Up @@ -1327,6 +1333,18 @@ def : Pat<(i32 (atomic_load ADDRri:$src)), (LDri ADDRri:$src)>;
def : Pat<(atomic_store ADDRrr:$dst, i32:$val), (STrr ADDRrr:$dst, $val)>;
def : Pat<(atomic_store ADDRri:$dst, i32:$val), (STri ADDRri:$dst, $val)>;

// extract_vector
def : Pat<(vector_extract (v2i32 IntPair:$Rn), 0),
(i32 (EXTRACT_SUBREG IntPair:$Rn, sub_even))>;
def : Pat<(vector_extract (v2i32 IntPair:$Rn), 1),
(i32 (EXTRACT_SUBREG IntPair:$Rn, sub_odd))>;

// build_vector
def : Pat<(build_vector (i32 IntRegs:$a1), (i32 IntRegs:$a2)),
(INSERT_SUBREG
(INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (i32 IntRegs:$a1), sub_even),
(i32 IntRegs:$a2), sub_odd)>;


include "SparcInstr64Bit.td"
include "SparcInstrVIS.td"
Expand Down
13 changes: 12 additions & 1 deletion llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,18 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(SP::G6);
Reserved.set(SP::G7);

// Also reserve the register pair aliases covering the above
// registers, with the same conditions.
Reserved.set(SP::G0_G1);
if (ReserveAppRegisters)
Reserved.set(SP::G2_G3);
if (ReserveAppRegisters || !Subtarget.is64Bit())
Reserved.set(SP::G4_G5);

Reserved.set(SP::O6_O7);
Reserved.set(SP::I6_I7);
Reserved.set(SP::G6_G7);

// Unaliased double registers are not available in non-V9 targets.
if (!Subtarget.isV9()) {
for (unsigned n = 0; n != 16; ++n) {
Expand Down Expand Up @@ -210,4 +222,3 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return SP::I6;
}

31 changes: 31 additions & 0 deletions llvm/lib/Target/Sparc/SparcRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ def sub_odd64 : SubRegIndex<64, 64>;
// Ri - 32-bit integer registers
class Ri<bits<16> Enc, string n> : SparcReg<Enc, n>;

// Rdi - pairs of 32-bit integer registers
class Rdi<bits<16> Enc, string n, list<Register> subregs> : SparcReg<Enc, n> {
let SubRegs = subregs;
let SubRegIndices = [sub_even, sub_odd];
let CoveredBySubRegs = 1;
}
// Rf - 32-bit floating-point registers
class Rf<bits<16> Enc, string n> : SparcReg<Enc, n>;

Expand Down Expand Up @@ -217,6 +223,24 @@ def Q13 : Rq<21, "F52", [D26, D27]>;
def Q14 : Rq<25, "F56", [D28, D29]>;
def Q15 : Rq<29, "F60", [D30, D31]>;

// Aliases of the integer registers used for LDD/STD double-word operations
def G0_G1 : Rdi<0, "G0", [G0, G1]>;
def G2_G3 : Rdi<2, "G2", [G2, G3]>;
def G4_G5 : Rdi<4, "G4", [G4, G5]>;
def G6_G7 : Rdi<6, "G6", [G6, G7]>;
def O0_O1 : Rdi<8, "O0", [O0, O1]>;
def O2_O3 : Rdi<10, "O2", [O2, O3]>;
def O4_O5 : Rdi<12, "O4", [O4, O5]>;
def O6_O7 : Rdi<14, "O6", [O6, O7]>;
def L0_L1 : Rdi<16, "L0", [L0, L1]>;
def L2_L3 : Rdi<18, "L2", [L2, L3]>;
def L4_L5 : Rdi<20, "L4", [L4, L5]>;
def L6_L7 : Rdi<22, "L6", [L6, L7]>;
def I0_I1 : Rdi<24, "I0", [I0, I1]>;
def I2_I3 : Rdi<26, "I2", [I2, I3]>;
def I4_I5 : Rdi<28, "I4", [I4, I5]>;
def I6_I7 : Rdi<30, "I6", [I6, I7]>;

// Register classes.
//
// FIXME: the register order should be defined in terms of the preferred
Expand All @@ -231,6 +255,13 @@ def IntRegs : RegisterClass<"SP", [i32, i64], 32,
(sequence "L%u", 0, 7),
(sequence "O%u", 0, 7))>;

// Should be in the same order as IntRegs.
def IntPair : RegisterClass<"SP", [v2i32], 64,
(add I0_I1, I2_I3, I4_I5, I6_I7,
G0_G1, G2_G3, G4_G5, G6_G7,
L0_L1, L2_L3, L4_L5, L6_L7,
O0_O1, O2_O3, O4_O5, O6_O7)>;

// Register class for 64-bit mode, with a 64-bit spill slot size.
// These are the same as the 32-bit registers, so TableGen will consider this
// to be a sub-class of IntRegs. That works out because requiring a 64-bit
Expand Down
1 change: 1 addition & 0 deletions llvm/llvm.spec.in
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,4 @@ rm -rf %{buildroot}
* Mon Feb 09 2003 Brian R. Gaeke
- Initial working version of RPM spec file.


13 changes: 13 additions & 0 deletions llvm/test/CodeGen/SPARC/basictest.ll
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,16 @@ define i64 @unsigned_multiply_32x32_64(i32 %a, i32 %b) {
ret i64 %r
}

; CHECK-LABEL: load_store_64bit:
; CHECK: ldd [%o0], %o2
; CHECK: addcc %o3, 3, %o5
; CHECK: addxcc %o2, 0, %o4
; CHECK: retl
; CHECK: std %o4, [%o1]
define void @load_store_64bit(i64* %x, i64* %y) {
entry:
%0 = load i64, i64* %x
%add = add nsw i64 %0, 3
store i64 %add, i64* %y
ret void
}
53 changes: 46 additions & 7 deletions llvm/test/CodeGen/SPARC/inlineasm.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -march=sparc -no-integrated-as <%s | FileCheck %s
; RUN: llc -march=sparc <%s | FileCheck %s

; CHECK-LABEL: test_constraint_r
; CHECK: add %o1, %o0, %o0
Expand All @@ -8,23 +8,23 @@ entry:
ret i32 %0
}

; CHECK-LABEL: test_constraint_I
; CHECK-LABEL: test_constraint_I:
; CHECK: add %o0, 1023, %o0
define i32 @test_constraint_I(i32 %a) {
entry:
%0 = tail call i32 asm sideeffect "add $1, $2, $0", "=r,r,rI"(i32 %a, i32 1023)
ret i32 %0
}

; CHECK-LABEL: test_constraint_I_neg
; CHECK-LABEL: test_constraint_I_neg:
; CHECK: add %o0, -4096, %o0
define i32 @test_constraint_I_neg(i32 %a) {
entry:
%0 = tail call i32 asm sideeffect "add $1, $2, $0", "=r,r,rI"(i32 %a, i32 -4096)
ret i32 %0
}

; CHECK-LABEL: test_constraint_I_largeimm
; CHECK-LABEL: test_constraint_I_largeimm:
; CHECK: sethi 9, [[R0:%[gilo][0-7]]]
; CHECK: or [[R0]], 784, [[R1:%[gilo][0-7]]]
; CHECK: add %o0, [[R1]], %o0
Expand All @@ -34,12 +34,51 @@ entry:
ret i32 %0
}

; CHECK-LABEL: test_constraint_reg
; CHECK-LABEL: test_constraint_reg:
; CHECK: ldda [%o1] 43, %g2
; CHECK: ldda [%o1] 43, %g3
; CHECK: ldda [%o1] 43, %g4
define void @test_constraint_reg(i32 %s, i32* %ptr) {
entry:
%0 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={r2},r,n"(i32* %ptr, i32 43)
%1 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={g3},r,n"(i32* %ptr, i32 43)
%1 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={g4},r,n"(i32* %ptr, i32 43)
ret void
}

;; Ensure that i64 args to asm are allocated to the IntPair register class.
;; Also checks that register renaming for leaf proc works.
; CHECK-LABEL: test_constraint_r_i64:
; CHECK: mov %o0, %o5
; CHECK: sra %o5, 31, %o4
; CHECK: std %o4, [%o1]
define i32 @test_constraint_r_i64(i32 %foo, i64* %out, i32 %o) {
entry:
%conv = sext i32 %foo to i64
tail call void asm sideeffect "std $0, [$1]", "r,r,~{memory}"(i64 %conv, i64* %out)
ret i32 %o
}

;; Same test without leaf-proc opt
; CHECK-LABEL: test_constraint_r_i64_noleaf:
; CHECK: mov %i0, %i5
; CHECK: sra %i5, 31, %i4
; CHECK: std %i4, [%i1]
define i32 @test_constraint_r_i64_noleaf(i32 %foo, i64* %out, i32 %o) #0 {
entry:
%conv = sext i32 %foo to i64
tail call void asm sideeffect "std $0, [$1]", "r,r,~{memory}"(i64 %conv, i64* %out)
ret i32 %o
}
attributes #0 = { "no-frame-pointer-elim"="true" }

;; Ensures that tied in and out gets allocated properly.
; CHECK-LABEL: test_i64_inout:
; CHECK: sethi 0, %o2
; CHECK: mov 5, %o3
; CHECK: xor %o2, %g0, %o2
; CHECK: mov %o2, %o0
; CHECK: ret
define i64 @test_i64_inout() {
entry:
%0 = call i64 asm sideeffect "xor $1, %g0, $0", "=r,0,~{i1}"(i64 5);
ret i64 %0
}
135 changes: 135 additions & 0 deletions llvm/test/CodeGen/SPARC/reserved-regs.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
; RUN: llc -march=sparc < %s | FileCheck %s

@g = common global [32 x i32] zeroinitializer, align 16
@h = common global [16 x i64] zeroinitializer, align 16

;; Ensures that we don't use registers which are supposed to be reserved.

; CHECK-LABEL: use_all_i32_regs:
; CHECK-NOT: %g0
; CHECK-NOT: %g1
; CHECK-NOT: %g5
; CHECK-NOT: %g6
; CHECK-NOT: %g7
; CHECK-NOT: %o6
; CHECK-NOT: %i6
; CHECK-NOT: %i7
; CHECK: ret
define void @use_all_i32_regs() {
entry:
%0 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 0), align 16
%1 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 1), align 4
%2 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 2), align 8
%3 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 3), align 4
%4 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 4), align 16
%5 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 5), align 4
%6 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 6), align 8
%7 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 7), align 4
%8 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 8), align 16
%9 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 9), align 4
%10 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 10), align 8
%11 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 11), align 4
%12 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 12), align 16
%13 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 13), align 4
%14 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 14), align 8
%15 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 15), align 4
%16 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 16), align 16
%17 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 17), align 4
%18 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 18), align 8
%19 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 19), align 4
%20 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 20), align 16
%21 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 21), align 4
%22 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 22), align 8
%23 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 23), align 4
%24 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 24), align 16
%25 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 25), align 4
%26 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 26), align 8
%27 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 27), align 4
%28 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 28), align 16
%29 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 29), align 4
%30 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 30), align 8
%31 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 31), align 4
store volatile i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 0), align 16
store volatile i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 1), align 4
store volatile i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 2), align 8
store volatile i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 3), align 4
store volatile i32 %5, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 4), align 16
store volatile i32 %6, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 5), align 4
store volatile i32 %7, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 6), align 8
store volatile i32 %8, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 7), align 4
store volatile i32 %9, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 8), align 16
store volatile i32 %10, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 9), align 4
store volatile i32 %11, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 10), align 8
store volatile i32 %12, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 11), align 4
store volatile i32 %13, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 12), align 16
store volatile i32 %14, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 13), align 4
store volatile i32 %15, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 14), align 8
store volatile i32 %16, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 15), align 4
store volatile i32 %17, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 16), align 16
store volatile i32 %18, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 17), align 4
store volatile i32 %19, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 18), align 8
store volatile i32 %20, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 19), align 4
store volatile i32 %21, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 20), align 16
store volatile i32 %22, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 21), align 4
store volatile i32 %23, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 22), align 8
store volatile i32 %24, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 23), align 4
store volatile i32 %25, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 24), align 16
store volatile i32 %26, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 25), align 4
store volatile i32 %27, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 26), align 8
store volatile i32 %28, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 27), align 4
store volatile i32 %29, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 28), align 16
store volatile i32 %30, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 29), align 4
store volatile i32 %31, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 30), align 8
store volatile i32 %0, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 31), align 4
ret void
}


; CHECK-LABEL: use_all_i64_regs:
; CHECK-NOT: %g0
; CHECK-NOT: %g1
; CHECK-NOT: %g4
; CHECK-NOT: %g5
; CHECK-NOT: %g6
; CHECK-NOT: %g7
; CHECK-NOT: %o6
; CHECK-NOT: %o7
; CHECK-NOT: %i6
; CHECK-NOT: %i7
; CHECK: ret
define void @use_all_i64_regs() {
entry:
%0 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 0), align 16
%1 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 1), align 4
%2 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 2), align 8
%3 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 3), align 4
%4 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 4), align 16
%5 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 5), align 4
%6 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 6), align 8
%7 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 7), align 4
%8 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 8), align 16
%9 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 9), align 4
%10 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 10), align 8
%11 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 11), align 4
%12 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 12), align 16
%13 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 13), align 4
%14 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 14), align 8
%15 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 15), align 4
store volatile i64 %1, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 0), align 16
store volatile i64 %2, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 1), align 4
store volatile i64 %3, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 2), align 8
store volatile i64 %4, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 3), align 4
store volatile i64 %5, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 4), align 16
store volatile i64 %6, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 5), align 4
store volatile i64 %7, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 6), align 8
store volatile i64 %8, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 7), align 4
store volatile i64 %9, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 8), align 16
store volatile i64 %10, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 9), align 4
store volatile i64 %11, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 10), align 8
store volatile i64 %12, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 11), align 4
store volatile i64 %13, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 12), align 16
store volatile i64 %14, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 13), align 4
store volatile i64 %15, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 14), align 8
store volatile i64 %0, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 15), align 4
ret void
}
64 changes: 64 additions & 0 deletions llvm/test/CodeGen/SPARC/spill.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
; RUN: llc -march=sparc < %s | FileCheck %s

;; Ensure that spills and reloads work for various types on
;; sparcv8.

;; For i32/i64 tests, use an asm statement which clobbers most
;; registers to ensure the spill will happen.

; CHECK-LABEL: test_i32_spill:
; CHECK: and %i0, %i1, %o0
; CHECK: st %o0, [%fp+{{.+}}]
; CHECK: add %o0, %o0, %g0
; CHECK: ld [%fp+{{.+}}, %i0
define i32 @test_i32_spill(i32 %a, i32 %b) {
entry:
%r0 = and i32 %a, %b
; The clobber list has all registers except g0/o0. (Only o0 is usable.)
%0 = call i32 asm sideeffect "add $0,$1,%g0", "=r,0,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6},~{o7}"(i32 %r0)
ret i32 %r0
}

; CHECK-LABEL: test_i64_spill:
; CHECK: and %i0, %i2, %o0
; CHECK: and %i1, %i3, %o1
; CHECK: std %o0, [%fp+{{.+}}]
; CHECK: add %o0, %o0, %g0
; CHECK: ldd [%fp+{{.+}}, %i0
define i64 @test_i64_spill(i64 %a, i64 %b) {
entry:
%r0 = and i64 %a, %b
; The clobber list has all registers except g0,g1,o0,o1. (Only o0/o1 are a usable pair)
; So, o0/o1 must be used.
%0 = call i64 asm sideeffect "add $0,$1,%g0", "=r,0,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o2},~{o3},~{o4},~{o5},~{o7}"(i64 %r0)
ret i64 %r0
}

;; For float/double tests, a call is a suitable clobber as *all* FPU
;; registers are caller-save on sparcv8.

; CHECK-LABEL: test_float_spill:
; CHECK: fadds %f1, %f0, [[R:%[f][0-31]]]
; CHECK: st [[R]], [%fp+{{.+}}]
; CHECK: call
; CHECK: ld [%fp+{{.+}}, %f0
declare float @foo_float(float)
define float @test_float_spill(float %a, float %b) {
entry:
%r0 = fadd float %a, %b
%0 = call float @foo_float(float %r0)
ret float %r0
}

; CHECK-LABEL: test_double_spill:
; CHECK: faddd %f2, %f0, [[R:%[f][0-31]]]
; CHECK: std [[R]], [%fp+{{.+}}]
; CHECK: call
; CHECK: ldd [%fp+{{.+}}, %f0
declare double @foo_double(double)
define double @test_double_spill(double %a, double %b) {
entry:
%r0 = fadd double %a, %b
%0 = call double @foo_double(double %r0)
ret double %r0
}
24 changes: 24 additions & 0 deletions llvm/test/MC/Disassembler/Sparc/sparc-mem.txt
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,27 @@

# CHECK: swapa [%g1] 131, %o2
0xd4 0xf8 0x50 0x60

# CHECK: ldd [%i0+%l6], %o2
0xd4 0x1e 0x00 0x16

# CHECK: ldd [%i0+32], %o2
0xd4 0x1e 0x20 0x20

# CHECK: ldd [%g1], %o2
0xd4 0x18 0x60 0x00

# CHECK: ldd [%g1], %o2
0xd4 0x18 0x40 0x00

# CHECK: std %o2, [%i0+%l6]
0xd4 0x3e 0x00 0x16

# CHECK: std %o2, [%i0+32]
0xd4 0x3e 0x20 0x20

# CHECK: std %o2, [%g1]
0xd4 0x38 0x60 0x00

# CHECK: std %o2, [%g1]
0xd4 0x38 0x40 0x00
18 changes: 18 additions & 0 deletions llvm/test/MC/Sparc/sparc-mem-instructions.s
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@
! CHECK: lda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x86,0x10,0x76]
lda [%i0 + %l6] 131, %o2

! CHECK: ldd [%i0+%l6], %o2 ! encoding: [0xd4,0x1e,0x00,0x16]
ldd [%i0 + %l6], %o2
! CHECK: ldd [%i0+32], %o2 ! encoding: [0xd4,0x1e,0x20,0x20]
ldd [%i0 + 32], %o2
! CHECK: ldd [%g1], %o2 ! encoding: [0xd4,0x18,0x40,0x00]
ldd [%g1], %o2
! CHECK: ldda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x9e,0x10,0x76]
ldda [%i0 + %l6] 131, %o2

! CHECK: stb %o2, [%i0+%l6] ! encoding: [0xd4,0x2e,0x00,0x16]
stb %o2, [%i0 + %l6]
! CHECK: stb %o2, [%i0+32] ! encoding: [0xd4,0x2e,0x20,0x20]
Expand Down Expand Up @@ -73,6 +82,15 @@
! CHECK: sta %o2, [%i0+%l6] 131 ! encoding: [0xd4,0xa6,0x10,0x76]
sta %o2, [%i0 + %l6] 131

! CHECK: std %o2, [%i0+%l6] ! encoding: [0xd4,0x3e,0x00,0x16]
std %o2, [%i0 + %l6]
! CHECK: std %o2, [%i0+32] ! encoding: [0xd4,0x3e,0x20,0x20]
std %o2, [%i0 + 32]
! CHECK: std %o2, [%g1] ! encoding: [0xd4,0x38,0x40,0x00]
std %o2, [%g1]
! CHECK: stda %o2, [%i0+%l6] 131 ! encoding: [0xd4,0xbe,0x10,0x76]
stda %o2, [%i0 + %l6] 131

! CHECK: flush %g1+%g2 ! encoding: [0x81,0xd8,0x40,0x02]
flush %g1 + %g2
! CHECK: flush %g1+8 ! encoding: [0x81,0xd8,0x60,0x08]
Expand Down