40 changes: 27 additions & 13 deletions llvm/lib/Target/PowerPC/PPCCallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,17 @@ def RetCC_PPC : CallingConv<[
// only the ELFv2 ABI fully utilizes all these registers.
CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,


// QPX vectors are returned in QF1 and QF2.
CCIfType<[v4f64, v4f32, v4i1],
CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,

// Vector types returned as "direct" go into V2 .. V9; note that only the
// ELFv2 ABI fully utilizes all these registers.
CCIfType<[v16i8, v8i16, v4i32, v4f32],
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
CCIfType<[v2f64, v2i64],
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
]>;

// No explicit register is specified for the AnyReg calling convention. The
Expand Down Expand Up @@ -108,10 +112,12 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[v16i8, v8i16, v4i32, v4f32],
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
CCIfType<[v2f64, v2i64],
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
CCIfType<[v4f64, v4f32, v4i1],
CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
]>;

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -144,6 +150,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[
// alignment and size as doubles.
CCIfType<[f32,f64], CCAssignToStack<8, 8>>,

// QPX vectors that are stored in double precision need 32-byte alignment.
CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>,

// Vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>
]>;
Expand All @@ -158,12 +167,17 @@ def CC_PPC32_SVR4_VarArg : CallingConv<[
// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
// put vector arguments in vector registers before putting them on the stack.
def CC_PPC32_SVR4 : CallingConv<[
// QPX vectors mirror the scalar FP convention.
CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,

// The first 12 Vector arguments are passed in AltiVec registers.
CCIfType<[v16i8, v8i16, v4i32, v4f32],
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
CCIfType<[v2f64, v2i64],
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9,
V10, V11, V12, V13]>>>,
CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
VSH10, VSH11, VSH12, VSH13]>>,
VSH10, VSH11, VSH12, VSH13]>>>,

CCDelegateTo<CC_PPC32_SVR4_Common>
]>;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {

PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
(STI.hasQPX() || STI.isBGQ()) ? 32 : 16, 0),
STI.getPlatformStackAlignment(), 0),
Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2293,6 +2293,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
// Altivec Vector compare instructions do not set any CR register by default and
// vector compare operations return the same type as the operands.
if (LHS.getValueType().isVector()) {
if (PPCSubTarget->hasQPX())
return nullptr;

EVT VecVT = LHS.getValueType();
bool Swap, Negate;
unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
Expand Down Expand Up @@ -2468,6 +2471,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
case MVT::f64: Opcode = PPC::LFDUX; break;
case MVT::f32: Opcode = PPC::LFSUX; break;
case MVT::i32: Opcode = PPC::LWZUX; break;
Expand Down Expand Up @@ -2711,6 +2716,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SelectCCOp = PPC::SELECT_CC_VSFRC;
else
SelectCCOp = PPC::SELECT_CC_F8;
else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
SelectCCOp = PPC::SELECT_CC_QFRC;
else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
SelectCCOp = PPC::SELECT_CC_QSRC;
else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
SelectCCOp = PPC::SELECT_CC_QBRC;
else if (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)
SelectCCOp = PPC::SELECT_CC_VSRC;
Expand Down Expand Up @@ -3406,6 +3417,9 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
case PPC::SELECT_QFRC:
case PPC::SELECT_QSRC:
case PPC::SELECT_QBRC:
case PPC::SELECT_VRRC:
case PPC::SELECT_VSFRC:
case PPC::SELECT_VSRC: {
Expand Down Expand Up @@ -3713,6 +3727,9 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
case PPC::SELECT_QFRC:
case PPC::SELECT_QSRC:
case PPC::SELECT_QBRC:
case PPC::SELECT_VRRC:
case PPC::SELECT_VSFRC:
case PPC::SELECT_VSRC:
Expand Down
1,145 changes: 1,097 additions & 48 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Large diffs are not rendered by default.

30 changes: 29 additions & 1 deletion llvm/lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,22 @@ namespace llvm {
/// of outputs.
XXSWAPD,

/// QVFPERM = This corresponds to the QPX qvfperm instruction.
QVFPERM,

/// QVGPCI = This corresponds to the QPX qvgpci instruction.
QVGPCI,

/// QVALIGNI = This corresponds to the QPX qvaligni instruction.
QVALIGNI,

/// QVESPLATI = This corresponds to the QPX qvesplati instruction.
QVESPLATI,

/// QBFLT = Access the underlying QPX floating-point boolean
/// representation.
QBFLT,

/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
Expand Down Expand Up @@ -332,7 +348,11 @@ namespace llvm {
/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
/// Maps directly to an stxvd2x instruction that will be preceded by
/// an xxswapd.
STXVD2X
STXVD2X,

/// QBRC, CHAIN = QVLFSb CHAIN, Ptr
/// The 4xf32 load used for v4i1 constants.
QVLFSb
};
}

Expand Down Expand Up @@ -381,6 +401,10 @@ namespace llvm {
/// size, return the constant being splatted. The ByteSize field indicates
/// the number of bytes of each element [124] -> [bhw].
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);

/// If this is a qvaligni shuffle mask, return the shift
/// amount, otherwise return -1.
int isQVALIGNIShuffleMask(SDNode *N);
}

class PPCTargetLowering : public TargetLowering {
Expand Down Expand Up @@ -679,11 +703,15 @@ namespace llvm {
SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
Expand Down
92 changes: 92 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,47 @@ class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}

// Used for QPX
class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> FRT;
bits<5> FRA;
bits<5> FRB;

let Pattern = pattern;

let Inst{6-10} = FRT;
let Inst{11-15} = FRA;
let Inst{16-20} = FRB;
let Inst{21-30} = xo;
let Inst{31} = 0;
}

class XForm_19<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_18<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
let FRA = 0;
}

class XForm_20<bits<6> opcode, bits<6> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> FRT;
bits<5> FRA;
bits<5> FRB;
bits<4> tttt;

let Pattern = pattern;

let Inst{6-10} = FRT;
let Inst{11-15} = FRA;
let Inst{16-20} = FRB;
let Inst{21-24} = tttt;
let Inst{25-30} = xo;
let Inst{31} = 0;
}

class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
Expand Down Expand Up @@ -1215,6 +1256,14 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}

// Used for QPX
class AForm_4a<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
let FRA = 0;
let FRC = 0;
}

// 1.7.13 M-Form
class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
Expand Down Expand Up @@ -1439,6 +1488,49 @@ class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{22-31} = xo;
}

// Z23-Form (used by QPX)
class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> FRT;
bits<5> FRA;
bits<5> FRB;
bits<2> idx;

let Pattern = pattern;

bit RC = 0; // set by isDOT

let Inst{6-10} = FRT;
let Inst{11-15} = FRA;
let Inst{16-20} = FRB;
let Inst{21-22} = idx;
let Inst{23-30} = xo;
let Inst{31} = RC;
}

class Z23Form_2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: Z23Form_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
let FRB = 0;
}

class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> FRT;
bits<12> idx;

let Pattern = pattern;

bit RC = 0; // set by isDOT

let Inst{6-10} = FRT;
let Inst{11-22} = idx;
let Inst{23-30} = xo;
let Inst{31} = RC;
}

//===----------------------------------------------------------------------===//
class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
: I<0, OOL, IOL, asmstr, NoItinerary> {
Expand Down
42 changes: 42 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
case PPC::RESTORE_CRBIT:
case PPC::LVX:
case PPC::LXVD2X:
case PPC::QVLFDX:
case PPC::QVLFSXs:
case PPC::QVLFDXb:
case PPC::RESTORE_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
Expand All @@ -207,6 +210,9 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
case PPC::SPILL_CRBIT:
case PPC::STVX:
case PPC::STXVD2X:
case PPC::QVSTFDX:
case PPC::QVSTFSXs:
case PPC::QVSTFDXb:
case PPC::SPILL_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
Expand Down Expand Up @@ -759,6 +765,12 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = PPC::XXLOR;
else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::XXLORf;
else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::QVFMR;
else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::QVFMRs;
else if (PPC::QBRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::QVFMRb;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
else
Expand Down Expand Up @@ -844,6 +856,24 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
SpillsVRS = true;
} else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDX))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
NonRI = true;
} else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFSXs))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
NonRI = true;
} else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDXb))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
NonRI = true;
} else {
llvm_unreachable("Unknown regclass!");
}
Expand Down Expand Up @@ -939,6 +969,18 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
DestReg),
FrameIdx));
SpillsVRS = true;
} else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDX), DestReg),
FrameIdx));
NonRI = true;
} else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFSXs), DestReg),
FrameIdx));
NonRI = true;
} else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDXb), DestReg),
FrameIdx));
NonRI = true;
} else {
llvm_unreachable("Unknown regclass!");
}
Expand Down
45 changes: 45 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,27 @@ def tocentry32 : Operand<iPTR> {
let MIOperandInfo = (ops i32imm:$imm);
}

def SDT_PPCqvfperm : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVec<3>
]>;
def SDT_PPCqvgpci : SDTypeProfile<1, 1, [
SDTCisVec<0>, SDTCisInt<1>
]>;
def SDT_PPCqvaligni : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>
]>;
def SDT_PPCqvesplati : SDTypeProfile<1, 2, [
SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
]>;

def SDT_PPCqbflt : SDTypeProfile<1, 1, [
SDTCisVec<0>, SDTCisVec<1>
]>;

def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [
SDTCisVec<0>, SDTCisPtrTy<1>
]>;

//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
Expand Down Expand Up @@ -127,6 +148,16 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;

def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;

def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;
def PPCqvaligni : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>;
def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>;

def PPCqbflt : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>;

def PPCqvlfsb : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb,
[SDNPHasChain, SDNPMayLoad]>;

def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;

// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
Expand Down Expand Up @@ -464,6 +495,15 @@ def u6imm : Operand<i32> {
let ParserMatchClass = PPCU6ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<6>";
}
def PPCU12ImmAsmOperand : AsmOperandClass {
let Name = "U12Imm"; let PredicateMethod = "isU12Imm";
let RenderMethod = "addImmOperands";
}
def u12imm : Operand<i32> {
let PrintMethod = "printU12ImmOperand";
let ParserMatchClass = PPCU12ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<12>";
}
def PPCS16ImmAsmOperand : AsmOperandClass {
let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
let RenderMethod = "addS16ImmOperands";
Expand Down Expand Up @@ -680,6 +720,10 @@ def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">;
def IsE500 : Predicate<"PPCSubTarget->isE500()">;
def HasSPE : Predicate<"PPCSubTarget->HasSPE()">;
def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;

def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">;

//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.

Expand Down Expand Up @@ -2643,6 +2687,7 @@ include "PPCInstrAltivec.td"
include "PPCInstrSPE.td"
include "PPCInstr64Bit.td"
include "PPCInstrVSX.td"
include "PPCInstrQPX.td"

def crnot : OutPatFrag<(ops node:$in),
(CRNOR $in, $in)>;
Expand Down
1,192 changes: 1,192 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrQPX.td

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,9 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
case PPC::F8RCRegClassID:
case PPC::F4RCRegClassID:
case PPC::QFRCRegClassID:
case PPC::QSRCRegClassID:
case PPC::QBRCRegClassID:
case PPC::VRRCRegClassID:
case PPC::VFRCRegClassID:
case PPC::VSLRCRegClassID:
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/PowerPC/PPCRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}

// QFPR - One of the 32 256-bit floating-point vector registers (used for QPX)
class QFPR<FPR SubReg, string n> : PPCReg<n> {
let HWEncoding = SubReg.HWEncoding;
let SubRegs = [SubReg];
let SubRegIndices = [sub_64];
}

// VF - One of the 32 64-bit floating-point subregisters of the vector
// registers (used by VSX).
class VF<bits<5> num, string n> : PPCReg<n> {
Expand Down Expand Up @@ -114,6 +121,12 @@ foreach Index = 0-31 in {
def VF#Index : VF<Index, "vs" # !add(Index, 32)>;
}

// QPX Floating-point registers
foreach Index = 0-31 in {
def QF#Index : QFPR<!cast<FPR>("F"#Index), "q"#Index>,
DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
}

// Vector registers
foreach Index = 0-31 in {
def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
Expand Down Expand Up @@ -303,6 +316,16 @@ def VFRC : RegisterClass<"PPC", [f64], 64,
VF22, VF21, VF20)>;
def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>;

// For QPX
def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13),
(sequence "QF%u", 31, 14))>;
def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>;
def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> {
// These are actually stored as floating-point values where a positive
// number is true and anything else (including NaN) is false.
let Size = 256;
}

def CRBITRC : RegisterClass<"PPC", [i1], 32,
(add CR2LT, CR2GT, CR2EQ, CR2UN,
CR3LT, CR3GT, CR3EQ, CR3UN,
Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/Target/PowerPC/PPCSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ using namespace llvm;
static cl::opt<bool> UseSubRegLiveness("ppc-track-subreg-liveness",
cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden);

static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
cl::Hidden);

PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
initializeEnvironment();
Expand Down Expand Up @@ -90,6 +94,7 @@ void PPCSubtarget::initializeEnvironment() {
HasLazyResolverStubs = false;
HasICBT = false;
HasInvariantFunctionDescriptors = false;
IsQPXStackUnaligned = false;
}

void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
Expand Down Expand Up @@ -126,8 +131,8 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// QPX requires a 32-byte aligned stack. Note that we need to do this if
// we're compiling for a BG/Q system regardless of whether or not QPX
// is enabled because external functions will assume this alignment.
if (hasQPX() || isBGQ())
StackAlignment = 32;
IsQPXStackUnaligned = QPXStackUnaligned;
StackAlignment = getPlatformStackAlignment();

// Determine endianness.
// FIXME: Part of the TargetMachine.
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/PowerPC/PPCSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
bool HasICBT;
bool HasInvariantFunctionDescriptors;

/// When targeting QPX running a stock PPC64 Linux kernel where the stack
/// alignment has not been changed, we need to keep the 16-byte alignment
/// of the stack.
bool IsQPXStackUnaligned;

const PPCTargetMachine &TM;
PPCFrameLowering FrameLowering;
PPCInstrInfo InstrInfo;
Expand Down Expand Up @@ -230,6 +235,14 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
return HasInvariantFunctionDescriptors;
}

bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
unsigned getPlatformStackAlignment() const {
if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
return 32;

return 16;
}

const Triple &getTargetTriple() const { return TargetTriple; }

/// isDarwin - True if this is any darwin platform.
Expand Down
9 changes: 8 additions & 1 deletion llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,13 +193,14 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L,
}

unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
if (Vector && !ST->hasAltivec())
if (Vector && !ST->hasAltivec() && !ST->hasQPX())
return 0;
return ST->hasVSX() ? 64 : 32;
}

unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) {
if (Vector) {
if (ST->hasQPX()) return 256;
if (ST->hasAltivec()) return 128;
return 0;
}
Expand Down Expand Up @@ -276,6 +277,12 @@ unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (Index == 0)
return 0;

return BaseT::getVectorInstrCost(Opcode, Val, Index);
} else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
// Floating point scalars are already located in index #0.
if (Index == 0)
return 0;

return BaseT::getVectorInstrCost(Opcode, Val, Index);
}

Expand Down
33 changes: 33 additions & 0 deletions llvm/test/CodeGen/PowerPC/qpx-bv-sint.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s

define void @s452() nounwind {
entry:
br label %for.body4

for.body4: ; preds = %for.body4, %entry
%conv.4 = sitofp i32 undef to double
%conv.5 = sitofp i32 undef to double
%mul.4.v.i0.1 = insertelement <2 x double> undef, double %conv.4, i32 0
%mul.4.v.i0.2 = insertelement <2 x double> %mul.4.v.i0.1, double %conv.5, i32 1
%mul.4 = fmul <2 x double> %mul.4.v.i0.2, undef
%add7.4 = fadd <2 x double> undef, %mul.4
store <2 x double> %add7.4, <2 x double>* undef, align 16
br i1 undef, label %for.end, label %for.body4

for.end: ; preds = %for.body4
unreachable
; CHECK-LABEL: @s452
; CHECK: lfiwax [[REG1:[0-9]+]],
; CHECK: fcfid [[REG2:[0-9]+]], [[REG1]]
; FIXME: We could 'promote' this to a vector earlier and remove this splat.
; CHECK: qvesplati {{[0-9]+}}, [[REG2]], 0
; CHECK: qvfmul
; CHECK: qvfadd
; CHECK: qvesplati {{[0-9]+}},
; FIXME: We can use qvstfcdx here instead of two stores.
; CHECK: stfd
; CHECK: stfd
}

37 changes: 37 additions & 0 deletions llvm/test/CodeGen/PowerPC/qpx-bv.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
; RUN: llc < %s -mcpu=a2q | FileCheck %s

target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"

define <4 x double> @foo(double %f1, double %f2, double %f3, double %f4) {
%v1 = insertelement <4 x double> undef, double %f1, i32 0
%v2 = insertelement <4 x double> %v1, double %f2, i32 1
%v3 = insertelement <4 x double> %v2, double %f3, i32 2
%v4 = insertelement <4 x double> %v3, double %f4, i32 3
ret <4 x double> %v4

; CHECK-LABEL: @foo
; CHECK: qvgpci [[REG1:[0-9]+]], 275
; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
; CHECK: blr
}

define <4 x float> @goo(float %f1, float %f2, float %f3, float %f4) {
%v1 = insertelement <4 x float> undef, float %f1, i32 0
%v2 = insertelement <4 x float> %v1, float %f2, i32 1
%v3 = insertelement <4 x float> %v2, float %f3, i32 2
%v4 = insertelement <4 x float> %v3, float %f4, i32 3
ret <4 x float> %v4

; CHECK-LABEL: @goo
; CHECK: qvgpci [[REG1:[0-9]+]], 275
; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
; CHECK: blr
}

21 changes: 21 additions & 0 deletions llvm/test/CodeGen/PowerPC/qpx-func-clobber.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s

declare <4 x double> @foo(<4 x double> %p)

define <4 x double> @bar(<4 x double> %p, <4 x double> %q) {
entry:
%v = call <4 x double> @foo(<4 x double> %p)
%w = call <4 x double> @foo(<4 x double> %q)
%x = fadd <4 x double> %v, %w
ret <4 x double> %x

; CHECK-LABEL: @bar
; CHECK: qvstfdx 2,
; CHECK: bl foo
; CHECK: qvstfdx 1,
; CHECK: qvlfdx 1,
; CHECK: bl foo
; CHECK: qvlfdx [[REG:[0-9]+]],
; CHECK: qvfadd 1, [[REG]], 1
}

25 changes: 25 additions & 0 deletions llvm/test/CodeGen/PowerPC/qpx-load.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s

define <4 x double> @foo(<4 x double>* %p) {
entry:
%v = load <4 x double>* %p, align 8
ret <4 x double> %v
}

; CHECK: @foo
; CHECK-DAG: li [[REG1:[0-9]+]], 31
; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 0, 3
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]]
; CHECK-DAG: qvlpcldx [[REG3:[0-9]+]], 0, 3
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
; CHECK: blr

define <4 x double> @bar(<4 x double>* %p) {
entry:
%v = load <4 x double>* %p, align 32
ret <4 x double> %v
}

; CHECK: @bar
; CHECK: qvlfdx

194 changes: 194 additions & 0 deletions llvm/test/CodeGen/PowerPC/qpx-recipest.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck %s
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck -check-prefix=CHECK-SAFE %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"

declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)

define <4 x double> @foo(<4 x double> %a, <4 x double> %b) nounwind {
entry:
%x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
%r = fdiv <4 x double> %a, %x
ret <4 x double> %r

; CHECK-LABEL: @foo
; CHECK: qvfrsqrte
; CHECK: qvfmul
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
; an qvfmadd instead of a qvfnmsub
; CHECK: qvfmadd
; CHECK: qvfmadd
; CHECK: qvfmul
; CHECK: qvfmul
; CHECK: qvfmadd
; CHECK: qvfmul
; CHECK: qvfmul
; CHECK: blr

; CHECK-SAFE-LABEL: @foo
; CHECK-SAFE: fsqrt
; CHECK-SAFE: fdiv
; CHECK-SAFE: blr
}

define <4 x double> @foof(<4 x double> %a, <4 x float> %b) nounwind {
entry:
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
%y = fpext <4 x float> %x to <4 x double>
%r = fdiv <4 x double> %a, %y
ret <4 x double> %r

; CHECK-LABEL: @foof
; CHECK: qvfrsqrtes
; CHECK: qvfmuls
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
; an qvfmadd instead of a qvfnmsubs
; CHECK: qvfmadds
; CHECK: qvfmadds
; CHECK: qvfmuls
; CHECK: qvfmul
; CHECK: blr

; CHECK-SAFE-LABEL: @foof
; CHECK-SAFE: fsqrts
; CHECK-SAFE: fdiv
; CHECK-SAFE: blr
}

define <4 x float> @food(<4 x float> %a, <4 x double> %b) nounwind {
entry:
%x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
%y = fptrunc <4 x double> %x to <4 x float>
%r = fdiv <4 x float> %a, %y
ret <4 x float> %r

; CHECK-LABEL: @food
; CHECK: qvfrsqrte
; CHECK: qvfmul
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
; an qvfmadd instead of a qvfnmsub
; CHECK: qvfmadd
; CHECK: qvfmadd
; CHECK: qvfmul
; CHECK: qvfmul
; CHECK: qvfmadd
; CHECK: qvfmul
; CHECK: qvfrsp
; CHECK: qvfmuls
; CHECK: blr

; CHECK-SAFE-LABEL: @food
; CHECK-SAFE: fsqrt
; CHECK-SAFE: fdivs
; CHECK-SAFE: blr
}

define <4 x float> @goo(<4 x float> %a, <4 x float> %b) nounwind {
entry:
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
%r = fdiv <4 x float> %a, %x
ret <4 x float> %r

; CHECK-LABEL: @goo
; CHECK: qvfrsqrtes
; CHECK: qvfmuls
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
; an qvfmadd instead of a qvfnmsubs
; CHECK: qvfmadds
; CHECK: qvfmadds
; CHECK: qvfmuls
; CHECK: qvfmuls
; CHECK: blr

; CHECK-SAFE-LABEL: @goo
; CHECK-SAFE: fsqrts
; CHECK-SAFE: fdivs
; CHECK-SAFE: blr
}

define <4 x double> @foo2(<4 x double> %a, <4 x double> %b) nounwind {
entry:
%r = fdiv <4 x double> %a, %b
ret <4 x double> %r

; CHECK-LABEL: @foo2
; CHECK: qvfre
; CHECK: qvfnmsub
; CHECK: qvfmadd
; CHECK: qvfnmsub
; CHECK: qvfmadd
; CHECK: qvfmul
; CHECK: blr

; CHECK-SAFE-LABEL: @foo2
; CHECK-SAFE: fdiv
; CHECK-SAFE: blr
}

define <4 x float> @goo2(<4 x float> %a, <4 x float> %b) nounwind {
entry:
%r = fdiv <4 x float> %a, %b
ret <4 x float> %r

; CHECK-LABEL: @goo2
; CHECK: qvfres
; CHECK: qvfnmsubs
; CHECK: qvfmadds
; CHECK: qvfmuls
; CHECK: blr

; CHECK-SAFE-LABEL: @goo2
; CHECK-SAFE: fdivs
; CHECK-SAFE: blr
}

define <4 x double> @foo3(<4 x double> %a) nounwind {
entry:
%r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
ret <4 x double> %r

; CHECK-LABEL: @foo3
; CHECK: qvfrsqrte
; CHECK: qvfmul
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
; an qvfmadd instead of a qvfnmsub
; CHECK-DAG: qvfmadd
; CHECK-DAG: qvfcmpeq
; CHECK-DAG: qvfmadd
; CHECK-DAG: qvfmul
; CHECK-DAG: qvfmul
; CHECK-DAG: qvfmadd
; CHECK-DAG: qvfmul
; CHECK-DAG: qvfmul
; CHECK: qvfsel
; CHECK: blr

; CHECK-SAFE-LABEL: @foo3
; CHECK-SAFE: fsqrt
; CHECK-SAFE: blr
}

define <4 x float> @goo3(<4 x float> %a) nounwind {
entry:
%r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
ret <4 x float> %r

; CHECK-LABEL: @goo3
; CHECK: qvfrsqrtes
; CHECK: qvfmuls
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
; an qvfmadds instead of a qvfnmsubs
; CHECK-DAG: qvfmadds
; CHECK-DAG: qvfcmpeq
; CHECK-DAG: qvfmadds
; CHECK-DAG: qvfmuls
; CHECK-DAG: qvfmuls
; CHECK: qvfsel
; CHECK: blr

; CHECK-SAFE-LABEL: @goo3
; CHECK-SAFE: fsqrts
; CHECK-SAFE: blr
}

109 changes: 109 additions & 0 deletions llvm/test/CodeGen/PowerPC/qpx-rounding-ops.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"

define <4 x float> @test1(<4 x float> %x) nounwind {
%call = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %x) nounwind readnone
ret <4 x float> %call

; CHECK: test1:
; CHECK: qvfrim 1, 1

; CHECK-FM: test1:
; CHECK-FM: qvfrim 1, 1
}

declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone

define <4 x double> @test2(<4 x double> %x) nounwind {
%call = tail call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
ret <4 x double> %call

; CHECK: test2:
; CHECK: qvfrim 1, 1

; CHECK-FM: test2:
; CHECK-FM: qvfrim 1, 1
}

declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone

define <4 x float> @test3(<4 x float> %x) nounwind {
%call = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x) nounwind readnone
ret <4 x float> %call

; CHECK: test3:
; CHECK-NOT: qvfrin

; CHECK-FM: test3:
; CHECK-FM-NOT: qvfrin
}

declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone

define <4 x double> @test4(<4 x double> %x) nounwind {
%call = tail call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %x) nounwind readnone
ret <4 x double> %call

; CHECK: test4:
; CHECK-NOT: qvfrin

; CHECK-FM: test4:
; CHECK-FM-NOT: qvfrin
}

declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) nounwind readnone

define <4 x float> @test5(<4 x float> %x) nounwind {
%call = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
ret <4 x float> %call

; CHECK: test5:
; CHECK: qvfrip 1, 1

; CHECK-FM: test5:
; CHECK-FM: qvfrip 1, 1
}

declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone

define <4 x double> @test6(<4 x double> %x) nounwind {
%call = tail call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
ret <4 x double> %call

; CHECK: test6:
; CHECK: qvfrip 1, 1

; CHECK-FM: test6:
; CHECK-FM: qvfrip 1, 1
}

declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone

define <4 x float> @test9(<4 x float> %x) nounwind {
%call = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
ret <4 x float> %call

; CHECK: test9:
; CHECK: qvfriz 1, 1

; CHECK-FM: test9:
; CHECK-FM: qvfriz 1, 1
}

declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone

define <4 x double> @test10(<4 x double> %x) nounwind {
%call = tail call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
ret <4 x double> %call

; CHECK: test10:
; CHECK: qvfriz 1, 1

; CHECK-FM: test10:
; CHECK-FM: qvfriz 1, 1
}

declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone

25 changes: 25 additions & 0 deletions llvm/test/CodeGen/PowerPC/qpx-s-load.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s

define <4 x float> @foo(<4 x float>* %p) {
entry:
%v = load <4 x float>* %p, align 4
ret <4 x float> %v
}

; CHECK: @foo
; CHECK-DAG: li [[REG1:[0-9]+]], 15
; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 0, 3
; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]]
; CHECK-DAG: qvlpclsx [[REG3:[0-9]+]], 0, 3
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
; CHECK: blr

define <4 x float> @bar(<4 x float>* %p) {
entry:
%v = load <4 x float>* %p, align 16
ret <4 x float> %v
}

; CHECK: @bar
; CHECK: qvlfsx

143 changes: 143 additions & 0 deletions llvm/test/CodeGen/PowerPC/qpx-s-sel.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s

@Q = constant <4 x i1> <i1 0, i1 undef, i1 1, i1 1>, align 16
@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16

define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %c) nounwind readnone {
entry:
%r = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %r

; CHECK-LABEL: @test1
; CHECK: qvfsel 1, 3, 1, 2
; CHECK: blr
}

define <4 x float> @test2(<4 x float> %a, <4 x float> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
entry:
%v = insertelement <4 x i1> undef, i1 %c1, i32 0
%v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
%v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
%v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
%r = select <4 x i1> %v4, <4 x float> %a, <4 x float> %b
ret <4 x float> %r

; CHECK-LABEL: @test2
; CHECK: stw
; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
; CHECK: qvfsel 1, [[REG4]], 1, 2
; CHECK: blr
}

define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
entry:
%v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
ret <4 x i1> %v

; CHECK-LABEL: @test3
; CHECK: qvlfsx [[REG:[0-9]+]],
; qvflogical 1, 1, [[REG]], 1
; blr
}

define <4 x i1> @test4(<4 x i1> %a) nounwind {
entry:
%q = load <4 x i1>* @Q, align 16
%v = and <4 x i1> %a, %q
ret <4 x i1> %v

; CHECK-LABEL: @test4
; CHECK-DAG: lbz
; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
; CHECK-DAG: stw
; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
; CHECK: qvflogical 1, 1, [[REG4]], 1
; CHECK: blr
}

define void @test5(<4 x i1> %a) nounwind {
entry:
store <4 x i1> %a, <4 x i1>* @R
ret void

; CHECK-LABEL: @test5
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: stb
; CHECK: blr
}

define i1 @test6(<4 x i1> %a) nounwind {
entry:
%r = extractelement <4 x i1> %a, i32 2
ret i1 %r

; CHECK-LABEL: @test6
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: blr
}

define i1 @test7(<4 x i1> %a) nounwind {
entry:
%r = extractelement <4 x i1> %a, i32 2
%s = extractelement <4 x i1> %a, i32 3
%q = and i1 %r, %s
ret i1 %q

; CHECK-LABEL: @test7
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK-DAG: lwz [[REG4:[0-9]+]],
; FIXME: We're storing the vector twice, and that's silly.
; CHECK-DAG: qvstfiwx [[REG3]],
; CHECK: lwz [[REG5:[0-9]+]],
; CHECK: and 3,
; CHECK: blr
}

define i1 @test8(<3 x i1> %a) nounwind {
entry:
%r = extractelement <3 x i1> %a, i32 2
ret i1 %r

; CHECK-LABEL: @test8
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: blr
}

define <3 x float> @test9(<3 x float> %a, <3 x float> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
entry:
%v = insertelement <3 x i1> undef, i1 %c1, i32 0
%v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
%v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
%r = select <3 x i1> %v3, <3 x float> %a, <3 x float> %b
ret <3 x float> %r

; CHECK-LABEL: @test9
; CHECK: stw
; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
; CHECK: qvfsel 1, [[REG4]], 1, 2
; CHECK: blr
}

24 changes: 24 additions & 0 deletions llvm/test/CodeGen/PowerPC/qpx-s-store.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s

define void @foo(<4 x float> %v, <4 x float>* %p) {
entry:
store <4 x float> %v, <4 x float>* %p, align 4
ret void
}

; CHECK: @foo
; CHECK: stfs
; CHECK: stfs
; CHECK: stfs
; CHECK: stfs
; CHECK: blr

define void @bar(<4 x float> %v, <4 x float>* %p) {
entry:
store <4 x float> %v, <4 x float>* %p, align 16
ret void
}

; CHECK: @bar
; CHECK: qvstfsx

151 changes: 151 additions & 0 deletions llvm/test/CodeGen/PowerPC/qpx-sel.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s

@Q = constant <4 x i1> <i1 0, i1 undef, i1 1, i1 1>, align 16
@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16

define <4 x double> @test1(<4 x double> %a, <4 x double> %b, <4 x i1> %c) nounwind readnone {
entry:
%r = select <4 x i1> %c, <4 x double> %a, <4 x double> %b
ret <4 x double> %r

; CHECK-LABEL: @test1
; CHECK: qvfsel 1, 3, 1, 2
; CHECK: blr
}

define <4 x double> @test2(<4 x double> %a, <4 x double> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
entry:
%v = insertelement <4 x i1> undef, i1 %c1, i32 0
%v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
%v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
%v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
%r = select <4 x i1> %v4, <4 x double> %a, <4 x double> %b
ret <4 x double> %r

; CHECK-LABEL: @test2

; FIXME: This load/store sequence is unnecessary.
; CHECK-DAG: lbz
; CHECK-DAG: stw

; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
; CHECK: qvfsel 1, [[REG4]], 1, 2
; CHECK: blr
}

define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
entry:
%v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
ret <4 x i1> %v

; CHECK-LABEL: @test3
; CHECK: qvlfsx [[REG:[0-9]+]],
; qvflogical 1, 1, [[REG]], 1
; blr
}

define <4 x i1> @test4(<4 x i1> %a) nounwind {
entry:
%q = load <4 x i1>* @Q, align 16
%v = and <4 x i1> %a, %q
ret <4 x i1> %v

; CHECK-LABEL: @test4
; CHECK-DAG: lbz
; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
; CHECK-DAG: stw
; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
; CHECK: qvflogical 1, 1, [[REG4]], 1
; CHECK: blr
}

define void @test5(<4 x i1> %a) nounwind {
entry:
store <4 x i1> %a, <4 x i1>* @R
ret void

; CHECK-LABEL: @test5
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: stb
; CHECK: blr
}

define i1 @test6(<4 x i1> %a) nounwind {
entry:
%r = extractelement <4 x i1> %a, i32 2
ret i1 %r

; CHECK-LABEL: @test6
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: blr
}

define i1 @test7(<4 x i1> %a) nounwind {
entry:
%r = extractelement <4 x i1> %a, i32 2
%s = extractelement <4 x i1> %a, i32 3
%q = and i1 %r, %s
ret i1 %q

; CHECK-LABEL: @test7
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK-DAG: lwz [[REG4:[0-9]+]],
; FIXME: We're storing the vector twice, and that's silly.
; CHECK-DAG: qvstfiwx [[REG3]],
; CHECK-DAG: lwz [[REG5:[0-9]+]],
; CHECK: and 3,
; CHECK: blr
}

define i1 @test8(<3 x i1> %a) nounwind {
entry:
%r = extractelement <3 x i1> %a, i32 2
ret i1 %r

; CHECK-LABEL: @test8
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: blr
}

define <3 x double> @test9(<3 x double> %a, <3 x double> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
entry:
%v = insertelement <3 x i1> undef, i1 %c1, i32 0
%v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
%v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
%r = select <3 x i1> %v3, <3 x double> %a, <3 x double> %b
ret <3 x double> %r

; CHECK-LABEL: @test9

; FIXME: This load/store sequence is unnecessary.
; CHECK-DAG: lbz
; CHECK-DAG: stw

; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
; CHECK: qvfsel 1, [[REG4]], 1, 2
; CHECK: blr
}

24 changes: 24 additions & 0 deletions llvm/test/CodeGen/PowerPC/qpx-store.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s

define void @foo(<4 x double> %v, <4 x double>* %p) {
entry:
store <4 x double> %v, <4 x double>* %p, align 8
ret void
}

; CHECK: @foo
; CHECK: stfd
; CHECK: stfd
; CHECK: stfd
; CHECK: stfd
; CHECK: blr

define void @bar(<4 x double> %v, <4 x double>* %p) {
entry:
store <4 x double> %v, <4 x double>* %p, align 32
ret void
}

; CHECK: @bar
; CHECK: qvstfdx

64 changes: 64 additions & 0 deletions llvm/test/CodeGen/PowerPC/qpx-unalperm.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
; RUN: llc < %s -mcpu=a2q | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"

define <4 x double> @foo(<4 x double>* %a) {
entry:
%r = load <4 x double>* %a, align 32
ret <4 x double> %r
; CHECK: qvlfdx
; CHECK: blr
}

define <4 x double> @bar(<4 x double>* %a) {
entry:
%r = load <4 x double>* %a, align 8
%b = getelementptr <4 x double>* %a, i32 16
%s = load <4 x double>* %b, align 32
%t = fadd <4 x double> %r, %s
ret <4 x double> %t
; CHECK: qvlpcldx
; CHECK: qvlfdx
; CHECK: qvfperm
; CHECK: blr
}

define <4 x double> @bar1(<4 x double>* %a) {
entry:
%r = load <4 x double>* %a, align 8
%b = getelementptr <4 x double>* %a, i32 16
%s = load <4 x double>* %b, align 8
%t = fadd <4 x double> %r, %s
ret <4 x double> %t
}

define <4 x double> @bar2(<4 x double>* %a) {
entry:
%r = load <4 x double>* %a, align 8
%b = getelementptr <4 x double>* %a, i32 1
%s = load <4 x double>* %b, align 32
%t = fadd <4 x double> %r, %s
ret <4 x double> %t
}

define <4 x double> @bar3(<4 x double>* %a) {
entry:
%r = load <4 x double>* %a, align 8
%b = getelementptr <4 x double>* %a, i32 1
%s = load <4 x double>* %b, align 8
%t = fadd <4 x double> %r, %s
ret <4 x double> %t
}

define <4 x double> @bar4(<4 x double>* %a) {
entry:
%r = load <4 x double>* %a, align 8
%b = getelementptr <4 x double>* %a, i32 1
%s = load <4 x double>* %b, align 8
%c = getelementptr <4 x double>* %b, i32 1
%t = load <4 x double>* %c, align 8
%u = fadd <4 x double> %r, %s
%v = fadd <4 x double> %u, %t
ret <4 x double> %v
}

1 change: 0 additions & 1 deletion llvm/test/CodeGen/PowerPC/vsx-infl-copy2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ entry:
br i1 false, label %loop2_start, label %if.end5

; CHECK-LABEL: @_Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc
; CHECK: xxlor

loop2_start: ; preds = %loop2_start, %entry
br i1 undef, label %loop2_start, label %if.then.i31
Expand Down
383 changes: 383 additions & 0 deletions llvm/test/MC/Disassembler/PowerPC/qpx.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,383 @@
# RUN: llvm-mc --disassemble %s -triple powerpc64-bgq-linux -mcpu=a2q | FileCheck %s

# CHECK: qvfabs 3, 5
0x10 0x60 0x2a 0x10

# CHECK: qvfadd 3, 4, 5
0x10 0x64 0x28 0x2a

# CHECK: qvfadds 3, 4, 5
0x00 0x64 0x28 0x2a

# FIXME: decode as qvfandc 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 4
0x10 0x64 0x2a 0x08

# FIXME: decode as qvfand 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 1
0x10 0x64 0x28 0x88

# CHECK: qvfcfid 3, 5
0x10 0x60 0x2e 0x9c

# CHECK: qvfcfids 3, 5
0x00 0x60 0x2e 0x9c

# CHECK: qvfcfidu 3, 5
0x10 0x60 0x2f 0x9c

# CHECK: qvfcfidus 3, 5
0x00 0x60 0x2f 0x9c

# FIXME: decode as qvfclr 3
# CHECK: qvflogical 3, 3, 3, 0
0x10 0x63 0x18 0x08

# CHECK: qvfcpsgn 3, 4, 5
0x10 0x64 0x28 0x10

# FIXME: decode as qvfctfb 3, 4
# CHECK: qvflogical 3, 4, 4, 5
0x10 0x64 0x22 0x88

# CHECK: qvfctid 3, 5
0x10 0x60 0x2e 0x5c

# CHECK: qvfctidu 3, 5
0x10 0x60 0x2f 0x5c

# CHECK: qvfctiduz 3, 5
0x10 0x60 0x2f 0x5e

# CHECK: qvfctidz 3, 5
0x10 0x60 0x2e 0x5e

# CHECK: qvfctiw 3, 5
0x10 0x60 0x28 0x1c

# CHECK: qvfctiwu 3, 5
0x10 0x60 0x29 0x1c

# CHECK: qvfctiwuz 3, 5
0x10 0x60 0x29 0x1e

# CHECK: qvfctiwz 3, 5
0x10 0x60 0x28 0x1e

# FIXME: decode as qvfequ 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 9
0x10 0x64 0x2c 0x88

# CHECK: qvflogical 3, 4, 5, 12
0x10 0x64 0x2e 0x08

# CHECK: qvfmadd 3, 4, 6, 5
0x10 0x64 0x29 0xba

# CHECK: qvfmadds 3, 4, 6, 5
0x00 0x64 0x29 0xba

# CHECK: qvfmr 3, 5
0x10 0x60 0x28 0x90

# CHECK: qvfmsub 3, 4, 6, 5
0x10 0x64 0x29 0xb8

# CHECK: qvfmsubs 3, 4, 6, 5
0x00 0x64 0x29 0xb8

# CHECK: qvfmul 3, 4, 6
0x10 0x64 0x01 0xb2

# CHECK: qvfmuls 3, 4, 6
0x00 0x64 0x01 0xb2

# CHECK: qvfnabs 3, 5
0x10 0x60 0x29 0x10

# FIXME: decode as qvfnand 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 14
0x10 0x64 0x2f 0x08

# CHECK: qvfneg 3, 5
0x10 0x60 0x28 0x50

# CHECK: qvfnmadd 3, 4, 6, 5
0x10 0x64 0x29 0xbe

# CHECK: qvfnmadds 3, 4, 6, 5
0x00 0x64 0x29 0xbe

# CHECK: qvfnmsub 3, 4, 6, 5
0x10 0x64 0x29 0xbc

# CHECK: qvfnmsubs 3, 4, 6, 5
0x00 0x64 0x29 0xbc

# FIXME: decode as qvfnor 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 8
0x10 0x64 0x2c 0x08

# FIXME: decode as qvfnot 3, 4
# CHECK: qvflogical 3, 4, 4, 10
0x10 0x64 0x25 0x08

# FIXME: decode as qvforc 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 13
0x10 0x64 0x2e 0x88

# FIXME: decode as qvfor 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 7
0x10 0x64 0x2b 0x88

# CHECK: qvfperm 3, 4, 5, 6
0x10 0x64 0x29 0x8c

# CHECK: qvfre 3, 5
0x10 0x60 0x28 0x30

# CHECK: qvfres 3, 5
0x00 0x60 0x28 0x30

# CHECK: qvfrim 3, 5
0x10 0x60 0x2b 0xd0

# CHECK: qvfrin 3, 5
0x10 0x60 0x2b 0x10

# CHECK: qvfrip 3, 5
0x10 0x60 0x2b 0x90

# CHECK: qvfriz 3, 5
0x10 0x60 0x2b 0x50

# CHECK: qvfrsp 3, 5
0x10 0x60 0x28 0x18

# CHECK: qvfrsqrte 3, 5
0x10 0x60 0x28 0x34

# CHECK: qvfrsqrtes 3, 5
0x00 0x60 0x28 0x34

# CHECK: qvfsel 3, 4, 6, 5
0x10 0x64 0x29 0xae

# FIXME: decode as qvfset 3
# CHECK: qvflogical 3, 3, 3, 15
0x10 0x63 0x1f 0x88

# CHECK: qvfsub 3, 4, 5
0x10 0x64 0x28 0x28

# CHECK: qvfsubs 3, 4, 5
0x00 0x64 0x28 0x28

# CHECK: qvfxmadd 3, 4, 6, 5
0x10 0x64 0x29 0x92

# CHECK: qvfxmadds 3, 4, 6, 5
0x00 0x64 0x29 0x92

# CHECK: qvfxmul 3, 4, 6
0x10 0x64 0x01 0xa2

# CHECK: qvfxmuls 3, 4, 6
0x00 0x64 0x01 0xa2

# FIXME: decode as qvfxor 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 6
0x10 0x64 0x2b 0x08

# CHECK: qvfxxcpnmadd 3, 4, 6, 5
0x10 0x64 0x29 0x86

# CHECK: qvfxxcpnmadds 3, 4, 6, 5
0x00 0x64 0x29 0x86

# CHECK: qvfxxmadd 3, 4, 6, 5
0x10 0x64 0x29 0x82

# CHECK: qvfxxmadds 3, 4, 6, 5
0x00 0x64 0x29 0x82

# CHECK: qvfxxnpmadd 3, 4, 6, 5
0x10 0x64 0x29 0x96

# CHECK: qvfxxnpmadds 3, 4, 6, 5
0x00 0x64 0x29 0x96

# CHECK: qvlfcduxa 3, 9, 11
0x7c 0x69 0x58 0xcf

# CHECK: qvlfcdux 3, 9, 11
0x7c 0x69 0x58 0xce

# CHECK: qvlfcdxa 3, 10, 11
0x7c 0x6a 0x58 0x8f

# CHECK: qvlfcdx 3, 10, 11
0x7c 0x6a 0x58 0x8e

# CHECK: qvlfcsuxa 3, 9, 11
0x7c 0x69 0x58 0x4f

# CHECK: qvlfcsux 3, 9, 11
0x7c 0x69 0x58 0x4e

# CHECK: qvlfcsxa 3, 10, 11
0x7c 0x6a 0x58 0x0f

# CHECK: qvlfcsx 3, 10, 11
0x7c 0x6a 0x58 0x0e

# CHECK: qvlfduxa 3, 9, 11
0x7c 0x69 0x5c 0xcf

# CHECK: qvlfdux 3, 9, 11
0x7c 0x69 0x5c 0xce

# CHECK: qvlfdxa 3, 10, 11
0x7c 0x6a 0x5c 0x8f

# CHECK: qvlfdx 3, 10, 11
0x7c 0x6a 0x5c 0x8e

# CHECK: qvlfiwaxa 3, 10, 11
0x7c 0x6a 0x5e 0xcf

# CHECK: qvlfiwax 3, 10, 11
0x7c 0x6a 0x5e 0xce

# CHECK: qvlfiwzxa 3, 10, 11
0x7c 0x6a 0x5e 0x8f

# CHECK: qvlfiwzx 3, 10, 11
0x7c 0x6a 0x5e 0x8e

# CHECK: qvlfsuxa 3, 9, 11
0x7c 0x69 0x5c 0x4f

# CHECK: qvlfsux 3, 9, 11
0x7c 0x69 0x5c 0x4e

# CHECK: qvlfsxa 3, 10, 11
0x7c 0x6a 0x5c 0x0f

# CHECK: qvlfsx 3, 10, 11
0x7c 0x6a 0x5c 0x0e

# CHECK: qvlpcldx 3, 10, 11
0x7c 0x6a 0x5c 0x8c

# CHECK: qvlpclsx 3, 10, 11
0x7c 0x6a 0x5c 0x0c

# CHECK: qvlpcrdx 3, 10, 11
0x7c 0x6a 0x58 0x8c

# CHECK: qvlpcrsx 3, 10, 11
0x7c 0x6a 0x58 0x0c

# CHECK: qvstfcduxa 2, 9, 11
0x7c 0x49 0x59 0xcf

# CHECK: qvstfcduxia 2, 9, 11
0x7c 0x49 0x59 0xcb

# CHECK: qvstfcduxi 2, 9, 11
0x7c 0x49 0x59 0xca

# CHECK: qvstfcdux 2, 9, 11
0x7c 0x49 0x59 0xce

# CHECK: qvstfcdxa 2, 10, 11
0x7c 0x4a 0x59 0x8f

# CHECK: qvstfcdxia 2, 10, 11
0x7c 0x4a 0x59 0x8b

# CHECK: qvstfcdxi 2, 10, 11
0x7c 0x4a 0x59 0x8a

# CHECK: qvstfcdx 2, 10, 11
0x7c 0x4a 0x59 0x8e

# CHECK: qvstfcsuxa 2, 9, 11
0x7c 0x49 0x59 0x4f

# CHECK: qvstfcsuxia 2, 9, 11
0x7c 0x49 0x59 0x4b

# CHECK: qvstfcsuxi 2, 9, 11
0x7c 0x49 0x59 0x4a

# CHECK: qvstfcsux 2, 9, 11
0x7c 0x49 0x59 0x4e

# CHECK: qvstfcsxa 2, 10, 11
0x7c 0x4a 0x59 0x0f

# CHECK: qvstfcsxia 2, 10, 11
0x7c 0x4a 0x59 0x0b

# CHECK: qvstfcsxi 2, 10, 11
0x7c 0x4a 0x59 0x0a

# CHECK: qvstfcsx 2, 10, 11
0x7c 0x4a 0x59 0x0e

# CHECK: qvstfduxa 2, 9, 11
0x7c 0x49 0x5d 0xcf

# CHECK: qvstfduxia 2, 9, 11
0x7c 0x49 0x5d 0xcb

# CHECK: qvstfduxi 2, 9, 11
0x7c 0x49 0x5d 0xca

# CHECK: qvstfdux 2, 9, 11
0x7c 0x49 0x5d 0xce

# CHECK: qvstfdxa 2, 10, 11
0x7c 0x4a 0x5d 0x8f

# CHECK: qvstfdxia 2, 10, 11
0x7c 0x4a 0x5d 0x8b

# CHECK: qvstfdxi 2, 10, 11
0x7c 0x4a 0x5d 0x8a

# CHECK: qvstfdx 2, 10, 11
0x7c 0x4a 0x5d 0x8e

# CHECK: qvstfiwxa 2, 10, 11
0x7c 0x4a 0x5f 0x8f

# CHECK: qvstfiwx 2, 10, 11
0x7c 0x4a 0x5f 0x8e

# CHECK: qvstfsuxa 2, 9, 11
0x7c 0x49 0x5d 0x4f

# CHECK: qvstfsuxia 2, 9, 11
0x7c 0x49 0x5d 0x4b

# CHECK: qvstfsuxi 2, 9, 11
0x7c 0x49 0x5d 0x4a

# CHECK: qvstfsux 2, 9, 11
0x7c 0x49 0x5d 0x4e

# CHECK: qvstfsxa 2, 10, 11
0x7c 0x4a 0x5d 0x0f

# CHECK: qvstfsxia 2, 10, 11
0x7c 0x4a 0x5d 0x0b

# CHECK: qvstfsxi 2, 10, 11
0x7c 0x4a 0x5d 0x0a

# CHECK: qvstfsx 2, 10, 11
0x7c 0x4a 0x5d 0x0e

251 changes: 251 additions & 0 deletions llvm/test/MC/PowerPC/qpx.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
# RUN: llvm-mc -triple powerpc64-bgq-linux --show-encoding %s | FileCheck %s

# FIXME: print qvflogical aliases.

# CHECK: qvfabs 3, 5 # encoding: [0x10,0x60,0x2a,0x10]
qvfabs 3, 5
# CHECK: qvfadd 3, 4, 5 # encoding: [0x10,0x64,0x28,0x2a]
qvfadd 3, 4, 5
# CHECK: qvfadds 3, 4, 5 # encoding: [0x00,0x64,0x28,0x2a]
qvfadds 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 4 # encoding: [0x10,0x64,0x2a,0x08]
qvfandc 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 1 # encoding: [0x10,0x64,0x28,0x88]
qvfand 3, 4, 5
# CHECK: qvfcfid 3, 5 # encoding: [0x10,0x60,0x2e,0x9c]
qvfcfid 3, 5
# CHECK: qvfcfids 3, 5 # encoding: [0x00,0x60,0x2e,0x9c]
qvfcfids 3, 5
# CHECK: qvfcfidu 3, 5 # encoding: [0x10,0x60,0x2f,0x9c]
qvfcfidu 3, 5
# CHECK: qvfcfidus 3, 5 # encoding: [0x00,0x60,0x2f,0x9c]
qvfcfidus 3, 5
# CHECK: qvflogical 3, 3, 3, 0 # encoding: [0x10,0x63,0x18,0x08]
qvfclr 3
# CHECK: qvfcpsgn 3, 4, 5 # encoding: [0x10,0x64,0x28,0x10]
qvfcpsgn 3, 4, 5
# CHECK: qvflogical 3, 4, 4, 5 # encoding: [0x10,0x64,0x22,0x88]
qvfctfb 3, 4
# CHECK: qvfctid 3, 5 # encoding: [0x10,0x60,0x2e,0x5c]
qvfctid 3, 5
# CHECK: qvfctidu 3, 5 # encoding: [0x10,0x60,0x2f,0x5c]
qvfctidu 3, 5
# CHECK: qvfctiduz 3, 5 # encoding: [0x10,0x60,0x2f,0x5e]
qvfctiduz 3, 5
# CHECK: qvfctidz 3, 5 # encoding: [0x10,0x60,0x2e,0x5e]
qvfctidz 3, 5
# CHECK: qvfctiw 3, 5 # encoding: [0x10,0x60,0x28,0x1c]
qvfctiw 3, 5
# CHECK: qvfctiwu 3, 5 # encoding: [0x10,0x60,0x29,0x1c]
qvfctiwu 3, 5
# CHECK: qvfctiwuz 3, 5 # encoding: [0x10,0x60,0x29,0x1e]
qvfctiwuz 3, 5
# CHECK: qvfctiwz 3, 5 # encoding: [0x10,0x60,0x28,0x1e]
qvfctiwz 3, 5
# CHECK: qvflogical 3, 4, 5, 9 # encoding: [0x10,0x64,0x2c,0x88]
qvfequ 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 12 # encoding: [0x10,0x64,0x2e,0x08]
qvflogical 3, 4, 5, 12
# CHECK: qvfmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xba]
qvfmadd 3, 4, 6, 5
# CHECK: qvfmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xba]
qvfmadds 3, 4, 6, 5
# CHECK: qvfmr 3, 5 # encoding: [0x10,0x60,0x28,0x90]
qvfmr 3, 5
# CHECK: qvfmsub 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xb8]
qvfmsub 3, 4, 6, 5
# CHECK: qvfmsubs 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xb8]
qvfmsubs 3, 4, 6, 5
# CHECK: qvfmul 3, 4, 6 # encoding: [0x10,0x64,0x01,0xb2]
qvfmul 3, 4, 6
# CHECK: qvfmuls 3, 4, 6 # encoding: [0x00,0x64,0x01,0xb2]
qvfmuls 3, 4, 6
# CHECK: qvfnabs 3, 5 # encoding: [0x10,0x60,0x29,0x10]
qvfnabs 3, 5
# CHECK: qvflogical 3, 4, 5, 14 # encoding: [0x10,0x64,0x2f,0x08]
qvfnand 3, 4, 5
# CHECK: qvfneg 3, 5 # encoding: [0x10,0x60,0x28,0x50]
qvfneg 3, 5
# CHECK: qvfnmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xbe]
qvfnmadd 3, 4, 6, 5
# CHECK: qvfnmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xbe]
qvfnmadds 3, 4, 6, 5
# CHECK: qvfnmsub 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xbc]
qvfnmsub 3, 4, 6, 5
# CHECK: qvfnmsubs 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xbc]
qvfnmsubs 3, 4, 6, 5
# CHECK: qvflogical 3, 4, 5, 8 # encoding: [0x10,0x64,0x2c,0x08]
qvfnor 3, 4, 5
# CHECK: qvflogical 3, 4, 4, 10 # encoding: [0x10,0x64,0x25,0x08]
qvfnot 3, 4
# CHECK: qvflogical 3, 4, 5, 13 # encoding: [0x10,0x64,0x2e,0x88]
qvforc 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 7 # encoding: [0x10,0x64,0x2b,0x88]
qvfor 3, 4, 5
# CHECK: qvfperm 3, 4, 5, 6 # encoding: [0x10,0x64,0x29,0x8c]
qvfperm 3, 4, 5, 6
# CHECK: qvfre 3, 5 # encoding: [0x10,0x60,0x28,0x30]
qvfre 3, 5
# CHECK: qvfres 3, 5 # encoding: [0x00,0x60,0x28,0x30]
qvfres 3, 5
# CHECK: qvfrim 3, 5 # encoding: [0x10,0x60,0x2b,0xd0]
qvfrim 3, 5
# CHECK: qvfrin 3, 5 # encoding: [0x10,0x60,0x2b,0x10]
qvfrin 3, 5
# CHECK: qvfrip 3, 5 # encoding: [0x10,0x60,0x2b,0x90]
qvfrip 3, 5
# CHECK: qvfriz 3, 5 # encoding: [0x10,0x60,0x2b,0x50]
qvfriz 3, 5
# CHECK: qvfrsp 3, 5 # encoding: [0x10,0x60,0x28,0x18]
qvfrsp 3, 5
# CHECK: qvfrsqrte 3, 5 # encoding: [0x10,0x60,0x28,0x34]
qvfrsqrte 3, 5
# CHECK: qvfrsqrtes 3, 5 # encoding: [0x00,0x60,0x28,0x34]
qvfrsqrtes 3, 5
# CHECK: qvfsel 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xae]
qvfsel 3, 4, 6, 5
# CHECK: qvflogical 3, 3, 3, 15 # encoding: [0x10,0x63,0x1f,0x88]
qvfset 3
# CHECK: qvfsub 3, 4, 5 # encoding: [0x10,0x64,0x28,0x28]
qvfsub 3, 4, 5
# CHECK: qvfsubs 3, 4, 5 # encoding: [0x00,0x64,0x28,0x28]
qvfsubs 3, 4, 5
# CHECK: qvfxmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x92]
qvfxmadd 3, 4, 6, 5
# CHECK: qvfxmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x92]
qvfxmadds 3, 4, 6, 5
# CHECK: qvfxmul 3, 4, 6 # encoding: [0x10,0x64,0x01,0xa2]
qvfxmul 3, 4, 6
# CHECK: qvfxmuls 3, 4, 6 # encoding: [0x00,0x64,0x01,0xa2]
qvfxmuls 3, 4, 6
# CHECK: qvflogical 3, 4, 5, 6 # encoding: [0x10,0x64,0x2b,0x08]
qvfxor 3, 4, 5
# CHECK: qvfxxcpnmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x86]
qvfxxcpnmadd 3, 4, 6, 5
# CHECK: qvfxxcpnmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x86]
qvfxxcpnmadds 3, 4, 6, 5
# CHECK: qvfxxmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x82]
qvfxxmadd 3, 4, 6, 5
# CHECK: qvfxxmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x82]
qvfxxmadds 3, 4, 6, 5
# CHECK: qvfxxnpmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x96]
qvfxxnpmadd 3, 4, 6, 5
# CHECK: qvfxxnpmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x96]
qvfxxnpmadds 3, 4, 6, 5
# CHECK: qvlfcduxa 3, 9, 11 # encoding: [0x7c,0x69,0x58,0xcf]
qvlfcduxa 3, 9, 11
# CHECK: qvlfcdux 3, 9, 11 # encoding: [0x7c,0x69,0x58,0xce]
qvlfcdux 3, 9, 11
# CHECK: qvlfcdxa 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8f]
qvlfcdxa 3, 10, 11
# CHECK: qvlfcdx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8e]
qvlfcdx 3, 10, 11
# CHECK: qvlfcsuxa 3, 9, 11 # encoding: [0x7c,0x69,0x58,0x4f]
qvlfcsuxa 3, 9, 11
# CHECK: qvlfcsux 3, 9, 11 # encoding: [0x7c,0x69,0x58,0x4e]
qvlfcsux 3, 9, 11
# CHECK: qvlfcsxa 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0f]
qvlfcsxa 3, 10, 11
# CHECK: qvlfcsx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0e]
qvlfcsx 3, 10, 11
# CHECK: qvlfduxa 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0xcf]
qvlfduxa 3, 9, 11
# CHECK: qvlfdux 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0xce]
qvlfdux 3, 9, 11
# CHECK: qvlfdxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8f]
qvlfdxa 3, 10, 11
# CHECK: qvlfdx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8e]
qvlfdx 3, 10, 11
# CHECK: qvlfiwaxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0xcf]
qvlfiwaxa 3, 10, 11
# CHECK: qvlfiwax 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0xce]
qvlfiwax 3, 10, 11
# CHECK: qvlfiwzxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0x8f]
qvlfiwzxa 3, 10, 11
# CHECK: qvlfiwzx 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0x8e]
qvlfiwzx 3, 10, 11
# CHECK: qvlfsuxa 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0x4f]
qvlfsuxa 3, 9, 11
# CHECK: qvlfsux 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0x4e]
qvlfsux 3, 9, 11
# CHECK: qvlfsxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0f]
qvlfsxa 3, 10, 11
# CHECK: qvlfsx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0e]
qvlfsx 3, 10, 11
# CHECK: qvlpcldx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8c]
qvlpcldx 3, 10, 11
# CHECK: qvlpclsx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0c]
qvlpclsx 3, 10, 11
# CHECK: qvlpcrdx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8c]
qvlpcrdx 3, 10, 11
# CHECK: qvlpcrsx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0c]
qvlpcrsx 3, 10, 11
# CHECK: qvstfcduxa 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xcf]
qvstfcduxa 2, 9, 11
# CHECK: qvstfcduxia 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xcb]
qvstfcduxia 2, 9, 11
# CHECK: qvstfcduxi 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xca]
qvstfcduxi 2, 9, 11
# CHECK: qvstfcdux 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xce]
qvstfcdux 2, 9, 11
# CHECK: qvstfcdxa 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8f]
qvstfcdxa 2, 10, 11
# CHECK: qvstfcdxia 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8b]
qvstfcdxia 2, 10, 11
# CHECK: qvstfcdxi 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8a]
qvstfcdxi 2, 10, 11
# CHECK: qvstfcdx 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8e]
qvstfcdx 2, 10, 11
# CHECK: qvstfcsuxa 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4f]
qvstfcsuxa 2, 9, 11
# CHECK: qvstfcsuxia 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4b]
qvstfcsuxia 2, 9, 11
# CHECK: qvstfcsuxi 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4a]
qvstfcsuxi 2, 9, 11
# CHECK: qvstfcsux 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4e]
qvstfcsux 2, 9, 11
# CHECK: qvstfcsxa 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0f]
qvstfcsxa 2, 10, 11
# CHECK: qvstfcsxia 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0b]
qvstfcsxia 2, 10, 11
# CHECK: qvstfcsxi 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0a]
qvstfcsxi 2, 10, 11
# CHECK: qvstfcsx 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0e]
qvstfcsx 2, 10, 11
# CHECK: qvstfduxa 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xcf]
qvstfduxa 2, 9, 11
# CHECK: qvstfduxia 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xcb]
qvstfduxia 2, 9, 11
# CHECK: qvstfduxi 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xca]
qvstfduxi 2, 9, 11
# CHECK: qvstfdux 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xce]
qvstfdux 2, 9, 11
# CHECK: qvstfdxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8f]
qvstfdxa 2, 10, 11
# CHECK: qvstfdxia 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8b]
qvstfdxia 2, 10, 11
# CHECK: qvstfdxi 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8a]
qvstfdxi 2, 10, 11
# CHECK: qvstfdx 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8e]
qvstfdx 2, 10, 11
# CHECK: qvstfiwxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5f,0x8f]
qvstfiwxa 2, 10, 11
# CHECK: qvstfiwx 2, 10, 11 # encoding: [0x7c,0x4a,0x5f,0x8e]
qvstfiwx 2, 10, 11
# CHECK: qvstfsuxa 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4f]
qvstfsuxa 2, 9, 11
# CHECK: qvstfsuxia 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4b]
qvstfsuxia 2, 9, 11
# CHECK: qvstfsuxi 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4a]
qvstfsuxi 2, 9, 11
# CHECK: qvstfsux 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4e]
qvstfsux 2, 9, 11
# CHECK: qvstfsxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0f]
qvstfsxa 2, 10, 11
# CHECK: qvstfsxia 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0b]
qvstfsxia 2, 10, 11
# CHECK: qvstfsxi 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0a]
qvstfsxi 2, 10, 11
# CHECK: qvstfsx 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0e]
qvstfsx 2, 10, 11