Skip to content

Commit

Permalink
AVX-512: Implemented encoding , DAG lowering and intrinsics for Integ…
Browse files Browse the repository at this point in the history
…er Truncate with/without saturation

Added tests for DAG lowering ,encoding and intrinsic

Differential Revision: http://reviews.llvm.org/D11218

llvm-svn: 242990
  • Loading branch information
Igor Breger committed Jul 23, 2015
1 parent ac7947e commit da1b2ea
Show file tree
Hide file tree
Showing 17 changed files with 4,649 additions and 125 deletions.
544 changes: 544 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsX86.td

Large diffs are not rendered by default.

53 changes: 51 additions & 2 deletions llvm/include/llvm/Target/TargetSelectionDAG.td
Expand Up @@ -493,9 +493,10 @@ def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad,
def atomic_store : SDNode<"ISD::ATOMIC_STORE", SDTAtomicStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;

def masked_store : SDNode<"ISD::MSTORE", SDTMaskedStore,
// Do not use mld, mst directly. Use masked_store masked_load, masked_truncstore
def mst : SDNode<"ISD::MSTORE", SDTMaskedStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def masked_load : SDNode<"ISD::MLOAD", SDTMaskedLoad,
def mld : SDNode<"ISD::MLOAD", SDTMaskedLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def masked_scatter : SDNode<"ISD::MSCATTER", SDTMaskedScatter,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
Expand Down Expand Up @@ -680,6 +681,12 @@ def load : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
return cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
}]>;

// masked load fragments.
def masked_load : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(mld node:$src1, node:$src2, node:$src3), [{
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
}]>;

// extending load fragments.
def extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
return cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
Expand Down Expand Up @@ -791,6 +798,12 @@ def store : PatFrag<(ops node:$val, node:$ptr),
return !cast<StoreSDNode>(N)->isTruncatingStore();
}]>;

// masked store fragments.
def masked_store : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(mst node:$src1, node:$src2, node:$src3), [{
return !cast<MaskedStoreSDNode>(N)->isTruncatingStore();
}]>;

// truncstore fragments.
def truncstore : PatFrag<(ops node:$val, node:$ptr),
(unindexedstore node:$val, node:$ptr), [{
Expand All @@ -817,6 +830,21 @@ def truncstoref64 : PatFrag<(ops node:$val, node:$ptr),
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::f64;
}]>;

def truncstorevi8 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;

def truncstorevi16 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;

def truncstorevi32 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;

// indexed store fragments.
def istore : PatFrag<(ops node:$val, node:$base, node:$offset),
(ist node:$val, node:$base, node:$offset), [{
Expand Down Expand Up @@ -891,6 +919,27 @@ def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::f32;
}]>;

// masked truncstore fragments
def masked_truncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(mst node:$src1, node:$src2, node:$src3), [{
return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
}]>;
def masked_truncstorevi8 :
PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_truncstore node:$src1, node:$src2, node:$src3), [{
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
def masked_truncstorevi16 :
PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_truncstore node:$src1, node:$src2, node:$src3), [{
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;
def masked_truncstorevi32 :
PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_truncstore node:$src1, node:$src2, node:$src3), [{
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;

// setcc convenience fragments.
def setoeq : PatFrag<(ops node:$lhs, node:$rhs),
(setcc node:$lhs, node:$rhs, SETOEQ)>;
Expand Down
132 changes: 119 additions & 13 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -1348,6 +1348,24 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);

setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
if (Subtarget->hasVLX()){
setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);

setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
}
setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
Expand Down Expand Up @@ -1556,6 +1574,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::VSELECT, MVT::v64i8, Legal);
setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);

setOperationAction(ISD::SMAX, MVT::v64i8, Legal);
setOperationAction(ISD::SMAX, MVT::v32i16, Legal);
Expand All @@ -1566,6 +1585,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, MVT::v64i8, Legal);
setOperationAction(ISD::UMIN, MVT::v32i16, Legal);

setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
if (Subtarget->hasVLX())
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);

for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
const MVT VT = (MVT::SimpleValueType)i;

Expand Down Expand Up @@ -12485,10 +12509,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
Subtarget->hasDQI() && Subtarget->hasVLX())
return Op; // legal, will go to VPMOVB2M, VPMOVQ2M
}
if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
if (VT.getVectorElementType().getSizeInBits() >=8)
return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);

if (VT.getVectorElementType() == MVT::i1) {
assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
unsigned NumElts = InVT.getVectorNumElements();
assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type");
Expand All @@ -12504,6 +12526,11 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::TESTM, DL, VT, And, And);
}

// vpmovqb/w/d, vpmovdb/w, vpmovwb
if (((!InVT.is512BitVector() && Subtarget->hasVLX()) || InVT.is512BitVector()) &&
(InVT.getVectorElementType() != MVT::i16 || Subtarget->hasBWI()))
return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);

if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget->hasInt256()) {
Expand Down Expand Up @@ -15220,28 +15247,37 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,

/// \brief Return (and \p Op, \p Mask) for compare instructions or
/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
/// necessary casting for \p Mask when lowering masking intrinsics.
/// necessary casting or extending for \p Mask when lowering masking intrinsics
static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
SDValue PreservedSrc,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
EVT VT = Op.getValueType();
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(),
MVT::i1, VT.getVectorNumElements());
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
SDValue VMask = SDValue();
unsigned OpcodeSelect = ISD::VSELECT;
SDLoc dl(Op);

assert(MaskVT.isSimple() && "invalid mask type");

if (isAllOnes(Mask))
return Op;

// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));
if (MaskVT.bitsGT(Mask.getValueType())) {
EVT newMaskVT = EVT::getIntegerVT(*DAG.getContext(),
MaskVT.getSizeInBits());
VMask = DAG.getBitcast(MaskVT,
DAG.getNode(ISD::ANY_EXTEND, dl, newMaskVT, Mask));
} else {
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));
}

switch (Op.getOpcode()) {
default: break;
Expand All @@ -15250,10 +15286,18 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
case X86ISD::CMPM:
case X86ISD::CMPMU:
return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
case X86ISD::VTRUNC:
case X86ISD::VTRUNCS:
case X86ISD::VTRUNCUS:
// We can't use ISD::VSELECT here because it is not always "Legal"
// for the destination type. For example vpmovqb require only AVX512
// and vselect that can operate on byte element type require BWI
OpcodeSelect = X86ISD::SELECT;
break;
}
if (PreservedSrc.getOpcode() == ISD::UNDEF)
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc);
return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc);
}

/// \brief Creates an SDNode for a predicated scalar operation.
Expand Down Expand Up @@ -16111,6 +16155,45 @@ static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget,
return Chain;
}

/// \brief Lower intrinsics for TRUNCATE_TO_MEM case
/// return truncate Store/MaskedStore Node
static SDValue LowerINTRINSIC_TRUNCATE_TO_MEM(const SDValue & Op,
SelectionDAG &DAG,
MVT ElementType) {
SDLoc dl(Op);
SDValue Mask = Op.getOperand(4);
SDValue DataToTruncate = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);

EVT VT = DataToTruncate.getValueType();
EVT SVT = EVT::getVectorVT(*DAG.getContext(),
ElementType, VT.getVectorNumElements());

if (isAllOnes(Mask)) // return just a truncate store
return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr,
MachinePointerInfo(), SVT, false, false,
SVT.getScalarSizeInBits()/8);

EVT MaskVT = EVT::getVectorVT(*DAG.getContext(),
MVT::i1, VT.getVectorNumElements());
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));

MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(),
MachineMemOperand::MOStore, SVT.getStoreSize(),
SVT.getScalarSizeInBits()/8);

return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr,
VMask, SVT, MMO, true);
}

static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Expand Down Expand Up @@ -16244,6 +16327,12 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
MachinePointerInfo(), false, false,
VT.getScalarSizeInBits()/8);
}
case TRUNCATE_TO_MEM_VI8:
return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i8);
case TRUNCATE_TO_MEM_VI16:
return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i16);
case TRUNCATE_TO_MEM_VI32:
return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i32);
case EXPAND_FROM_MEM: {
SDLoc dl(Op);
SDValue Mask = Op.getOperand(4);
Expand Down Expand Up @@ -18954,7 +19043,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VZEXT: return "X86ISD::VZEXT";
case X86ISD::VSEXT: return "X86ISD::VSEXT";
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM";
case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS";
case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";
case X86ISD::VINSERT: return "X86ISD::VINSERT";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
Expand Down Expand Up @@ -24093,6 +24183,15 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

// The truncating store is legal in some cases. For example
// vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
// are designated for truncate store.
// In this case we don't need any further transformations.
if (TLI.isTruncStoreLegal(VT, StVT))
return SDValue();

// From, To sizes and ElemCount must be pow of two
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
"Unexpected size for truncating masked store");
Expand Down Expand Up @@ -24204,6 +24303,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();

// The truncating store is legal in some cases. For example
// vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
// are designated for truncate store.
// In this case we don't need any further transformations.
if (TLI.isTruncStoreLegal(VT, StVT))
return SDValue();

// From, To sizes and ElemCount must be pow of two
if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
// We are going to use the original vector elt for storing.
Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.h
Expand Up @@ -282,9 +282,8 @@ namespace llvm {

// Vector integer truncate.
VTRUNC,

// Vector integer truncate with mask.
VTRUNCM,
// Vector integer truncate with unsigned/signed saturation.
VTRUNCUS, VTRUNCS,

// Vector FP extend.
VFPEXT,
Expand Down

0 comments on commit da1b2ea

Please sign in to comment.