Skip to content

Commit

Permalink
[Power9] Exploit vector absolute difference instructions on Power 9
Browse files Browse the repository at this point in the history
Power 9 has instructions to do absolute difference (VABSDUB, VABSDUH, VABSDUW)
for byte, halfword and word. We should take advantage of these.

Differential Revision: https://reviews.llvm.org/D34684

llvm-svn: 309876
  • Loading branch information
stefanp-ibm committed Aug 2, 2017
1 parent 1b53672 commit 873889c
Show file tree
Hide file tree
Showing 3 changed files with 410 additions and 1 deletion.
38 changes: 37 additions & 1 deletion llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -226,6 +226,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::UREM, MVT::i64, Expand);
}

if (Subtarget.hasP9Vector()) {
setOperationAction(ISD::ABS, MVT::v4i32, Legal);
setOperationAction(ISD::ABS, MVT::v8i16, Legal);
setOperationAction(ISD::ABS, MVT::v16i8, Legal);
}

// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
Expand Down Expand Up @@ -8390,16 +8396,46 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();

SDLoc dl(Op);

if (IntrinsicID == Intrinsic::thread_pointer) {
// Reads the thread pointer register, used for __builtin_thread_pointer.
if (Subtarget.isPPC64())
return DAG.getRegister(PPC::X13, MVT::i64);
return DAG.getRegister(PPC::R2, MVT::i32);
}

// We are looking for absolute values here.
// The idea is to try to fit one of two patterns:
// max (a, (0-a)) OR max ((0-a), a)
if (Subtarget.hasP9Vector() &&
(IntrinsicID == Intrinsic::ppc_altivec_vmaxsw ||
IntrinsicID == Intrinsic::ppc_altivec_vmaxsh ||
IntrinsicID == Intrinsic::ppc_altivec_vmaxsb)) {
SDValue V1 = Op.getOperand(1);
SDValue V2 = Op.getOperand(2);
if (V1.getSimpleValueType() == V2.getSimpleValueType() &&
(V1.getSimpleValueType() == MVT::v4i32 ||
V1.getSimpleValueType() == MVT::v8i16 ||
V1.getSimpleValueType() == MVT::v16i8)) {
if ( V1.getOpcode() == ISD::SUB &&
ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
V1.getOperand(1) == V2 ) {
// Generate the abs instruction with the operands
return DAG.getNode(ISD::ABS, dl, V2.getValueType(),V2);
}

if ( V2.getOpcode() == ISD::SUB &&
ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
V2.getOperand(1) == V1 ) {
// Generate the abs instruction with the operands
return DAG.getNode(ISD::ABS, dl, V1.getValueType(),V1);
}
}
}

// If this is a lowered altivec predicate compare, CompareOpc is set to the
// opcode number of the comparison.
SDLoc dl(Op);
int CompareOpc;
bool isDot;
if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrAltivec.td
Expand Up @@ -1488,4 +1488,19 @@ def VABSDUH : VXForm_1<1091, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
def VABSDUW : VXForm_1<1155, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vabsduw $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (int_ppc_altivec_vabsduw v4i32:$vA, v4i32:$vB))]>;

def : Pat<(v16i8:$vD (abs v16i8:$vA)),
(v16i8 (VABSDUB $vA, (V_SET0B)))>;
def : Pat<(v8i16:$vD (abs v8i16:$vA)),
(v8i16 (VABSDUH $vA, (V_SET0H)))>;
def : Pat<(v4i32:$vD (abs v4i32:$vA)),
(v4i32 (VABSDUW $vA, (V_SET0)))>;

def : Pat<(v16i8:$vD (abs (sub v16i8:$vA, v16i8:$vB))),
(v16i8 (VABSDUB $vA, $vB))>;
def : Pat<(v8i16:$vD (abs (sub v8i16:$vA, v8i16:$vB))),
(v8i16 (VABSDUH $vA, $vB))>;
def : Pat<(v4i32:$vD (abs (sub v4i32:$vA, v4i32:$vB))),
(v4i32 (VABSDUW $vA, $vB))>;

} // end HasP9Altivec

0 comments on commit 873889c

Please sign in to comment.