Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);

if (Subtarget.isISA3_0() && isPPC64) {
setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
}

// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
Expand Down Expand Up @@ -11909,6 +11920,62 @@ SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
}

// Adjust the length value for a load/store with length to account for the
// instructions requiring a left justified length, and for non-byte element
// types requiring scaling by element size.
static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add some doc on what this function's uses are.

SelectionDAG &DAG) {
SDLoc dl(Val);
EVT VT = Val->getValueType(0);
unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
}

SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
auto VPLD = cast<VPLoadSDNode>(Op);
bool Future = Subtarget.isISAFuture();
SDLoc dl(Op);
assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
"Mask predication not supported");
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
Len = AdjustLength(Len, EltBits, !Future, DAG);
SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
VPLD->getOperand(1), Len};
SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
SDValue VPL =
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys, Ops,
VPLD->getMemoryVT(), VPLD->getMemOperand());
return VPL;
}

SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
auto VPST = cast<VPStoreSDNode>(Op);
assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
"Mask predication not supported");
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
unsigned EltBits =
Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
bool Future = Subtarget.isISAFuture();
unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
Len = AdjustLength(Len, EltBits, !Future, DAG);
SDValue Ops[] = {
VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
VPST->getOperand(2), Len};
SDVTList Tys = DAG.getVTList(MVT::Other);
SDValue VPS =
DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops,
VPST->getMemoryVT(), VPST->getMemOperand());
return VPS;
}

SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
Expand Down Expand Up @@ -12763,6 +12830,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
if (Op->getFlags().hasNoFPExcept())
return Op;
return SDValue();
case ISD::VP_LOAD:
return LowerVP_LOAD(Op, DAG);
case ISD::VP_STORE:
return LowerVP_STORE(Op, DAG);
}
}

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1345,6 +1345,9 @@ namespace llvm {
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const;
Expand Down
43 changes: 43 additions & 0 deletions llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ using namespace llvm;

#define DEBUG_TYPE "ppctti"

static cl::opt<bool> Pwr9EVL("ppc-pwr9-evl",
cl::desc("Allow vp.load and vp.store for pwr9"),
cl::init(false), cl::Hidden);

static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost",
cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden);

Expand Down Expand Up @@ -1031,3 +1035,42 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const {
return TLI->supportsTailCallFor(CB);
}

// Target hook used by CodeGen to decide whether to expand vector predication
// intrinsics into scalar operations or to use special ISD nodes to represent
// them. The Target will not see the intrinsics.
TargetTransformInfo::VPLegalization
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good if you can add some documentation to summarize this new function.

PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const {
using VPLegalization = TargetTransformInfo::VPLegalization;
unsigned Directive = ST->getCPUDirective();
VPLegalization DefaultLegalization = BaseT::getVPLegalizationStrategy(PI);
if (Directive != PPC::DIR_PWR10 && Directive != PPC::DIR_PWR_FUTURE &&
(!Pwr9EVL || Directive != PPC::DIR_PWR9))
return DefaultLegalization;

if (!ST->isPPC64())
return DefaultLegalization;

unsigned IID = PI.getIntrinsicID();
if (IID != Intrinsic::vp_load && IID != Intrinsic::vp_store)
return DefaultLegalization;

bool IsLoad = IID == Intrinsic::vp_load;
Type *VecTy = IsLoad ? PI.getType() : PI.getOperand(0)->getType();
EVT VT = TLI->getValueType(DL, VecTy, true);
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
VT != MVT::v16i8)
return DefaultLegalization;

auto IsAllTrueMask = [](Value *MaskVal) {
if (Value *SplattedVal = getSplatValue(MaskVal))
if (auto *ConstValue = dyn_cast<Constant>(SplattedVal))
return ConstValue->isAllOnesValue();
return false;
};
unsigned MaskIx = IsLoad ? 1 : 2;
if (!IsAllTrueMask(PI.getOperand(MaskIx)))
return DefaultLegalization;

return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ class PPCTTIImpl final : public BasicTTIImplBase<PPCTTIImpl> {
const ArrayRef<Type *> &Types) const override;
bool supportsTailCallFor(const CallBase *CB) const override;

TargetTransformInfo::VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const override;

private:
// The following constant is used for estimating costs on power9.
static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
Expand Down
160 changes: 160 additions & 0 deletions llvm/test/CodeGen/PowerPC/vp-ld-st.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mcpu=future \
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -check-prefix=FUTURE %s

; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mcpu=future \
; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck --check-prefix=FUTURE %s

; Function Attrs: nounwind readnone
define void @stxvl1(<16 x i8> %a, ptr %b, i64 %c) {
; CHECK-LABEL: stxvl1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi 3, 6, 56
; CHECK-NEXT: stxvl 34, 5, 3
; CHECK-NEXT: blr
;
; FUTURE-LABEL: stxvl1:
; FUTURE: # %bb.0: # %entry
; FUTURE-NEXT: stxvrl 34, 5, 6
; FUTURE-NEXT: blr
entry:
%cconv = trunc i64 %c to i32
tail call void @llvm.vp.store.v16i8.p0(<16 x i8> %a, ptr %b, <16 x i1> splat (i1 true), i32 %cconv)
ret void
}

; Function Attrs: nounwind readnone
define void @stxvl2(<8 x i16> %a, ptr %b, i64 %c) {
; CHECK-LABEL: stxvl2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi 3, 6, 57
; CHECK-NEXT: stxvl 34, 5, 3
; CHECK-NEXT: blr
;
; FUTURE-LABEL: stxvl2:
; FUTURE: # %bb.0: # %entry
; FUTURE-NEXT: sldi 3, 6, 1
; FUTURE-NEXT: stxvrl 34, 5, 3
; FUTURE-NEXT: blr
entry:
%cconv = trunc i64 %c to i32
tail call void @llvm.vp.store.v8i16.p0(<8 x i16> %a, ptr %b, <8 x i1> splat (i1 true), i32 %cconv)
ret void
}

; Function Attrs: nounwind readnone
define void @stxvl4(<4 x i32> %a, ptr %b, i64 %c) {
; CHECK-LABEL: stxvl4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi 3, 6, 58
; CHECK-NEXT: stxvl 34, 5, 3
; CHECK-NEXT: blr
;
; FUTURE-LABEL: stxvl4:
; FUTURE: # %bb.0: # %entry
; FUTURE-NEXT: sldi 3, 6, 2
; FUTURE-NEXT: stxvrl 34, 5, 3
; FUTURE-NEXT: blr
entry:
%cconv = trunc i64 %c to i32
tail call void @llvm.vp.store.v4i32.p0(<4 x i32> %a, ptr %b, <4 x i1> splat (i1 true), i32 %cconv)
ret void
}

; Function Attrs: nounwind readnone
define void @stxvl8(<2 x i64> %a, ptr %b, i64 %c) {
; CHECK-LABEL: stxvl8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi 3, 6, 59
; CHECK-NEXT: stxvl 34, 5, 3
; CHECK-NEXT: blr
;
; FUTURE-LABEL: stxvl8:
; FUTURE: # %bb.0: # %entry
; FUTURE-NEXT: sldi 3, 6, 3
; FUTURE-NEXT: stxvrl 34, 5, 3
; FUTURE-NEXT: blr
entry:
%cconv = trunc i64 %c to i32
tail call void @llvm.vp.store.v2i64.p0(<2 x i64> %a, ptr %b, <2 x i1> splat (i1 true), i32 %cconv)
ret void
}

; Function Attrs: nounwind readnone
define <16 x i8> @lxvl1(ptr %a, i64 %b) {
; CHECK-LABEL: lxvl1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi 4, 4, 56
; CHECK-NEXT: lxvl 34, 3, 4
; CHECK-NEXT: blr
;
; FUTURE-LABEL: lxvl1:
; FUTURE: # %bb.0: # %entry
; FUTURE-NEXT: lxvrl 34, 3, 4
; FUTURE-NEXT: blr
entry:
%bconv = trunc i64 %b to i32
%0 = tail call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %a, <16 x i1> splat (i1 true), i32 %bconv)
ret <16 x i8> %0
}

; Function Attrs: nounwind readnone
define <8 x i16> @lxvl2(ptr %a, i64 %b) {
; CHECK-LABEL: lxvl2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi 4, 4, 57
; CHECK-NEXT: lxvl 34, 3, 4
; CHECK-NEXT: blr
;
; FUTURE-LABEL: lxvl2:
; FUTURE: # %bb.0: # %entry
; FUTURE-NEXT: sldi 4, 4, 1
; FUTURE-NEXT: lxvrl 34, 3, 4
; FUTURE-NEXT: blr
entry:
%bconv = trunc i64 %b to i32
%0 = tail call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %a, <8 x i1> splat (i1 true), i32 %bconv)
ret <8 x i16> %0
}

; Function Attrs: nounwind readnone
define <4 x i32> @lxvl4(ptr %a, i64 %b) {
; CHECK-LABEL: lxvl4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi 4, 4, 58
; CHECK-NEXT: lxvl 34, 3, 4
; CHECK-NEXT: blr
;
; FUTURE-LABEL: lxvl4:
; FUTURE: # %bb.0: # %entry
; FUTURE-NEXT: sldi 4, 4, 2
; FUTURE-NEXT: lxvrl 34, 3, 4
; FUTURE-NEXT: blr
entry:
%bconv = trunc i64 %b to i32
%0 = tail call <4 x i32> @llvm.vp.load.v4i32.p0(ptr %a, <4 x i1> splat (i1 true), i32 %bconv)
ret <4 x i32> %0
}

; Function Attrs: nounwind readnone
define <2 x i64> @lxvl8(ptr %a, i64 %b) {
; CHECK-LABEL: lxvl8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi 4, 4, 59
; CHECK-NEXT: lxvl 34, 3, 4
; CHECK-NEXT: blr
;
; FUTURE-LABEL: lxvl8:
; FUTURE: # %bb.0: # %entry
; FUTURE-NEXT: sldi 4, 4, 3
; FUTURE-NEXT: lxvrl 34, 3, 4
; FUTURE-NEXT: blr
entry:
%bconv = trunc i64 %b to i32
%0 = tail call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %a, <2 x i1> splat (i1 true), i32 %bconv)
ret <2 x i64> %0
}