[PowerPC] Lowering support for EVL type VP_LOAD/VP_STORE #165910

RolandF77 · 2025-10-31T19:14:08Z

Map EVL type VP_LOAD/VP_STORE for fixed length vectors to PPC load/store with length.

github-actions · 2025-10-31T19:16:09Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvmbot · 2025-11-04T19:07:17Z

@llvm/pr-subscribers-backend-powerpc

Author: None (RolandF77)

Changes

Map EVL type VP_LOAD/VP_STORE for fixed length vectors to PPC load/store with length.

Full diff: https://github.com/llvm/llvm-project/pull/165910.diff

5 Files Affected:

(modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+68)
(modified) llvm/lib/Target/PowerPC/PPCISelLowering.h (+3)
(modified) llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp (+40)
(modified) llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h (+3)
(added) llvm/test/CodeGen/PowerPC/vp-ld-st.ll (+160)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 17f04d0fd05e8..f303d237e5cc2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -652,6 +652,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
 
+  if (Subtarget.isISA3_0() && isPPC64) {
+    setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
+    setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
+    setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
+    setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
+    setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
+    setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
+    setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
+    setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
+  }
+
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
@@ -11909,6 +11920,59 @@ SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
   return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
 }
 
+static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
+                            SelectionDAG &DAG) {
+  SDLoc dl(Val);
+  EVT VT = Val->getValueType(0);
+  unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
+  unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
+  SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
+  return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
+}
+
+SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
+  auto VPLD = cast<VPLoadSDNode>(Op);
+  bool Future = Subtarget.isISAFuture();
+  SDLoc dl(Op);
+  assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
+         "Mask predication not supported");
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
+  unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
+  unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
+  Len = AdjustLength(Len, EltBits, !Future, DAG);
+  SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
+                   VPLD->getOperand(1), Len};
+  SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
+  SDValue VPL =
+      DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys, Ops,
+                              VPLD->getMemoryVT(), VPLD->getMemOperand());
+  return VPL;
+}
+
+SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
+  auto VPST = cast<VPStoreSDNode>(Op);
+  assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
+         "Mask predication not supported");
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  SDLoc dl(Op);
+  SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
+  unsigned EltBits =
+      Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
+  bool Future = Subtarget.isISAFuture();
+  unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
+  Len = AdjustLength(Len, EltBits, !Future, DAG);
+  SDValue Ops[] = {
+      VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
+      DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
+      VPST->getOperand(2), Len};
+  SDVTList Tys = DAG.getVTList(MVT::Other);
+  SDValue VPS =
+      DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops,
+                              VPST->getMemoryVT(), VPST->getMemOperand());
+  return VPS;
+}
+
 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                                                  SelectionDAG &DAG) const {
   SDLoc dl(Op);
@@ -12763,6 +12827,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     if (Op->getFlags().hasNoFPExcept())
       return Op;
     return SDValue();
+  case ISD::VP_LOAD:
+    return LowerVP_LOAD(Op, DAG);
+  case ISD::VP_STORE:
+    return LowerVP_STORE(Op, DAG);
   }
 }
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 880aca751d7d6..d967018982734 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1345,6 +1345,9 @@ namespace llvm {
     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
 
+    SDValue LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const;
+
     SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 2fba090f2d501..6373343f2b2e3 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -24,6 +24,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "ppctti"
 
+static cl::opt<bool> Pwr9EVL("ppc-pwr9-evl",
+                             cl::desc("Allow vp.load and vp.store for pwr9"),
+                             cl::init(false), cl::Hidden);
+
 static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost",
 cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden);
 
@@ -1031,3 +1035,39 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
 bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const {
   return TLI->supportsTailCallFor(CB);
 }
+
+TargetTransformInfo::VPLegalization
+PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const {
+  using VPLegalization = TargetTransformInfo::VPLegalization;
+  unsigned Directive = ST->getCPUDirective();
+  VPLegalization DefaultLegalization = BaseT::getVPLegalizationStrategy(PI);
+  if (Directive != PPC::DIR_PWR10 && Directive != PPC::DIR_PWR_FUTURE &&
+      (!Pwr9EVL || Directive != PPC::DIR_PWR9))
+    return DefaultLegalization;
+
+  if (!ST->isPPC64())
+    return DefaultLegalization;
+
+  unsigned IID = PI.getIntrinsicID();
+  if (IID != Intrinsic::vp_load && IID != Intrinsic::vp_store)
+    return DefaultLegalization;
+
+  bool IsLoad = IID == Intrinsic::vp_load;
+  Type *VecTy = IsLoad ? PI.getType() : PI.getOperand(0)->getType();
+  EVT VT = TLI->getValueType(DL, VecTy, true);
+  if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
+      VT != MVT::v16i8)
+    return DefaultLegalization;
+
+  auto IsAllTrueMask = [](Value *MaskVal) {
+    if (Value *SplattedVal = getSplatValue(MaskVal))
+      if (auto *ConstValue = dyn_cast<Constant>(SplattedVal))
+        return ConstValue->isAllOnesValue();
+    return false;
+  };
+  unsigned MaskIx = IsLoad ? 1 : 2;
+  if (!IsAllTrueMask(PI.getOperand(MaskIx)))
+    return DefaultLegalization;
+
+  return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 475472ac3720f..385ad89876b93 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -150,6 +150,9 @@ class PPCTTIImpl final : public BasicTTIImplBase<PPCTTIImpl> {
                              const ArrayRef<Type *> &Types) const override;
   bool supportsTailCallFor(const CallBase *CB) const override;
 
+  TargetTransformInfo::VPLegalization
+  getVPLegalizationStrategy(const VPIntrinsic &PI) const override;
+
 private:
   // The following constant is used for estimating costs on power9.
   static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
diff --git a/llvm/test/CodeGen/PowerPC/vp-ld-st.ll b/llvm/test/CodeGen/PowerPC/vp-ld-st.ll
new file mode 100644
index 0000000000000..f0f9943e901ec
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vp-ld-st.ll
@@ -0,0 +1,160 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=future \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -check-prefix=FUTURE %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
+; RUN:   -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=future \
+; RUN:   -mtriple=powerpc64-unknown-unknown < %s | FileCheck --check-prefix=FUTURE %s
+
+; Function Attrs: nounwind readnone
+define void @stxvl1(<16 x i8> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 3, 6, 56
+; CHECK-NEXT:    stxvl 34, 5, 3
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: stxvl1:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    stxvrl 34, 5, 6
+; FUTURE-NEXT:    blr
+entry:
+  %cconv =  trunc i64 %c to i32
+  tail call void @llvm.vp.store.v16i8.p0(<16 x i8> %a, ptr %b, <16 x i1> splat (i1 true), i32 %cconv)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+define void @stxvl2(<8 x i16> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 3, 6, 57
+; CHECK-NEXT:    stxvl 34, 5, 3
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: stxvl2:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    sldi 3, 6, 1
+; FUTURE-NEXT:    stxvrl 34, 5, 3
+; FUTURE-NEXT:    blr
+entry:
+  %cconv =  trunc i64 %c to i32
+  tail call void @llvm.vp.store.v8i16.p0(<8 x i16> %a, ptr %b, <8 x i1> splat (i1 true), i32 %cconv)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+define void @stxvl4(<4 x i32> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 3, 6, 58
+; CHECK-NEXT:    stxvl 34, 5, 3
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: stxvl4:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    sldi 3, 6, 2
+; FUTURE-NEXT:    stxvrl 34, 5, 3
+; FUTURE-NEXT:    blr
+entry:
+  %cconv =  trunc i64 %c to i32
+  tail call void @llvm.vp.store.v4i32.p0(<4 x i32> %a, ptr %b, <4 x i1> splat (i1 true), i32 %cconv)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+define void @stxvl8(<2 x i64> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 3, 6, 59
+; CHECK-NEXT:    stxvl 34, 5, 3
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: stxvl8:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    sldi 3, 6, 3
+; FUTURE-NEXT:    stxvrl 34, 5, 3
+; FUTURE-NEXT:    blr
+entry:
+  %cconv =  trunc i64 %c to i32
+  tail call void @llvm.vp.store.v2i64.p0(<2 x i64> %a, ptr %b, <2 x i1> splat (i1 true), i32 %cconv)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+define <16 x i8> @lxvl1(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 4, 4, 56
+; CHECK-NEXT:    lxvl 34, 3, 4
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: lxvl1:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    lxvrl 34, 3, 4
+; FUTURE-NEXT:    blr
+entry:
+  %bconv =  trunc i64 %b to i32
+  %0 = tail call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %a, <16 x i1> splat (i1 true), i32 %bconv)
+  ret <16 x i8> %0
+}
+
+; Function Attrs: nounwind readnone
+define <8 x i16> @lxvl2(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 4, 4, 57
+; CHECK-NEXT:    lxvl 34, 3, 4
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: lxvl2:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    sldi 4, 4, 1
+; FUTURE-NEXT:    lxvrl 34, 3, 4
+; FUTURE-NEXT:    blr
+entry:
+  %bconv =  trunc i64 %b to i32
+  %0 = tail call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %a, <8 x i1> splat (i1 true), i32 %bconv)
+  ret <8 x i16> %0
+}
+
+; Function Attrs: nounwind readnone
+define <4 x i32> @lxvl4(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 4, 4, 58
+; CHECK-NEXT:    lxvl 34, 3, 4
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: lxvl4:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    sldi 4, 4, 2
+; FUTURE-NEXT:    lxvrl 34, 3, 4
+; FUTURE-NEXT:    blr
+entry:
+  %bconv =  trunc i64 %b to i32
+  %0 = tail call <4 x i32> @llvm.vp.load.v4i32.p0(ptr %a, <4 x i1> splat (i1 true), i32 %bconv)
+  ret <4 x i32> %0
+}
+
+; Function Attrs: nounwind readnone
+define <2 x i64> @lxvl8(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 4, 4, 59
+; CHECK-NEXT:    lxvl 34, 3, 4
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: lxvl8:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    sldi 4, 4, 3
+; FUTURE-NEXT:    lxvrl 34, 3, 4
+; FUTURE-NEXT:    blr
+entry:
+  %bconv =  trunc i64 %b to i32
+  %0 = tail call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %a, <2 x i1> splat (i1 true), i32 %bconv)
+  ret <2 x i64> %0
+}

lei137

This look like a nice optimization on existing codegen.
Would appreciate it if some documentation can be added for the new functions.

lei137 · 2025-11-05T21:01:14Z

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

  return TLI->supportsTailCallFor(CB);
 }
+
+TargetTransformInfo::VPLegalization


Would be good if you can add some documentation to summarize this new function.

lei137 · 2025-11-05T21:03:10Z

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

  return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
 }

+static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,


Maybe add some doc on what this function's uses are.

Map EVL type VP_LOAD/VP_STORE for fixed length vectors to PPC load/store with length.

RolandF77 added 3 commits October 29, 2025 18:46

lower vp load/store

d451b76

allow p9 testing

5d3ff20

cleanup

73496b6

RolandF77 added 2 commits November 3, 2025 19:11

formatting

7ea0aca

test

e5b55c3

RolandF77 marked this pull request as ready for review November 4, 2025 19:06

llvmbot added the backend:PowerPC label Nov 4, 2025

RolandF77 requested review from diggerlin, lei137 and maryammo November 4, 2025 23:09

lei137 approved these changes Nov 6, 2025

View reviewed changes

add comments

9e5ab11

RolandF77 merged commit 411ea8e into llvm:main Nov 7, 2025
10 checks passed

vinay-deshmukh pushed a commit to vinay-deshmukh/llvm-project that referenced this pull request Nov 8, 2025

[PowerPC] Lowering support for EVL type VP_LOAD/VP_STORE (llvm#165910)

a4554b6

Map EVL type VP_LOAD/VP_STORE for fixed length vectors to PPC load/store with length.

nigham mentioned this pull request Nov 10, 2025

[libc] Implement fchown #167286

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[PowerPC] Lowering support for EVL type VP_LOAD/VP_STORE #165910

[PowerPC] Lowering support for EVL type VP_LOAD/VP_STORE #165910

Uh oh!

RolandF77 commented Oct 31, 2025

Uh oh!

github-actions bot commented Oct 31, 2025 •

edited

Loading

Uh oh!

llvmbot commented Nov 4, 2025

Uh oh!

lei137 left a comment

Uh oh!

lei137 Nov 5, 2025

Uh oh!

lei137 Nov 5, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

[PowerPC] Lowering support for EVL type VP_LOAD/VP_STORE #165910

[PowerPC] Lowering support for EVL type VP_LOAD/VP_STORE #165910

Uh oh!

Conversation

RolandF77 commented Oct 31, 2025

Uh oh!

github-actions bot commented Oct 31, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Nov 4, 2025

Uh oh!

lei137 left a comment

Choose a reason for hiding this comment

Uh oh!

lei137 Nov 5, 2025

Choose a reason for hiding this comment

Uh oh!

lei137 Nov 5, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

github-actions bot commented Oct 31, 2025 •

edited

Loading