Skip to content

Conversation

@RolandF77
Copy link
Collaborator

Map EVL type VP_LOAD/VP_STORE for fixed length vectors to PPC load/store with length.

@github-actions
Copy link

github-actions bot commented Oct 31, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@RolandF77 RolandF77 marked this pull request as ready for review November 4, 2025 19:06
@llvmbot
Copy link
Member

llvmbot commented Nov 4, 2025

@llvm/pr-subscribers-backend-powerpc

Author: None (RolandF77)

Changes

Map EVL type VP_LOAD/VP_STORE for fixed length vectors to PPC load/store with length.


Full diff: https://github.com/llvm/llvm-project/pull/165910.diff

5 Files Affected:

  • (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+68)
  • (modified) llvm/lib/Target/PowerPC/PPCISelLowering.h (+3)
  • (modified) llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp (+40)
  • (modified) llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h (+3)
  • (added) llvm/test/CodeGen/PowerPC/vp-ld-st.ll (+160)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 17f04d0fd05e8..f303d237e5cc2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -652,6 +652,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
 
+  if (Subtarget.isISA3_0() && isPPC64) {
+    setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
+    setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
+    setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
+    setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
+    setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
+    setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
+    setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
+    setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
+  }
+
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
@@ -11909,6 +11920,59 @@ SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
   return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
 }
 
+static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
+                            SelectionDAG &DAG) {
+  SDLoc dl(Val);
+  EVT VT = Val->getValueType(0);
+  unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
+  unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
+  SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
+  return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
+}
+
+SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
+  auto VPLD = cast<VPLoadSDNode>(Op);
+  bool Future = Subtarget.isISAFuture();
+  SDLoc dl(Op);
+  assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
+         "Mask predication not supported");
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
+  unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
+  unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
+  Len = AdjustLength(Len, EltBits, !Future, DAG);
+  SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
+                   VPLD->getOperand(1), Len};
+  SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
+  SDValue VPL =
+      DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys, Ops,
+                              VPLD->getMemoryVT(), VPLD->getMemOperand());
+  return VPL;
+}
+
+SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
+  auto VPST = cast<VPStoreSDNode>(Op);
+  assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
+         "Mask predication not supported");
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  SDLoc dl(Op);
+  SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
+  unsigned EltBits =
+      Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
+  bool Future = Subtarget.isISAFuture();
+  unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
+  Len = AdjustLength(Len, EltBits, !Future, DAG);
+  SDValue Ops[] = {
+      VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
+      DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
+      VPST->getOperand(2), Len};
+  SDVTList Tys = DAG.getVTList(MVT::Other);
+  SDValue VPS =
+      DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops,
+                              VPST->getMemoryVT(), VPST->getMemOperand());
+  return VPS;
+}
+
 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                                                  SelectionDAG &DAG) const {
   SDLoc dl(Op);
@@ -12763,6 +12827,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     if (Op->getFlags().hasNoFPExcept())
       return Op;
     return SDValue();
+  case ISD::VP_LOAD:
+    return LowerVP_LOAD(Op, DAG);
+  case ISD::VP_STORE:
+    return LowerVP_STORE(Op, DAG);
   }
 }
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 880aca751d7d6..d967018982734 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1345,6 +1345,9 @@ namespace llvm {
     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
 
+    SDValue LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const;
+
     SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 2fba090f2d501..6373343f2b2e3 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -24,6 +24,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "ppctti"
 
+static cl::opt<bool> Pwr9EVL("ppc-pwr9-evl",
+                             cl::desc("Allow vp.load and vp.store for pwr9"),
+                             cl::init(false), cl::Hidden);
+
 static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost",
 cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden);
 
@@ -1031,3 +1035,39 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
 bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const {
   return TLI->supportsTailCallFor(CB);
 }
+
+TargetTransformInfo::VPLegalization
+PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const {
+  using VPLegalization = TargetTransformInfo::VPLegalization;
+  unsigned Directive = ST->getCPUDirective();
+  VPLegalization DefaultLegalization = BaseT::getVPLegalizationStrategy(PI);
+  if (Directive != PPC::DIR_PWR10 && Directive != PPC::DIR_PWR_FUTURE &&
+      (!Pwr9EVL || Directive != PPC::DIR_PWR9))
+    return DefaultLegalization;
+
+  if (!ST->isPPC64())
+    return DefaultLegalization;
+
+  unsigned IID = PI.getIntrinsicID();
+  if (IID != Intrinsic::vp_load && IID != Intrinsic::vp_store)
+    return DefaultLegalization;
+
+  bool IsLoad = IID == Intrinsic::vp_load;
+  Type *VecTy = IsLoad ? PI.getType() : PI.getOperand(0)->getType();
+  EVT VT = TLI->getValueType(DL, VecTy, true);
+  if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
+      VT != MVT::v16i8)
+    return DefaultLegalization;
+
+  auto IsAllTrueMask = [](Value *MaskVal) {
+    if (Value *SplattedVal = getSplatValue(MaskVal))
+      if (auto *ConstValue = dyn_cast<Constant>(SplattedVal))
+        return ConstValue->isAllOnesValue();
+    return false;
+  };
+  unsigned MaskIx = IsLoad ? 1 : 2;
+  if (!IsAllTrueMask(PI.getOperand(MaskIx)))
+    return DefaultLegalization;
+
+  return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 475472ac3720f..385ad89876b93 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -150,6 +150,9 @@ class PPCTTIImpl final : public BasicTTIImplBase<PPCTTIImpl> {
                              const ArrayRef<Type *> &Types) const override;
   bool supportsTailCallFor(const CallBase *CB) const override;
 
+  TargetTransformInfo::VPLegalization
+  getVPLegalizationStrategy(const VPIntrinsic &PI) const override;
+
 private:
   // The following constant is used for estimating costs on power9.
   static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
diff --git a/llvm/test/CodeGen/PowerPC/vp-ld-st.ll b/llvm/test/CodeGen/PowerPC/vp-ld-st.ll
new file mode 100644
index 0000000000000..f0f9943e901ec
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vp-ld-st.ll
@@ -0,0 +1,160 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=future \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -check-prefix=FUTURE %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
+; RUN:   -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=future \
+; RUN:   -mtriple=powerpc64-unknown-unknown < %s | FileCheck --check-prefix=FUTURE %s
+
+; Function Attrs: nounwind readnone
+define void @stxvl1(<16 x i8> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 3, 6, 56
+; CHECK-NEXT:    stxvl 34, 5, 3
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: stxvl1:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    stxvrl 34, 5, 6
+; FUTURE-NEXT:    blr
+entry:
+  %cconv =  trunc i64 %c to i32
+  tail call void @llvm.vp.store.v16i8.p0(<16 x i8> %a, ptr %b, <16 x i1> splat (i1 true), i32 %cconv)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+define void @stxvl2(<8 x i16> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 3, 6, 57
+; CHECK-NEXT:    stxvl 34, 5, 3
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: stxvl2:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    sldi 3, 6, 1
+; FUTURE-NEXT:    stxvrl 34, 5, 3
+; FUTURE-NEXT:    blr
+entry:
+  %cconv =  trunc i64 %c to i32
+  tail call void @llvm.vp.store.v8i16.p0(<8 x i16> %a, ptr %b, <8 x i1> splat (i1 true), i32 %cconv)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+define void @stxvl4(<4 x i32> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 3, 6, 58
+; CHECK-NEXT:    stxvl 34, 5, 3
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: stxvl4:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    sldi 3, 6, 2
+; FUTURE-NEXT:    stxvrl 34, 5, 3
+; FUTURE-NEXT:    blr
+entry:
+  %cconv =  trunc i64 %c to i32
+  tail call void @llvm.vp.store.v4i32.p0(<4 x i32> %a, ptr %b, <4 x i1> splat (i1 true), i32 %cconv)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+define void @stxvl8(<2 x i64> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 3, 6, 59
+; CHECK-NEXT:    stxvl 34, 5, 3
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: stxvl8:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    sldi 3, 6, 3
+; FUTURE-NEXT:    stxvrl 34, 5, 3
+; FUTURE-NEXT:    blr
+entry:
+  %cconv =  trunc i64 %c to i32
+  tail call void @llvm.vp.store.v2i64.p0(<2 x i64> %a, ptr %b, <2 x i1> splat (i1 true), i32 %cconv)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+define <16 x i8> @lxvl1(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 4, 4, 56
+; CHECK-NEXT:    lxvl 34, 3, 4
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: lxvl1:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    lxvrl 34, 3, 4
+; FUTURE-NEXT:    blr
+entry:
+  %bconv =  trunc i64 %b to i32
+  %0 = tail call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %a, <16 x i1> splat (i1 true), i32 %bconv)
+  ret <16 x i8> %0
+}
+
+; Function Attrs: nounwind readnone
+define <8 x i16> @lxvl2(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 4, 4, 57
+; CHECK-NEXT:    lxvl 34, 3, 4
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: lxvl2:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    sldi 4, 4, 1
+; FUTURE-NEXT:    lxvrl 34, 3, 4
+; FUTURE-NEXT:    blr
+entry:
+  %bconv =  trunc i64 %b to i32
+  %0 = tail call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %a, <8 x i1> splat (i1 true), i32 %bconv)
+  ret <8 x i16> %0
+}
+
+; Function Attrs: nounwind readnone
+define <4 x i32> @lxvl4(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 4, 4, 58
+; CHECK-NEXT:    lxvl 34, 3, 4
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: lxvl4:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    sldi 4, 4, 2
+; FUTURE-NEXT:    lxvrl 34, 3, 4
+; FUTURE-NEXT:    blr
+entry:
+  %bconv =  trunc i64 %b to i32
+  %0 = tail call <4 x i32> @llvm.vp.load.v4i32.p0(ptr %a, <4 x i1> splat (i1 true), i32 %bconv)
+  ret <4 x i32> %0
+}
+
+; Function Attrs: nounwind readnone
+define <2 x i64> @lxvl8(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi 4, 4, 59
+; CHECK-NEXT:    lxvl 34, 3, 4
+; CHECK-NEXT:    blr
+;
+; FUTURE-LABEL: lxvl8:
+; FUTURE:       # %bb.0: # %entry
+; FUTURE-NEXT:    sldi 4, 4, 3
+; FUTURE-NEXT:    lxvrl 34, 3, 4
+; FUTURE-NEXT:    blr
+entry:
+  %bconv =  trunc i64 %b to i32
+  %0 = tail call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %a, <2 x i1> splat (i1 true), i32 %bconv)
+  ret <2 x i64> %0
+}

Copy link
Contributor

@lei137 lei137 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This look like a nice optimization on existing codegen.
Would appreciate it if some documentation can be added for the new functions.

return TLI->supportsTailCallFor(CB);
}

TargetTransformInfo::VPLegalization
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good if you can add some documentation to summarize this new function.

return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
}

static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add some doc on what this function's uses are.

@RolandF77 RolandF77 merged commit 411ea8e into llvm:main Nov 7, 2025
10 checks passed
vinay-deshmukh pushed a commit to vinay-deshmukh/llvm-project that referenced this pull request Nov 8, 2025
Map EVL type VP_LOAD/VP_STORE for fixed length vectors to PPC load/store
with length.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants