From 2fa5e1ed9a81bd86681a3284eb14d56d0d8c7c93 Mon Sep 17 00:00:00 2001 From: Roland Froese Date: Thu, 20 Nov 2025 19:40:16 +0000 Subject: [PATCH 1/3] cost modeling for EVL vp.load/vp.store --- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 63 +++++++++++++++++++ .../Target/PowerPC/PPCTargetTransformInfo.h | 12 ++++ .../CostModel/PowerPC/ld-st-with-length.ll | 19 ++++++ 3 files changed, 94 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index fbed34277dbab..f17a7a87313c6 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -24,6 +24,10 @@ using namespace llvm; #define DEBUG_TYPE "ppctti" +static cl::opt PPCEVL("ppc-evl", + cl::desc("Allow EVL type vp.load/vp.store"), + cl::init(false), cl::Hidden); + static cl::opt Pwr9EVL("ppc-pwr9-evl", cl::desc("Allow vp.load and vp.store for pwr9"), cl::init(false), cl::Hidden); @@ -1078,3 +1082,62 @@ PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const { return VPLegalization(VPLegalization::Legal, VPLegalization::Legal); } + +bool PPCTTIImpl::hasActiveVectorLength() const { + unsigned CPU = ST->getCPUDirective(); + if (!PPCEVL) + return false; + if (CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE || + (Pwr9EVL && CPU == PPC::DIR_PWR9)) + return true; + return false; +} + +static inline bool isLegalLoadWithLengthType(EVT VT) { + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && + VT != MVT::i8) + return false; + return true; +} + +bool PPCTTIImpl::isLegalMaskedLoad(Type *DataType, Align Alignment, + unsigned AddressSpace) const { + if (!hasActiveVectorLength()) + return false; + if (!isLegalLoadWithLengthType(TLI->getValueType(DL, DataType, true))) + return false; + return true; +} + +bool PPCTTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment, + unsigned AddressSpace) const { + return isLegalMaskedLoad(DataType, Alignment, AddressSpace); +} + +InstructionCost +PPCTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, + Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind) const { + InstructionCost BaseCost = + BaseT::getMaskedMemoryOpCost(Opcode, DataTy, Alignment, AddressSpace, + CostKind); + + if (Opcode != Instruction::Load && Opcode != Instruction::Store) + return BaseCost; + auto VecTy = dyn_cast(DataTy); + if (!VecTy) + return BaseCost; + if (!isLegalMaskedLoad(VecTy->getScalarType(), Alignment, AddressSpace)) + return BaseCost; + if (VecTy->getPrimitiveSizeInBits() > 128) + return BaseCost; + + // Is scalar compare + select + maybe shift + vector load + InstructionCost Adj = vectorCostAdjustmentFactor(Opcode, DataTy, nullptr); + InstructionCost Cost = 2 + Adj; + if (ST->getCPUDirective() != PPC::DIR_PWR_FUTURE || + VecTy->getScalarSizeInBits() != 8) + Cost += 1; // need shift + return Cost; +} diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index f80ebdbce7f64..f7fd5da94b1cb 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -153,6 +153,18 @@ class PPCTTIImpl final : public BasicTTIImplBase { TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const override; + bool hasActiveVectorLength() const override; + + bool isLegalMaskedStore(Type *DataType, Align Alignment, + unsigned AddressSpace) const override; + bool isLegalMaskedLoad(Type *DataType, Align Alignment, + unsigned AddressSpace) const override; + + InstructionCost + getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind) const override; + private: // The following constant is used for estimating costs on power9. static const InstructionCost::CostType P9PipelineFlushEstimate = 80; diff --git a/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll new file mode 100644 index 0000000000000..df5b5b68677a4 --- /dev/null +++ b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll @@ -0,0 +1,19 @@ +; RUN: opt < %s -mcpu=pwr9 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefix=P9 +; RUN: opt < %s -mcpu=pwr10 -ppc-evl -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefix=P10 +; RUN: opt < %s -mcpu=future -ppc-evl -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefix=FUTURE +target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512" +target triple = "powerpc64le-unknown-linux-gnu" + +define void @bar(ptr %base, <2 x ptr> %base.vec) { +; P9: cost of 16 for {{.*}} @llvm.masked.load.v2i8.p0 +; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i8.p0 +; FUTURE: cost of 3 for {{.*}} @llvm.masked.load.v2i8.p0 +; P9: cost of 12 for {{.*}} @llvm.masked.store.v2i8.p0 +; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i8.p0 +; FUTURE: cost of 3 for {{.*}} @llvm.masked.store.v2i8.p0 + %x2 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> undef, <2 x i8> undef) + + call void @llvm.masked.store.v2i8.p0(<2 x i8> undef, ptr %base, i32 1, <2 x i1> undef) + + ret void +} From 5d98a9b96ffcc808319ccc546d5b33a64a2f7eda Mon Sep 17 00:00:00 2001 From: Roland Froese Date: Fri, 21 Nov 2025 19:44:19 +0000 Subject: [PATCH 2/3] fix checks --- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 22 +++++++++---------- .../Target/PowerPC/PPCTargetTransformInfo.h | 3 +-- .../CostModel/PowerPC/ld-st-with-length.ll | 6 ++--- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index f17a7a87313c6..fe4c6b73900d6 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1084,7 +1084,7 @@ PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const { } bool PPCTTIImpl::hasActiveVectorLength() const { - unsigned CPU = ST->getCPUDirective(); + unsigned CPU = ST->getCPUDirective(); if (!PPCEVL) return false; if (CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE || @@ -1094,8 +1094,7 @@ bool PPCTTIImpl::hasActiveVectorLength() const { } static inline bool isLegalLoadWithLengthType(EVT VT) { - if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && - VT != MVT::i8) + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8) return false; return true; } @@ -1115,16 +1114,17 @@ bool PPCTTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment, } InstructionCost -PPCTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, - Align Alignment, - unsigned AddressSpace, +PPCTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const { - InstructionCost BaseCost = - BaseT::getMaskedMemoryOpCost(Opcode, DataTy, Alignment, AddressSpace, - CostKind); + Type *DataTy = MICA.getDataType(); + Align Alignment = MICA.getAlignment(); + unsigned Opcode = MICA.getID() == Intrinsic::masked_load + ? Instruction::Load + : Instruction::Store; + unsigned AddressSpace = MICA.getAddressSpace(); + + InstructionCost BaseCost = BaseT::getMaskedMemoryOpCost(MICA, CostKind); - if (Opcode != Instruction::Load && Opcode != Instruction::Store) - return BaseCost; auto VecTy = dyn_cast(DataTy); if (!VecTy) return BaseCost; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index f7fd5da94b1cb..ac665f99a5ddc 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -161,8 +161,7 @@ class PPCTTIImpl final : public BasicTTIImplBase { unsigned AddressSpace) const override; InstructionCost - getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, - unsigned AddressSpace, + getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override; private: diff --git a/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll index df5b5b68677a4..fe0b035e062e0 100644 --- a/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll +++ b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll @@ -4,16 +4,16 @@ target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512" target triple = "powerpc64le-unknown-linux-gnu" -define void @bar(ptr %base, <2 x ptr> %base.vec) { +define void @bar(ptr %base, <2 x i8> %val) { ; P9: cost of 16 for {{.*}} @llvm.masked.load.v2i8.p0 ; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i8.p0 ; FUTURE: cost of 3 for {{.*}} @llvm.masked.load.v2i8.p0 ; P9: cost of 12 for {{.*}} @llvm.masked.store.v2i8.p0 ; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i8.p0 ; FUTURE: cost of 3 for {{.*}} @llvm.masked.store.v2i8.p0 - %x2 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> undef, <2 x i8> undef) + %x2 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> , <2 x i8> %val) - call void @llvm.masked.store.v2i8.p0(<2 x i8> undef, ptr %base, i32 1, <2 x i1> undef) + call void @llvm.masked.store.v2i8.p0(<2 x i8> %x2, ptr %base, i32 1, <2 x i1> ) ret void } From 5c7d0b1e5a6917d80f8e3b3a06853e8969d3c82d Mon Sep 17 00:00:00 2001 From: Roland Froese Date: Fri, 21 Nov 2025 20:49:30 +0000 Subject: [PATCH 3/3] formatting --- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index fe4c6b73900d6..2c547eb68437e 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1118,9 +1118,8 @@ PPCTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const { Type *DataTy = MICA.getDataType(); Align Alignment = MICA.getAlignment(); - unsigned Opcode = MICA.getID() == Intrinsic::masked_load - ? Instruction::Load - : Instruction::Store; + unsigned Opcode = MICA.getID() == Intrinsic::masked_load ? Instruction::Load + : Instruction::Store; unsigned AddressSpace = MICA.getAddressSpace(); InstructionCost BaseCost = BaseT::getMaskedMemoryOpCost(MICA, CostKind);