-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[PowerPC] cost modeling for length type VP intrinsic load/store #168938
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-backend-powerpc Author: None (RolandF77) ChangesOverride and fill in the target hooks for PPC that allow opt to cost using length style VP intrinsics for load/store. Full diff: https://github.com/llvm/llvm-project/pull/168938.diff 3 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index fbed34277dbab..f17a7a87313c6 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -24,6 +24,10 @@ using namespace llvm;
#define DEBUG_TYPE "ppctti"
+static cl::opt<bool> PPCEVL("ppc-evl",
+ cl::desc("Allow EVL type vp.load/vp.store"),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool> Pwr9EVL("ppc-pwr9-evl",
cl::desc("Allow vp.load and vp.store for pwr9"),
cl::init(false), cl::Hidden);
@@ -1078,3 +1082,62 @@ PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const {
return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);
}
+
+bool PPCTTIImpl::hasActiveVectorLength() const {
+ unsigned CPU = ST->getCPUDirective();
+ if (!PPCEVL)
+ return false;
+ if (CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE ||
+ (Pwr9EVL && CPU == PPC::DIR_PWR9))
+ return true;
+ return false;
+}
+
+static inline bool isLegalLoadWithLengthType(EVT VT) {
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
+ VT != MVT::i8)
+ return false;
+ return true;
+}
+
+bool PPCTTIImpl::isLegalMaskedLoad(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const {
+ if (!hasActiveVectorLength())
+ return false;
+ if (!isLegalLoadWithLengthType(TLI->getValueType(DL, DataType, true)))
+ return false;
+ return true;
+}
+
+bool PPCTTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const {
+ return isLegalMaskedLoad(DataType, Alignment, AddressSpace);
+}
+
+InstructionCost
+PPCTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
+ Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) const {
+ InstructionCost BaseCost =
+ BaseT::getMaskedMemoryOpCost(Opcode, DataTy, Alignment, AddressSpace,
+ CostKind);
+
+ if (Opcode != Instruction::Load && Opcode != Instruction::Store)
+ return BaseCost;
+ auto VecTy = dyn_cast<FixedVectorType>(DataTy);
+ if (!VecTy)
+ return BaseCost;
+ if (!isLegalMaskedLoad(VecTy->getScalarType(), Alignment, AddressSpace))
+ return BaseCost;
+ if (VecTy->getPrimitiveSizeInBits() > 128)
+ return BaseCost;
+
+ // Is scalar compare + select + maybe shift + vector load
+ InstructionCost Adj = vectorCostAdjustmentFactor(Opcode, DataTy, nullptr);
+ InstructionCost Cost = 2 + Adj;
+ if (ST->getCPUDirective() != PPC::DIR_PWR_FUTURE ||
+ VecTy->getScalarSizeInBits() != 8)
+ Cost += 1; // need shift
+ return Cost;
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index f80ebdbce7f64..f7fd5da94b1cb 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -153,6 +153,18 @@ class PPCTTIImpl final : public BasicTTIImplBase<PPCTTIImpl> {
TargetTransformInfo::VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const override;
+ bool hasActiveVectorLength() const override;
+
+ bool isLegalMaskedStore(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const override;
+ bool isLegalMaskedLoad(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const override;
+
+ InstructionCost
+ getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) const override;
+
private:
// The following constant is used for estimating costs on power9.
static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
diff --git a/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
new file mode 100644
index 0000000000000..df5b5b68677a4
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -mcpu=pwr9 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P9
+; RUN: opt < %s -mcpu=pwr10 -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P10
+; RUN: opt < %s -mcpu=future -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=FUTURE
+target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+define void @bar(ptr %base, <2 x ptr> %base.vec) {
+; P9: cost of 16 for {{.*}} @llvm.masked.load.v2i8.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i8.p0
+; FUTURE: cost of 3 for {{.*}} @llvm.masked.load.v2i8.p0
+; P9: cost of 12 for {{.*}} @llvm.masked.store.v2i8.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i8.p0
+; FUTURE: cost of 3 for {{.*}} @llvm.masked.store.v2i8.p0
+ %x2 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> undef, <2 x i8> undef)
+
+ call void @llvm.masked.store.v2i8.p0(<2 x i8> undef, ptr %base, i32 1, <2 x i1> undef)
+
+ ret void
+}
|
You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions h,cpp -- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --diff_from_common_commit
View the diff from clang-format here.diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index f17a7a873..dadf84b1d 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1084,7 +1084,7 @@ PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const {
}
bool PPCTTIImpl::hasActiveVectorLength() const {
- unsigned CPU = ST->getCPUDirective();
+ unsigned CPU = ST->getCPUDirective();
if (!PPCEVL)
return false;
if (CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE ||
@@ -1094,8 +1094,7 @@ bool PPCTTIImpl::hasActiveVectorLength() const {
}
static inline bool isLegalLoadWithLengthType(EVT VT) {
- if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
- VT != MVT::i8)
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
return false;
return true;
}
@@ -1116,12 +1115,10 @@ bool PPCTTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment,
InstructionCost
PPCTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
- Align Alignment,
- unsigned AddressSpace,
+ Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind) const {
- InstructionCost BaseCost =
- BaseT::getMaskedMemoryOpCost(Opcode, DataTy, Alignment, AddressSpace,
- CostKind);
+ InstructionCost BaseCost = BaseT::getMaskedMemoryOpCost(
+ Opcode, DataTy, Alignment, AddressSpace, CostKind);
if (Opcode != Instruction::Load && Opcode != Instruction::Store)
return BaseCost;
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef([^a-zA-Z0-9_-]|$)|UndefValue::get)' 'HEAD~1' HEAD llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/lib/Target/PowerPC/PPCTargetTransformInfo.hThe following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
}Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
}Please refer to the Undefined Behavior Manual for more information. |
🐧 Linux x64 Test Results
All tests passed but another part of the build failed. Click on a failure below to see the details. lib/Target/PowerPC/CMakeFiles/LLVMPowerPCCodeGen.dir/PPCTargetTransformInfo.cpp.olib/Target/PowerPC/CMakeFiles/LLVMPowerPCCodeGen.dir/PPCTargetMachine.cpp.oIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
Override and fill in the target hooks for PPC that allow opt to cost using length style VP intrinsics for load/store.