diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 0f17312b03827..a65e4667ab76c 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -123,6 +123,32 @@ struct HardwareLoopInfo { LLVM_ABI bool canAnalyze(LoopInfo &LI); }; +/// Information for memory intrinsic cost model. +class MemIntrinsicCostAttributes { + /// Vector type of the data to be loaded or stored. + Type *DataTy = nullptr; + + /// ID of the memory intrinsic. + Intrinsic::ID IID; + + /// Address space of the pointer. + unsigned AddressSpace = 0; + + /// Alignment of single element. + Align Alignment; + +public: + LLVM_ABI MemIntrinsicCostAttributes(Intrinsic::ID Id, Type *DataTy, + Align Alignment, unsigned AddressSpace) + : DataTy(DataTy), IID(Id), AddressSpace(AddressSpace), + Alignment(Alignment) {} + + Intrinsic::ID getID() const { return IID; } + Type *getDataType() const { return DataTy; } + unsigned getAddressSpace() const { return AddressSpace; } + Align getAlignment() const { return Alignment; } +}; + class IntrinsicCostAttributes { const IntrinsicInst *II = nullptr; Type *RetTy = nullptr; @@ -1556,7 +1582,7 @@ class TargetTransformInfo { /// \return The cost of masked Load and Store instructions. LLVM_ABI InstructionCost getMaskedMemoryOpCost( - unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, + const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; /// \return The cost of Gather or Scatter operation diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index aacb88d2f9684..d8e35748f53e5 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -842,8 +842,7 @@ class TargetTransformInfoImplBase { } virtual InstructionCost - getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, + getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const { return 1; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index e8dbc964a943e..b74501040298a 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1547,9 +1547,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } InstructionCost - getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, - unsigned AddressSpace, + getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override { + Type *DataTy = MICA.getDataType(); + Align Alignment = MICA.getAlignment(); + unsigned Opcode = MICA.getID() == Intrinsic::masked_load + ? Instruction::Load + : Instruction::Store; // TODO: Pass on AddressSpace when we have test coverage. return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, true, false, CostKind); @@ -1606,10 +1610,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // Firstly, the cost of load/store operation. InstructionCost Cost; - if (UseMaskForCond || UseMaskForGaps) - Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment, - AddressSpace, CostKind); - else + if (UseMaskForCond || UseMaskForGaps) { + unsigned IID = Opcode == Instruction::Load ? Intrinsic::masked_load + : Intrinsic::masked_store; + Cost = thisT()->getMaskedMemoryOpCost( + {IID, VecTy, Alignment, AddressSpace}, CostKind); + } else Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, CostKind); @@ -2408,14 +2414,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::masked_store: { Type *Ty = Tys[0]; Align TyAlign = thisT()->DL.getABITypeAlign(Ty); - return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0, - CostKind); + return thisT()->getMaskedMemoryOpCost({IID, Ty, TyAlign, 0}, CostKind); } case Intrinsic::masked_load: { Type *Ty = RetTy; Align TyAlign = thisT()->DL.getABITypeAlign(Ty); - return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0, - CostKind); + return thisT()->getMaskedMemoryOpCost({IID, Ty, TyAlign, 0}, CostKind); } case Intrinsic::experimental_vp_strided_store: { auto *Ty = cast(ICA.getArgTypes()[0]); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 0426ac7e62fab..45369f0ffe137 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1183,10 +1183,9 @@ InstructionCost TargetTransformInfo::getMemoryOpCost( } InstructionCost TargetTransformInfo::getMaskedMemoryOpCost( - unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, + const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const { - InstructionCost Cost = TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, - AddressSpace, CostKind); + InstructionCost Cost = TTIImpl->getMaskedMemoryOpCost(MICA, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 10f2c80edc1b3..9dbca001a3ff6 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -4720,12 +4720,12 @@ bool AArch64TTIImpl::prefersVectorizedAddressing() const { } InstructionCost -AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, - Align Alignment, unsigned AddressSpace, +AArch64TTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const { + Type *Src = MICA.getDataType(); + if (useNeonVector(Src)) - return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, - CostKind); + return BaseT::getMaskedMemoryOpCost(MICA, CostKind); auto LT = getTypeLegalizationCost(Src); if (!LT.first.isValid()) return InstructionCost::getInvalid(); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index e3b0a1bec53ec..e04a5522e392c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -188,8 +188,7 @@ class AArch64TTIImpl final : public BasicTTIImplBase { unsigned Opcode2) const; InstructionCost - getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, + getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override; InstructionCost diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 9b250e6cac3ab..9b80c4f95ccff 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1631,20 +1631,22 @@ InstructionCost ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, } InstructionCost -ARMTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, +ARMTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const { + unsigned IID = MICA.getID(); + Type *Src = MICA.getDataType(); + Align Alignment = MICA.getAlignment(); + unsigned AddressSpace = MICA.getAddressSpace(); if (ST->hasMVEIntegerOps()) { - if (Opcode == Instruction::Load && + if (IID == Intrinsic::masked_load && isLegalMaskedLoad(Src, Alignment, AddressSpace)) return ST->getMVEVectorCostFactor(CostKind); - if (Opcode == Instruction::Store && + if (IID == Intrinsic::masked_store && isLegalMaskedStore(Src, Alignment, AddressSpace)) return ST->getMVEVectorCostFactor(CostKind); } if (!isa(Src)) - return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, - CostKind); + return BaseT::getMaskedMemoryOpCost(MICA, CostKind); // Scalar cost, which is currently very high due to the efficiency of the // generated code. return cast(Src)->getNumElements() * 8; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 0810c5532ed91..919a6fc9fd0b0 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -275,8 +275,7 @@ class ARMTTIImpl final : public BasicTTIImplBase { const Instruction *I = nullptr) const override; InstructionCost - getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, + getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override; InstructionCost getInterleavedMemoryOpCost( diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index e925e041eb64e..8f3f0cc8abb01 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -224,11 +224,9 @@ InstructionCost HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, } InstructionCost -HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, - Align Alignment, unsigned AddressSpace, +HexagonTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const { - return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, - CostKind); + return BaseT::getMaskedMemoryOpCost(MICA, CostKind); } InstructionCost diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index cec2bf9656ffc..e95b5a10b76a7 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -120,8 +120,7 @@ class HexagonTTIImpl final : public BasicTTIImplBase { TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I = nullptr) const override; InstructionCost - getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, + getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override; InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 7bc0b5b394828..85d66392862d9 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1001,13 +1001,17 @@ InstructionCost RISCVTTIImpl::getScalarizationOverhead( } InstructionCost -RISCVTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, +RISCVTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const { + unsigned Opcode = MICA.getID() == Intrinsic::masked_load ? Instruction::Load + : Instruction::Store; + Type *Src = MICA.getDataType(); + Align Alignment = MICA.getAlignment(); + unsigned AddressSpace = MICA.getAddressSpace(); + if (!isLegalMaskedLoadStore(Src, Alignment) || CostKind != TTI::TCK_RecipThroughput) - return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, - CostKind); + return BaseT::getMaskedMemoryOpCost(MICA, CostKind); return getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 6886e8964e29e..39c1173e2986c 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -144,8 +144,7 @@ class RISCVTTIImpl final : public BasicTTIImplBase { bool shouldConsiderVectorizationRegPressure() const override { return true; } InstructionCost - getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, + getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override; InstructionCost diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 0b1430e373fc7..4b77bf925b2ba 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -5411,9 +5411,14 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, } InstructionCost -X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment, - unsigned AddressSpace, +X86TTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const { + unsigned Opcode = MICA.getID() == Intrinsic::masked_load ? Instruction::Load + : Instruction::Store; + Type *SrcTy = MICA.getDataType(); + Align Alignment = MICA.getAlignment(); + unsigned AddressSpace = MICA.getAddressSpace(); + bool IsLoad = (Instruction::Load == Opcode); bool IsStore = (Instruction::Store == Opcode); @@ -6647,10 +6652,12 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( LegalVT.getVectorNumElements()); InstructionCost MemOpCost; bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps; - if (UseMaskedMemOp) - MemOpCost = getMaskedMemoryOpCost(Opcode, SingleMemOpTy, Alignment, - AddressSpace, CostKind); - else + if (UseMaskedMemOp) { + unsigned IID = Opcode == Instruction::Load ? Intrinsic::masked_load + : Intrinsic::masked_store; + MemOpCost = getMaskedMemoryOpCost( + {IID, SingleMemOpTy, Alignment, AddressSpace}, CostKind); + } else MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace, CostKind); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index de5e1c297b1e4..df1393ce16ca1 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -183,8 +183,7 @@ class X86TTIImpl final : public BasicTTIImplBase { TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I = nullptr) const override; InstructionCost - getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, + getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override; InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e5c3f17860103..dd984c485c1ee 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5197,8 +5197,10 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I, const Align Alignment = getLoadStoreAlignment(I); InstructionCost Cost = 0; if (Legal->isMaskRequired(I)) { - Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, - CostKind); + unsigned IID = I->getOpcode() == Instruction::Load + ? Intrinsic::masked_load + : Intrinsic::masked_store; + Cost += TTI.getMaskedMemoryOpCost({IID, VectorTy, Alignment, AS}, CostKind); } else { TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0)); Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index bf3f52c51b64c..f7a800652c551 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6848,9 +6848,10 @@ static bool isMaskedLoadCompress( ScalarLoadsCost; InstructionCost LoadCost = 0; if (IsMasked) { - LoadCost = - TTI.getMaskedMemoryOpCost(Instruction::Load, LoadVecTy, CommonAlignment, - LI->getPointerAddressSpace(), CostKind); + LoadCost = TTI.getMaskedMemoryOpCost({Intrinsic::masked_load, LoadVecTy, + CommonAlignment, + LI->getPointerAddressSpace()}, + CostKind); } else { LoadCost = TTI.getMemoryOpCost(Instruction::Load, LoadVecTy, CommonAlignment, @@ -7249,8 +7250,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( break; case LoadsState::CompressVectorize: VecLdCost += TTI.getMaskedMemoryOpCost( - Instruction::Load, SubVecTy, CommonAlignment, - LI0->getPointerAddressSpace(), CostKind) + + {Intrinsic::masked_load, SubVecTy, CommonAlignment, + LI0->getPointerAddressSpace()}, + CostKind) + VectorGEPCost + ::getShuffleCost(TTI, TTI::SK_PermuteSingleSrc, SubVecTy, {}, CostKind); @@ -15041,8 +15043,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, CommonAlignment, LI0->getPointerAddressSpace(), CostKind); } else if (IsMasked) { VecLdCost = TTI->getMaskedMemoryOpCost( - Instruction::Load, LoadVecTy, CommonAlignment, - LI0->getPointerAddressSpace(), CostKind); + {Intrinsic::masked_load, LoadVecTy, CommonAlignment, + LI0->getPointerAddressSpace()}, + CostKind); // TODO: include this cost into CommonCost. VecLdCost += ::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc, LoadVecTy, CompressMask, CostKind); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1ee405a62aa68..f7604e59725b7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3566,8 +3566,10 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF, InstructionCost Cost = 0; if (IsMasked) { + unsigned IID = isa(this) ? Intrinsic::masked_load + : Intrinsic::masked_store; Cost += - Ctx.TTI.getMaskedMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind); + Ctx.TTI.getMaskedMemoryOpCost({IID, Ty, Alignment, AS}, Ctx.CostKind); } else { TTI::OperandValueInfo OpInfo = Ctx.getOperandInfo( isa(this) ? getOperand(0) @@ -3685,8 +3687,10 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF, Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); unsigned AS = cast(Ctx.Types.inferScalarType(getAddr())) ->getAddressSpace(); + // FIXME: getMaskedMemoryOpCost assumes masked_* intrinsics. + // After migrating to getMemIntrinsicInstrCost, switch this to vp_load. InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( - Instruction::Load, Ty, Alignment, AS, Ctx.CostKind); + {Intrinsic::masked_load, Ty, Alignment, AS}, Ctx.CostKind); if (!Reverse) return Cost; @@ -3794,8 +3798,10 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF, Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); unsigned AS = cast(Ctx.Types.inferScalarType(getAddr())) ->getAddressSpace(); + // FIXME: getMaskedMemoryOpCost assumes masked_* intrinsics. + // After migrating to getMemIntrinsicInstrCost, switch this to vp_store. InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( - Instruction::Store, Ty, Alignment, AS, Ctx.CostKind); + {Intrinsic::masked_store, Ty, Alignment, AS}, Ctx.CostKind); if (!Reverse) return Cost;