Skip to content
33 changes: 5 additions & 28 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -845,34 +845,6 @@ class TargetTransformInfoImplBase {
return 1;
}

virtual InstructionCost
getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const {
return 1;
}

virtual InstructionCost
getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
bool VariableMask, Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) const {
return 1;
}

virtual InstructionCost
getExpandCompressMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const {
return 1;
}

virtual InstructionCost
getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
bool VariableMask, Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) const {
return InstructionCost::getInvalid();
}

virtual InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
Expand Down Expand Up @@ -932,6 +904,11 @@ class TargetTransformInfoImplBase {
virtual InstructionCost
getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const {
unsigned IID = MICA.getID();
bool IsStrided = IID == Intrinsic::experimental_vp_strided_load ||
IID == Intrinsic::experimental_vp_strided_store;
if (IsStrided)
return InstructionCost::getInvalid();
return 1;
}
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
Expand Down
102 changes: 36 additions & 66 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1557,55 +1557,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return Cost;
}

InstructionCost
getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const override {
Type *DataTy = MICA.getDataType();
Align Alignment = MICA.getAlignment();
unsigned Opcode = MICA.getID() == Intrinsic::masked_load
? Instruction::Load
: Instruction::Store;
// TODO: Pass on AddressSpace when we have test coverage.
return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, true, false,
CostKind);
}

InstructionCost
getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
bool VariableMask, Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) const override {
return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
true, CostKind);
}

InstructionCost
getExpandCompressMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const override {
unsigned Opcode = MICA.getID() == Intrinsic::masked_expandload
? Instruction::Load
: Instruction::Store;
Type *DataTy = MICA.getDataType();
bool VariableMask = MICA.getVariableMask();
Align Alignment = MICA.getAlignment();
// Treat expand load/compress store as gather/scatter operation.
// TODO: implement more precise cost estimation for these intrinsics.
return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
/*IsGatherScatter*/ true, CostKind);
}

InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I) const override {
// For a target without strided memory operations (or for an illegal
// operation type on one which does), assume we lower to a gather/scatter
// operation. (Which may in turn be scalarized.)
return thisT()->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
}

InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
Expand Down Expand Up @@ -3054,8 +3005,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
TTI::TargetCostKind CostKind) const override {
unsigned Id = MICA.getID();
Type *DataTy = MICA.getDataType();
const Value *Ptr = MICA.getPointer();
const Instruction *I = MICA.getInst();
bool VariableMask = MICA.getVariableMask();
Align Alignment = MICA.getAlignment();

Expand All @@ -3065,26 +3014,47 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
unsigned Opcode = Id == Intrinsic::experimental_vp_strided_load
? Instruction::Load
: Instruction::Store;
return thisT()->getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
// For a target without strided memory operations (or for an illegal
// operation type on one which does), assume we lower to a gather/scatter
// operation. (Which may in turn be scalarized.)
return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment,
VariableMask, true, CostKind);
}
case Intrinsic::masked_scatter:
case Intrinsic::masked_gather:
case Intrinsic::vp_scatter:
case Intrinsic::vp_gather: {
unsigned Opcode =
(Id == Intrinsic::masked_gather || Id == Intrinsic::vp_gather)
? Instruction::Load
: Instruction::Store;
return thisT()->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
case Intrinsic::vp_gather:
case Intrinsic::masked_scatter:
case Intrinsic::masked_gather: {
unsigned Opcode = (MICA.getID() == Intrinsic::masked_gather ||
MICA.getID() == Intrinsic::vp_gather)
? Instruction::Load
: Instruction::Store;

return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment,
VariableMask, true, CostKind);
}

case Intrinsic::vp_load:
case Intrinsic::vp_store:
return InstructionCost::getInvalid();
case Intrinsic::masked_load:
case Intrinsic::masked_store:
return thisT()->getMaskedMemoryOpCost(MICA, CostKind);
case Intrinsic::masked_store: {
unsigned Opcode =
Id == Intrinsic::masked_load ? Instruction::Load : Instruction::Store;
// TODO: Pass on AddressSpace when we have test coverage.
return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, true, false,
CostKind);
}
case Intrinsic::masked_compressstore:
case Intrinsic::masked_expandload:
return thisT()->getExpandCompressMemoryOpCost(MICA, CostKind);
case Intrinsic::masked_expandload: {
unsigned Opcode = MICA.getID() == Intrinsic::masked_expandload
? Instruction::Load
: Instruction::Store;
// Treat expand load/compress store as gather/scatter operation.
// TODO: implement more precise cost estimation for these intrinsics.
return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment,
VariableMask,
/*IsGatherScatter*/ true, CostKind);
}
case Intrinsic::vp_load_ff:
return InstructionCost::getInvalid();
default:
Expand Down
35 changes: 29 additions & 6 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4746,13 +4746,27 @@ bool AArch64TTIImpl::prefersVectorizedAddressing() const {
return ST->hasSVE();
}

InstructionCost
AArch64TTIImpl::getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const {
switch (MICA.getID()) {
case Intrinsic::masked_scatter:
case Intrinsic::masked_gather:
return getGatherScatterOpCost(MICA, CostKind);
case Intrinsic::masked_load:
case Intrinsic::masked_store:
return getMaskedMemoryOpCost(MICA, CostKind);
}
return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
}

InstructionCost
AArch64TTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const {
Type *Src = MICA.getDataType();

if (useNeonVector(Src))
return BaseT::getMaskedMemoryOpCost(MICA, CostKind);
return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
auto LT = getTypeLegalizationCost(Src);
if (!LT.first.isValid())
return InstructionCost::getInvalid();
Expand Down Expand Up @@ -4794,12 +4808,21 @@ static unsigned getSVEGatherScatterOverhead(unsigned Opcode,
}
}

InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
InstructionCost
AArch64TTIImpl::getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const {

unsigned Opcode = (MICA.getID() == Intrinsic::masked_gather ||
MICA.getID() == Intrinsic::vp_gather)
? Instruction::Load
: Instruction::Store;

Type *DataTy = MICA.getDataType();
Align Alignment = MICA.getAlignment();
const Instruction *I = MICA.getInst();

if (useNeonVector(DataTy) || !isLegalMaskedGatherScatter(DataTy))
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
auto *VT = cast<VectorType>(DataTy);
auto LT = getTypeLegalizationCost(DataTy);
if (!LT.first.isValid())
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,14 +188,14 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
unsigned Opcode2) const;

InstructionCost
getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const override;
getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const override;

InstructionCost
getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
bool VariableMask, Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) const override;
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const;

InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const;

bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst,
Type *Src) const;
Expand Down
32 changes: 26 additions & 6 deletions llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1631,6 +1631,20 @@ InstructionCost ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
CostKind, OpInfo, I);
}

InstructionCost
ARMTTIImpl::getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const {
switch (MICA.getID()) {
case Intrinsic::masked_scatter:
case Intrinsic::masked_gather:
return getGatherScatterOpCost(MICA, CostKind);
case Intrinsic::masked_load:
case Intrinsic::masked_store:
return getMaskedMemoryOpCost(MICA, CostKind);
}
return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
}

InstructionCost
ARMTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const {
Expand All @@ -1647,7 +1661,7 @@ ARMTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
return ST->getMVEVectorCostFactor(CostKind);
}
if (!isa<FixedVectorType>(Src))
return BaseT::getMaskedMemoryOpCost(MICA, CostKind);
return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
// Scalar cost, which is currently very high due to the efficiency of the
// generated code.
return cast<FixedVectorType>(Src)->getNumElements() * 8;
Expand Down Expand Up @@ -1694,13 +1708,19 @@ InstructionCost ARMTTIImpl::getInterleavedMemoryOpCost(
UseMaskForCond, UseMaskForGaps);
}

InstructionCost ARMTTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
InstructionCost
ARMTTIImpl::getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const {

Type *DataTy = MICA.getDataType();
const Value *Ptr = MICA.getPointer();
bool VariableMask = MICA.getVariableMask();
Align Alignment = MICA.getAlignment();
const Instruction *I = MICA.getInst();

using namespace PatternMatch;
if (!ST->hasMVEIntegerOps() || !EnableMaskedGatherScatters)
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);

assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!");
auto *VTy = cast<FixedVectorType>(DataTy);
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/ARM/ARMTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,19 +279,19 @@ class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> {
const Instruction *I = nullptr) const override;

InstructionCost
getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const override;
getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const override;

InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const;

InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;

InstructionCost
getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
bool VariableMask, Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) const override;
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const;

InstructionCost
getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
Expand Down
25 changes: 19 additions & 6 deletions llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,10 +223,24 @@ InstructionCost HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
OpInfo, I);
}

InstructionCost
HexagonTTIImpl::getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const {
switch (MICA.getID()) {
case Intrinsic::masked_scatter:
case Intrinsic::masked_gather:
return getGatherScatterOpCost(MICA, CostKind);
case Intrinsic::masked_load:
case Intrinsic::masked_store:
return getMaskedMemoryOpCost(MICA, CostKind);
}
return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
}

InstructionCost
HexagonTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe these overrides can just be removed? (In a separate earlier patch)

TTI::TargetCostKind CostKind) const {
return BaseT::getMaskedMemoryOpCost(MICA, CostKind);
return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
}

InstructionCost
Expand All @@ -238,11 +252,10 @@ HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
return 1;
}

InstructionCost HexagonTTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
InstructionCost
HexagonTTIImpl::getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you do this API change in a prior change? It would reduce the mechanical part of the diff here significantly. Happy to review if you want to put it up and tag me.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realized you already had this change on review as #168650, and had even mentioned it in the review description. Sorry for not noticing, you have an LGTM there and can rebase this one you land it.

TTI::TargetCostKind CostKind) const {
return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
}

InstructionCost HexagonTTIImpl::getInterleavedMemoryOpCost(
Expand Down
Loading