Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1332,8 +1332,8 @@ class TargetTransformInfo {
LLVM_ABI InstructionCost getPartialReductionCost(
unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
ElementCount VF, PartialReductionExtendKind OpAExtend,
PartialReductionExtendKind OpBExtend,
std::optional<unsigned> BinOp = std::nullopt) const;
PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
TTI::TargetCostKind CostKind) const;

/// \return The maximum interleave factor that any transform should try to
/// perform for this target. This number depends on the level of parallelism
Expand Down
11 changes: 5 additions & 6 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -652,12 +652,11 @@ class TargetTransformInfoImplBase {
virtual bool enableWritePrefetching() const { return false; }
virtual bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }

virtual InstructionCost
getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB,
Type *AccumType, ElementCount VF,
TTI::PartialReductionExtendKind OpAExtend,
TTI::PartialReductionExtendKind OpBExtend,
std::optional<unsigned> BinOp = std::nullopt) const {
virtual InstructionCost getPartialReductionCost(
unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,
TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
TTI::TargetCostKind CostKind) const {
return InstructionCost::getInvalid();
}

Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -871,10 +871,11 @@ bool TargetTransformInfo::shouldPrefetchAddressSpace(unsigned AS) const {
InstructionCost TargetTransformInfo::getPartialReductionCost(
unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
ElementCount VF, PartialReductionExtendKind OpAExtend,
PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp) const {
PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
TTI::TargetCostKind CostKind) const {
return TTIImpl->getPartialReductionCost(Opcode, InputTypeA, InputTypeB,
AccumType, VF, OpAExtend, OpBExtend,
BinOp);
BinOp, CostKind);
}

unsigned TargetTransformInfo::getMaxInterleaveFactor(ElementCount VF) const {
Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5395,11 +5395,14 @@ AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index,
InstructionCost AArch64TTIImpl::getPartialReductionCost(
unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,
TTI::PartialReductionExtendKind OpBExtend,
std::optional<unsigned> BinOp) const {
TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
TTI::TargetCostKind CostKind) const {
InstructionCost Invalid = InstructionCost::getInvalid();
InstructionCost Cost(TTI::TCC_Basic);

if (CostKind != TTI::TCK_RecipThroughput)
return Invalid;
Comment on lines +5403 to +5404
Copy link
Collaborator

@davemgreen davemgreen Jun 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think ideally this could have just handled all of the cost types. The vectorizer will use CodeSize costs for minsize nowadays IIRC. This wasn't in review for very long though, so there wasn't much chance to react :) No need to do anything about it - I might look into relaxing it if I find it useful.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is exactly the type of thing which makes a reasonable follow up by a backend owner. I have no context on the aarch64 instruction expansions and the prior code wasn't a code size estimate at all.


// Sub opcodes currently only occur in chained cases.
// Independent partial reduction subtractions are still costed as an add
if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
Expand Down
11 changes: 5 additions & 6 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -382,12 +382,11 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
return BaseT::isLegalNTLoad(DataType, Alignment);
}

InstructionCost
getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB,
Type *AccumType, ElementCount VF,
TTI::PartialReductionExtendKind OpAExtend,
TTI::PartialReductionExtendKind OpBExtend,
std::optional<unsigned> BinOp) const override;
InstructionCost getPartialReductionCost(
unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,
TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
TTI::TargetCostKind CostKind) const override;

bool enableOrderedReductions() const override { return true; }

Expand Down
9 changes: 4 additions & 5 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,8 @@ RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) const {
InstructionCost RISCVTTIImpl::getPartialReductionCost(
unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,
TTI::PartialReductionExtendKind OpBExtend,
std::optional<unsigned> BinOp) const {
TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
TTI::TargetCostKind CostKind) const {

// zve32x is broken for partial_reduce_umla, but let's make sure we
// don't generate them.
Expand All @@ -311,9 +311,8 @@ InstructionCost RISCVTTIImpl::getPartialReductionCost(
Type *Tp = VectorType::get(AccumType, VF.divideCoefficientBy(4));
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
// Note: Asuming all vqdot* variants are equal cost
// TODO: Thread CostKind through this API
return LT.first * getRISCVInstructionCost(RISCV::VQDOT_VV, LT.second,
TTI::TCK_RecipThroughput);
return LT.first *
getRISCVInstructionCost(RISCV::VQDOT_VV, LT.second, CostKind);
}

bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
Expand Down
11 changes: 5 additions & 6 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,11 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
TargetTransformInfo::PopcntSupportKind
getPopcntSupport(unsigned TyWidth) const override;

InstructionCost
getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB,
Type *AccumType, ElementCount VF,
TTI::PartialReductionExtendKind OpAExtend,
TTI::PartialReductionExtendKind OpBExtend,
std::optional<unsigned> BinOp) const override;
InstructionCost getPartialReductionCost(
unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,
TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
TTI::TargetCostKind CostKind) const override;

bool shouldExpandReduction(const IntrinsicInst *II) const override;
bool supportsScalableVectors() const override {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,12 +198,15 @@ InstructionCost WebAssemblyTTIImpl::getVectorInstrCost(
InstructionCost WebAssemblyTTIImpl::getPartialReductionCost(
unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,
TTI::PartialReductionExtendKind OpBExtend,
std::optional<unsigned> BinOp) const {
TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
TTI::TargetCostKind CostKind) const {
InstructionCost Invalid = InstructionCost::getInvalid();
if (!VF.isFixed() || !ST->hasSIMD128())
return Invalid;

if (CostKind != TTI::TCK_RecipThroughput)
return Invalid;

InstructionCost Cost(TTI::TCC_Basic);

// Possible options:
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
InstructionCost getPartialReductionCost(
unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,
TTI::PartialReductionExtendKind OpBExtend,
std::optional<unsigned> BinOp = std::nullopt) const override;
TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
TTI::TargetCostKind CostKind) const override;
TTI::ReductionShuffle
getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const override;

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8240,7 +8240,7 @@ bool VPRecipeBuilder::getScaledReductions(
[&](ElementCount VF) {
InstructionCost Cost = TTI->getPartialReductionCost(
Update->getOpcode(), A->getType(), B->getType(), PHI->getType(),
VF, OpAExtend, OpBExtend, BinOp->getOpcode());
VF, OpAExtend, OpBExtend, BinOp->getOpcode(), CM.CostKind);
return Cost.isValid();
},
Range)) {
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,9 +336,9 @@ VPPartialReductionRecipe::computeCost(ElementCount VF,
return TargetTransformInfo::PR_None;
};

return Ctx.TTI.getPartialReductionCost(getOpcode(), InputTypeA, InputTypeB,
PhiType, VF, GetExtendKind(ExtAR),
GetExtendKind(ExtBR), Opcode);
return Ctx.TTI.getPartialReductionCost(
getOpcode(), InputTypeA, InputTypeB, PhiType, VF, GetExtendKind(ExtAR),
GetExtendKind(ExtBR), Opcode, Ctx.CostKind);
}

void VPPartialReductionRecipe::execute(VPTransformState &State) {
Expand Down
Loading