Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV][CostModel] Add getRISCVInstructionCost() to TTI for Cost… #73651

Merged
merged 9 commits into from
Dec 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2676,11 +2676,19 @@ InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {
return getLMULCost(VT);
}

/// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction
/// Return the cost of a vslidedown.vx or vslideup.vx instruction
/// for the type VT. (This does not cover the vslide1up or vslide1down
/// variants.) Slides may be linear in the number of vregs implied by LMUL,
/// or may track the vrgather.vv cost. It is implementation-dependent.
InstructionCost RISCVTargetLowering::getVSlideCost(MVT VT) const {
InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const {
return getLMULCost(VT);
}

/// Return the cost of a vslidedown.vi or vslideup.vi instruction
/// for the type VT. (This does not cover the vslide1up or vslide1down
/// variants.) Slides may be linear in the number of vregs implied by LMUL,
/// or may track the vrgather.vv cost. It is implementation-dependent.
InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const {
return getLMULCost(VT);
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,8 @@ class RISCVTargetLowering : public TargetLowering {

InstructionCost getVRGatherVVCost(MVT VT) const;
InstructionCost getVRGatherVICost(MVT VT) const;
InstructionCost getVSlideCost(MVT VT) const;
InstructionCost getVSlideVXCost(MVT VT) const;
InstructionCost getVSlideVICost(MVT VT) const;

// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
Expand Down
116 changes: 103 additions & 13 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,65 @@ static cl::opt<unsigned> SLPMaxVF(
"exclusively by SLP vectorizer."),
cl::Hidden);

InstructionCost
RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
TTI::TargetCostKind CostKind) {
size_t NumInstr = OpCodes.size();
if (CostKind == TTI::TCK_CodeSize)
topperc marked this conversation as resolved.
Show resolved Hide resolved
return NumInstr;
InstructionCost LMULCost = TLI->getLMULCost(VT);
if ((CostKind != TTI::TCK_RecipThroughput) && (CostKind != TTI::TCK_Latency))
return LMULCost * NumInstr;
InstructionCost Cost = 0;
for (auto Op : OpCodes) {
switch (Op) {
case RISCV::VRGATHER_VI:
Cost += TLI->getVRGatherVICost(VT);
break;
case RISCV::VRGATHER_VV:
Cost += TLI->getVRGatherVVCost(VT);
break;
case RISCV::VSLIDEUP_VI:
case RISCV::VSLIDEDOWN_VI:
Cost += TLI->getVSlideVICost(VT);
break;
case RISCV::VSLIDEUP_VX:
Copy link
Collaborator

@topperc topperc Dec 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.vx and .vi slides are somewhat different. The .vi instructions know at decode time how far to slide so hardware can know early which source DLEN pieces are needed for each DLEN piece of the result.

.vx requires the sources to determined after looking at the scalar register.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think it would be good to expand getVSlideCost to getVSlideVXCost and getVSlideVICost. Fixed.

case RISCV::VSLIDEDOWN_VX:
Cost += TLI->getVSlideVXCost(VT);
break;
case RISCV::VREDMAX_VS:
case RISCV::VREDMIN_VS:
case RISCV::VREDMAXU_VS:
case RISCV::VREDMINU_VS:
case RISCV::VREDSUM_VS:
case RISCV::VREDAND_VS:
case RISCV::VREDOR_VS:
case RISCV::VREDXOR_VS:
case RISCV::VFREDMAX_VS:
case RISCV::VFREDMIN_VS:
case RISCV::VFREDUSUM_VS: {
unsigned VL = VT.getVectorMinNumElements();
if (!VT.isFixedLengthVector())
VL *= *getVScaleForTuning();
Cost += Log2_32_Ceil(VL);
break;
}
case RISCV::VFREDOSUM_VS: {
unsigned VL = VT.getVectorMinNumElements();
if (!VT.isFixedLengthVector())
VL *= *getVScaleForTuning();
Cost += VL;
break;
}
case RISCV::VMV_S_X:
// FIXME: VMV_S_X doesn't use LMUL, the cost should be 1
default:
Cost += LMULCost;
}
}
return Cost;
}

InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy() &&
Expand Down Expand Up @@ -279,7 +338,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// Example sequence:
// vnsrl.wi v10, v8, 0
if (equal(DeinterleaveMask, Mask))
return LT.first * TLI->getLMULCost(LT.second);
return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
LT.second, CostKind);
}
}
}
Expand All @@ -290,7 +350,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
LT.second.getVectorNumElements() <= 256)) {
VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
return IndexCost + TLI->getVRGatherVVCost(LT.second);
return IndexCost +
getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind);
}
[[fallthrough]];
}
Expand All @@ -308,7 +369,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC);
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind);
return 2 * IndexCost + 2 * TLI->getVRGatherVVCost(LT.second) + MaskCost;
return 2 * IndexCost +
getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
LT.second, CostKind) +
MaskCost;
}
[[fallthrough]];
}
Expand Down Expand Up @@ -363,19 +427,24 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// Example sequence:
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
// vslidedown.vi v8, v9, 2
return LT.first * TLI->getVSlideCost(LT.second);
return LT.first *
getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second, CostKind);
case TTI::SK_InsertSubvector:
// Example sequence:
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
// vslideup.vi v8, v9, 2
return LT.first * TLI->getVSlideCost(LT.second);
return LT.first *
getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second, CostKind);
case TTI::SK_Select: {
// Example sequence:
// li a0, 90
// vsetivli zero, 8, e8, mf2, ta, ma (ignored)
// vmv.s.x v0, a0
// vmerge.vvm v8, v9, v8, v0
return LT.first * 3 * TLI->getLMULCost(LT.second);
return LT.first *
(TLI->getLMULCost(LT.second) + // FIXME: should be 1 for li
getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
LT.second, CostKind));
}
case TTI::SK_Broadcast: {
bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==
Expand All @@ -387,7 +456,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// vsetivli zero, 2, e8, mf8, ta, ma (ignored)
// vmv.v.x v8, a0
// vmsne.vi v0, v8, 0
return LT.first * TLI->getLMULCost(LT.second) * 3;
return LT.first *
(TLI->getLMULCost(LT.second) + // FIXME: should be 1 for andi
getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
LT.second, CostKind));
}
// Example sequence:
// vsetivli zero, 2, e8, mf8, ta, mu (ignored)
Expand All @@ -398,24 +470,40 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// vmv.v.x v8, a0
// vmsne.vi v0, v8, 0

return LT.first * TLI->getLMULCost(LT.second) * 6;
return LT.first *
(TLI->getLMULCost(LT.second) + // FIXME: this should be 1 for andi
TLI->getLMULCost(
LT.second) + // FIXME: vmv.x.s is the same as extractelement
getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
RISCV::VMV_V_X, RISCV::VMSNE_VI},
LT.second, CostKind));
}

if (HasScalar) {
// Example sequence:
// vmv.v.x v8, a0
return LT.first * TLI->getLMULCost(LT.second);
return LT.first *
getRISCVInstructionCost(RISCV::VMV_V_X, LT.second, CostKind);
}

// Example sequence:
// vrgather.vi v9, v8, 0
return LT.first * TLI->getVRGatherVICost(LT.second);
return LT.first *
getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second, CostKind);
}
case TTI::SK_Splice:
case TTI::SK_Splice: {
// vslidedown+vslideup.
// TODO: Multiplying by LT.first implies this legalizes into multiple copies
// of similar code, but I think we expand through memory.
return 2 * LT.first * TLI->getVSlideCost(LT.second);
ArrayRef<unsigned> Opcodes;
if (Index >= 0 && Index < 32)
Opcodes = {RISCV::VSLIDEDOWN_VI, RISCV::VSLIDEUP_VX};
else if (Index < 0 && Index > -32)
Opcodes = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VI};
else
Opcodes = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
return LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
}
case TTI::SK_Reverse: {
// TODO: Cases to improve here:
// * Illegal vector types
Expand All @@ -435,7 +523,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (LT.second.isFixedLengthVector())
// vrsub.vi has a 5 bit immediate field, otherwise an li suffices
LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
InstructionCost GatherCost = 2 + TLI->getVRGatherVVCost(LT.second);
// FIXME: replace the constant `2` below with cost of {VID_V,VRSUB_VX}
InstructionCost GatherCost =
2 + getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind);
// Mask operation additionally required extend and truncate
InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0;
return LT.first * (LenCost + GatherCost + ExtendCost);
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
/// actual target hardware.
unsigned getEstimatedVLFor(VectorType *Ty);

InstructionCost getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
TTI::TargetCostKind CostKind);

/// Return the cost of accessing a constant pool entry of the specified
/// type.
InstructionCost getConstantPoolLoadCost(Type *Ty,
Expand Down
Loading