-
Notifications
You must be signed in to change notification settings - Fork 11.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV][CostModel] Add getRISCVInstructionCost() to TTI for Cost… #73651
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-backend-risc-v Author: Shih-Po Hung (arcbbb) Changes…Kind Instruction cost for CodeSize and Latency/RecipThroughput can be very differnet. Considering the diversity of CostKind and vendor-specific cost, and how they are spread across various TTI functions, it's becoming quite a challenge to handle. This patch adds an interface getRISCVInstructionCost to address it. Full diff: https://github.com/llvm/llvm-project/pull/73651.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 3a2f2f39cd1c9b0..e48f51c751e245d 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -34,6 +34,50 @@ static cl::opt<unsigned> SLPMaxVF(
"exclusively by SLP vectorizer."),
cl::Hidden);
+InstructionCost RISCVTTIImpl::getRISCVInstructionCost(
+ RISCVInstruction Inst, MVT VT, unsigned NumInstr,
+ TTI::TargetCostKind CostKind) {
+ if (CostKind == TTI::TCK_CodeSize)
+ return NumInstr;
+
+ InstructionCost LMUL = TLI->getLMULCost(VT);
+ InstructionCost Cost = LMUL * NumInstr;
+
+ if ((CostKind == TTI::TCK_RecipThroughput) ||
+ (CostKind == TTI::TCK_Latency)) {
+ switch (Inst) {
+ case RISCVInstruction::VRGATHER_VI:
+ return NumInstr * TLI->getVRGatherVICost(VT);
+ case RISCVInstruction::VRGATHER_VV:
+ return NumInstr * TLI->getVRGatherVVCost(VT);
+ case RISCVInstruction::VSLIDE:
+ return NumInstr * TLI->getVSlideCost(VT);
+ case RISCVInstruction::VSIMPLE_INT_RED:
+ case RISCVInstruction::VMINMAX_INTFP_RED:
+ case RISCVInstruction::VUNORD_FP_RED: {
+ unsigned VL = VT.getVectorMinNumElements();
+ if (!VT.isFixedLengthVector()) {
+ VL *= *getVScaleForTuning();
+ }
+ return Log2_32_Ceil(VL);
+ }
+ case RISCVInstruction::VORD_FP_RED: {
+ unsigned VL = VT.getVectorMinNumElements();
+ if (!VT.isFixedLengthVector()) {
+ VL *= *getVScaleForTuning();
+ }
+ return VL;
+ }
+ case RISCVInstruction::VMERGE:
+ case RISCVInstruction::VMV:
+ case RISCVInstruction::VSIMPLE_INT:
+ case RISCVInstruction::VNARROWING:
+ return Cost;
+ }
+ }
+ return Cost;
+}
+
InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy() &&
@@ -279,7 +323,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// Example sequence:
// vnsrl.wi v10, v8, 0
if (equal(DeinterleaveMask, Mask))
- return LT.first * TLI->getLMULCost(LT.second);
+ return LT.first *
+ getRISCVInstructionCost(RISCVInstruction::VNARROWING,
+ LT.second, 1, CostKind);
}
}
}
@@ -290,7 +336,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
LT.second.getVectorNumElements() <= 256)) {
VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
- return IndexCost + TLI->getVRGatherVVCost(LT.second);
+ return IndexCost +
+ getRISCVInstructionCost(RISCVInstruction::VRGATHER_VV,
+ LT.second, 1, CostKind);
}
[[fallthrough]];
}
@@ -308,7 +356,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC);
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind);
- return 2 * IndexCost + 2 * TLI->getVRGatherVVCost(LT.second) + MaskCost;
+ return 2 * IndexCost +
+ getRISCVInstructionCost(RISCVInstruction::VRGATHER_VV,
+ LT.second, 2, CostKind) +
+ MaskCost;
}
[[fallthrough]];
}
@@ -363,19 +414,26 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// Example sequence:
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
// vslidedown.vi v8, v9, 2
- return LT.first * TLI->getVSlideCost(LT.second);
+ return LT.first * getRISCVInstructionCost(RISCVInstruction::VSLIDE,
+ LT.second, 1, CostKind);
case TTI::SK_InsertSubvector:
// Example sequence:
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
// vslideup.vi v8, v9, 2
- return LT.first * TLI->getVSlideCost(LT.second);
+ return LT.first * getRISCVInstructionCost(RISCVInstruction::VSLIDE,
+ LT.second, 1, CostKind);
case TTI::SK_Select: {
// Example sequence:
// li a0, 90
// vsetivli zero, 8, e8, mf2, ta, ma (ignored)
// vmv.s.x v0, a0
// vmerge.vvm v8, v9, v8, v0
- return LT.first * 3 * TLI->getLMULCost(LT.second);
+ return LT.first *
+ (TLI->getLMULCost(LT.second) + // FIXME: should be 1 for li
+ getRISCVInstructionCost(RISCVInstruction::VMV, LT.second, 1,
+ CostKind) +
+ getRISCVInstructionCost(RISCVInstruction::VMERGE, LT.second, 1,
+ CostKind));
}
case TTI::SK_Broadcast: {
bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==
@@ -387,7 +445,12 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// vsetivli zero, 2, e8, mf8, ta, ma (ignored)
// vmv.v.x v8, a0
// vmsne.vi v0, v8, 0
- return LT.first * TLI->getLMULCost(LT.second) * 3;
+ return LT.first *
+ (TLI->getLMULCost(LT.second) + // FIXME: should be 1 for andi
+ getRISCVInstructionCost(RISCVInstruction::VMV, LT.second,
+ 1, CostKind) +
+ getRISCVInstructionCost(RISCVInstruction::VSIMPLE_INT,
+ LT.second, 1, CostKind));
}
// Example sequence:
// vsetivli zero, 2, e8, mf8, ta, mu (ignored)
@@ -398,24 +461,34 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// vmv.v.x v8, a0
// vmsne.vi v0, v8, 0
- return LT.first * TLI->getLMULCost(LT.second) * 6;
+ return LT.first *
+ (TLI->getLMULCost(LT.second) + // FIXME: this should be 1 for andi
+ getRISCVInstructionCost(RISCVInstruction::VMV, LT.second, 3,
+ CostKind) +
+ getRISCVInstructionCost(RISCVInstruction::VMERGE, LT.second,
+ 1, CostKind) +
+ getRISCVInstructionCost(RISCVInstruction::VSIMPLE_INT,
+ LT.second, 1, CostKind));
}
if (HasScalar) {
// Example sequence:
// vmv.v.x v8, a0
- return LT.first * TLI->getLMULCost(LT.second);
+ return LT.first * getRISCVInstructionCost(RISCVInstruction::VMV,
+ LT.second, 1, CostKind);
}
// Example sequence:
// vrgather.vi v9, v8, 0
- return LT.first * TLI->getVRGatherVICost(LT.second);
+ return LT.first * getRISCVInstructionCost(RISCVInstruction::VRGATHER_VI,
+ LT.second, 1, CostKind);
}
case TTI::SK_Splice:
// vslidedown+vslideup.
// TODO: Multiplying by LT.first implies this legalizes into multiple copies
// of similar code, but I think we expand through memory.
- return 2 * LT.first * TLI->getVSlideCost(LT.second);
+ return LT.first * getRISCVInstructionCost(RISCVInstruction::VSLIDE,
+ LT.second, 2, CostKind);
case TTI::SK_Reverse: {
// TODO: Cases to improve here:
// * Illegal vector types
@@ -435,7 +508,11 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (LT.second.isFixedLengthVector())
// vrsub.vi has a 5 bit immediate field, otherwise an li suffices
LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
- InstructionCost GatherCost = 2 + TLI->getVRGatherVVCost(LT.second);
+ // FIXME: replace the constant `2` below with cost of VSIMPLE_INT (vid.v &
+ // vrsub.vx)
+ InstructionCost GatherCost =
+ 2 + getRISCVInstructionCost(RISCVInstruction::VRGATHER_VV,
+ LT.second, 1, CostKind);
// Mask operation additionally required extend and truncate
InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0;
return LT.first * (LenCost + GatherCost + ExtendCost);
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 81a45f623d294e8..829eace9d1236d2 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -48,6 +48,24 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
/// actual target hardware.
unsigned getEstimatedVLFor(VectorType *Ty);
+ enum class RISCVInstruction {
+ VRGATHER_VV,
+ VRGATHER_VI,
+ VSLIDE,
+ VMERGE,
+ VMV,
+ VSIMPLE_INT, // ICMP
+ VNARROWING, // VNSRL
+ VSIMPLE_INT_RED, // VREDSUM, VREDAND, VREDOR, VREDXOR
+ VMINMAX_INTFP_RED, // VREDMAX, VREDMAXU, VREDMIN, VREDMINU
+ VUNORD_FP_RED, // VFREDUSUM, VFREDMAX, VFREDMIN
+ VORD_FP_RED, // VFREDOSUM
+ };
+
+ InstructionCost getRISCVInstructionCost(RISCVInstruction Inst, MVT VT,
+ unsigned NumInstr,
+ TTI::TargetCostKind CostKind);
+
/// Return the cost of accessing a constant pool entry of the specified
/// type.
InstructionCost getConstantPoolLoadCost(Type *Ty,
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
…Kind Instruction cost for CodeSize and Latency/RecipThroughput can be very differnet. Considering the diversity of CostKind and vendor-specific cost, and how they are spread across various TTI functions, it's becoming quite a challenge to handle. This patch adds an interface getRISCVInstructionCost to address it.
What's the actual problem you're hoping to solve here? Where are you going with this direction? |
return LT.first * TLI->getLMULCost(LT.second) * 3; | ||
return LT.first * | ||
(TLI->getLMULCost(LT.second) + // FIXME: should be 1 for andi | ||
getRISCVInstructionCost(RISCVInstruction::VMV, LT.second, 1, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
RISCVInstruction::VMV seems to be used for different instruction here and on line 434. Line 434 uses it for vmv.s.x which is an operation that doesn't use LMUL. This code uses it for vmv.v.v which does use LMUL.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed. Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this makes how the cost is calculated much more consistent and clearer.
@@ -48,6 +48,25 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> { | |||
/// actual target hardware. | |||
unsigned getEstimatedVLFor(VectorType *Ty); | |||
|
|||
enum class RISCVInstruction { | |||
VRGATHER_VV, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are a lot like enums in RISCV::XXX
namespace, which is generated by TableGen and defined in RISCVMCTargetDesc.h
via GET_INSTRINFO_ENUM
. I don't know if we can reuse them?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good, I have updated based on it.
@@ -34,6 +34,53 @@ static cl::opt<unsigned> SLPMaxVF( | |||
"exclusively by SLP vectorizer."), | |||
cl::Hidden); | |||
|
|||
InstructionCost | |||
RISCVTTIImpl::getRISCVInstructionCost(RISCVInstruction Inst, MVT VT, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about making it getRISCVInstructionCost(ArrayRef<RISCVInstruction> Insts, MVT VT, TTI::TargetCostKind CostKind)
and calculate the cost sum of Insts
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated!
case RISCVInstruction::VSIMPLE_INT_RED: | ||
case RISCVInstruction::VMINMAX_INTFP_RED: | ||
case RISCVInstruction::VUNORD_FP_RED: { | ||
unsigned VL = VT.getVectorMinNumElements(); | ||
if (!VT.isFixedLengthVector()) { | ||
VL *= *getVScaleForTuning(); | ||
} | ||
return Log2_32_Ceil(VL); | ||
} | ||
case RISCVInstruction::VORD_FP_RED: { | ||
unsigned VL = VT.getVectorMinNumElements(); | ||
if (!VT.isFixedLengthVector()) { | ||
VL *= *getVScaleForTuning(); | ||
} | ||
return VL; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the plan to use these in a later patch?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that is the plan. I intend to make this an NFC patch so that it is easier to review.
@preames Two problem I would like to solve in this way:
And in this way I would like to update TTI cost functions of shuffle, cast, reduction, and arithmetic instruction in follow-up patches. |
@@ -279,7 +336,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, | |||
// Example sequence: | |||
// vnsrl.wi v10, v8, 0 | |||
if (equal(DeinterleaveMask, Mask)) | |||
return LT.first * TLI->getLMULCost(LT.second); | |||
return LT.first * getRISCVInstructionCost({RISCV::VNSRL_WI}, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If there is only one element, we can drop the braces.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed. Thanks!
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm OK with this change, but please wait for others' approval.
@@ -435,7 +515,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, | |||
if (LT.second.isFixedLengthVector()) | |||
// vrsub.vi has a 5 bit immediate field, otherwise an li suffices | |||
LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1; | |||
InstructionCost GatherCost = 2 + TLI->getVRGatherVVCost(LT.second); | |||
// FIXME: replace the constant `2` below with cost of VSIMPLE_INT (vid.v & |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This FIXME is stale.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed. Thanks!
Remove NFC from the title since the cost for CodeSize changed in this patch. |
ping |
} | ||
case TTI::SK_Splice: | ||
// vslidedown+vslideup. | ||
// TODO: Multiplying by LT.first implies this legalizes into multiple copies | ||
// of similar code, but I think we expand through memory. | ||
return 2 * LT.first * TLI->getVSlideCost(LT.second); | ||
return LT.first * | ||
getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX}, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this slidedown.vx or slidedown.vi? It's a shuffle so the mask and thus the index should be know at compile time right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, the index should be known and I can check if it is a 5-bit imm for .vi.
break; | ||
case RISCV::VSLIDEUP_VI: | ||
case RISCV::VSLIDEDOWN_VI: | ||
case RISCV::VSLIDEUP_VX: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
.vx and .vi slides are somewhat different. The .vi instructions know at decode time how far to slide so hardware can know early which source DLEN pieces are needed for each DLEN piece of the result.
.vx requires the sources to determined after looking at the scalar register.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I think it would be good to expand getVSlideCost to getVSlideVXCost and getVSlideVICost. Fixed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This breaks a few bots with https://lab.llvm.org/buildbot/#/builders/168/builds/17710 Should we revert it? |
Please revert it |
…ost… (#73651)" (#76536) Fails on bots https://lab.llvm.org/buildbot/#/builders/5/builds/39629 Issue #76535 This reverts commit 3e75dec.
@vitalybuka Thanks! I am still trying to reproduce this in my local. |
…vm#73651) …Kind Instruction cost for CodeSize and Latency/RecipThroughput can be very different. Considering the diversity of CostKind and vendor-specific cost, and how they are spread across various TTI functions, it's becoming quite a challenge to handle. This patch adds an interface getRISCVInstructionCost to address it.
…Kind
Instruction cost for CodeSize and Latency/RecipThroughput can be very differnet. Considering the diversity of CostKind and vendor-specific cost, and how they are spread across various TTI functions, it's becoming quite a challenge to handle. This patch adds an interface getRISCVInstructionCost to address it.