-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Support live-ins without underlying IR in type analysis. #80723
Conversation
A VPlan contains multiple live-ins without underlying IR, like VFxUF or VectorTripCount. Trying to infer the scalar type of those causes a crash at the moment. Update VPTypeAnalysis to take a VPlan in its constructor and assign types to those live-ins up front. All those live-ins share the type of the canonical IV.
@llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesA VPlan contains multiple live-ins without underlying IR, like VFxUF or VectorTripCount. Trying to infer the scalar type of those causes a crash at the moment. Update VPTypeAnalysis to take a VPlan in its constructor and assign types to those live-ins up front. All those live-ins share the type of the canonical IV. Full diff: https://github.com/llvm/llvm-project/pull/80723.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 162a3c4b195e5..43f10c315f2e9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -238,7 +238,7 @@ struct VPTransformState {
DominatorTree *DT, IRBuilderBase &Builder,
InnerLoopVectorizer *ILV, VPlan *Plan, LLVMContext &Ctx)
: VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan),
- LVer(nullptr), TypeAnalysis(Ctx) {}
+ LVer(nullptr), TypeAnalysis(*Plan, Ctx) {}
/// The chosen Vectorization and Unroll Factors of the loop being vectorized.
ElementCount VF;
@@ -2932,6 +2932,9 @@ class VPlan {
return BackedgeTakenCount;
}
+ /// Return the backedge taken count of the original loop, if set.
+ VPValue *getBackedgeTakenCount() { return BackedgeTakenCount; }
+
/// The vector trip count.
VPValue &getVectorTripCount() { return VectorTripCount; }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index b9ffe7e5b7af7..b71bac1546332 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -35,12 +35,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
CachedTypes[OtherV] = ResTy;
return ResTy;
}
- case Instruction::ICmp: {
- // TODO: Check if types for both operands agree. This also requires
- // type-inference for the vector-trip-count, which is missing at the moment.
- Type *ResTy = inferScalarType(R->getOperand(0));
- return ResTy;
- }
+ case Instruction::ICmp:
case VPInstruction::FirstOrderRecurrenceSplice: {
Type *ResTy = inferScalarType(R->getOperand(0));
VPValue *OtherV = R->getOperand(1);
@@ -203,6 +198,16 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) {
llvm_unreachable("Unhandled opcode");
}
+VPTypeAnalysis::VPTypeAnalysis(VPlan &Plan, LLVMContext &Ctx) : Ctx(Ctx) {
+ auto *CanIV = Plan.getCanonicalIV();
+ Type *CanIVTy = inferScalarType(CanIV);
+ CachedTypes[&Plan.getVectorTripCount()] = CanIVTy;
+ CachedTypes[&Plan.getVFxUF()] = CanIVTy;
+ CachedTypes[Plan.getTripCount()] = CanIVTy;
+ if (auto *BTC = Plan.getBackedgeTakenCount())
+ CachedTypes[BTC] = CanIVTy;
+}
+
Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
if (Type *CachedTy = CachedTypes.lookup(V))
return CachedTy;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
index 7276641551ae8..6b218a39f605c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
@@ -15,6 +15,7 @@ namespace llvm {
class LLVMContext;
class VPValue;
+class VPlan;
class VPBlendRecipe;
class VPInstruction;
class VPWidenRecipe;
@@ -47,7 +48,7 @@ class VPTypeAnalysis {
Type *inferScalarTypeForRecipe(const VPReplicateRecipe *R);
public:
- VPTypeAnalysis(LLVMContext &Ctx) : Ctx(Ctx) {}
+ VPTypeAnalysis(VPlan &Plan, LLVMContext &Ctx);
/// Infer the type of \p V. Returns the scalar type of \p V.
Type *inferScalarType(const VPValue *V);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 71f5285f90236..4f4febcd1403f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -502,7 +502,7 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
}
// Truncate base induction if needed.
- VPTypeAnalysis TypeInfo(SE.getContext());
+ VPTypeAnalysis TypeInfo(Plan, SE.getContext());
Type *ResultTy = TypeInfo.inferScalarType(BaseIV);
if (TruncI) {
Type *TruncTy = TruncI->getType();
@@ -880,7 +880,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
#ifndef NDEBUG
// Verify that the cached type info is for both A and its users is still
// accurate by comparing it to freshly computed types.
- VPTypeAnalysis TypeInfo2(TypeInfo.getContext());
+ VPTypeAnalysis TypeInfo2(*R.getParent()->getPlan(), TypeInfo.getContext());
assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
for (VPUser *U : A->users()) {
auto *R = dyn_cast<VPRecipeBase>(U);
@@ -901,7 +901,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
static void simplifyRecipes(VPlan &Plan, LLVMContext &Ctx) {
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
Plan.getEntry());
- VPTypeAnalysis TypeInfo(Ctx);
+ VPTypeAnalysis TypeInfo(Plan, Ctx);
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
simplifyRecipe(R, TypeInfo);
@@ -922,7 +922,7 @@ void VPlanTransforms::truncateToMinimalBitwidths(
// other uses have different types for their operands, making them invalidly
// typed.
DenseMap<VPValue *, VPWidenCastRecipe *> ProcessedTruncs;
- VPTypeAnalysis TypeInfo(Ctx);
+ VPTypeAnalysis TypeInfo(Plan, Ctx);
VPBasicBlock *PH = Plan.getEntry();
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_deep(Plan.getVectorLoopRegion()))) {
|
ping :) |
CachedTypes[&Plan.getVFxUF()] = CanIVTy; | ||
CachedTypes[Plan.getTripCount()] = CanIVTy; | ||
if (auto *BTC = Plan.getBackedgeTakenCount()) | ||
CachedTypes[BTC] = CanIVTy; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These live-ins-w/o-an-underlying-value VPValues are temporary place-holders that should be replaced by recipes, as the boundary of VPlan's scope expands; or folded into constants (UF, times non-scalable VF). So fixing their type for now seems ok. Still, it may be better to pass and record the "Canonical Type" of (the start value - a zero that may be replaced - of) the Canonical IV directly, rather than call the recursive inferScalarType() during construction of VPTypeAnalsis.
VPTypeAnalysis::VPTypeAnalysis(Type *CanTy, LLVMContext &Ctx) : CanTy(CanTy), Ctx(Ctx) {}
Perhaps (also) record it in VPlan, as part of addCanonicalIVRecipes(); or at-least have VPlan provide a method to retrieve it.
Plus, have these VPValues lookup this type when needed, rather than listing them all here upfront, and maintaining this list as it changes?
(BTW, is Ctx needed?)
(Another option may be to record their type upon construction, using another subclass of VPValue, is better avoided being place-holders.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, updated to take the CanonicalIVType as argument, instead of the whole Plan and avoid caching it as suggested!
The canonical IV type is available once it is added via the type of its start value. Updated patch uses getCanonicalIV()->getScalarType()
to retrieve that type.
(BTW, is Ctx needed?)
It is used to construct i1
types for the results of compares at the moment. Now that we have access to a type, we could in theory use that type to retrieve a context, but it seems cleaner to store it explicitly.
if (auto *BTC = Plan.getBackedgeTakenCount()) | ||
CachedTypes[BTC] = CanIVTy; | ||
} | ||
|
||
Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { | ||
if (Type *CachedTy = CachedTypes.lookup(V)) | ||
return CachedTy; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
An alternative to caching the pre-computed type for all live-ins-w/o-underlying-value, as raised above, may be to change below into:
if (V->isLiveIn()) {
if (auto *IRValue = V->getLiveInIRValue())
return IRValue->getType();
return CanonicalType;
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated as suggested, thanks!
@@ -35,12 +35,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { | |||
CachedTypes[OtherV] = ResTy; | |||
return ResTy; | |||
} | |||
case Instruction::ICmp: { | |||
// TODO: Check if types for both operands agree. This also requires | |||
// type-inference for the vector-trip-count, which is missing at the moment. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: should have probably referred to backedge-taken-count
, used by an ICmp to form the header mask, rather than vector-trip-count
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for accommodating, this looks good to me, adding a couple of nits, worth explaining what CanonicalIVTy is used for, possibly along with a TODO to promote the temporary live-ins-w/o-underlying-value (which need it) into recipes.
/// Return the backedge taken count of the original loop, if set. | ||
VPValue *getBackedgeTakenCount() { return BackedgeTakenCount; } | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/// Return the backedge taken count of the original loop, if set. | |
VPValue *getBackedgeTakenCount() { return BackedgeTakenCount; } |
is this still needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed, thanks!
return V->getLiveInIRValue()->getType(); | ||
if (V->isLiveIn()) { | ||
if (auto *IRValue = V->getLiveInIRValue()) | ||
return IRValue->getType(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: worth a comment to explain who's expected to use CanonicalIVTy.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a comment ,thanks!
@@ -15,6 +15,7 @@ namespace llvm { | |||
|
|||
class LLVMContext; | |||
class VPValue; | |||
class VPlan; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
class VPlan; |
is this still needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Dropped thanks.
@@ -35,6 +36,7 @@ class Type; | |||
/// of the previously inferred types. | |||
class VPTypeAnalysis { | |||
DenseMap<const VPValue *, Type *> CachedTypes; | |||
Type *CanonicalIVTy; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: worth a comment what this is for.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added ,thanks!
@@ -47,7 +49,8 @@ class VPTypeAnalysis { | |||
Type *inferScalarTypeForRecipe(const VPReplicateRecipe *R); | |||
|
|||
public: | |||
VPTypeAnalysis(LLVMContext &Ctx) : Ctx(Ctx) {} | |||
VPTypeAnalysis(Type *CanonicalIVTy, LLVMContext &Ctx) | |||
: CanonicalIVTy(CanonicalIVTy), Ctx(Ctx) {} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit (unrelated to this patch): does VPTypeAnalysis really need Ctx
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could retrieve the context from a type separately when needed. Can do separately if preferred?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any current use of Ctx
? Sure, if this is indeed dead, should be eliminated separately.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The current uses are to create i1
& void
types:
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp#L63
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp#L163
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp#L189
A VPlan contains multiple live-ins without underlying IR, like VFxUF or VectorTripCount. Trying to infer the scalar type of those causes a crash at the moment.
Update VPTypeAnalysis to take a VPlan in its constructor and assign types to those live-ins up front. All those live-ins share the type of the canonical IV.