[VPlan] Expand VPExpandSCEVRecipes to VPInstructions before CSE.#197643
Conversation
|
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-backend-risc-v Author: Florian Hahn (fhahn) ChangesAdd expandSCEVExpressions transform that converts VPExpandSCEVRecipes Currently limited to SCEVMulExpr (along with constants, unknowns, and Depends on #189455 (included in PR) Patch is 297.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/197643.diff 108 Files Affected:
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index fd3a7fab1fd66..96f09b191c21e 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -63,6 +63,7 @@ class SCEVUnknown;
class StructType;
class TargetLibraryInfo;
class Type;
+class VPSCEVExpander;
enum SCEVTypes : unsigned short;
LLVM_ABI extern bool VerifySCEV;
@@ -1636,6 +1637,7 @@ class ScalarEvolution {
friend class SCEVCallbackVH;
friend class SCEVExpander;
friend class SCEVUnknown;
+ friend class VPSCEVExpander;
/// The function we are analyzing.
Function &F;
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 42355f5841eab..5ff730b3755d0 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -311,6 +311,11 @@ class SCEVExpander : public SCEVUseVisitor<SCEVExpander, Value *> {
LLVM_ABI bool isSafeToExpandAt(const SCEV *S,
const Instruction *InsertionPoint) const;
+ /// Drop poison-generating flags from \p I, then try re-infer via SCEV.
+ LLVM_ABI static void
+ dropPoisonGeneratingAnnotationsAndReinfer(ScalarEvolution &SE,
+ Instruction *I);
+
/// Insert code to directly compute the specified SCEV expression into the
/// program. The code is inserted into the specified block.
LLVM_ABI Value *expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I);
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 8877688207548..3a65a0405a05b 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1684,24 +1684,7 @@ Value *SCEVExpander::expand(SCEVUse S) {
} else {
for (Instruction *I : DropPoisonGeneratingInsts) {
rememberFlags(I);
- I->dropPoisonGeneratingAnnotations();
- // See if we can re-infer from first principles any of the flags we just
- // dropped.
- if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
- if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
- auto *BO = cast<BinaryOperator>(I);
- BO->setHasNoUnsignedWrap(
- ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == SCEV::FlagNUW);
- BO->setHasNoSignedWrap(
- ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == SCEV::FlagNSW);
- }
- if (auto *NNI = dyn_cast<PossiblyNonNegInst>(I)) {
- auto *Src = NNI->getOperand(0);
- if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src,
- Constant::getNullValue(Src->getType()), I,
- DL).value_or(false))
- NNI->setNonNeg(true);
- }
+ dropPoisonGeneratingAnnotationsAndReinfer(SE, I);
}
}
// Remember the expanded value for this SCEV at this location.
@@ -1729,6 +1712,29 @@ void SCEVExpander::rememberFlags(Instruction *I) {
OrigFlags.try_emplace(I, PoisonFlags(I));
}
+void SCEVExpander::dropPoisonGeneratingAnnotationsAndReinfer(
+ ScalarEvolution &SE, Instruction *I) {
+ I->dropPoisonGeneratingAnnotations();
+ // See if we can re-infer from first principles any of the flags we just
+ // dropped.
+ if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
+ if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
+ auto *BO = cast<BinaryOperator>(I);
+ BO->setHasNoUnsignedWrap(
+ ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == SCEV::FlagNUW);
+ BO->setHasNoSignedWrap(
+ ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == SCEV::FlagNSW);
+ }
+ if (auto *NNI = dyn_cast<PossiblyNonNegInst>(I)) {
+ auto *Src = NNI->getOperand(0);
+ if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src,
+ Constant::getNullValue(Src->getType()), I,
+ SE.getDataLayout())
+ .value_or(false))
+ NNI->setNonNeg(true);
+ }
+}
+
void SCEVExpander::replaceCongruentIVInc(
PHINode *&Phi, PHINode *&OrigPhi, Loop *L, const DominatorTree *DT,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7213d4ae795ec..be1e6d7a17023 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6163,6 +6163,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
CM.requiresScalarEpilogue(BestVF.isVector()), &BestVPlan.getVFxUF(),
MaxRuntimeStep);
VPlanTransforms::materializeFactors(BestVPlan, VectorPH, BestVF);
+ // Limit expansions to VPInstruction to when not vectorizing the main epilogue loop.
+ if (EpilogueVecKind == EpilogueVectorizationKind::None)
+ VPlanTransforms::expandSCEVExpressions(BestVPlan, *PSE.getSE(), *OrigLoop);
VPlanTransforms::cse(BestVPlan);
VPlanTransforms::simplifyRecipes(BestVPlan);
VPlanTransforms::simplifyKnownEVL(BestVPlan, BestVF, PSE);
@@ -7324,7 +7327,7 @@ void LoopVectorizationPlanner::addMinimumIterationCheck(
CM.requiresScalarEpilogue(VF.isVector()),
CM.foldTailByMasking(), OrigLoop, BranchWeights,
OrigLoop->getLoopPredecessor()->getTerminator()->getDebugLoc(),
- PSE, /*CheckBlock=*/nullptr);
+ PSE, Plan.getEntry());
}
// Determine how to lower the epilogue, which depends on 1) optimising
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 7c54a223f9793..4587856e9b9cc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -1448,16 +1448,6 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
addBypassBranch(Plan, CheckBlockVPBB, CondVPV, AddBranchWeights);
}
-/// Return an insert point in \p EntryVPBB after existing VPIRPhi,
-/// VPIRInstruction and VPExpandSCEVRecipe recipes.
-static VPBasicBlock::iterator getExpandSCEVInsertPt(VPBasicBlock *EntryVPBB) {
- auto InsertPt = EntryVPBB->begin();
- while (InsertPt != EntryVPBB->end() &&
- isa<VPExpandSCEVRecipe, VPIRPhi, VPIRInstruction>(&*InsertPt))
- ++InsertPt;
- return InsertPt;
-}
-
void VPlanTransforms::addMinimumIterationCheck(
VPlan &Plan, ElementCount VF, unsigned UF,
ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
@@ -1489,10 +1479,7 @@ void VPlanTransforms::addMinimumIterationCheck(
return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
};
- VPBasicBlock *EntryVPBB = Plan.getEntry();
- // Place compare and branch in CheckBlock if given, ExpandSCEVs in Entry.
- VPBasicBlock *CheckVPBB = CheckBlock ? CheckBlock : EntryVPBB;
- VPBuilder Builder(CheckVPBB);
+ VPBuilder Builder(CheckBlock);
VPValue *TripCountCheck = Plan.getFalse();
const SCEV *Step = GetMinTripCount();
// TripCountCheck = false, folding tail implies positive vector trip
@@ -1510,9 +1497,9 @@ void VPlanTransforms::addMinimumIterationCheck(
TripCount, Step)) {
// Generate the minimum iteration check only if we cannot prove the
// check is known to be true, or known to be false.
- // ExpandSCEV must be placed in Entry.
- VPBuilder SCEVBuilder(EntryVPBB, getExpandSCEVInsertPt(EntryVPBB));
- VPValue *MinTripCountVPV = SCEVBuilder.createExpandSCEV(Step);
+ VPValue *MinTripCountVPV =
+ VPSCEVExpander(Builder, Plan, *PSE.getSE(), *OrigLoop, DL)
+ .expand(Step);
TripCountCheck = Builder.createICmp(
CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
} // else step known to be < trip count, use TripCountCheck preset to false.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 673355ffb1c96..e73a44740902f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -5016,6 +5016,28 @@ void VPlanTransforms::materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH,
VFxUF.replaceAllUsesWith(MulByUF);
}
+void VPlanTransforms::expandSCEVExpressions(VPlan &Plan, ScalarEvolution &SE,
+ Loop &OrigLoop) {
+ auto *Entry = cast<VPIRBasicBlock>(Plan.getEntry());
+ VPBuilder Builder(Entry, Entry->begin());
+ VPSCEVExpander Expander(Builder, Plan, SE, OrigLoop);
+
+ // Expand VPExpandSCEVRecipes to VPInstructions using VPSCEVExpander. During
+ // the transition, unsupported SCEV expressions are still expanded to
+ // VPExpandSCEVRecipes.
+ for (VPRecipeBase &R : make_early_inc_range(*Entry)) {
+ auto *ExpSCEV = dyn_cast<VPExpandSCEVRecipe>(&R);
+ if (!ExpSCEV)
+ continue;
+ Builder.setInsertPoint(ExpSCEV);
+ VPValue *Expanded = Expander.expand(ExpSCEV->getSCEV());
+ ExpSCEV->replaceAllUsesWith(Expanded);
+ if (Plan.getTripCount() == ExpSCEV)
+ Plan.resetTripCount(Expanded);
+ ExpSCEV->eraseFromParent();
+ }
+}
+
DenseMap<const SCEV *, Value *>
VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) {
SCEVExpander Expander(SE, "induction", /*PreserveLCSSA=*/false);
@@ -5023,16 +5045,15 @@ VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) {
auto *Entry = cast<VPIRBasicBlock>(Plan.getEntry());
BasicBlock *EntryBB = Entry->getIRBasicBlock();
DenseMap<const SCEV *, Value *> ExpandedSCEVs;
+ // Expand remaining VPExpandSCEVRecipes to IR instructions using SCEVExpander.
for (VPRecipeBase &R : make_early_inc_range(*Entry)) {
- if (isa<VPIRInstruction, VPIRPhi>(&R))
- continue;
auto *ExpSCEV = dyn_cast<VPExpandSCEVRecipe>(&R);
if (!ExpSCEV)
- break;
+ continue;
const SCEV *Expr = ExpSCEV->getSCEV();
Value *Res =
Expander.expandCodeFor(Expr, Expr->getType(), EntryBB->getTerminator());
- ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
+ ExpandedSCEVs[Expr] = Res;
VPValue *Exp = Plan.getOrAddLiveIn(Res);
ExpSCEV->replaceAllUsesWith(Exp);
if (Plan.getTripCount() == ExpSCEV)
@@ -5040,8 +5061,7 @@ VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) {
ExpSCEV->eraseFromParent();
}
assert(none_of(*Entry, IsaPred<VPExpandSCEVRecipe>) &&
- "VPExpandSCEVRecipes must be at the beginning of the entry block, "
- "before any VPIRInstructions");
+ "all VPExpandSCEVRecipes must have been expanded");
// Add IR instructions in the entry basic block but not in the VPIRBasicBlock
// to the VPIRBasicBlock.
auto EI = Entry->begin();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index c66d83d3177d3..bc8d5ba879f39 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -164,15 +164,13 @@ struct VPlanTransforms {
/// be added to the middle block.
LLVM_ABI_FOR_TEST static void addMiddleCheck(VPlan &Plan, bool TailFolded);
- // Create a check to \p Plan to see if the vector loop should be executed.
- // If \p CheckBlock is non-null, the compare and branch are placed there;
- // ExpandSCEV recipes are always placed in Entry.
+ // Create a check in \p CheckBlock to see if the vector loop should be
+ // executed.
static void addMinimumIterationCheck(
VPlan &Plan, ElementCount VF, unsigned UF,
ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
- DebugLoc DL, PredicatedScalarEvolution &PSE,
- VPBasicBlock *CheckBlock = nullptr);
+ DebugLoc DL, PredicatedScalarEvolution &PSE, VPBasicBlock *CheckBlock);
/// Add a new check block before the vector preheader to \p Plan to check if
/// the main vector loop should be executed (TC >= VF * UF).
@@ -429,10 +427,18 @@ struct VPlanTransforms {
static void materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH,
ElementCount VF);
- /// Expand VPExpandSCEVRecipes in \p Plan's entry block. Each
- /// VPExpandSCEVRecipe is replaced with a live-in wrapping the expanded IR
- /// value. A mapping from SCEV expressions to their expanded IR value is
- /// returned.
+ /// Try to expand VPExpandSCEVRecipes in \p Plan's entry block to
+ /// VPInstructions. Recipes that cannot be expanded (casts, min/max) are kept
+ /// for later IR-level expansion by expandSCEVs. Should run before CSE so
+ /// that duplicate expansions are eliminated. Existing loop-invariant IR
+ /// values are reused as live-ins.
+ static void expandSCEVExpressions(VPlan &Plan, ScalarEvolution &SE,
+ Loop &OrigLoop);
+
+ /// Expand remaining VPExpandSCEVRecipes in \p Plan's entry block using
+ /// SCEVExpander. Each VPExpandSCEVRecipe is replaced with a live-in wrapping
+ /// the expanded IR value. A mapping from SCEV expressions to their expanded
+ /// IR value is returned.
static DenseMap<const SCEV *, Value *> expandSCEVs(VPlan &Plan,
ScalarEvolution &SE);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 24adcea1040b5..38e0f18284899 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -7,14 +7,17 @@
//===----------------------------------------------------------------------===//
#include "VPlanUtils.h"
+#include "LoopVectorizationPlanner.h"
#include "VPlanAnalysis.h"
#include "VPlanCFG.h"
#include "VPlanDominatorTree.h"
#include "VPlanPatternMatch.h"
#include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
using namespace llvm;
using namespace llvm::VPlanPatternMatch;
@@ -839,3 +842,62 @@ bool vputils::isUsedByLoadStoreAddress(const VPValue *V) {
}
return false;
}
+
+/// Try to find a loop-invariant IR value for \p S in \p OrigLoop's preheader
+/// that can be reused. Returns the corresponding live-in VPValue, or nullptr
+/// if no reusable IR value is found.
+VPValue *VPSCEVExpander::tryToReuseIRValue(const SCEV *S) {
+ if (isa<SCEVConstant, SCEVUnknown>(S))
+ return nullptr;
+ BasicBlock *PH = OrigLoop.getLoopPreheader();
+ if (!PH)
+ return nullptr;
+ for (Value *V : SE.getSCEVValues(S)) {
+ if (V->getType() != S->getType())
+ continue;
+ // Non-instruction values (arguments, globals) are always reusable.
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return Plan.getOrAddLiveIn(V);
+ // Only reuse instructions in the loop preheader, as instructions in
+ // sibling branches may not dominate this loop's preheader.
+ if (I->getParent() != PH)
+ continue;
+ SmallVector<Instruction *> DropPoisonGeneratingInsts;
+ if (!SE.canReuseInstruction(S, I, DropPoisonGeneratingInsts))
+ continue;
+ for (Instruction *DropI : DropPoisonGeneratingInsts)
+ SCEVExpander::dropPoisonGeneratingAnnotationsAndReinfer(SE, DropI);
+ return Plan.getOrAddLiveIn(V);
+ }
+ return nullptr;
+}
+
+VPValue *VPSCEVExpander::expand(const SCEV *S) {
+ if (VPValue *V = tryToReuseIRValue(S))
+ return V;
+
+ switch (S->getSCEVType()) {
+ case scConstant:
+ return Plan.getOrAddLiveIn(cast<SCEVConstant>(S)->getValue());
+ case scUnknown:
+ return Plan.getOrAddLiveIn(cast<SCEVUnknown>(S)->getValue());
+ case scVScale:
+ return Builder.createNaryOp(VPInstruction::VScale, {}, S->getType());
+ case scMulExpr: {
+ auto *Mul = cast<SCEVMulExpr>(S);
+ VPIRFlags::WrapFlagsTy WrapFlags(Mul->hasNoUnsignedWrap(),
+ Mul->hasNoSignedWrap());
+ VPValue *Result = expand(Mul->getOperand(0));
+ for (const SCEVUse &Op : drop_begin(Mul->operands()))
+ Result = Builder.createOverflowingOp(Instruction::Mul,
+ {Result, expand(Op)}, WrapFlags, DL);
+ return Result;
+ }
+ default:
+ // Unsupported SCEV kind; fall back to VPExpandSCEVRecipe.
+ assert(Builder.getInsertBlock() == Plan.getEntry() &&
+ "VPExpandSCEVRecipe fallback requires insertion in the entry block");
+ return Builder.createExpandSCEV(S);
+ }
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 21da1864d5d6a..fbcb972370c36 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -176,6 +176,29 @@ VPSingleDefRecipe *findHeaderMask(VPlan &Plan);
} // namespace vputils
+/// Lightweight SCEV-to-VPlan expander. Converts SCEV expressions into
+/// VPInstructions where possible, falling back to VPExpandSCEVRecipe for
+/// unsupported expressions (casts, min/max).
+class VPSCEVExpander {
+ VPBuilder &Builder;
+ VPlan &Plan;
+ ScalarEvolution &SE;
+ Loop &OrigLoop;
+ DebugLoc DL;
+
+ /// Try to find a loop-invariant IR value in OrigLoop's preheader whose
+ /// SCEV matches \p S. Returns the corresponding live-in VPValue, or nullptr
+ /// if none is found.
+ VPValue *tryToReuseIRValue(const SCEV *S);
+
+public:
+ VPSCEVExpander(VPBuilder &Builder, VPlan &Plan, ScalarEvolution &SE,
+ Loop &OrigLoop, DebugLoc DL = DebugLoc())
+ : Builder(Builder), Plan(Plan), SE(SE), OrigLoop(OrigLoop), DL(DL) {}
+
+ /// Expand \p S into VPlan recipes using the builder.
+ VPValue *expand(const SCEV *S);
+};
//===----------------------------------------------------------------------===//
// Utilities for modifying predecessors and successors of VPlan blocks.
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 690a61e3e05c2..f877f0ba9cfee 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -542,8 +542,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
; DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 257, [[TMP3]]
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
-; DEFAULT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP4]], 2
+; DEFAULT-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP2]], 2
; DEFAULT-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP11]], 2
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 257, [[TMP5]]
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 257, [[N_MOD_VF]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-scalar-assignment.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-scalar-assignment.ll
index 9fc9f03461b69..fcc646cb49137 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-scalar-assignment.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-scalar-assignment.ll
@@ -31,8 +31,7 @@ define i32 @simple_csa_int_select(i64 %N, ptr %data, i32 %a) {
; SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; SVE: [[VECTOR_PH]]:
-; SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
+; SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP0]], 2
; SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; SVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[A]], i64 0
@@ -120,8 +119,7 @@ define ptr @simple_csa_ptr_select(i64 %N, ptr %data, i64 %a, ptr %init) ...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
9b7eeb8 to
4e13a7d
Compare
| // Expand VPExpandSCEVRecipes to VPInstructions using VPSCEVExpander. During | ||
| // the transition, unsupported SCEV expressions are still expanded to | ||
| // VPExpandSCEVRecipes. |
There was a problem hiding this comment.
Hm, this is highly confusing! We don't "expand" to ExpandSCEVRecipes; we simply fall back to not expanding the ExpandSCEVRecipe to VPInstructions, and use SCEVExpander to expand to Instructions?
There was a problem hiding this comment.
The early transform now only expands VPExpandSCEV to VPInstructions (and skips if it cannot be expanded).
There was a problem hiding this comment.
Would be good to update this comment?
| // Unsupported SCEV kind; fall back to VPExpandSCEVRecipe. | ||
| assert(Builder.getInsertBlock() == Plan.getEntry() && | ||
| "VPExpandSCEVRecipe fallback requires insertion in the entry block"); | ||
| return Builder.createExpandSCEV(S); |
There was a problem hiding this comment.
Just return nullptr here?
There was a problem hiding this comment.
Updated to do that for now, this also requires adding a bailout above if any operand is nullptr though. Updated in #189455
| VPValue *VPSCEVExpander::tryToReuseIRValue(const SCEV *S) { | ||
| if (isa<SCEVConstant, SCEVUnknown>(S)) | ||
| return nullptr; | ||
| BasicBlock *PH = OrigLoop.getLoopPreheader(); |
There was a problem hiding this comment.
Hm, can we not use Plan.getScalarPreheader()? Confused about why do we need the OrigLoop here?
There was a problem hiding this comment.
We can use the entry block, which is a VPIRBasicBlock at this point. The scalar preheader retrieved from VPlan only dominates the scalar loop, but not the vector loop
49bb901 to
c3f77d5
Compare
artagnon
left a comment
There was a problem hiding this comment.
Let's attack the patch this is dependent on for now -- I've left a review.
| // Expand VPExpandSCEVRecipes to VPInstructions using VPSCEVExpander. During | ||
| // the transition, unsupported SCEV expressions are still expanded to | ||
| // VPExpandSCEVRecipes. |
There was a problem hiding this comment.
Would be good to update this comment?
c706cae to
c726b5e
Compare
Add expandSCEVExpressions transform that converts VPExpandSCEVRecipes to VPInstructions where possible, running before CSE so duplicates with other SCEV expansions (e.g., from addMinimumIterationCheck) are eliminated. This also reuses existing loop-invariant IR values via ScalarEvolution::getSCEVValues to avoid redundant computation. Currently limited to SCEVMulExpr (along with constants, unknowns, and vscale). Support for SCEVAddExpr and SCEVUDivExpr will follow in subsequent patches.
a718890 to
9d14f49
Compare
9d14f49 to
7229fd0
Compare
| // Expand VPExpandSCEVRecipes to VPInstructions using VPSCEVExpander. During | ||
| // the transition, unsupported SCEV expressions are still expanded to | ||
| // VPExpandSCEVRecipes. |
| class VPSCEVExpander { | ||
| VPBuilder &Builder; | ||
| ScalarEvolution &SE; | ||
| DebugLoc DL; |
There was a problem hiding this comment.
| DebugLoc DL; | |
| const DebugLoc &DL; |
There was a problem hiding this comment.
DebugLoc is already a wrapper around pointer, IIUC usually it is used directly by value to avoid another level of indirection.
…vpinstructions-before-cse
artagnon
left a comment
There was a problem hiding this comment.
LGTM, thanks! DL as a non-const-ref is also fine, in case there is some issue changing it?
fhahn
left a comment
There was a problem hiding this comment.
LGTM, thanks! DL as a non-const-ref is also fine, in case there is some issue changing it?
responded inline, but forgot to submit comments....
DebugLoc is already a wrapper around pointer, IIUC usually it is used directly by value to avoid another level of indirection.
| class VPSCEVExpander { | ||
| VPBuilder &Builder; | ||
| ScalarEvolution &SE; | ||
| DebugLoc DL; |
There was a problem hiding this comment.
DebugLoc is already a wrapper around pointer, IIUC usually it is used directly by value to avoid another level of indirection.
…e CSE. (#197643) Add expandSCEVExpressions transform that converts VPExpandSCEVRecipes to VPInstructions where possible, running before CSE so duplicates with other SCEV expansions (e.g., from addMinimumIterationCheck) are eliminated. This also reuses existing loop-invariant IR values via ScalarEvolution::getSCEVValues to avoid redundant computation. Currently limited to SCEVMulExpr (along with constants, unknowns, and vscale). Support for SCEVAddExpr and SCEVUDivExpr will follow in subsequent patches. Depends on llvm/llvm-project#189455 PR: llvm/llvm-project#197643
…e CSE. (#197643) Add expandSCEVExpressions transform that converts VPExpandSCEVRecipes to VPInstructions where possible, running before CSE so duplicates with other SCEV expansions (e.g., from addMinimumIterationCheck) are eliminated. This also reuses existing loop-invariant IR values via ScalarEvolution::getSCEVValues to avoid redundant computation. Currently limited to SCEVMulExpr (along with constants, unknowns, and vscale). Support for SCEVAddExpr and SCEVUDivExpr will follow in subsequent patches. Depends on llvm/llvm-project#189455 PR: llvm/llvm-project#197643
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/65/builds/35076 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/10/builds/29148 Here is the relevant piece of the build log for the reference |
Add expandSCEVExpressions transform that converts VPExpandSCEVRecipes
to VPInstructions where possible, running before CSE so duplicates with
other SCEV expansions (e.g., from addMinimumIterationCheck) are
eliminated. This also reuses existing loop-invariant IR values via
ScalarEvolution::getSCEVValues to avoid redundant computation.
Currently limited to SCEVMulExpr (along with constants, unknowns, and
vscale). Support for SCEVAddExpr and SCEVUDivExpr will follow in
subsequent patches.
Depends on #189455