-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LV] Clang-format some loop vectorizer files #158507
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-vectorizers Author: Tibor Győri (TiborGY) ChangesSome loop vectorizer files appear to not conform to the LLVM style. I intend to work on these files and formatting my changes with clang-format has resulted in diffs polluted with formatting changes to code I have not touched. Patch is 24.65 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/158507.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index ff35db14f7094..636607d57248a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -14,7 +14,6 @@
// is a need (but D45420 needs to happen first).
//
-#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -28,6 +27,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
+#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
using namespace llvm;
@@ -40,10 +40,10 @@ static cl::opt<bool>
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
cl::desc("Enable if-conversion during vectorization."));
-static cl::opt<bool>
-AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
- cl::desc("Enable recognition of non-constant strided "
- "pointer induction variables."));
+static cl::opt<bool> AllowStridedPointerIVs(
+ "lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
+ cl::desc("Enable recognition of non-constant strided "
+ "pointer induction variables."));
static cl::opt<bool>
HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
@@ -420,8 +420,8 @@ static IntegerType *getWiderInductionTy(const DataLayout &DL, Type *Ty0,
/// identified reduction variable.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
SmallPtrSetImpl<Value *> &AllowedExit) {
- // Reductions, Inductions and non-header phis are allowed to have exit users. All
- // other instructions must not have external users.
+ // Reductions, Inductions and non-header phis are allowed to have exit users.
+ // All other instructions must not have external users.
if (!AllowedExit.count(Inst))
// Check that all of the users of the loop are inside the BB.
for (User *U : Inst->users()) {
@@ -459,12 +459,13 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
// pointer is checked to reference consecutive elements suitable for a
// masked access.
const auto &Strides =
- LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
+ LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
bool CanAddPredicate = !llvm::shouldOptimizeForSize(
TheLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass);
- int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
- CanAddPredicate, false).value_or(0);
+ int Stride =
+ getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides, CanAddPredicate, false)
+ .value_or(0);
if (Stride == 1 || Stride == -1)
return Stride;
return 0;
@@ -622,7 +623,8 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
// not supported yet.
auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
if (!Br) {
- reportVectorizationFailure("Unsupported basic block terminator",
+ reportVectorizationFailure(
+ "Unsupported basic block terminator",
"loop control flow is not understood by vectorizer",
"CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
@@ -641,7 +643,8 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
!TheLoop->isLoopInvariant(Br->getCondition()) &&
!LI->isLoopHeader(Br->getSuccessor(0)) &&
!LI->isLoopHeader(Br->getSuccessor(1))) {
- reportVectorizationFailure("Unsupported conditional branch",
+ reportVectorizationFailure(
+ "Unsupported conditional branch",
"loop control flow is not understood by vectorizer",
"CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
@@ -655,9 +658,10 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
// simple outer loops scenarios with uniform nested loops.
if (!isUniformLoopNest(TheLoop /*loop nest*/,
TheLoop /*context outer loop*/)) {
- reportVectorizationFailure("Outer loop contains divergent loops",
- "loop control flow is not understood by vectorizer",
- "CFGNotUnderstood", ORE, TheLoop);
+ reportVectorizationFailure(
+ "Outer loop contains divergent loops",
+ "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
+ ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -1623,9 +1627,10 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
// We must have a loop in canonical form. Loops with indirectbr in them cannot
// be canonicalized.
if (!Lp->getLoopPreheader()) {
- reportVectorizationFailure("Loop doesn't have a legal pre-header",
- "loop control flow is not understood by vectorizer",
- "CFGNotUnderstood", ORE, TheLoop);
+ reportVectorizationFailure(
+ "Loop doesn't have a legal pre-header",
+ "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
+ ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -1634,9 +1639,10 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
// We must have a single backedge.
if (Lp->getNumBackEdges() != 1) {
- reportVectorizationFailure("The loop must have a single backedge",
- "loop control flow is not understood by vectorizer",
- "CFGNotUnderstood", ORE, TheLoop);
+ reportVectorizationFailure(
+ "The loop must have a single backedge",
+ "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
+ ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -2049,7 +2055,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "
"due to SCEVThreshold");
- reportVectorizationFailure("Too many SCEV checks needed",
+ reportVectorizationFailure(
+ "Too many SCEV checks needed",
"Too many SCEV assumptions need to be made and checked at runtime",
"TooManySCEVRunTimeChecks", ORE, TheLoop);
if (DoExtraAnalysis)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c04b5cb10eac2..ba5ac8465da0c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -53,7 +53,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "LoopVectorizationPlanner.h"
#include "VPRecipeBuilder.h"
#include "VPlan.h"
@@ -144,6 +143,7 @@
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
+#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -197,36 +197,35 @@ static cl::opt<unsigned> VectorizeMemoryCheckThreshold(
"vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
cl::desc("The maximum allowed number of runtime memory checks"));
-// Option prefer-predicate-over-epilogue indicates that an epilogue is undesired,
-// that predication is preferred, and this lists all options. I.e., the
-// vectorizer will try to fold the tail-loop (epilogue) into the vector body
+// Option prefer-predicate-over-epilogue indicates that an epilogue is
+// undesired, that predication is preferred, and this lists all options. I.e.,
+// the vectorizer will try to fold the tail-loop (epilogue) into the vector body
// and predicate the instructions accordingly. If tail-folding fails, there are
// different fallback strategies depending on these values:
namespace PreferPredicateTy {
- enum Option {
- ScalarEpilogue = 0,
- PredicateElseScalarEpilogue,
- PredicateOrDontVectorize
- };
+enum Option {
+ ScalarEpilogue = 0,
+ PredicateElseScalarEpilogue,
+ PredicateOrDontVectorize
+};
} // namespace PreferPredicateTy
static cl::opt<PreferPredicateTy::Option> PreferPredicateOverEpilogue(
"prefer-predicate-over-epilogue",
- cl::init(PreferPredicateTy::ScalarEpilogue),
- cl::Hidden,
+ cl::init(PreferPredicateTy::ScalarEpilogue), cl::Hidden,
cl::desc("Tail-folding and predication preferences over creating a scalar "
"epilogue loop."),
- cl::values(clEnumValN(PreferPredicateTy::ScalarEpilogue,
- "scalar-epilogue",
- "Don't tail-predicate loops, create scalar epilogue"),
- clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue,
- "predicate-else-scalar-epilogue",
- "prefer tail-folding, create scalar epilogue if tail "
- "folding fails."),
- clEnumValN(PreferPredicateTy::PredicateOrDontVectorize,
- "predicate-dont-vectorize",
- "prefers tail-folding, don't attempt vectorization if "
- "tail-folding fails.")));
+ cl::values(
+ clEnumValN(PreferPredicateTy::ScalarEpilogue, "scalar-epilogue",
+ "Don't tail-predicate loops, create scalar epilogue"),
+ clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue,
+ "predicate-else-scalar-epilogue",
+ "prefer tail-folding, create scalar epilogue if tail "
+ "folding fails."),
+ clEnumValN(PreferPredicateTy::PredicateOrDontVectorize,
+ "predicate-dont-vectorize",
+ "prefers tail-folding, don't attempt vectorization if "
+ "tail-folding fails.")));
static cl::opt<TailFoldingStyle> ForceTailFoldingStyle(
"force-tail-folding-style", cl::desc("Force the tail folding style"),
@@ -262,7 +261,8 @@ static cl::opt<bool> EnableInterleavedMemAccesses(
/// predication, or in order to mask away gaps.
static cl::opt<bool> EnableMaskedInterleavedMemAccesses(
"enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden,
- cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"));
+ cl::desc("Enable vectorization on masked interleaved memory accesses in a "
+ "loop"));
static cl::opt<unsigned> ForceTargetNumScalarRegs(
"force-target-num-scalar-regs", cl::init(0), cl::Hidden,
@@ -1254,8 +1254,7 @@ class LoopVectorizationCostModel {
/// First result is for scalarization (will be invalid for scalable
/// vectors); second is for the safe-divisor strategy.
std::pair<InstructionCost, InstructionCost>
- getDivRemSpeculationCost(Instruction *I,
- ElementCount VF) const;
+ getDivRemSpeculationCost(Instruction *I, ElementCount VF) const;
/// Returns true if \p I is a memory instruction with consecutive memory
/// access that can be widened.
@@ -1528,10 +1527,9 @@ class LoopVectorizationCostModel {
/// memory access.
InstructionCost getConsecutiveMemOpCost(Instruction *I, ElementCount VF);
- /// The cost calculation for Load/Store instruction \p I with uniform pointer -
- /// Load: scalar load + broadcast.
- /// Store: scalar store + (loop invariant value stored? 0 : extract of last
- /// element)
+ /// The cost calculation for Load/Store instruction \p I with uniform pointer
+ /// - Load: scalar load + broadcast. Store: scalar store + (loop invariant
+ /// value stored? 0 : extract of last element)
InstructionCost getUniformMemOpCost(Instruction *I, ElementCount VF);
/// Estimate the overhead of scalarizing an instruction. This is a
@@ -2208,12 +2206,13 @@ static std::optional<unsigned> getMaxVScale(const Function &F,
}
/// For the given VF and UF and maximum trip count computed for the loop, return
-/// whether the induction variable might overflow in the vectorized loop. If not,
-/// then we know a runtime overflow check always evaluates to false and can be
-/// removed.
-static bool isIndvarOverflowCheckKnownFalse(
- const LoopVectorizationCostModel *Cost,
- ElementCount VF, std::optional<unsigned> UF = std::nullopt) {
+/// whether the induction variable might overflow in the vectorized loop. If
+/// not, then we know a runtime overflow check always evaluates to false and can
+/// be removed.
+static bool
+isIndvarOverflowCheckKnownFalse(const LoopVectorizationCostModel *Cost,
+ ElementCount VF,
+ std::optional<unsigned> UF = std::nullopt) {
// Always be conservative if we don't know the exact unroll factor.
unsigned MaxUF = UF ? *UF : Cost->TTI.getMaxInterleaveFactor(VF);
@@ -2456,7 +2455,7 @@ struct CSEDenseMapInfo {
} // end anonymous namespace
-///Perform cse of induction variable instructions.
+/// Perform cse of induction variable instructions.
static void cse(BasicBlock *BB) {
// Perform simple cse.
SmallDenseMap<Instruction *, Instruction *, 4, CSEDenseMapInfo> CSEMap;
@@ -2685,7 +2684,8 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
auto ForcedScalar = ForcedScalars.find(VF);
if (ForcedScalar != ForcedScalars.end())
for (auto *I : ForcedScalar->second) {
- LLVM_DEBUG(dbgs() << "LV: Found (forced) scalar instruction: " << *I << "\n");
+ LLVM_DEBUG(dbgs() << "LV: Found (forced) scalar instruction: " << *I
+ << "\n");
Worklist.insert(I);
}
@@ -2776,7 +2776,7 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
// Do we have a non-scalar lowering for this predicated
// instruction? No - it is scalar with predication.
- switch(I->getOpcode()) {
+ switch (I->getOpcode()) {
default:
return true;
case Instruction::Call:
@@ -2833,7 +2833,7 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
// having at least one active lane (the first). If the side-effects of the
// instruction are invariant, executing it w/o (the tail-folding) mask is safe
// - it will cause the same side-effects as when masked.
- switch(I->getOpcode()) {
+ switch (I->getOpcode()) {
default:
llvm_unreachable(
"instruction should have been considered by earlier checks");
@@ -2865,7 +2865,7 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
std::pair<InstructionCost, InstructionCost>
LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
- ElementCount VF) const {
+ ElementCount VF) const {
assert(I->getOpcode() == Instruction::UDiv ||
I->getOpcode() == Instruction::SDiv ||
I->getOpcode() == Instruction::SRem ||
@@ -3060,8 +3060,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// where only a single instance out of VF should be formed.
auto AddToWorklistIfAllowed = [&](Instruction *I) -> void {
if (IsOutOfScope(I)) {
- LLVM_DEBUG(dbgs() << "LV: Found not uniform due to scope: "
- << *I << "\n");
+ LLVM_DEBUG(dbgs() << "LV: Found not uniform due to scope: " << *I
+ << "\n");
return;
}
if (isPredicatedInst(I)) {
@@ -3266,7 +3266,8 @@ bool LoopVectorizationCostModel::runtimeChecksRequired() {
LLVM_DEBUG(dbgs() << "LV: Performing code size checks.\n");
if (Legal->getRuntimePointerChecking()->Need) {
- reportVectorizationFailure("Runtime ptr check is required with -Os/-Oz",
+ reportVectorizationFailure(
+ "Runtime ptr check is required with -Os/-Oz",
"runtime pointer checks needed. Enable vectorization of this "
"loop with '#pragma clang loop vectorize(enable)' when "
"compiling with -Os/-Oz",
@@ -3275,7 +3276,8 @@ bool LoopVectorizationCostModel::runtimeChecksRequired() {
}
if (!PSE.getPredicate().isAlwaysTrue()) {
- reportVectorizationFailure("Runtime SCEV check is required with -Os/-Oz",
+ reportVectorizationFailure(
+ "Runtime SCEV check is required with -Os/-Oz",
"runtime SCEV checks needed. Enable vectorization of this "
"loop with '#pragma clang loop vectorize(enable)' when "
"compiling with -Os/-Oz",
@@ -3285,7 +3287,8 @@ bool LoopVectorizationCostModel::runtimeChecksRequired() {
// FIXME: Avoid specializing for stride==1 instead of bailing out.
if (!Legal->getLAI()->getSymbolicStrides().empty()) {
- reportVectorizationFailure("Runtime stride check for small trip count",
+ reportVectorizationFailure(
+ "Runtime stride check for small trip count",
"runtime stride == 1 checks needed. Enable vectorization of "
"this loop without such check by compiling with -Os/-Oz",
"CantVersionLoopWithOptForSize", ORE, TheLoop);
@@ -3506,7 +3509,8 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
if (TC != ElementCount::getFixed(MaxTC))
LLVM_DEBUG(dbgs() << "LV: Found maximum trip count: " << MaxTC << '\n');
if (TC.isScalar()) {
- reportVectorizationFailure("Single iteration (non) loop",
+ reportVectorizationFailure(
+ "Single iteration (non) loop",
"loop trip count is one, irrelevant for vectorization",
"SingleIterationLoop", ORE, TheLoop);
return FixedScalableVFPair::getNone();
@@ -4356,7 +4360,8 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
if (EpilogueVectorizationForceVF > 1) {
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n");
- ElementCount ForcedEC = ElementCount::getFixed(EpilogueVectorizationForceVF);
+ ElementCount ForcedEC =
+ ElementCount::getFixed(EpilogueVectorizationForceVF);
if (hasPlanWithVF(ForcedEC))
return {ForcedEC, 0, 0};
@@ -4565,7 +4570,8 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
LoopCost = CM.expectedCost(VF);
else
LoopCost = cost(Plan, VF);
- assert(LoopCost.isValid() && "Expected to have chosen a VF with valid cost");
+ assert(LoopCost.isValid() &&
+ "Expected to have chosen a VF with valid cost");
// Loop body is free and there is no need for interleaving.
if (LoopCost == 0)
@@ -4858,11 +4864,9 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
// from moving "masked load/store" check from legality to cost model.
// Masked Load/Gather emulation was previously never allowed.
// Limited number of Masked Store/Scatter emulation was allowed.
- assert((isPredicatedInst(I)) &&
- "Expecting a scalar emulated instruction");
+ assert((isPredicatedInst(I)) && "Expecting a scalar emulated instruction");
return isa<LoadInst>(I) ||
- (isa<StoreInst>(I) &&
- NumPredStores > NumberOfStoresToPredicate);
+ (isa<StoreInst>(I) && NumPredStores > NumberOfStoresToPredicate);
}
void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
@@ -5097,11 +5101,10 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
///
/// This SCEV can be sent to the Target in order to estimate the address
/// calculation cost.
-static const SCEV *getAddressAccessSCEV(
- Value *Ptr,
- LoopVectorizationLegality *Legal,
- PredicatedScalarEvolution &PSE,
- const Loop *TheLoop) {
+static const SCEV *getAddressAccessSCEV(Value *Ptr,
+ LoopVectorizationLegality *Legal,
+ PredicatedScalarEvolution &PSE,
+ const Loop *TheLoop) {
auto *Gep = dyn_cast<GetElementPtrInst>(Ptr);
if (!Gep)
@@ -5557,7 +5560,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
for (BasicBlock *BB : TheLoop->blocks()) {
// For each instruction in the...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Tibor Győri (TiborGY) ChangesSome loop vectorizer files appear to not conform to the LLVM style. I intend to work on these files and formatting my changes with clang-format has resulted in diffs polluted with formatting changes to code I have not touched. Patch is 24.65 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/158507.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index ff35db14f7094..636607d57248a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -14,7 +14,6 @@
// is a need (but D45420 needs to happen first).
//
-#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -28,6 +27,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
+#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
using namespace llvm;
@@ -40,10 +40,10 @@ static cl::opt<bool>
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
cl::desc("Enable if-conversion during vectorization."));
-static cl::opt<bool>
-AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
- cl::desc("Enable recognition of non-constant strided "
- "pointer induction variables."));
+static cl::opt<bool> AllowStridedPointerIVs(
+ "lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
+ cl::desc("Enable recognition of non-constant strided "
+ "pointer induction variables."));
static cl::opt<bool>
HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
@@ -420,8 +420,8 @@ static IntegerType *getWiderInductionTy(const DataLayout &DL, Type *Ty0,
/// identified reduction variable.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
SmallPtrSetImpl<Value *> &AllowedExit) {
- // Reductions, Inductions and non-header phis are allowed to have exit users. All
- // other instructions must not have external users.
+ // Reductions, Inductions and non-header phis are allowed to have exit users.
+ // All other instructions must not have external users.
if (!AllowedExit.count(Inst))
// Check that all of the users of the loop are inside the BB.
for (User *U : Inst->users()) {
@@ -459,12 +459,13 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
// pointer is checked to reference consecutive elements suitable for a
// masked access.
const auto &Strides =
- LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
+ LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
bool CanAddPredicate = !llvm::shouldOptimizeForSize(
TheLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass);
- int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
- CanAddPredicate, false).value_or(0);
+ int Stride =
+ getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides, CanAddPredicate, false)
+ .value_or(0);
if (Stride == 1 || Stride == -1)
return Stride;
return 0;
@@ -622,7 +623,8 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
// not supported yet.
auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
if (!Br) {
- reportVectorizationFailure("Unsupported basic block terminator",
+ reportVectorizationFailure(
+ "Unsupported basic block terminator",
"loop control flow is not understood by vectorizer",
"CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
@@ -641,7 +643,8 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
!TheLoop->isLoopInvariant(Br->getCondition()) &&
!LI->isLoopHeader(Br->getSuccessor(0)) &&
!LI->isLoopHeader(Br->getSuccessor(1))) {
- reportVectorizationFailure("Unsupported conditional branch",
+ reportVectorizationFailure(
+ "Unsupported conditional branch",
"loop control flow is not understood by vectorizer",
"CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
@@ -655,9 +658,10 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
// simple outer loops scenarios with uniform nested loops.
if (!isUniformLoopNest(TheLoop /*loop nest*/,
TheLoop /*context outer loop*/)) {
- reportVectorizationFailure("Outer loop contains divergent loops",
- "loop control flow is not understood by vectorizer",
- "CFGNotUnderstood", ORE, TheLoop);
+ reportVectorizationFailure(
+ "Outer loop contains divergent loops",
+ "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
+ ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -1623,9 +1627,10 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
// We must have a loop in canonical form. Loops with indirectbr in them cannot
// be canonicalized.
if (!Lp->getLoopPreheader()) {
- reportVectorizationFailure("Loop doesn't have a legal pre-header",
- "loop control flow is not understood by vectorizer",
- "CFGNotUnderstood", ORE, TheLoop);
+ reportVectorizationFailure(
+ "Loop doesn't have a legal pre-header",
+ "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
+ ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -1634,9 +1639,10 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
// We must have a single backedge.
if (Lp->getNumBackEdges() != 1) {
- reportVectorizationFailure("The loop must have a single backedge",
- "loop control flow is not understood by vectorizer",
- "CFGNotUnderstood", ORE, TheLoop);
+ reportVectorizationFailure(
+ "The loop must have a single backedge",
+ "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
+ ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -2049,7 +2055,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "
"due to SCEVThreshold");
- reportVectorizationFailure("Too many SCEV checks needed",
+ reportVectorizationFailure(
+ "Too many SCEV checks needed",
"Too many SCEV assumptions need to be made and checked at runtime",
"TooManySCEVRunTimeChecks", ORE, TheLoop);
if (DoExtraAnalysis)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c04b5cb10eac2..ba5ac8465da0c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -53,7 +53,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "LoopVectorizationPlanner.h"
#include "VPRecipeBuilder.h"
#include "VPlan.h"
@@ -144,6 +143,7 @@
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
+#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -197,36 +197,35 @@ static cl::opt<unsigned> VectorizeMemoryCheckThreshold(
"vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
cl::desc("The maximum allowed number of runtime memory checks"));
-// Option prefer-predicate-over-epilogue indicates that an epilogue is undesired,
-// that predication is preferred, and this lists all options. I.e., the
-// vectorizer will try to fold the tail-loop (epilogue) into the vector body
+// Option prefer-predicate-over-epilogue indicates that an epilogue is
+// undesired, that predication is preferred, and this lists all options. I.e.,
+// the vectorizer will try to fold the tail-loop (epilogue) into the vector body
// and predicate the instructions accordingly. If tail-folding fails, there are
// different fallback strategies depending on these values:
namespace PreferPredicateTy {
- enum Option {
- ScalarEpilogue = 0,
- PredicateElseScalarEpilogue,
- PredicateOrDontVectorize
- };
+enum Option {
+ ScalarEpilogue = 0,
+ PredicateElseScalarEpilogue,
+ PredicateOrDontVectorize
+};
} // namespace PreferPredicateTy
static cl::opt<PreferPredicateTy::Option> PreferPredicateOverEpilogue(
"prefer-predicate-over-epilogue",
- cl::init(PreferPredicateTy::ScalarEpilogue),
- cl::Hidden,
+ cl::init(PreferPredicateTy::ScalarEpilogue), cl::Hidden,
cl::desc("Tail-folding and predication preferences over creating a scalar "
"epilogue loop."),
- cl::values(clEnumValN(PreferPredicateTy::ScalarEpilogue,
- "scalar-epilogue",
- "Don't tail-predicate loops, create scalar epilogue"),
- clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue,
- "predicate-else-scalar-epilogue",
- "prefer tail-folding, create scalar epilogue if tail "
- "folding fails."),
- clEnumValN(PreferPredicateTy::PredicateOrDontVectorize,
- "predicate-dont-vectorize",
- "prefers tail-folding, don't attempt vectorization if "
- "tail-folding fails.")));
+ cl::values(
+ clEnumValN(PreferPredicateTy::ScalarEpilogue, "scalar-epilogue",
+ "Don't tail-predicate loops, create scalar epilogue"),
+ clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue,
+ "predicate-else-scalar-epilogue",
+ "prefer tail-folding, create scalar epilogue if tail "
+ "folding fails."),
+ clEnumValN(PreferPredicateTy::PredicateOrDontVectorize,
+ "predicate-dont-vectorize",
+ "prefers tail-folding, don't attempt vectorization if "
+ "tail-folding fails.")));
static cl::opt<TailFoldingStyle> ForceTailFoldingStyle(
"force-tail-folding-style", cl::desc("Force the tail folding style"),
@@ -262,7 +261,8 @@ static cl::opt<bool> EnableInterleavedMemAccesses(
/// predication, or in order to mask away gaps.
static cl::opt<bool> EnableMaskedInterleavedMemAccesses(
"enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden,
- cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"));
+ cl::desc("Enable vectorization on masked interleaved memory accesses in a "
+ "loop"));
static cl::opt<unsigned> ForceTargetNumScalarRegs(
"force-target-num-scalar-regs", cl::init(0), cl::Hidden,
@@ -1254,8 +1254,7 @@ class LoopVectorizationCostModel {
/// First result is for scalarization (will be invalid for scalable
/// vectors); second is for the safe-divisor strategy.
std::pair<InstructionCost, InstructionCost>
- getDivRemSpeculationCost(Instruction *I,
- ElementCount VF) const;
+ getDivRemSpeculationCost(Instruction *I, ElementCount VF) const;
/// Returns true if \p I is a memory instruction with consecutive memory
/// access that can be widened.
@@ -1528,10 +1527,9 @@ class LoopVectorizationCostModel {
/// memory access.
InstructionCost getConsecutiveMemOpCost(Instruction *I, ElementCount VF);
- /// The cost calculation for Load/Store instruction \p I with uniform pointer -
- /// Load: scalar load + broadcast.
- /// Store: scalar store + (loop invariant value stored? 0 : extract of last
- /// element)
+ /// The cost calculation for Load/Store instruction \p I with uniform pointer
+ /// - Load: scalar load + broadcast. Store: scalar store + (loop invariant
+ /// value stored? 0 : extract of last element)
InstructionCost getUniformMemOpCost(Instruction *I, ElementCount VF);
/// Estimate the overhead of scalarizing an instruction. This is a
@@ -2208,12 +2206,13 @@ static std::optional<unsigned> getMaxVScale(const Function &F,
}
/// For the given VF and UF and maximum trip count computed for the loop, return
-/// whether the induction variable might overflow in the vectorized loop. If not,
-/// then we know a runtime overflow check always evaluates to false and can be
-/// removed.
-static bool isIndvarOverflowCheckKnownFalse(
- const LoopVectorizationCostModel *Cost,
- ElementCount VF, std::optional<unsigned> UF = std::nullopt) {
+/// whether the induction variable might overflow in the vectorized loop. If
+/// not, then we know a runtime overflow check always evaluates to false and can
+/// be removed.
+static bool
+isIndvarOverflowCheckKnownFalse(const LoopVectorizationCostModel *Cost,
+ ElementCount VF,
+ std::optional<unsigned> UF = std::nullopt) {
// Always be conservative if we don't know the exact unroll factor.
unsigned MaxUF = UF ? *UF : Cost->TTI.getMaxInterleaveFactor(VF);
@@ -2456,7 +2455,7 @@ struct CSEDenseMapInfo {
} // end anonymous namespace
-///Perform cse of induction variable instructions.
+/// Perform cse of induction variable instructions.
static void cse(BasicBlock *BB) {
// Perform simple cse.
SmallDenseMap<Instruction *, Instruction *, 4, CSEDenseMapInfo> CSEMap;
@@ -2685,7 +2684,8 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
auto ForcedScalar = ForcedScalars.find(VF);
if (ForcedScalar != ForcedScalars.end())
for (auto *I : ForcedScalar->second) {
- LLVM_DEBUG(dbgs() << "LV: Found (forced) scalar instruction: " << *I << "\n");
+ LLVM_DEBUG(dbgs() << "LV: Found (forced) scalar instruction: " << *I
+ << "\n");
Worklist.insert(I);
}
@@ -2776,7 +2776,7 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
// Do we have a non-scalar lowering for this predicated
// instruction? No - it is scalar with predication.
- switch(I->getOpcode()) {
+ switch (I->getOpcode()) {
default:
return true;
case Instruction::Call:
@@ -2833,7 +2833,7 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
// having at least one active lane (the first). If the side-effects of the
// instruction are invariant, executing it w/o (the tail-folding) mask is safe
// - it will cause the same side-effects as when masked.
- switch(I->getOpcode()) {
+ switch (I->getOpcode()) {
default:
llvm_unreachable(
"instruction should have been considered by earlier checks");
@@ -2865,7 +2865,7 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
std::pair<InstructionCost, InstructionCost>
LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
- ElementCount VF) const {
+ ElementCount VF) const {
assert(I->getOpcode() == Instruction::UDiv ||
I->getOpcode() == Instruction::SDiv ||
I->getOpcode() == Instruction::SRem ||
@@ -3060,8 +3060,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// where only a single instance out of VF should be formed.
auto AddToWorklistIfAllowed = [&](Instruction *I) -> void {
if (IsOutOfScope(I)) {
- LLVM_DEBUG(dbgs() << "LV: Found not uniform due to scope: "
- << *I << "\n");
+ LLVM_DEBUG(dbgs() << "LV: Found not uniform due to scope: " << *I
+ << "\n");
return;
}
if (isPredicatedInst(I)) {
@@ -3266,7 +3266,8 @@ bool LoopVectorizationCostModel::runtimeChecksRequired() {
LLVM_DEBUG(dbgs() << "LV: Performing code size checks.\n");
if (Legal->getRuntimePointerChecking()->Need) {
- reportVectorizationFailure("Runtime ptr check is required with -Os/-Oz",
+ reportVectorizationFailure(
+ "Runtime ptr check is required with -Os/-Oz",
"runtime pointer checks needed. Enable vectorization of this "
"loop with '#pragma clang loop vectorize(enable)' when "
"compiling with -Os/-Oz",
@@ -3275,7 +3276,8 @@ bool LoopVectorizationCostModel::runtimeChecksRequired() {
}
if (!PSE.getPredicate().isAlwaysTrue()) {
- reportVectorizationFailure("Runtime SCEV check is required with -Os/-Oz",
+ reportVectorizationFailure(
+ "Runtime SCEV check is required with -Os/-Oz",
"runtime SCEV checks needed. Enable vectorization of this "
"loop with '#pragma clang loop vectorize(enable)' when "
"compiling with -Os/-Oz",
@@ -3285,7 +3287,8 @@ bool LoopVectorizationCostModel::runtimeChecksRequired() {
// FIXME: Avoid specializing for stride==1 instead of bailing out.
if (!Legal->getLAI()->getSymbolicStrides().empty()) {
- reportVectorizationFailure("Runtime stride check for small trip count",
+ reportVectorizationFailure(
+ "Runtime stride check for small trip count",
"runtime stride == 1 checks needed. Enable vectorization of "
"this loop without such check by compiling with -Os/-Oz",
"CantVersionLoopWithOptForSize", ORE, TheLoop);
@@ -3506,7 +3509,8 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
if (TC != ElementCount::getFixed(MaxTC))
LLVM_DEBUG(dbgs() << "LV: Found maximum trip count: " << MaxTC << '\n');
if (TC.isScalar()) {
- reportVectorizationFailure("Single iteration (non) loop",
+ reportVectorizationFailure(
+ "Single iteration (non) loop",
"loop trip count is one, irrelevant for vectorization",
"SingleIterationLoop", ORE, TheLoop);
return FixedScalableVFPair::getNone();
@@ -4356,7 +4360,8 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
if (EpilogueVectorizationForceVF > 1) {
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n");
- ElementCount ForcedEC = ElementCount::getFixed(EpilogueVectorizationForceVF);
+ ElementCount ForcedEC =
+ ElementCount::getFixed(EpilogueVectorizationForceVF);
if (hasPlanWithVF(ForcedEC))
return {ForcedEC, 0, 0};
@@ -4565,7 +4570,8 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
LoopCost = CM.expectedCost(VF);
else
LoopCost = cost(Plan, VF);
- assert(LoopCost.isValid() && "Expected to have chosen a VF with valid cost");
+ assert(LoopCost.isValid() &&
+ "Expected to have chosen a VF with valid cost");
// Loop body is free and there is no need for interleaving.
if (LoopCost == 0)
@@ -4858,11 +4864,9 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
// from moving "masked load/store" check from legality to cost model.
// Masked Load/Gather emulation was previously never allowed.
// Limited number of Masked Store/Scatter emulation was allowed.
- assert((isPredicatedInst(I)) &&
- "Expecting a scalar emulated instruction");
+ assert((isPredicatedInst(I)) && "Expecting a scalar emulated instruction");
return isa<LoadInst>(I) ||
- (isa<StoreInst>(I) &&
- NumPredStores > NumberOfStoresToPredicate);
+ (isa<StoreInst>(I) && NumPredStores > NumberOfStoresToPredicate);
}
void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
@@ -5097,11 +5101,10 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
///
/// This SCEV can be sent to the Target in order to estimate the address
/// calculation cost.
-static const SCEV *getAddressAccessSCEV(
- Value *Ptr,
- LoopVectorizationLegality *Legal,
- PredicatedScalarEvolution &PSE,
- const Loop *TheLoop) {
+static const SCEV *getAddressAccessSCEV(Value *Ptr,
+ LoopVectorizationLegality *Legal,
+ PredicatedScalarEvolution &PSE,
+ const Loop *TheLoop) {
auto *Gep = dyn_cast<GetElementPtrInst>(Ptr);
if (!Gep)
@@ -5557,7 +5560,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
for (BasicBlock *BB : TheLoop->blocks()) {
// For each instruction in the...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
7559a35
to
802301e
Compare
802301e
to
90c8229
Compare
Our coding guidelines prefer not to reformat entire files, because it pollutes the |
Got it, thanks for the tip. Closing this and rebasing #158513 to get rid of the formatting changes, |
Some loop vectorizer files appear to not conform to the LLVM style. I intend to work on these files and formatting my changes with clang-format has resulted in diffs polluted with formatting changes to code I have not touched.
This PR contains only formatting changes.