-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SCEV] Infer loop max trip count from memory accesses #70361
Conversation
@llvm/pr-subscribers-llvm-analysis Author: Shilei Tian (shiltian) ChangesData references in a loop is assumed to not access elements over the statically This patch is refined from the orignal one (https://reviews.llvm.org/D155049) Patch is 20.57 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70361.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 2765f1286d8bce5..6fa504d80c72afc 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -854,6 +854,12 @@ class ScalarEvolution {
unsigned getSmallConstantTripMultiple(const Loop *L,
const BasicBlock *ExitingBlock);
+ /// Return the upper bound of the loop trip count infered from memory access.
+ /// This can not access bytes starting outside the statically allocated size
+ /// without being immediate UB. Returns SCEVCouldNotCompute if the trip count
+ /// could not be inferred.
+ const SCEV *getConstantMaxTripCountFromMemAccess(const Loop *L);
+
/// The terms "backedge taken count" and "exit count" are used
/// interchangeably to refer to the number of times the backedge of a loop
/// has executed before the loop is exited.
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 2368003177e741c..10b33304b4244a0 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -249,6 +249,10 @@ static cl::opt<bool> UseContextForNoWrapFlagInference(
cl::desc("Infer nuw/nsw flags using context where suitable"),
cl::init(true));
+static cl::opt<bool> UseMemoryAccessUBForBEInference(
+ "scalar-evolution-infer-max-trip-count-from-memory-access", cl::Hidden,
+ cl::desc("Infer loop max trip count from memory access"), cl::init(false));
+
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
@@ -8136,7 +8140,16 @@ ScalarEvolution::getSmallConstantTripCount(const Loop *L,
unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
const auto *MaxExitCount =
dyn_cast<SCEVConstant>(getConstantMaxBackedgeTakenCount(L));
- return getConstantTripCount(MaxExitCount);
+ unsigned MaxExitCountN = getConstantTripCount(MaxExitCount);
+ if (UseMemoryAccessUBForBEInference) {
+ auto *MaxInferCount = getConstantMaxTripCountFromMemAccess(L);
+ if (auto *InferCount = dyn_cast<SCEVConstant>(MaxInferCount)) {
+ unsigned InferValue = InferCount->getValue()->getZExtValue();
+ MaxExitCountN =
+ MaxExitCountN == 0 ? InferValue : std::min(MaxExitCountN, InferValue);
+ }
+ }
+ return MaxExitCountN;
}
unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
@@ -8191,6 +8204,153 @@ ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
return getSmallConstantTripMultiple(L, ExitCount);
}
+/// Collect all load/store instructions that must be executed in every iteration
+/// of loop \p L .
+static void
+collectExecLoadStoreInsideLoop(const Loop *L, DominatorTree &DT,
+ SmallVector<Instruction *, 4> &MemInsts) {
+ // It is difficult to tell if the load/store instruction is executed on every
+ // iteration inside an irregular loop.
+ if (!L->isLoopSimplifyForm() || !L->isInnermost())
+ return;
+
+ // FIXME: To make the case more typical, we only analyze loops that have one
+ // exiting block and the block must be the latch. It is easier to capture
+ // loops with memory access that will be executed in every iteration.
+ const BasicBlock *LoopLatch = L->getLoopLatch();
+ assert(LoopLatch && "normal form loop doesn't have a latch");
+ if (L->getExitingBlock() != LoopLatch)
+ return;
+
+ const Function *F = LoopLatch->getParent();
+ if (F->hasFnAttribute(Attribute::SanitizeAddress) ||
+ F->hasFnAttribute(Attribute::SanitizeThread) ||
+ F->hasFnAttribute(Attribute::SanitizeMemory) ||
+ F->hasFnAttribute(Attribute::SanitizeHWAddress) ||
+ F->hasFnAttribute(Attribute::SanitizeMemTag))
+ return;
+
+ for (auto *BB : L->getBlocks()) {
+ // We need to make sure that max execution time of MemAccessBB in loop
+ // represents latch max excution time. The BB below should be skipped:
+ // Entry
+ // │
+ // ┌─────▼─────┐
+ // │Loop Header◄─────┐
+ // └──┬──────┬─┘ │
+ // │ │ │
+ // ┌────────▼──┐ ┌─▼─────┐ │
+ // │MemAccessBB│ │OtherBB│ │
+ // └────────┬──┘ └─┬─────┘ │
+ // │ │ │
+ // ┌─▼──────▼─┐ │
+ // │Loop Latch├─────┘
+ // └────┬─────┘
+ // ▼
+ // Exit
+ if (!DT.dominates(BB, LoopLatch))
+ continue;
+
+ for (Instruction &I : *BB) {
+ if (isa<LoadInst>(&I) || isa<StoreInst>(&I))
+ MemInsts.push_back(&I);
+ }
+ }
+}
+
+/// Return a SCEV representing the memory size of pointer \p V .
+/// TODO: Memory size of more types can be identified here.
+static const SCEV *getCertainSizeOfMem(const SCEV *V, Type *RTy,
+ const DataLayout &DL,
+ const TargetLibraryInfo &TLI,
+ ScalarEvolution *SE) {
+ const SCEVUnknown *PtrBase = dyn_cast<SCEVUnknown>(V);
+ if (!PtrBase)
+ return nullptr;
+ Value *Ptr = PtrBase->getValue();
+ uint64_t Size = 0;
+ if (!llvm::getObjectSize(Ptr, Size, DL, &TLI))
+ return nullptr;
+ return SE->getConstant(RTy, Size);
+}
+
+static const SCEV *countItersSelfWrap(const SCEV *V, ScalarEvolution *SE) {
+ if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
+ const SCEV *Upper = SE->getConstant(SE->getUnsignedRangeMax(V));
+ const SCEV *Lower = SE->getConstant(SE->getUnsignedRangeMin(V));
+ const SCEV *Range = SE->getMinusSCEV(Upper, Lower);
+ const SCEV *Step = AddRec->getStepRecurrence(*SE);
+ return SE->getUDivCeilSCEV(Range, Step);
+ }
+ return SE->getCouldNotCompute();
+}
+
+/// Return the smaller one of the wraps that will occur in the indexes.
+static const SCEV *getSmallCountOfIdxSelfWrap(Value *Ptr, ScalarEvolution *SE) {
+ auto *PtrGEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!PtrGEP)
+ return SE->getCouldNotCompute();
+
+ SmallVector<const SCEV *> CountColl;
+ for (Value *Index : PtrGEP->indices()) {
+ Value *V = Index;
+ if (isa<ZExtInst>(V) || isa<SExtInst>(V))
+ V = cast<Instruction>(Index)->getOperand(0);
+ const SCEV *Count = countItersSelfWrap(SE->getSCEV(V), SE);
+ if (!isa<SCEVCouldNotCompute>(Count))
+ CountColl.push_back(Count);
+ }
+
+ if (CountColl.empty())
+ return SE->getCouldNotCompute();
+
+ return SE->getUMinFromMismatchedTypes(CountColl);
+}
+
+const SCEV *
+ScalarEvolution::getConstantMaxTripCountFromMemAccess(const Loop *L) {
+ SmallVector<Instruction *, 4> MemInsts;
+ collectExecLoadStoreInsideLoop(L, DT, MemInsts);
+
+ // Collect AddRecExpr that meets the requirements and can be analyzed.
+ SmallVector<const SCEV *> InferCountColl;
+ const DataLayout &DL = getDataLayout();
+
+ for (Instruction *I : MemInsts) {
+ Value *Ptr = getLoadStorePointerOperand(I);
+ assert(Ptr && "empty pointer operand");
+ auto *AddRec = dyn_cast<SCEVAddRecExpr>(getSCEV(Ptr));
+ if (!AddRec || !AddRec->isAffine())
+ continue;
+ const SCEV *PtrBase = getPointerBase(AddRec);
+ const SCEV *Step = AddRec->getStepRecurrence(*this);
+ const SCEV *MemSize =
+ getCertainSizeOfMem(PtrBase, Step->getType(), DL, TLI, this);
+ if (!MemSize)
+ continue;
+ // Now we can infer a max execution time by MemLength/StepLength.
+ auto *MaxExecCount = dyn_cast<SCEVConstant>(getUDivCeilSCEV(MemSize, Step));
+ if (!MaxExecCount || MaxExecCount->getAPInt().getActiveBits() > 32)
+ continue;
+ auto *IdxWrap =
+ dyn_cast<SCEVConstant>(getSmallCountOfIdxSelfWrap(Ptr, this));
+ if (!IdxWrap)
+ continue;
+ ConstantInt *WrapVC = IdxWrap->getValue();
+ auto *InferCount = dyn_cast<SCEVConstant>(
+ getAddExpr(MaxExecCount, getOne(MaxExecCount->getType())));
+ ConstantInt *InferVC = InferCount->getValue();
+ if (InferVC->getValue().getZExtValue() > WrapVC->getValue().getZExtValue())
+ continue;
+ InferCountColl.push_back(InferCount);
+ }
+
+ if (InferCountColl.empty())
+ return getCouldNotCompute();
+
+ return getUMinFromMismatchedTypes(InferCountColl);
+}
+
const SCEV *ScalarEvolution::getExitCount(const Loop *L,
const BasicBlock *ExitingBlock,
ExitCountKind Kind) {
@@ -13439,6 +13599,17 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
OS << ": ";
OS << "Trip multiple is " << SE->getSmallConstantTripMultiple(L) << "\n";
}
+
+ if (UseMemoryAccessUBForBEInference) {
+ unsigned SmallMaxTrip = SE->getSmallConstantMaxTripCount(L);
+ OS << "Loop ";
+ L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+ OS << ": ";
+ if (SmallMaxTrip)
+ OS << "Small constant max trip is " << SmallMaxTrip << "\n";
+ else
+ OS << "Small constant max trip couldn't be computed.\n";
+ }
}
namespace llvm {
diff --git a/llvm/test/Analysis/ScalarEvolution/infer-trip-count-idx-wrap.ll b/llvm/test/Analysis/ScalarEvolution/infer-trip-count-idx-wrap.ll
new file mode 100644
index 000000000000000..b2524dbb02eec2f
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/infer-trip-count-idx-wrap.ll
@@ -0,0 +1,110 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-classify-expressions=0 -scalar-evolution-infer-max-trip-count-from-memory-access 2>&1 | FileCheck %s
+
+define void @ComputeMaxTripCountFromArrayIdxWrap(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromArrayIdxWrap'
+; CHECK-NEXT: Determining loop execution counts for: @ComputeMaxTripCountFromArrayIdxWrap
+; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 2147483646
+; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Predicates:
+; CHECK: Loop %for.body: Trip multiple is 1
+; CHECK-NEXT: Loop %for.body: Small constant max trip is 2147483647
+;
+entry:
+ %a = alloca [256 x i32], align 4
+ %cmp4 = icmp sgt i32 %len, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.cleanup.loopexit:
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %iv = phi i8 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %idxprom = zext i8 %iv to i64
+ %arrayidx = getelementptr inbounds [256 x i32], [256 x i32]* %a, i64 0, i64 %idxprom
+ store i32 0, i32* %arrayidx, align 4
+ %inc = add nuw nsw i8 %iv, 1
+ %inc_zext = zext i8 %inc to i32
+ %cmp = icmp slt i32 %inc_zext, %len
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @ComputeMaxTripCountFromArrayIdxWrap2(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromArrayIdxWrap2'
+; CHECK-NEXT: Determining loop execution counts for: @ComputeMaxTripCountFromArrayIdxWrap2
+; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 2147483646
+; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Predicates:
+; CHECK: Loop %for.body: Trip multiple is 1
+; CHECK-NEXT: Loop %for.body: Small constant max trip is 2147483647
+;
+entry:
+ %a = alloca [127 x i32], align 4
+ %cmp4 = icmp sgt i32 %len, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.cleanup.loopexit:
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %iv = phi i8 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %idxprom = zext i8 %iv to i64
+ %arrayidx = getelementptr inbounds [127 x i32], [127 x i32]* %a, i64 0, i64 %idxprom
+ store i32 0, i32* %arrayidx, align 4
+ %inc = add nuw nsw i8 %iv, 1
+ %inc_zext = zext i8 %inc to i32
+ %cmp = icmp slt i32 %inc_zext, %len
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @ComputeMaxTripCountFromArrayIdxWrap3(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromArrayIdxWrap3'
+; CHECK-NEXT: Determining loop execution counts for: @ComputeMaxTripCountFromArrayIdxWrap3
+; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 2147483646
+; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Predicates:
+; CHECK: Loop %for.body: Trip multiple is 1
+; CHECK-NEXT: Loop %for.body: Small constant max trip is 21
+;
+entry:
+ %a = alloca [20 x i32], align 4
+ %cmp4 = icmp sgt i32 %len, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.cleanup.loopexit:
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %iv = phi i8 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %idxprom = zext i8 %iv to i64
+ %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %a, i64 0, i64 %idxprom
+ store i32 0, i32* %arrayidx, align 4
+ %inc = add nuw nsw i8 %iv, 1
+ %inc_zext = zext i8 %inc to i32
+ %cmp = icmp slt i32 %inc_zext, %len
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
diff --git a/llvm/test/Analysis/ScalarEvolution/infer-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/infer-trip-count.ll
new file mode 100644
index 000000000000000..7fb4e503651d998
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/infer-trip-count.ll
@@ -0,0 +1,191 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-classify-expressions=0 -scalar-evolution-infer-max-trip-count-from-memory-access 2>&1 | FileCheck %s
+
+define void @ComputeMaxTripCountFromArrayNormal(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromArrayNormal'
+; CHECK-NEXT: Determining loop execution counts for: @ComputeMaxTripCountFromArrayNormal
+; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 2147483646
+; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Predicates:
+; CHECK: Loop %for.body: Trip multiple is 1
+; CHECK-NEXT: Loop %for.body: Small constant max trip is 8
+;
+entry:
+ %a = alloca [7 x i32], align 4
+ %cmp4 = icmp sgt i32 %len, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.cleanup.loopexit:
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %iv = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %idxprom = zext i32 %iv to i64
+ %arrayidx = getelementptr inbounds [7 x i32], [7 x i32]* %a, i64 0, i64 %idxprom
+ store i32 0, i32* %arrayidx, align 4
+ %inc = add nuw nsw i32 %iv, 1
+ %cmp = icmp slt i32 %inc, %len
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+
+define void @ComputeMaxTripCountFromZeroArray(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromZeroArray'
+; CHECK-NEXT: Determining loop execution counts for: @ComputeMaxTripCountFromZeroArray
+; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 2147483646
+; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Predicates:
+; CHECK: Loop %for.body: Trip multiple is 1
+; CHECK-NEXT: Loop %for.body: Small constant max trip is 1
+;
+entry:
+ %a = alloca [0 x i32], align 4
+ %cmp4 = icmp sgt i32 %len, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.cleanup.loopexit:
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %iv = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %idxprom = zext i32 %iv to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* %a, i64 0, i64 %idxprom
+ store i32 0, i32* %arrayidx, align 4
+ %inc = add nuw nsw i32 %iv, 1
+ %cmp = icmp slt i32 %inc, %len
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @ComputeMaxTripCountFromExtremArray(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromExtremArray'
+; CHECK-NEXT: Determining loop execution counts for: @ComputeMaxTripCountFromExtremArray
+; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 2147483646
+; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT: Predicates:
+; CHECK: Loop %for.body: Trip multiple is 1
+; CHECK-NEXT: Loop %for.body: Small constant max trip is 2147483647
+;
+entry:
+ %a = alloca [4294967295 x i1], align 4
+ %cmp4 = icmp sgt i32 %len, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.cleanup.loopexit:
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %iv = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %idxprom = zext i32 %iv to i64
+ %arrayidx = getelementptr inbounds [4294967295 x i1], [4294967295 x i1]* %a, i64 0, i64 %idxprom
+ store i1 0, i1* %arrayidx, align 4
+ %inc = add nuw nsw i32 %iv, 1
+ %cmp = icmp slt i32 %inc, %len
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+
+define void @ComputeMaxTripCountFromArrayInBranch(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromArrayInBranch'
+; CHECK-NEXT: Determining loop execution counts for: @ComputeMaxTripCountFromArrayInBranch
+; CHECK-NEXT: Loop %for.cond: backedge-taken count is (0 smax %len)
+; CHECK-NEXT: Loop %for.cond: constant max backedge-taken count is 2147483647
+; CHECK-NEXT: Loop %for.cond: symbolic max backedge-taken count is (0 smax %len)
+; CHECK-NEXT: Loop %for.cond: Predicated backedge-taken count is (0 smax %len)
+; CHECK-NEXT: Predicates:
+; CHECK: Loop %for.cond: Trip multiple is 1
+; CHECK-NEXT: Loop %for.cond: Small constant max trip is 2147483648
+;
+entry:
+ %a = alloca [8 x i32], align 4
+ br label %for.cond
+
+for.cond:
+ %iv = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+ %cmp = icmp slt i32 %iv, %len
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ br label %for.end
+
+for.body:
+ %cmp1 = icmp slt i32 %iv, 8
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then:
+ %idxprom = sext i32 %iv to i64
+ %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %a, i64 0, i64 %idxprom
+ store i32 0, i32* %arrayidx, align 4
+ br label %if.end
+
+if.end:
+ br label %for.inc
+
+for.inc:
+ %inc = add nsw i32 %iv, 1
+ br label %for.cond
+
+for.end:
+ ret void
+}
+
+define void @ComputeMaxTripCountFromMultiDemArray(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromMultiDemArray'
+; CHECK-NEXT: Determining loop execution counts for: @ComputeMaxTripCountFromMultiDemArray
+; CHECK-NEXT: Loop %for.cond: backedge-taken count is (0 smax %len)
+; CHECK-NEXT: Loop %for.cond: constant max backedge-taken count is 2147483647
+; CHECK-NEXT: Loop %for.cond: symbolic max backedge-taken count is (0 smax %len)
+; CHECK-NEXT: Loop %for.cond: Predicated backedge-taken count is (0 smax %len)
+; CHECK-NEXT: Predicates:
+; CHECK: Loop %for.cond: Trip multiple is 1
+; CHECK-NEXT: Loop %for.cond: Small constant max trip is 2147483648
+;
+entry:
+ %a = alloca [3 x [5 x i32]], align 4
+ br label %...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
@@ -249,6 +249,10 @@ static cl::opt<bool> UseContextForNoWrapFlagInference( | |||
cl::desc("Infer nuw/nsw flags using context where suitable"), | |||
cl::init(true)); | |||
|
|||
static cl::opt<bool> UseMemoryAccessUBForBEInference( | |||
"scalar-evolution-infer-max-trip-count-from-memory-access", cl::Hidden, | |||
cl::desc("Infer loop max trip count from memory access"), cl::init(false)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you check the compile time impact (via @nikic tracker) when you enable this?
llvm/test/Analysis/ScalarEvolution/infer-trip-count-idx-wrap.ll
Outdated
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This still has the weird "self wrap" code instead of checking addrec nowrap flags.
c311990
to
d350f16
Compare
Gentle ping |
} | ||
|
||
/// Get the underlying SCEVAddExpr from a cast expression if possible. | ||
const SCEV *peelCastExpr(const SCEVCastExpr *S, ScalarEvolution *SE) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we have existing utility that can already do this?
return SE->getCouldNotCompute(); | ||
} | ||
|
||
static Value *peelExt(Value *V) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we have existing utility that can already do this?
return V; | ||
} | ||
|
||
static bool isIndexInductionVariable(PHINode *InductionVar, Value *Index) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we have existing utility that can already do this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This might be too conservative. It can't handle uses such as array[i * 2]
. What is the best way to do that?
@@ -0,0 +1,191 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The tests need to be improved by adding:
- Negative step
- Multi-dim array with nested loop with both forwarded and reversed indices
This version can pass llvm test suite w/ the option |
Data references in a loop is assumed to not access elements over the statically allocated size. We can therefore infer a loop max trip count from this undefined behavior. This patch is refined from the orignal one (https://reviews.llvm.org/D155049) authored by @Peakulorain.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM from sanitizer point of view
This PR doesn’t work as expected for a real-world use case. |
Data references in a loop is assumed to not access elements over the statically
allocated size. We can therefore infer a loop max trip count from this undefined
behavior.
This patch is refined from the orignal one (https://reviews.llvm.org/D155049)
authored by @Peakulorain.