[SCEV] Compute exit counts for unsigned IVs using mustprogress semantics

The motivation here is simple loops with unsigned induction variables w/non-one steps. A toy example would be: for (unsigned i = 0; i < N; i += 2) { body; } Given C/C++ semantics, we do not get the nuw flag on the induction variable. Given that lack, we currently can't compute a bound for this loop. We can do better for many cases, depending on the contents of "body". The basic intuition behind this patch is as follows: * A step which evenly divides the iteration space must wrap through the same numbers repeatedly. And thus, we can ignore potential cornercases where we exit after the n-th wrap through uint32_max. * Per C++ rules, infinite loops without side effects are UB. We already have code in SCEV which relies on this. In LLVM, this is tied to the mustprogress attribute. Together, these let us conclude that the trip count of this loop must come before unsigned overflow unless the body would form a well defined infinite loop. A couple notes for those reading along: * I reused the loop properties code which is overly conservative for this case. I may follow up in another patch to generalize it for the actual UB rules. * We could cache the n(s/u)w facts. I left that out because doing a pre-patch which cached existing inference showed a lot of diffs I had trouble fully explaining. I plan to get back to this, but I don't want it on the critical path. Differential Revision: https://reviews.llvm.org/D103118
llvm · Jun 7, 2021 · 38540d7 · 38540d7
1 parent 00b6463
commit 38540d7
Show file tree

Hide file tree

Showing 3 changed files with 237 additions and 1 deletion.
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1542,6 +1542,10 @@ class ScalarEvolution {
     return getLoopProperties(L).HasNoAbnormalExits;
   }
 
+  /// Return true if this loop is finite by assumption.  That is,
+  /// to be infinite, it must also be undefined.
+  bool loopIsFiniteByAssumption(const Loop *L);
+
   /// Compute a LoopDisposition value.
   LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L);
 

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6573,6 +6573,17 @@ ScalarEvolution::getLoopProperties(const Loop *L) {
   return Itr->second;
 }
 
+bool ScalarEvolution::loopIsFiniteByAssumption(const Loop *L) {
+  // TODO: Use the loop metadata form of mustprogress as well.
+  if (!L->getHeader()->getParent()->mustProgress())
+    return false;
+
+  // A loop without side effects must be finite.
+  // TODO: The check used here is very conservative.  It's only *specific*
+  // side effects which are well defined in infinite loops.
+  return loopHasNoSideEffects(L);
+}
+
 const SCEV *ScalarEvolution::createSCEV(Value *V) {
   if (!isSCEVable(V->getType()))
     return getUnknown(V);
@@ -11462,11 +11473,45 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
         !loopHasNoSideEffects(L))
       return getCouldNotCompute();
   } else if (!Stride->isOne() && !NoWrap) {
+    auto isUBOnWrap = [&]() {
+      // Can we prove this loop *must* be UB if overflow of IV occurs?
+      // Reasoning goes as follows:
+      // * Suppose the IV did self wrap.
+      // * If Stride evenly divides the iteration space, then once wrap
+      //   occurs, the loop must revisit the same values.
+      // * We know that RHS is invariant, and that none of those values
+      //   caused this exit to be taken previously.  Thus, this exit is
+      //   dynamically dead.
+      // * If this is the sole exit, then a dead exit implies the loop
+      //   must be infinite if there are no abnormal exits.
+      // * If the loop were infinite, then it must either not be mustprogress
+      //   or have side effects. Otherwise, it must be UB.
+      // * It can't (by assumption), be UB so we have contradicted our
+      //   premise and can conclude the IV did not in fact self-wrap.
+      // From no-self-wrap, we need to then prove no-(un)signed-wrap.  This
+      // follows trivially from the fact that every (un)signed-wrapped, but
+      // not self-wrapped value must be LT than the last value before
+      // (un)signed wrap.  Since we know that last value didn't exit, nor
+      // will any smaller one.
+
+      if (!isLoopInvariant(RHS, L))
+        return false;
+
+      auto *StrideC = dyn_cast<SCEVConstant>(Stride);
+      if (!StrideC || !StrideC->getAPInt().isPowerOf2())
+        return false;
+
+      if (!ControlsExit || !loopHasNoAbnormalExits(L))
+        return false;
+
+      return loopIsFiniteByAssumption(L);
+    };
+
     // Avoid proven overflow cases: this will ensure that the backedge taken
     // count will not generate any unsigned overflow. Relaxed no-overflow
     // conditions exploit NoWrapFlags, allowing to optimize in presence of
     // undefined behaviors like the case of C language.
-    if (canIVOverflowOnLT(RHS, Stride, IsSigned))
+    if (canIVOverflowOnLT(RHS, Stride, IsSigned) && !isUBOnWrap())
       return getCouldNotCompute();
   }
 

diff --git a/llvm/test/Analysis/ScalarEvolution/lt-overflow.ll b/llvm/test/Analysis/ScalarEvolution/lt-overflow.ll
@@ -0,0 +1,187 @@
+
+; RUN: opt %s -analyze -scalar-evolution -enable-new-pm=0 -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; A collection of tests focused on exercising logic to prove no-unsigned wrap
+; from mustprogress semantics of loops.
+
+; CHECK: Determining loop execution counts for: @test
+; CHECK: Loop %for.body: backedge-taken count is ((-1 + (2 umax %N)) /u 2)
+; CHECK: Determining loop execution counts for: @test_preinc
+; CHECK: Loop %for.body: backedge-taken count is ((1 + %N) /u 2)
+; CHECK: Determining loop execution counts for: @test_well_defined_infinite_st
+; CHECK: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK: Determining loop execution counts for: @test_well_defined_infinite_ld
+; CHECK: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK: Determining loop execution counts for: @test_no_mustprogress
+; CHECK: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK: Determining loop execution counts for: @test_1024
+; CHECK: Loop %for.body: backedge-taken count is ((-1 + (1024 umax %N)) /u 1024)
+; CHECK: Determining loop execution counts for: @test_uneven_divide
+; CHECK: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK: Determining loop execution counts for: @test_non_invariant_rhs
+; CHECK: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK: Determining loop execution counts for: @test_abnormal_exit
+; CHECK: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK: Determining loop execution counts for: @test_other_exit
+; CHECK: Loop %for.body: <multiple exits> Unpredictable backedge-taken count.
+
+define void @test(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_preinc(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  %cmp = icmp ult i32 %iv, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+}
+
+@G = external global i32
+
+define void @test_well_defined_infinite_st(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  store volatile i32 0, i32* @G
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_well_defined_infinite_ld(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  %val = load volatile i32, i32* @G
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_no_mustprogress(i32 %N) {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+}
+
+
+define void @test_1024(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 1024
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_uneven_divide(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 3
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_non_invariant_rhs() mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  %N = load i32, i32* @G
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+declare void @mayexit()
+
+define void @test_abnormal_exit(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  call void @mayexit()
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+
+define void @test_other_exit(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.latch ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  %cmp1 = icmp ult i32 %iv.next, 20
+  br i1 %cmp1, label %for.latch, label %for.cond.cleanup
+
+for.latch:
+  %cmp2 = icmp ult i32 %iv.next, %N
+  br i1 %cmp2, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+