[SCEV] Factor out part of wrap flag detection logic [NFC-ish]

In an effort to make code around flag determination more readable, and (possibly) prepare for a follow up change, factor out some of the flag detection logic. In the process, reduce the number of locations we mutate wrap flags by a couple. Note that this isn't NFC. The old code tried for NSW xor (NUW || NW). This is, two different paths computed different sets of wrap flags. The new code will try for all three. The result is that some expressions end up with a few extra flags set.
llvm · Nov 15, 2020 · 1ec6e1e · 1ec6e1e
1 parent dd0b8b9
commit 1ec6e1e
Show file tree

Hide file tree

Showing 5 changed files with 109 additions and 64 deletions.
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1905,6 +1905,10 @@ class ScalarEvolution {
   /// Try to prove NSW or NUW on \p AR relying on ConstantRange manipulation.
   SCEV::NoWrapFlags proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR);
 
+  /// Try to prove NSW or NEW on \p AR by proving facts about conditions known
+  /// on entry and backedge.
+  SCEV::NoWrapFlags proveNoWrapViaInduction(const SCEVAddRecExpr *AR);
+
   Optional<MonotonicPredicateType> getMonotonicPredicateTypeImpl(
       const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred,
       Optional<const SCEV *> NumIter, const Instruction *Context);

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -1588,13 +1588,18 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
         setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
       }
 
+      if (!AR->hasNoUnsignedWrap()) {
+        auto NewFlags = proveNoWrapViaInduction(AR);
+        setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
+      }
+
       // If we have special knowledge that this addrec won't overflow,
       // we don't need to do any further analysis.
       if (AR->hasNoUnsignedWrap())
         return getAddRecExpr(
             getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
             getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
-
+      
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
       // simply not analyzable, and it covers the case where this code is
@@ -1673,35 +1678,14 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
       // doing extra work that may not pay off.
       if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards ||
           !AC.assumptions().empty()) {
-        // If the backedge is guarded by a comparison with the pre-inc
-        // value the addrec is safe. Also, if the entry is guarded by
-        // a comparison with the start value and the backedge is
-        // guarded by a comparison with the post-inc value, the addrec
-        // is safe.
-        if (isKnownPositive(Step)) {
-          const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
-                                      getUnsignedRangeMax(Step));
-          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
-              isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)) {
-            // Cache knowledge of AR NUW, which is propagated to this
-            // AddRec.
-            setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW);
-            // Return the expression with the addrec on the outside.
-            return getAddRecExpr(
-                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
-                                                         Depth + 1),
-                getZeroExtendExpr(Step, Ty, Depth + 1), L,
-                AR->getNoWrapFlags());
-          }
-        } else if (isKnownNegative(Step)) {
+        // For a negative step, we can extend the operands iff doing so only
+        // traverses values in the range zext([0,UINT_MAX]). 
+        if (isKnownNegative(Step)) {
           const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
                                       getSignedRangeMin(Step));
           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
               isKnownOnEveryIteration(ICmpInst::ICMP_UGT, AR, N)) {
-            // Cache knowledge of AR NW, which is propagated to this
-            // AddRec.  Negative step causes unsigned wrap, but it
-            // still can't self-wrap.
-            setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
+            // Note: We've proven NW here, but that's already done above too.
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(
                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
@@ -1932,6 +1916,11 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
         setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
       }
 
+      if (!AR->hasNoSignedWrap()) {
+        auto NewFlags = proveNoWrapViaInduction(AR);
+        setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
+      }
+
       // If we have special knowledge that this addrec won't overflow,
       // we don't need to do any further analysis.
       if (AR->hasNoSignedWrap())
@@ -2015,35 +2004,6 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
         }
       }
 
-      // Normally, in the cases we can prove no-overflow via a
-      // backedge guarding condition, we can also compute a backedge
-      // taken count for the loop.  The exceptions are assumptions and
-      // guards present in the loop -- SCEV is not great at exploiting
-      // these to compute max backedge taken counts, but can still use
-      // these to prove lack of overflow.  Use this fact to avoid
-      // doing extra work that may not pay off.
-
-      if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards ||
-          !AC.assumptions().empty()) {
-        // If the backedge is guarded by a comparison with the pre-inc
-        // value the addrec is safe. Also, if the entry is guarded by
-        // a comparison with the start value and the backedge is
-        // guarded by a comparison with the post-inc value, the addrec
-        // is safe.
-        ICmpInst::Predicate Pred;
-        const SCEV *OverflowLimit =
-            getSignedOverflowLimitForStep(Step, &Pred, this);
-        if (OverflowLimit &&
-            (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
-             isKnownOnEveryIteration(Pred, AR, OverflowLimit))) {
-          // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
-          setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW);
-          return getAddRecExpr(
-              getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
-              getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
-        }
-      }
-
       // sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step}))<nuw><nsw>
       // if D + (C - D + Step * n) could be proven to not signed wrap
       // where D maximizes the number of trailing zeros of (C - D + Step * n)
@@ -4436,6 +4396,87 @@ ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) {
   return Result;
 }
 
+SCEV::NoWrapFlags
+ScalarEvolution::proveNoWrapViaInduction(const SCEVAddRecExpr *AR) {
+  SCEV::NoWrapFlags Result = AR->getNoWrapFlags();
+  if (!AR->isAffine())
+    return Result;
+
+  const SCEV *Step = AR->getStepRecurrence(*this);
+  unsigned BitWidth = getTypeSizeInBits(AR->getType());
+  const Loop *L = AR->getLoop();
+
+  // Check whether the backedge-taken count is SCEVCouldNotCompute.
+  // Note that this serves two purposes: It filters out loops that are
+  // simply not analyzable, and it covers the case where this code is
+  // being called from within backedge-taken count analysis, such that
+  // attempting to ask for the backedge-taken count would likely result
+  // in infinite recursion. In the later case, the analysis code will
+  // cope with a conservative value, and it will take care to purge
+  // that value once it has finished.
+  const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
+
+  // Normally, in the cases we can prove no-overflow via a
+  // backedge guarding condition, we can also compute a backedge
+  // taken count for the loop.  The exceptions are assumptions and
+  // guards present in the loop -- SCEV is not great at exploiting
+  // these to compute max backedge taken counts, but can still use
+  // these to prove lack of overflow.  Use this fact to avoid
+  // doing extra work that may not pay off.
+
+  if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards &&
+      AC.assumptions().empty())
+    return Result;
+
+  if (!AR->hasNoSignedWrap()) {
+    // If the backedge is guarded by a comparison with the pre-inc
+    // value the addrec is safe. Also, if the entry is guarded by
+    // a comparison with the start value and the backedge is
+    // guarded by a comparison with the post-inc value, the addrec
+    // is safe.
+    ICmpInst::Predicate Pred;
+    const SCEV *OverflowLimit =
+      getSignedOverflowLimitForStep(Step, &Pred, this);
+    if (OverflowLimit &&
+        (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
+         isKnownOnEveryIteration(Pred, AR, OverflowLimit))) {
+      Result = setFlags(Result, SCEV::FlagNSW);
+    }
+  }
+
+  if (!AR->hasNoUnsignedWrap()) {
+    // If the backedge is guarded by a comparison with the pre-inc
+    // value the addrec is safe. Also, if the entry is guarded by
+    // a comparison with the start value and the backedge is
+    // guarded by a comparison with the post-inc value, the addrec
+    // is safe.
+    if (isKnownPositive(Step)) {
+      const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
+                                  getUnsignedRangeMax(Step));
+      if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
+          isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)) {
+        Result = setFlags(Result, SCEV::FlagNUW);
+      }
+    }
+  }
+
+  if (!AR->hasNoSelfWrap()) {
+    if (isKnownNegative(Step)) {
+      // TODO: We can generalize this condition by proving (ugt AR, AR.start)
+      // for the two clauses below.
+      const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
+                                  getSignedRangeMin(Step));
+      if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
+          isKnownOnEveryIteration(ICmpInst::ICMP_UGT, AR, N)) {
+        // Negative step causes unsigned wrap, but it still can't self-wrap.
+        Result = setFlags(Result, SCEV::FlagNW);
+      }
+    }
+  }
+
+  return Result;
+}
+
 namespace {
 
 /// Represents an abstract binary operation.  This may exist as a

diff --git a/llvm/test/Analysis/ScalarEvolution/pr22641.ll b/llvm/test/Analysis/ScalarEvolution/pr22641.ll
@@ -12,7 +12,7 @@ body:
   %conv2 = zext i16 %dec2 to i32
   %conv = zext i16 %dec to i32
 ; CHECK:   %conv = zext i16 %dec to i32
-; CHECK-NEXT: -->  {(zext i16 (-1 + %a) to i32),+,65535}<nuw><%body>
+; CHECK-NEXT: -->  {(zext i16 (-1 + %a) to i32),+,65535}<nuw><nsw><%body>
 ; CHECK-NOT:  -->  {(65535 + (zext i16 %a to i32)),+,65535}<nuw><%body>
 
   br label %cond

diff --git a/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll b/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll
@@ -2,9 +2,9 @@
 ; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; CHECK: %tmp3 = sext i8 %tmp2 to i32
-; CHECK: -->  (sext i8 {0,+,1}<%bb1> to i32){{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: -1
+; CHECK: -->  (sext i8 {0,+,1}<nuw><%bb1> to i32){{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: -1
 ; CHECK: %tmp4 = mul i32 %tmp3, %i.02
-; CHECK: -->  ((sext i8 {0,+,1}<%bb1> to i32) * {0,+,1}<%bb>){{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: {0,+,-1}<%bb>
+; CHECK: -->  ((sext i8 {0,+,1}<nuw><%bb1> to i32) * {0,+,1}<%bb>){{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: {0,+,-1}<%bb>
 
 ; These sexts are not foldable.
 

diff --git a/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll b/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll
@@ -193,7 +193,7 @@ for.end:                                          ; preds = %if.end, %entry
 define void @test7(i64 %start, i64* %inc_ptr) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], !range !0
+; CHECK-NEXT:    [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG0:!range !.*]]
 ; CHECK-NEXT:    [[OK:%.*]] = icmp sge i64 [[INC]], 0
 ; CHECK-NEXT:    br i1 [[OK]], label [[LOOP_PREHEADER:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       loop.preheader:
@@ -317,7 +317,7 @@ define void @test3_neg(i64 %start) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP1]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
@@ -345,7 +345,7 @@ define void @test4_neg(i64 %start) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 25
 ; CHECK-NEXT:    br i1 [[CMP]], label [[BACKEDGE]], label [[FOR_END:%.*]]
 ; CHECK:       backedge:
@@ -405,7 +405,7 @@ for.end:                                          ; preds = %if.end, %entry
 define void @test8(i64 %start, i64* %inc_ptr) {
 ; CHECK-LABEL: @test8(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], !range !1
+; CHECK-NEXT:    [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG1:!range !.*]]
 ; CHECK-NEXT:    [[OK:%.*]] = icmp sge i64 [[INC]], 0
 ; CHECK-NEXT:    br i1 [[OK]], label [[LOOP_PREHEADER:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       loop.preheader:
@@ -525,7 +525,7 @@ exit:
 define void @test11(i64* %inc_ptr) {
 ; CHECK-LABEL: @test11(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], !range !0
+; CHECK-NEXT:    [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG0]]
 ; CHECK-NEXT:    [[NE_COND:%.*]] = icmp ne i64 [[INC]], 0
 ; CHECK-NEXT:    br i1 [[NE_COND]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
 ; CHECK:       loop.preheader:
@@ -576,7 +576,7 @@ exit:
 define void @test12(i64* %inc_ptr) {
 ; CHECK-LABEL: @test12(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], !range !0
+; CHECK-NEXT:    [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG0]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[INC]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]