[SCEVAA] Avoid forming malformed pointer diff expressions

This solves the same crash as in D104503, but with a different approach. The test case test_non_dom demonstrates a case where scev-aa crashes today. (If exercised either by -eval-aa or -licm.) The basic problem is that SCEV-AA expects to be able to compute a pointer difference between two SCEVs for any two pair of pointers we do an alias query on. For (valid, but out of scope) reasons, we can end up asking whether expressions in different sub-loops can alias each other. This results in a subtraction expression being formed where neither operand dominates the other. The approach this patch takes is to leverage the "defining scope" notion we introduced for flag semantics to detect and disallow the formation of the problematic SCEV. This ends up being relatively straight forward on that new infrastructure. This change does hint that we should probably be verifying a similar property for all SCEVs somewhere, but I'll leave that to a follow on change. Differential Revision: D114112
llvm · Nov 17, 2021 · ad69402 · ad69402
1 parent b861c36
commit ad69402
Show file tree

Hide file tree

Showing 4 changed files with 186 additions and 7 deletions.
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -519,6 +519,22 @@ class ScalarEvolution {
   // Returns a wider type among {Ty1, Ty2}.
   Type *getWiderType(Type *Ty1, Type *Ty2) const;
 
+  /// Return true if there exists a point in the program at which both
+  /// A and B could be operands to the same instruction.
+  /// SCEV expressions are generally assumed to correspond to instructions
+  /// which could exists in IR.  In general, this requires that there exists
+  /// a use point in the program where all operands dominate the use.
+  ///
+  /// Example:
+  /// loop {
+  ///   if
+  ///     loop { v1 = load @global1; }
+  ///   else
+  ///     loop { v2 = load @global2; }
+  /// }
+  /// No SCEV with operand V1, and v2 can exist in this program.
+  bool instructionCouldExistWitthOperands(const SCEV *A, const SCEV *B);
+
   /// Return true if the SCEV is a scAddRecExpr or it contains
   /// scAddRecExpr. The result will be cached in HasRecMap.
   bool containsAddRecurrence(const SCEV *S);
@@ -1947,7 +1963,13 @@ class ScalarEvolution {
   const Instruction *getNonTrivialDefiningScopeBound(const SCEV *S);
 
   /// Return a scope which provides an upper bound on the defining scope for
-  /// a SCEV with the operands in Ops.
+  /// a SCEV with the operands in Ops.  The outparam Precise is set if the
+  /// bound found is a precise bound (i.e. must be the defining scope.)
+  const Instruction *getDefiningScopeBound(ArrayRef<const SCEV *> Ops,
+                                           bool &Precise);
+
+  /// Wrapper around the above for cases which don't care if the bound
+  /// is precise.
   const Instruction *getDefiningScopeBound(ArrayRef<const SCEV *> Ops);
 
   /// Given two instructions in the same function, return true if we can

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3997,6 +3997,21 @@ Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const {
   return  getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2;
 }
 
+bool ScalarEvolution::instructionCouldExistWitthOperands(const SCEV *A,
+                                                         const SCEV *B) {
+  /// For a valid use point to exist, the defining scope of one operand
+  /// must dominate the other.
+  bool PreciseA, PreciseB;
+  auto *ScopeA = getDefiningScopeBound({A}, PreciseA);
+  auto *ScopeB = getDefiningScopeBound({B}, PreciseB);
+  if (!PreciseA || !PreciseB)
+    // Can't tell.
+    return false;
+  return (ScopeA == ScopeB) || DT.dominates(ScopeA, ScopeB) ||
+    DT.dominates(ScopeB, ScopeA);
+}
+
+
 const SCEV *ScalarEvolution::getCouldNotCompute() {
   return CouldNotCompute.get();
 }
@@ -6612,16 +6627,20 @@ static void collectUniqueOps(const SCEV *S,
 }
 
 const Instruction *
-ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops) {
+ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops,
+                                       bool &Precise) {
+  Precise = true;
   // Do a bounded search of the def relation of the requested SCEVs.
   SmallSet<const SCEV *, 16> Visited;
   SmallVector<const SCEV *> Worklist;
   auto pushOp = [&](const SCEV *S) {
     if (!Visited.insert(S).second)
       return;
     // Threshold of 30 here is arbitrary.
-    if (Visited.size() > 30)
+    if (Visited.size() > 30) {
+      Precise = false;
       return;
+    }
     Worklist.push_back(S);
   };
 
@@ -6644,6 +6663,12 @@ ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops) {
   return Bound ? Bound : &*F.getEntryBlock().begin();
 }
 
+const Instruction *
+ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops) {
+  bool Discard;
+  return getDefiningScopeBound(Ops, Discard);
+}
+
 bool ScalarEvolution::isGuaranteedToTransferExecutionTo(const Instruction *A,
                                                         const Instruction *B) {
   if (A->getParent() == B->getParent() &&

diff --git a/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -23,6 +23,15 @@
 #include "llvm/InitializePasses.h"
 using namespace llvm;
 
+static bool canComputePointerDiff(ScalarEvolution &SE,
+                                  const SCEV *A, const SCEV *B) {
+  if (SE.getEffectiveSCEVType(A->getType()) !=
+      SE.getEffectiveSCEVType(B->getType()))
+    return false;
+
+  return SE.instructionCouldExistWitthOperands(A, B);
+}
+
 AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
                                 const MemoryLocation &LocB, AAQueryInfo &AAQI) {
   // If either of the memory references is empty, it doesn't matter what the
@@ -41,8 +50,7 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
 
   // If something is known about the difference between the two addresses,
   // see if it's enough to prove a NoAlias.
-  if (SE.getEffectiveSCEVType(AS->getType()) ==
-      SE.getEffectiveSCEVType(BS->getType())) {
+  if (canComputePointerDiff(SE, AS, BS)) {
     unsigned BitWidth = SE.getTypeSizeInBits(AS->getType());
     APInt ASizeInt(BitWidth, LocA.Size.hasValue()
                                  ? LocA.Size.getValue()

diff --git a/llvm/test/Analysis/ScalarEvolution/scev-aa.ll b/llvm/test/Analysis/ScalarEvolution/scev-aa.ll
@@ -212,6 +212,130 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-; CHECK: 14 no alias responses
-; CHECK: 26 may alias responses
+; CHECK: Function: test_no_dom: 3 pointers, 0 call sites
+; CHECK: MayAlias:	double* %addr1, double* %data
+; CHECK: NoAlias:	double* %addr2, double* %data
+; CHECK: MayAlias:	double* %addr1, double* %addr2
+
+; In this case, checking %addr1 and %add2 involves two addrecs in two
+; different loops where neither dominates the other.  This used to crash
+; because we expected the arguments to an AddExpr to have a strict
+; dominance order.
+define void @test_no_dom(double* %data) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.latch ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br i1 undef, label %subloop1, label %subloop2
+
+subloop1:
+  %iv1 = phi i32 [0, %for.body], [%iv1.next, %subloop1]
+  %iv1.next = add i32 %iv1, 1
+  %addr1 = getelementptr double, double* %data, i32 %iv1
+  store double 0.0, double* %addr1
+  %cmp1 = icmp slt i32 %iv1, 200
+  br i1 %cmp1, label %subloop1, label %for.latch
+
+subloop2:
+  %iv2 = phi i32 [400, %for.body], [%iv2.next, %subloop2]
+  %iv2.next = add i32 %iv2, 1
+  %addr2 = getelementptr double, double* %data, i32 %iv2
+  store double 0.0, double* %addr2
+  %cmp2 = icmp slt i32 %iv2, 600
+  br i1 %cmp2, label %subloop2, label %for.latch
+
+for.latch:
+  br label %for.body
+
+for.end:
+  ret void
+}
+
+declare double* @get_addr(i32 %i)
+
+; CHECK: Function: test_no_dom2: 3 pointers, 2 call sites
+; CHECK: MayAlias:	double* %addr1, double* %data
+; CHECK: MayAlias:	double* %addr2, double* %data
+; CHECK: MayAlias:	double* %addr1, double* %addr2
+
+; In this case, checking %addr1 and %add2 involves two addrecs in two
+; different loops where neither dominates the other.  This is analogous
+; to test_no_dom, but involves SCEVUnknown as opposed to SCEVAddRecExpr.
+define void @test_no_dom2(double* %data) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.latch ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br i1 undef, label %subloop1, label %subloop2
+
+subloop1:
+  %iv1 = phi i32 [0, %for.body], [%iv1.next, %subloop1]
+  %iv1.next = add i32 %iv1, 1
+  %addr1 = call double* @get_addr(i32 %iv1)
+  store double 0.0, double* %addr1
+  %cmp1 = icmp slt i32 %iv1, 200
+  br i1 %cmp1, label %subloop1, label %for.latch
+
+subloop2:
+  %iv2 = phi i32 [400, %for.body], [%iv2.next, %subloop2]
+  %iv2.next = add i32 %iv2, 1
+  %addr2 = call double* @get_addr(i32 %iv2)
+  store double 0.0, double* %addr2
+  %cmp2 = icmp slt i32 %iv2, 600
+  br i1 %cmp2, label %subloop2, label %for.latch
+
+for.latch:
+  br label %for.body
+
+for.end:
+  ret void
+}
+
+
+; CHECK: Function: test_dom: 3 pointers, 0 call sites
+; CHECK: MayAlias:	double* %addr1, double* %data
+; CHECK: NoAlias:	double* %addr2, double* %data
+; CHECK: NoAlias:	double* %addr1, double* %addr2
+
+; This is a variant of test_non_dom where the second subloop is
+; dominated by the first.  As a result of that, we can nest the
+; addrecs and cancel out the %data base pointer.
+define void @test_dom(double* %data) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.latch ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %subloop1
+
+subloop1:
+  %iv1 = phi i32 [0, %for.body], [%iv1.next, %subloop1]
+  %iv1.next = add i32 %iv1, 1
+  %addr1 = getelementptr double, double* %data, i32 %iv1
+  store double 0.0, double* %addr1
+  %cmp1 = icmp slt i32 %iv1, 200
+  br i1 %cmp1, label %subloop1, label %subloop2
+
+subloop2:
+  %iv2 = phi i32 [400, %subloop1], [%iv2.next, %subloop2]
+  %iv2.next = add i32 %iv2, 1
+  %addr2 = getelementptr double, double* %data, i32 %iv2
+  store double 0.0, double* %addr2
+  %cmp2 = icmp slt i32 %iv2, 600
+  br i1 %cmp2, label %subloop2, label %for.latch
+
+for.latch:
+  br label %for.body
+
+for.end:
+  ret void
+}
+
+; CHECK: 17 no alias responses
+; CHECK: 32 may alias responses
 ; CHECK: 18 must alias responses