[BasicAA] Generalize base offset modulus handling

The GEP aliasing implementation currently has two pieces of code that solve two different subsets of the same basic problem: If you have GEPs with offsets 4*x + 0 and 4*y + 1 (assuming access size 1), then they do not alias regardless of whether x and y are the same. One implementation is in aliasSameBasePointerGEPs(), which looks at this in a limited structural way. It requires both GEP base pointers to be exactly the same, then (optionally) a number of equal indexes, then an unknown index, then a non-equal index into a struct. This set of limitations works, but it's overly restrictive and hides the core property we're trying to exploit. The second implementation is part of aliasGEP() itself and tries to find a common modulus in the scales, so it can then check that the constant offset doesn't overlap under modular arithmetic. The second implementation has the right idea of what the general problem is, but effectively only considers power of two factors in the scales (while aliasSameBasePointerGEPs also works with non-pow2 struct sizes.) What this patch does is to adjust the aliasGEP() implementation to instead find the largest common factor in all the scales (i.e. the GCD) and use that as the modulus. Differential Revision: https://reviews.llvm.org/D91027
llvm · Nov 18, 2020 · cd3c22c · cd3c22c
1 parent ca76e9f
commit cd3c22c
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 66 deletions.
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1088,8 +1088,6 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
 
   auto *Ty = GetElementPtrInst::getIndexedType(
     GEP1->getSourceElementType(), IntermediateIndices);
-  StructType *LastIndexedStruct = dyn_cast<StructType>(Ty);
-
   if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
     // We know that:
     // - both GEPs begin indexing from the exact same pointer;
@@ -1143,44 +1141,7 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
       } else if (isKnownNonEqual(GEP1LastIdx, GEP2LastIdx, DL))
         return NoAlias;
     }
-    return MayAlias;
-  } else if (!LastIndexedStruct || !C1 || !C2) {
-    return MayAlias;
   }
-
-  if (C1->getValue().getActiveBits() > 64 ||
-      C2->getValue().getActiveBits() > 64)
-    return MayAlias;
-
-  // We know that:
-  // - both GEPs begin indexing from the exact same pointer;
-  // - the last indices in both GEPs are constants, indexing into a struct;
-  // - said indices are different, hence, the pointed-to fields are different;
-  // - both GEPs only index through arrays prior to that.
-  //
-  // This lets us determine that the struct that GEP1 indexes into and the
-  // struct that GEP2 indexes into must either precisely overlap or be
-  // completely disjoint.  Because they cannot partially overlap, indexing into
-  // different non-overlapping fields of the struct will never alias.
-
-  // Therefore, the only remaining thing needed to show that both GEPs can't
-  // alias is that the fields are not overlapping.
-  const StructLayout *SL = DL.getStructLayout(LastIndexedStruct);
-  const uint64_t StructSize = SL->getSizeInBytes();
-  const uint64_t V1Off = SL->getElementOffset(C1->getZExtValue());
-  const uint64_t V2Off = SL->getElementOffset(C2->getZExtValue());
-
-  auto EltsDontOverlap = [StructSize](uint64_t V1Off, uint64_t V1Size,
-                                      uint64_t V2Off, uint64_t V2Size) {
-    return V1Off < V2Off && V1Off + V1Size <= V2Off &&
-           ((V2Off + V2Size <= StructSize) ||
-            (V2Off + V2Size - StructSize <= V1Off));
-  };
-
-  if (EltsDontOverlap(V1Off, V1Size, V2Off, V2Size) ||
-      EltsDontOverlap(V2Off, V2Size, V1Off, V1Size))
-    return NoAlias;
-
   return MayAlias;
 }
 
@@ -1392,16 +1353,15 @@ AliasResult BasicAAResult::aliasGEP(
   }
 
   if (!DecompGEP1.VarIndices.empty()) {
-    APInt Modulo(MaxPointerSize, 0);
+    APInt GCD;
     bool AllNonNegative = GEP1BaseOffset.isNonNegative();
     bool AllNonPositive = GEP1BaseOffset.isNonPositive();
     for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
-
-      // Try to distinguish something like &A[i][1] against &A[42][0].
-      // Grab the least significant bit set in any of the scales. We
-      // don't need std::abs here (even if the scale's negative) as we'll
-      // be ^'ing Modulo with itself later.
-      Modulo |= DecompGEP1.VarIndices[i].Scale;
+      const APInt &Scale = DecompGEP1.VarIndices[i].Scale;
+      if (i == 0)
+        GCD = Scale.abs();
+      else
+        GCD = APIntOps::GreatestCommonDivisor(GCD, Scale.abs());
 
       if (AllNonNegative || AllNonPositive) {
         // If the Value could change between cycles, then any reasoning about
@@ -1420,23 +1380,25 @@ AliasResult BasicAAResult::aliasGEP(
         SignKnownZero |= IsZExt;
         SignKnownOne &= !IsZExt;
 
-        APInt Scale = DecompGEP1.VarIndices[i].Scale;
         AllNonNegative &= (SignKnownZero && Scale.isNonNegative()) ||
                           (SignKnownOne && Scale.isNonPositive());
         AllNonPositive &= (SignKnownZero && Scale.isNonPositive()) ||
                           (SignKnownOne && Scale.isNonNegative());
       }
     }
 
-    Modulo = Modulo ^ (Modulo & (Modulo - 1));
-
-    // We can compute the difference between the two addresses
-    // mod Modulo. Check whether that difference guarantees that the
-    // two locations do not alias.
-    APInt ModOffset = GEP1BaseOffset & (Modulo - 1);
+    // We now have accesses at two offsets from the same base:
+    //  1. (...)*GCD + GEP1BaseOffset with size V1Size
+    //  2. 0 with size V2Size
+    // Using arithmetic modulo GCD, the accesses are at
+    // [ModOffset..ModOffset+V1Size) and [0..V2Size). If the first access fits
+    // into the range [V2Size..GCD), then we know they cannot overlap.
+    APInt ModOffset = GEP1BaseOffset.srem(GCD);
+    if (ModOffset.isNegative())
+      ModOffset += GCD; // We want mod, not rem.
     if (V1Size != LocationSize::unknown() &&
         V2Size != LocationSize::unknown() && ModOffset.uge(V2Size.getValue()) &&
-        (Modulo - ModOffset).uge(V1Size.getValue()))
+        (GCD - ModOffset).uge(V1Size.getValue()))
       return NoAlias;
 
     // If we know all the variables are non-negative, then the total offset is

diff --git a/llvm/test/Analysis/BasicAA/struct-geps.ll b/llvm/test/Analysis/BasicAA/struct-geps.ll
@@ -106,14 +106,14 @@ define void @test_in_3d_array([1 x [1 x [1 x %struct]]]* %st, i64 %i, i64 %j, i6
 ; CHECK-DAG: NoAlias: i32* %y, i32* %y2
 ; CHECK-DAG: NoAlias: i32* %z, i32* %z2
 
-; CHECK-DAG: MayAlias: i32* %x, i32* %y2
-; CHECK-DAG: MayAlias: i32* %x, i32* %z2
+; CHECK-DAG: NoAlias: i32* %x, i32* %y2
+; CHECK-DAG: NoAlias: i32* %x, i32* %z2
 
-; CHECK-DAG: MayAlias: i32* %x2, i32* %y
-; CHECK-DAG: MayAlias: i32* %y, i32* %z2
+; CHECK-DAG: NoAlias: i32* %x2, i32* %y
+; CHECK-DAG: NoAlias: i32* %y, i32* %z2
 
-; CHECK-DAG: MayAlias: i32* %x2, i32* %z
-; CHECK-DAG: MayAlias: i32* %y2, i32* %z
+; CHECK-DAG: NoAlias: i32* %x2, i32* %z
+; CHECK-DAG: NoAlias: i32* %y2, i32* %z
 
 define void @test_same_underlying_object_same_indices(%struct* %st, i64 %i, i64 %j, i64 %k) {
   %st2 = getelementptr %struct, %struct* %st, i32 10
@@ -132,14 +132,14 @@ define void @test_same_underlying_object_same_indices(%struct* %st, i64 %i, i64
 ; CHECK-DAG: MayAlias: i32* %y, i32* %y2
 ; CHECK-DAG: MayAlias: i32* %z, i32* %z2
 
-; CHECK-DAG: MayAlias: i32* %x, i32* %y2
-; CHECK-DAG: MayAlias: i32* %x, i32* %z2
+; CHECK-DAG: NoAlias: i32* %x, i32* %y2
+; CHECK-DAG: NoAlias: i32* %x, i32* %z2
 
-; CHECK-DAG: MayAlias: i32* %x2, i32* %y
-; CHECK-DAG: MayAlias: i32* %y, i32* %z2
+; CHECK-DAG: NoAlias: i32* %x2, i32* %y
+; CHECK-DAG: NoAlias: i32* %y, i32* %z2
 
-; CHECK-DAG: MayAlias: i32* %x2, i32* %z
-; CHECK-DAG: MayAlias: i32* %y2, i32* %z
+; CHECK-DAG: NoAlias: i32* %x2, i32* %z
+; CHECK-DAG: NoAlias: i32* %y2, i32* %z
 
 define void @test_same_underlying_object_different_indices(%struct* %st, i64 %i1, i64 %j1, i64 %k1, i64 %i2, i64 %k2, i64 %j2) {
   %st2 = getelementptr %struct, %struct* %st, i32 10