From cae643d596918e5a7cb00287c572099fb04715bd Mon Sep 17 00:00:00 2001
From: Evgeniy Brevnov <evgueni.brevnov@gmail.com>
Date: Fri, 14 Feb 2020 22:55:13 +0700
Subject: [PATCH] Reverting D73027 [DependenceAnalysis] Dependecies for loads
 marked with "ivnariant.load" should not be shared with general
 accesses(PR42151).

---
 .../lib/Analysis/MemoryDependenceAnalysis.cpp |  80 +++-----
 .../MemoryDependenceAnalysis/InvariantLoad.ll | 173 ------------------
 2 files changed, 24 insertions(+), 229 deletions(-)
 delete mode 100644 llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index ce9944a5ce4bee..450595cac57b45 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -902,11 +902,6 @@ MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
     Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
     BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
 
-  bool isInvariantLoad = false;
-
-  if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
-    isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
-
   // Do a binary search to see if we already have an entry for this block in
   // the cache set.  If so, find it.
   NonLocalDepInfo::iterator Entry = std::upper_bound(
@@ -918,13 +913,6 @@ MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
   if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
     ExistingResult = &*Entry;
 
-  // Use cached result for invariant load only if there is no dependency for non
-  // invariant load. In this case invariant load can not have any dependency as
-  // well.
-  if (ExistingResult && isInvariantLoad &&
-      !ExistingResult->getResult().isNonFuncLocal())
-    ExistingResult = nullptr;
-
   // If we have a cached entry, and it is non-dirty, use it as the value for
   // this dependency.
   if (ExistingResult && !ExistingResult->getResult().isDirty()) {
@@ -953,10 +941,6 @@ MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
   MemDepResult Dep =
       getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst);
 
-  // Don't cache results for invariant load.
-  if (isInvariantLoad)
-    return Dep;
-
   // If we had a dirty entry for the block, update it.  Otherwise, just add
   // a new entry.
   if (ExistingResult)
@@ -1045,10 +1029,6 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
   InitialNLPI.Size = Loc.Size;
   InitialNLPI.AATags = Loc.AATags;
 
-  bool isInvariantLoad = false;
-  if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
-    isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
-
   // Get the NLPI for CacheKey, inserting one into the map if it doesn't
   // already have one.
   std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
@@ -1057,8 +1037,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
 
   // If we already have a cache entry for this CacheKey, we may need to do some
   // work to reconcile the cache entry and the current query.
-  // Invariant loads don't participate in caching. Thus no need to reconcile.
-  if (!isInvariantLoad && !Pair.second) {
+  if (!Pair.second) {
     if (CacheInfo->Size != Loc.Size) {
       bool ThrowOutEverything;
       if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
@@ -1114,10 +1093,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
 
   // If we have valid cached information for exactly the block we are
   // investigating, just return it with no recomputation.
-  // Don't use cached information for invariant loads since it is valid for
-  // non-invariant loads only.
-  if (!isInvariantLoad &&
-      CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
+  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
     // We have a fully cached result for this query then we can just return the
     // cached results and populate the visited set.  However, we have to verify
     // that we don't already have conflicting results for these blocks.  Check
@@ -1153,18 +1129,14 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
     return true;
   }
 
-  // Invariant loads don't affect cache in any way thus no need to update
-  // CacheInfo as well.
-  if (!isInvariantLoad) {
-    // Otherwise, either this is a new block, a block with an invalid cache
-    // pointer or one that we're about to invalidate by putting more info into
-    // it than its valid cache info.  If empty, the result will be valid cache
-    // info, otherwise it isn't.
-    if (Cache->empty())
-      CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
-    else
-      CacheInfo->Pair = BBSkipFirstBlockPair();
-  }
+  // Otherwise, either this is a new block, a block with an invalid cache
+  // pointer or one that we're about to invalidate by putting more info into it
+  // than its valid cache info.  If empty, the result will be valid cache info,
+  // otherwise it isn't.
+  if (Cache->empty())
+    CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
+  else
+    CacheInfo->Pair = BBSkipFirstBlockPair();
 
   SmallVector<BasicBlock *, 32> Worklist;
   Worklist.push_back(StartBB);
@@ -1405,26 +1377,22 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
     if (SkipFirstBlock)
       return false;
 
-    // Results of invariant loads are not cached thus no need to update cached
-    // information.
-    if (!isInvariantLoad) {
-      bool foundBlock = false;
-      for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
-        if (I.getBB() != BB)
-          continue;
+    bool foundBlock = false;
+    for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
+      if (I.getBB() != BB)
+        continue;
 
-        assert((GotWorklistLimit || I.getResult().isNonLocal() ||
-                !DT.isReachableFromEntry(BB)) &&
-               "Should only be here with transparent block");
-        foundBlock = true;
-        I.setResult(MemDepResult::getUnknown());
-        Result.push_back(
-            NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
-        break;
-      }
-      (void)foundBlock; (void)GotWorklistLimit;
-      assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
+      assert((GotWorklistLimit || I.getResult().isNonLocal() ||
+              !DT.isReachableFromEntry(BB)) &&
+             "Should only be here with transparent block");
+      foundBlock = true;
+      I.setResult(MemDepResult::getUnknown());
+      Result.push_back(
+          NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
+      break;
     }
+    (void)foundBlock; (void)GotWorklistLimit;
+    assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
   }
 
   // Okay, we're done now.  If we added new values to the cache, re-sort it.
diff --git a/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll b/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll
deleted file mode 100644
index 152cb175ef6082..00000000000000
--- a/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll
+++ /dev/null
@@ -1,173 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -gvn -S | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
-target triple = "x86_64-unknown-linux-gnu"
-
-declare void @llvm.memset.p0i8.i8(i8*, i8, i32, i1)
-declare void @foo(i8*)
-
-define i8 @test(i1 %cmp) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[P:%.*]] = alloca i8
-; CHECK-NEXT:    store i8 5, i8* [[P]]
-; CHECK-NEXT:    br label [[HEADER:%.*]]
-; CHECK:       header:
-; CHECK-NEXT:    [[V:%.*]] = phi i8 [ 5, [[ENTRY:%.*]] ], [ -5, [[ALIVE:%.*]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[I_INC:%.*]], [[ALIVE]] ]
-; CHECK-NEXT:    br i1 [[CMP:%.*]], label [[ALIVE]], label [[DEAD:%.*]]
-; CHECK:       dead:
-; CHECK-NEXT:    call void @foo(i8* [[P]])
-; CHECK-NEXT:    [[I_1:%.*]] = add i8 [[I]], [[V]]
-; CHECK-NEXT:    br label [[ALIVE]]
-; CHECK:       alive:
-; CHECK-NEXT:    [[I_2:%.*]] = phi i8 [ [[I]], [[HEADER]] ], [ [[I_1]], [[DEAD]] ]
-; CHECK-NEXT:    store i8 -5, i8* [[P]]
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* align 1 [[P]], i8 0, i32 1, i1 false)
-; CHECK-NEXT:    [[I_INC]] = add i8 [[I_2]], 1
-; CHECK-NEXT:    [[CMP_LOOP:%.*]] = icmp ugt i8 [[I_INC]], 100
-; CHECK-NEXT:    br i1 [[CMP_LOOP]], label [[EXIT:%.*]], label [[HEADER]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret i8 0
-;
-
-entry:
-  %p = alloca i8
-  %addr = getelementptr inbounds i8, i8* %p, i64 0
-  store i8 5, i8* %addr
-  br label %header
-header:
-  %i = phi i8 [0, %entry], [%i.inc, %backedge]
-  br i1 %cmp, label %alive, label %dead
-dead:
-  call void @foo(i8* %p)
-  %v = load i8, i8* %addr, !invariant.load !1
-  %i.1 = add i8 %i, %v
-  br label %alive
-alive:
-  %i.2 = phi i8 [%i, %header], [%i.1, %dead]
-  store i8 -5, i8* %addr
-  br label %backedge
-backedge:
-  call void @llvm.memset.p0i8.i8(i8 * align 1 %p, i8 0, i32 1, i1 false)
-  %i.inc = add i8 %i.2, 1
-  %cmp.loop = icmp ugt i8 %i.inc, 100
-  br i1 %cmp.loop, label %exit, label %header
-exit:
-  %res = load i8, i8* %addr
-  ret i8 %res
-}
-
-; Check that first two loads are not optimized out while the one marked with
-; invariant.load reuses %res1
-define i8 @test2(i1 %cmp, i8 *%p) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RES1:%.*]] = load i8, i8* [[P:%.*]]
-; CHECK-NEXT:    call void @foo(i8* [[P]])
-; CHECK-NEXT:    br i1 [[CMP:%.*]], label [[B2:%.*]], label [[B1:%.*]]
-; CHECK:       b1:
-; CHECK-NEXT:    [[RES2:%.*]] = load i8, i8* [[P]]
-; CHECK-NEXT:    [[RES3:%.*]] = add i8 [[RES1]], [[RES2]]
-; CHECK-NEXT:    br label [[ALIVE:%.*]]
-; CHECK:       b2:
-; CHECK-NEXT:    [[RES_DEAD:%.*]] = add i8 [[RES1]], [[RES1]]
-; CHECK-NEXT:    br label [[ALIVE]]
-; CHECK:       alive:
-; CHECK-NEXT:    [[RES_PHI:%.*]] = phi i8 [ [[RES3]], [[B1]] ], [ [[RES_DEAD]], [[B2]] ]
-; CHECK-NEXT:    ret i8 [[RES_PHI]]
-;
-
-entry:
-  %res1 = load i8, i8* %p
-  call void @foo(i8 *%p)
-  br i1 %cmp, label %b2, label %b1
-b1:
-  %res2 = load i8, i8* %p
-  %res3 = add i8 %res1, %res2
-  br label %alive
-b2:
-  %v = load i8, i8* %p, !invariant.load !1
-  %res.dead = add i8 %v, %res1
-  br label %alive
-alive:
-  %res.phi = phi i8 [%res3, %b1], [%res.dead, %b2]
-  ret i8 %res.phi
-}
-
-; This is essentially the same test case as the above one but with %b1 and %b2
-; swapped in "br i1 %cmp, label %b1, label %b2" instruction. That helps us to
-; ensure that results doesn't depend on visiting order.
-define i8 @test3(i1 %cmp, i8 *%p) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RES1:%.*]] = load i8, i8* [[P:%.*]]
-; CHECK-NEXT:    call void @foo(i8* [[P]])
-; CHECK-NEXT:    br i1 [[CMP:%.*]], label [[B1:%.*]], label [[B2:%.*]]
-; CHECK:       b1:
-; CHECK-NEXT:    [[RES2:%.*]] = load i8, i8* [[P]]
-; CHECK-NEXT:    [[RES3:%.*]] = add i8 [[RES1]], [[RES2]]
-; CHECK-NEXT:    br label [[ALIVE:%.*]]
-; CHECK:       b2:
-; CHECK-NEXT:    [[RES_DEAD:%.*]] = add i8 [[RES1]], [[RES1]]
-; CHECK-NEXT:    br label [[ALIVE]]
-; CHECK:       alive:
-; CHECK-NEXT:    [[RES_PHI:%.*]] = phi i8 [ [[RES3]], [[B1]] ], [ [[RES_DEAD]], [[B2]] ]
-; CHECK-NEXT:    ret i8 [[RES_PHI]]
-;
-entry:
-  %res1 = load i8, i8* %p
-  call void @foo(i8 *%p)
-  br i1 %cmp, label %b1, label %b2
-b1:
-  %res2 = load i8, i8* %p
-  %res3 = add i8 %res1, %res2
-  br label %alive
-b2:
-  %v = load i8, i8* %p, !invariant.load !1
-  %res.dead = add i8 %v, %res1
-  br label %alive
-alive:
-  %res.phi = phi i8 [%res3, %b1], [%res.dead, %b2]
-  ret i8 %res.phi
-}
-
-
-; This is reduced test case catching regression in the first version of the
-; fix for invariant loads (https://reviews.llvm.org/D64405).
-define void @test4() {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* inttoptr (i64 8 to float*), align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul float [[TMP0]], [[TMP0]]
-; CHECK-NEXT:    br label [[FUSION_LOOP_HEADER_DIM_1_PREHEADER:%.*]]
-; CHECK:       fusion.loop_header.dim.1.preheader:
-; CHECK-NEXT:    [[TMP2:%.*]] = phi float [ [[TMP0]], [[ENTRY:%.*]] ], [ [[DOTPRE:%.*]], [[FUSION_LOOP_HEADER_DIM_1_PREHEADER]] ]
-; CHECK-NEXT:    [[FUSION_INVAR_ADDRESS_DIM_0_03:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INVAR_INC3:%.*]], [[FUSION_LOOP_HEADER_DIM_1_PREHEADER]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 [[FUSION_INVAR_ADDRESS_DIM_0_03]], i64 0, i64 2
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul float [[TMP2]], [[TMP2]]
-; CHECK-NEXT:    [[INVAR_INC3]] = add nuw nsw i64 [[FUSION_INVAR_ADDRESS_DIM_0_03]], 1
-; CHECK-NEXT:    [[DOTPHI_TRANS_INSERT:%.*]] = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 [[INVAR_INC3]], i64 0, i64 2
-; CHECK-NEXT:    [[DOTPRE]] = load float, float* [[DOTPHI_TRANS_INSERT]], align 4, !invariant.load !0
-; CHECK-NEXT:    br label [[FUSION_LOOP_HEADER_DIM_1_PREHEADER]]
-;
-entry:
-  %0 = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 0, i64 0, i64 2
-  %1 = load float, float* %0, align 4
-  %2 = fmul float %1, %1
-  br label %fusion.loop_header.dim.1.preheader
-
-fusion.loop_header.dim.1.preheader:               ; preds = %fusion.loop_header.dim.1.preheader, %entry
-  %fusion.invar_address.dim.0.03 = phi i64 [ 0, %entry ], [ %invar.inc3, %fusion.loop_header.dim.1.preheader ]
-  %3 = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 %fusion.invar_address.dim.0.03, i64 0, i64 2
-  %4 = load float, float* %3, align 4, !invariant.load !1
-  %5 = fmul float %4, %4
-  %6 = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 %fusion.invar_address.dim.0.03, i64 0, i64 2
-  %7 = load float, float* %6, align 4, !invariant.load !1
-  %8 = fmul float %7, %7
-  %invar.inc3 = add nuw nsw i64 %fusion.invar_address.dim.0.03, 1
-  br label %fusion.loop_header.dim.1.preheader
-}
-
-!1 = !{}