From cae643d596918e5a7cb00287c572099fb04715bd Mon Sep 17 00:00:00 2001 From: Evgeniy Brevnov Date: Fri, 14 Feb 2020 22:55:13 +0700 Subject: [PATCH] Reverting D73027 [DependenceAnalysis] Dependecies for loads marked with "ivnariant.load" should not be shared with general accesses(PR42151). --- .../lib/Analysis/MemoryDependenceAnalysis.cpp | 80 +++----- .../MemoryDependenceAnalysis/InvariantLoad.ll | 173 ------------------ 2 files changed, 24 insertions(+), 229 deletions(-) delete mode 100644 llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index ce9944a5ce4bee..450595cac57b45 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -902,11 +902,6 @@ MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock( Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) { - bool isInvariantLoad = false; - - if (LoadInst *LI = dyn_cast_or_null(QueryInst)) - isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load); - // Do a binary search to see if we already have an entry for this block in // the cache set. If so, find it. NonLocalDepInfo::iterator Entry = std::upper_bound( @@ -918,13 +913,6 @@ MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock( if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB) ExistingResult = &*Entry; - // Use cached result for invariant load only if there is no dependency for non - // invariant load. In this case invariant load can not have any dependency as - // well. - if (ExistingResult && isInvariantLoad && - !ExistingResult->getResult().isNonFuncLocal()) - ExistingResult = nullptr; - // If we have a cached entry, and it is non-dirty, use it as the value for // this dependency. if (ExistingResult && !ExistingResult->getResult().isDirty()) { @@ -953,10 +941,6 @@ MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock( MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst); - // Don't cache results for invariant load. - if (isInvariantLoad) - return Dep; - // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) @@ -1045,10 +1029,6 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( InitialNLPI.Size = Loc.Size; InitialNLPI.AATags = Loc.AATags; - bool isInvariantLoad = false; - if (LoadInst *LI = dyn_cast_or_null(QueryInst)) - isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load); - // Get the NLPI for CacheKey, inserting one into the map if it doesn't // already have one. std::pair Pair = @@ -1057,8 +1037,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( // If we already have a cache entry for this CacheKey, we may need to do some // work to reconcile the cache entry and the current query. - // Invariant loads don't participate in caching. Thus no need to reconcile. - if (!isInvariantLoad && !Pair.second) { + if (!Pair.second) { if (CacheInfo->Size != Loc.Size) { bool ThrowOutEverything; if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) { @@ -1114,10 +1093,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( // If we have valid cached information for exactly the block we are // investigating, just return it with no recomputation. - // Don't use cached information for invariant loads since it is valid for - // non-invariant loads only. - if (!isInvariantLoad && - CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { + if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { // We have a fully cached result for this query then we can just return the // cached results and populate the visited set. However, we have to verify // that we don't already have conflicting results for these blocks. Check @@ -1153,18 +1129,14 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( return true; } - // Invariant loads don't affect cache in any way thus no need to update - // CacheInfo as well. - if (!isInvariantLoad) { - // Otherwise, either this is a new block, a block with an invalid cache - // pointer or one that we're about to invalidate by putting more info into - // it than its valid cache info. If empty, the result will be valid cache - // info, otherwise it isn't. - if (Cache->empty()) - CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); - else - CacheInfo->Pair = BBSkipFirstBlockPair(); - } + // Otherwise, either this is a new block, a block with an invalid cache + // pointer or one that we're about to invalidate by putting more info into it + // than its valid cache info. If empty, the result will be valid cache info, + // otherwise it isn't. + if (Cache->empty()) + CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); + else + CacheInfo->Pair = BBSkipFirstBlockPair(); SmallVector Worklist; Worklist.push_back(StartBB); @@ -1405,26 +1377,22 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( if (SkipFirstBlock) return false; - // Results of invariant loads are not cached thus no need to update cached - // information. - if (!isInvariantLoad) { - bool foundBlock = false; - for (NonLocalDepEntry &I : llvm::reverse(*Cache)) { - if (I.getBB() != BB) - continue; + bool foundBlock = false; + for (NonLocalDepEntry &I : llvm::reverse(*Cache)) { + if (I.getBB() != BB) + continue; - assert((GotWorklistLimit || I.getResult().isNonLocal() || - !DT.isReachableFromEntry(BB)) && - "Should only be here with transparent block"); - foundBlock = true; - I.setResult(MemDepResult::getUnknown()); - Result.push_back( - NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr())); - break; - } - (void)foundBlock; (void)GotWorklistLimit; - assert((foundBlock || GotWorklistLimit) && "Current block not in cache?"); + assert((GotWorklistLimit || I.getResult().isNonLocal() || + !DT.isReachableFromEntry(BB)) && + "Should only be here with transparent block"); + foundBlock = true; + I.setResult(MemDepResult::getUnknown()); + Result.push_back( + NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr())); + break; } + (void)foundBlock; (void)GotWorklistLimit; + assert((foundBlock || GotWorklistLimit) && "Current block not in cache?"); } // Okay, we're done now. If we added new values to the cache, re-sort it. diff --git a/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll b/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll deleted file mode 100644 index 152cb175ef6082..00000000000000 --- a/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll +++ /dev/null @@ -1,173 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -gvn -S | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" -target triple = "x86_64-unknown-linux-gnu" - -declare void @llvm.memset.p0i8.i8(i8*, i8, i32, i1) -declare void @foo(i8*) - -define i8 @test(i1 %cmp) { -; CHECK-LABEL: @test( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P:%.*]] = alloca i8 -; CHECK-NEXT: store i8 5, i8* [[P]] -; CHECK-NEXT: br label [[HEADER:%.*]] -; CHECK: header: -; CHECK-NEXT: [[V:%.*]] = phi i8 [ 5, [[ENTRY:%.*]] ], [ -5, [[ALIVE:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[I_INC:%.*]], [[ALIVE]] ] -; CHECK-NEXT: br i1 [[CMP:%.*]], label [[ALIVE]], label [[DEAD:%.*]] -; CHECK: dead: -; CHECK-NEXT: call void @foo(i8* [[P]]) -; CHECK-NEXT: [[I_1:%.*]] = add i8 [[I]], [[V]] -; CHECK-NEXT: br label [[ALIVE]] -; CHECK: alive: -; CHECK-NEXT: [[I_2:%.*]] = phi i8 [ [[I]], [[HEADER]] ], [ [[I_1]], [[DEAD]] ] -; CHECK-NEXT: store i8 -5, i8* [[P]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 [[P]], i8 0, i32 1, i1 false) -; CHECK-NEXT: [[I_INC]] = add i8 [[I_2]], 1 -; CHECK-NEXT: [[CMP_LOOP:%.*]] = icmp ugt i8 [[I_INC]], 100 -; CHECK-NEXT: br i1 [[CMP_LOOP]], label [[EXIT:%.*]], label [[HEADER]] -; CHECK: exit: -; CHECK-NEXT: ret i8 0 -; - -entry: - %p = alloca i8 - %addr = getelementptr inbounds i8, i8* %p, i64 0 - store i8 5, i8* %addr - br label %header -header: - %i = phi i8 [0, %entry], [%i.inc, %backedge] - br i1 %cmp, label %alive, label %dead -dead: - call void @foo(i8* %p) - %v = load i8, i8* %addr, !invariant.load !1 - %i.1 = add i8 %i, %v - br label %alive -alive: - %i.2 = phi i8 [%i, %header], [%i.1, %dead] - store i8 -5, i8* %addr - br label %backedge -backedge: - call void @llvm.memset.p0i8.i8(i8 * align 1 %p, i8 0, i32 1, i1 false) - %i.inc = add i8 %i.2, 1 - %cmp.loop = icmp ugt i8 %i.inc, 100 - br i1 %cmp.loop, label %exit, label %header -exit: - %res = load i8, i8* %addr - ret i8 %res -} - -; Check that first two loads are not optimized out while the one marked with -; invariant.load reuses %res1 -define i8 @test2(i1 %cmp, i8 *%p) { -; CHECK-LABEL: @test2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[RES1:%.*]] = load i8, i8* [[P:%.*]] -; CHECK-NEXT: call void @foo(i8* [[P]]) -; CHECK-NEXT: br i1 [[CMP:%.*]], label [[B2:%.*]], label [[B1:%.*]] -; CHECK: b1: -; CHECK-NEXT: [[RES2:%.*]] = load i8, i8* [[P]] -; CHECK-NEXT: [[RES3:%.*]] = add i8 [[RES1]], [[RES2]] -; CHECK-NEXT: br label [[ALIVE:%.*]] -; CHECK: b2: -; CHECK-NEXT: [[RES_DEAD:%.*]] = add i8 [[RES1]], [[RES1]] -; CHECK-NEXT: br label [[ALIVE]] -; CHECK: alive: -; CHECK-NEXT: [[RES_PHI:%.*]] = phi i8 [ [[RES3]], [[B1]] ], [ [[RES_DEAD]], [[B2]] ] -; CHECK-NEXT: ret i8 [[RES_PHI]] -; - -entry: - %res1 = load i8, i8* %p - call void @foo(i8 *%p) - br i1 %cmp, label %b2, label %b1 -b1: - %res2 = load i8, i8* %p - %res3 = add i8 %res1, %res2 - br label %alive -b2: - %v = load i8, i8* %p, !invariant.load !1 - %res.dead = add i8 %v, %res1 - br label %alive -alive: - %res.phi = phi i8 [%res3, %b1], [%res.dead, %b2] - ret i8 %res.phi -} - -; This is essentially the same test case as the above one but with %b1 and %b2 -; swapped in "br i1 %cmp, label %b1, label %b2" instruction. That helps us to -; ensure that results doesn't depend on visiting order. -define i8 @test3(i1 %cmp, i8 *%p) { -; CHECK-LABEL: @test3( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[RES1:%.*]] = load i8, i8* [[P:%.*]] -; CHECK-NEXT: call void @foo(i8* [[P]]) -; CHECK-NEXT: br i1 [[CMP:%.*]], label [[B1:%.*]], label [[B2:%.*]] -; CHECK: b1: -; CHECK-NEXT: [[RES2:%.*]] = load i8, i8* [[P]] -; CHECK-NEXT: [[RES3:%.*]] = add i8 [[RES1]], [[RES2]] -; CHECK-NEXT: br label [[ALIVE:%.*]] -; CHECK: b2: -; CHECK-NEXT: [[RES_DEAD:%.*]] = add i8 [[RES1]], [[RES1]] -; CHECK-NEXT: br label [[ALIVE]] -; CHECK: alive: -; CHECK-NEXT: [[RES_PHI:%.*]] = phi i8 [ [[RES3]], [[B1]] ], [ [[RES_DEAD]], [[B2]] ] -; CHECK-NEXT: ret i8 [[RES_PHI]] -; -entry: - %res1 = load i8, i8* %p - call void @foo(i8 *%p) - br i1 %cmp, label %b1, label %b2 -b1: - %res2 = load i8, i8* %p - %res3 = add i8 %res1, %res2 - br label %alive -b2: - %v = load i8, i8* %p, !invariant.load !1 - %res.dead = add i8 %v, %res1 - br label %alive -alive: - %res.phi = phi i8 [%res3, %b1], [%res.dead, %b2] - ret i8 %res.phi -} - - -; This is reduced test case catching regression in the first version of the -; fix for invariant loads (https://reviews.llvm.org/D64405). -define void @test4() { -; CHECK-LABEL: @test4( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* inttoptr (i64 8 to float*), align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], [[TMP0]] -; CHECK-NEXT: br label [[FUSION_LOOP_HEADER_DIM_1_PREHEADER:%.*]] -; CHECK: fusion.loop_header.dim.1.preheader: -; CHECK-NEXT: [[TMP2:%.*]] = phi float [ [[TMP0]], [[ENTRY:%.*]] ], [ [[DOTPRE:%.*]], [[FUSION_LOOP_HEADER_DIM_1_PREHEADER]] ] -; CHECK-NEXT: [[FUSION_INVAR_ADDRESS_DIM_0_03:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INVAR_INC3:%.*]], [[FUSION_LOOP_HEADER_DIM_1_PREHEADER]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 [[FUSION_INVAR_ADDRESS_DIM_0_03]], i64 0, i64 2 -; CHECK-NEXT: [[TMP4:%.*]] = fmul float [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[INVAR_INC3]] = add nuw nsw i64 [[FUSION_INVAR_ADDRESS_DIM_0_03]], 1 -; CHECK-NEXT: [[DOTPHI_TRANS_INSERT:%.*]] = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 [[INVAR_INC3]], i64 0, i64 2 -; CHECK-NEXT: [[DOTPRE]] = load float, float* [[DOTPHI_TRANS_INSERT]], align 4, !invariant.load !0 -; CHECK-NEXT: br label [[FUSION_LOOP_HEADER_DIM_1_PREHEADER]] -; -entry: - %0 = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 0, i64 0, i64 2 - %1 = load float, float* %0, align 4 - %2 = fmul float %1, %1 - br label %fusion.loop_header.dim.1.preheader - -fusion.loop_header.dim.1.preheader: ; preds = %fusion.loop_header.dim.1.preheader, %entry - %fusion.invar_address.dim.0.03 = phi i64 [ 0, %entry ], [ %invar.inc3, %fusion.loop_header.dim.1.preheader ] - %3 = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 %fusion.invar_address.dim.0.03, i64 0, i64 2 - %4 = load float, float* %3, align 4, !invariant.load !1 - %5 = fmul float %4, %4 - %6 = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 %fusion.invar_address.dim.0.03, i64 0, i64 2 - %7 = load float, float* %6, align 4, !invariant.load !1 - %8 = fmul float %7, %7 - %invar.inc3 = add nuw nsw i64 %fusion.invar_address.dim.0.03, 1 - br label %fusion.loop_header.dim.1.preheader -} - -!1 = !{}