diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index d6f732d35fd4c..e8e4f491be5a3 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -287,6 +287,10 @@ class AAQueryInfo { /// store %l, ... bool MayBeCrossIteration = false; + /// Whether alias analysis is allowed to use the dominator tree, for use by + /// passes that lazily update the DT while performing AA queries. + bool UseDominatorTree = true; + AAQueryInfo(AAResults &AAR, CaptureInfo *CI) : AAR(AAR), CI(CI) {} }; @@ -668,6 +672,9 @@ class BatchAAResults { void enableCrossIterationMode() { AAQI.MayBeCrossIteration = true; } + + /// Disable the use of the dominator tree during alias analysis queries. + void disableDominatorTree() { AAQI.UseDominatorTree = false; } }; /// Temporary typedef for legacy code that uses a generic \c AliasAnalysis diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h index afc1811239f28..7eca82729430d 100644 --- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -43,20 +43,26 @@ class BasicAAResult : public AAResultBase { const Function &F; const TargetLibraryInfo &TLI; AssumptionCache &AC; - DominatorTree *DT; + /// Use getDT() instead of accessing this member directly, in order to + /// respect the AAQI.UseDominatorTree option. + DominatorTree *DT_; + + DominatorTree *getDT(const AAQueryInfo &AAQI) const { + return AAQI.UseDominatorTree ? DT_ : nullptr; + } public: BasicAAResult(const DataLayout &DL, const Function &F, const TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree *DT = nullptr) - : DL(DL), F(F), TLI(TLI), AC(AC), DT(DT) {} + : DL(DL), F(F), TLI(TLI), AC(AC), DT_(DT) {} BasicAAResult(const BasicAAResult &Arg) : AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), - DT(Arg.DT) {} + DT_(Arg.DT_) {} BasicAAResult(BasicAAResult &&Arg) : AAResultBase(std::move(Arg)), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), - AC(Arg.AC), DT(Arg.DT) {} + AC(Arg.AC), DT_(Arg.DT_) {} /// Handle invalidation events in the new pass manager. bool invalidate(Function &Fn, const PreservedAnalyses &PA, diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index 2880ed33a34cb..0926093bba99d 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -18,7 +18,7 @@ namespace llvm { -class AAResults; +class BatchAAResults; class AssumptionCache; class DataLayout; class DominatorTree; @@ -129,11 +129,10 @@ extern cl::opt DefMaxInstsToScan; /// location in memory, as opposed to the value operand of a store. /// /// \returns The found value, or nullptr if no value is found. -Value *FindAvailableLoadedValue(LoadInst *Load, - BasicBlock *ScanBB, +Value *FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan = DefMaxInstsToScan, - AAResults *AA = nullptr, + BatchAAResults *AA = nullptr, bool *IsLoadCSE = nullptr, unsigned *NumScanedInst = nullptr); @@ -141,7 +140,8 @@ Value *FindAvailableLoadedValue(LoadInst *Load, /// FindAvailableLoadedValue() for the case where we are not interested in /// finding the closest clobbering instruction if no available load is found. /// This overload cannot be used to scan across multiple blocks. -Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE, +Value *FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA, + bool *IsLoadCSE, unsigned MaxInstsToScan = DefMaxInstsToScan); /// Scan backwards to see if we have the value of the given pointer available @@ -170,7 +170,7 @@ Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE, Value *findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, - unsigned MaxInstsToScan, AAResults *AA, + unsigned MaxInstsToScan, BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst); /// Returns true if a pointer value \p A can be replace with another pointer diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 3178e2d278167..1028b52a79123 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -89,7 +89,7 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA, // may be created without handles to some analyses and in that case don't // depend on them. if (Inv.invalidate(Fn, PA) || - (DT && Inv.invalidate(Fn, PA))) + (DT_ && Inv.invalidate(Fn, PA))) return true; // Otherwise this analysis result remains valid. @@ -1063,6 +1063,7 @@ AliasResult BasicAAResult::aliasGEP( : AliasResult::MayAlias; } + DominatorTree *DT = getDT(AAQI); DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT); DecomposedGEP DecompGEP2 = DecomposeGEPExpression(V2, DL, &AC, DT); @@ -1556,6 +1557,7 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, const Value *HintO1 = getUnderlyingObject(Hint1); const Value *HintO2 = getUnderlyingObject(Hint2); + DominatorTree *DT = getDT(AAQI); auto ValidAssumeForPtrContext = [&](const Value *Ptr) { if (const Instruction *PtrI = dyn_cast(Ptr)) { return isValidAssumeForContext(Assume, PtrI, DT, @@ -1735,7 +1737,7 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V, if (!Inst || Inst->getParent()->isEntryBlock()) return true; - return isNotInCycle(Inst, DT, /*LI*/ nullptr); + return isNotInCycle(Inst, getDT(AAQI), /*LI*/ nullptr); } /// Computes the symbolic difference between two de-composed GEPs. diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp index 1ebc593016bc0..16635097d20af 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -657,11 +657,12 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, BasicBlock::iterator BBI = L->getIterator(); BasicBlock *BB = L->getParent(); SmallPtrSet VisitedBlocks; + BatchAAResults BatchAA(*AA); for (;;) { if (!VisitedBlocks.insert(BB).second) break; if (Value *U = - FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) + FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, &BatchAA)) return findValueImpl(U, OffsetOk, Visited); if (BBI != BB->begin()) break; diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 97d21db86abf2..6bf0d2f56eb4e 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -450,11 +450,10 @@ llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden, "to scan backward from a given instruction, when searching for " "available loaded value")); -Value *llvm::FindAvailableLoadedValue(LoadInst *Load, - BasicBlock *ScanBB, +Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AAResults *AA, bool *IsLoad, + BatchAAResults *AA, bool *IsLoad, unsigned *NumScanedInst) { // Don't CSE load that is volatile or anything stronger than unordered. if (!Load->isUnordered()) @@ -583,7 +582,7 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr, Value *llvm::findAvailablePtrLoadStore( const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) { + BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; @@ -664,7 +663,7 @@ Value *llvm::findAvailablePtrLoadStore( return nullptr; } -Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, +Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA, bool *IsLoadCSE, unsigned MaxInstsToScan) { const DataLayout &DL = Load->getModule()->getDataLayout(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index bb2a77daa60a7..1254a050027a4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1032,7 +1032,8 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) { // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. bool IsLoadCSE = false; - if (Value *AvailableVal = FindAvailableLoadedValue(&LI, *AA, &IsLoadCSE)) { + BatchAAResults BatchAA(*AA); + if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) { if (IsLoadCSE) combineMetadataForCSE(cast(AvailableVal), &LI, false); diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 8603c5cf9c022..87c01ead634ff 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1260,8 +1260,11 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { // the entry to its block. BasicBlock::iterator BBIt(LoadI); bool IsLoadCSE; + BatchAAResults BatchAA(*AA); + // The dominator tree is updated lazily and may not be valid at this point. + BatchAA.disableDominatorTree(); if (Value *AvailableVal = FindAvailableLoadedValue( - LoadI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) { + LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) { // If the value of the load is locally available within the block, just use // it. This frequently occurs for reg2mem'd allocas. @@ -1322,9 +1325,9 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB), LocationSize::precise(DL.getTypeStoreSize(AccessTy)), AATags); - PredAvailable = findAvailablePtrLoadStore(Loc, AccessTy, LoadI->isAtomic(), - PredBB, BBIt, DefMaxInstsToScan, - AA, &IsLoadCSE, &NumScanedInst); + PredAvailable = findAvailablePtrLoadStore( + Loc, AccessTy, LoadI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan, + &BatchAA, &IsLoadCSE, &NumScanedInst); // If PredBB has a single predecessor, continue scanning through the // single predecessor. @@ -1336,7 +1339,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { BBIt = SinglePredBB->end(); PredAvailable = findAvailablePtrLoadStore( Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt, - (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE, + (DefMaxInstsToScan - NumScanedInst), &BatchAA, &IsLoadCSE, &NumScanedInst); } } diff --git a/llvm/test/Transforms/JumpThreading/pr79175.ll b/llvm/test/Transforms/JumpThreading/pr79175.ll new file mode 100644 index 0000000000000..2c7ee0770cdc7 --- /dev/null +++ b/llvm/test/Transforms/JumpThreading/pr79175.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=jump-threading < %s | FileCheck %s + +@f = external global i32 + +; Make sure the value of @f is reloaded prior to the final comparison. +define i32 @test(i64 %idx, i32 %val) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: i64 [[IDX:%.*]], i32 [[VAL:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IDX]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[RETURN:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[F:%.*]] = load i32, ptr @f, align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[F]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[COND_END_THREAD:%.*]], label [[COND_END:%.*]] +; CHECK: cond.end: +; CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt i32 [[VAL]], 0 +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[CMP_I]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[COND_END_THREAD]], label [[TMP0:%.*]] +; CHECK: cond.end.thread: +; CHECK-NEXT: br label [[TMP0]] +; CHECK: 0: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 0, [[COND_END_THREAD]] ], [ [[VAL]], [[COND_END]] ] +; CHECK-NEXT: [[F_IDX:%.*]] = getelementptr inbounds i32, ptr @f, i64 [[IDX]] +; CHECK-NEXT: store i32 [[TMP1]], ptr [[F_IDX]], align 4 +; CHECK-NEXT: [[F_RELOAD:%.*]] = load i32, ptr @f, align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[F_RELOAD]], 1 +; CHECK-NEXT: br i1 [[CMP3]], label [[RETURN2:%.*]], label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: ret i32 0 +; CHECK: return2: +; CHECK-NEXT: ret i32 1 +; +entry: + %cmp = icmp slt i64 %idx, 1 + br i1 %cmp, label %for.body, label %return + +for.body: + %f = load i32, ptr @f, align 4 + %cmp1 = icmp eq i32 %f, 0 + br i1 %cmp1, label %cond.end, label %cond.false + +cond.false: + br label %cond.end + +cond.end: + %phi = phi i32 [ %val, %cond.false ], [ 1, %for.body ] + %cmp.i = icmp sgt i32 %phi, 0 + %sel = select i1 %cmp.i, i32 0, i32 %phi + %f.idx = getelementptr inbounds i32, ptr @f, i64 %idx + store i32 %sel, ptr %f.idx, align 4 + %f.reload = load i32, ptr @f, align 4 + %cmp3 = icmp slt i32 %f.reload, 1 + br i1 %cmp3, label %return2, label %return + +return: + ret i32 0 + +return2: + ret i32 1 +}