diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 15ff129deda13..af218ba564081 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -613,6 +613,12 @@ LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const DominatorTree *DT = nullptr, bool AllowEphemerals = false); +/// Returns true, if no instruction between \p Assume and \p CtxI may free +/// memory and the function is marked as NoSync. The latter ensures the current +/// function cannot arrange for another thread to free on its behalf. +LLVM_ABI bool willNotFreeBetween(const Instruction *Assume, + const Instruction *CtxI); + enum class OverflowResult { /// Always overflows in the direction of signed/unsigned min value. AlwaysOverflowsLow, diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 47dccde45337b..7adb25d0e5e0d 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -233,19 +233,25 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap( const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes); // Check if we have a suitable dereferencable assumption we can use. - if (!StartPtrV->canBeFreed()) { - Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt(); - if (BasicBlock *LoopPred = L->getLoopPredecessor()) { - if (isa(LoopPred->getTerminator())) - CtxI = LoopPred->getTerminator(); - } - - RetainedKnowledge DerefRK = getKnowledgeValidInContext( - StartPtrV, {Attribute::Dereferenceable}, *AC, CtxI, DT); - if (DerefRK) { - DerefBytesSCEV = - SE.getUMaxExpr(DerefBytesSCEV, SE.getSCEV(DerefRK.IRArgValue)); - } + Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt(); + if (BasicBlock *LoopPred = L->getLoopPredecessor()) { + if (isa(LoopPred->getTerminator())) + CtxI = LoopPred->getTerminator(); + } + RetainedKnowledge DerefRK; + getKnowledgeForValue(StartPtrV, {Attribute::Dereferenceable}, *AC, + [&](RetainedKnowledge RK, Instruction *Assume, auto) { + if (!isValidAssumeForContext(Assume, CtxI, DT)) + return false; + if (StartPtrV->canBeFreed() && + !willNotFreeBetween(Assume, CtxI)) + return false; + DerefRK = std::max(DerefRK, RK); + return true; + }); + if (DerefRK) { + DerefBytesSCEV = + SE.getUMaxExpr(DerefBytesSCEV, SE.getSCEV(DerefRK.IRArgValue)); } if (DerefBytesSCEV->isZero()) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 09a8fbea065ac..1eda7a7c90b08 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -89,6 +89,9 @@ using namespace llvm::PatternMatch; static cl::opt DomConditionsMaxUses("dom-conditions-max-uses", cl::Hidden, cl::init(20)); +/// Maximum number of instructions to check between assume and context +/// instruction. +static constexpr unsigned MaxInstrsToCheckForFree = 16; /// Returns the bitwidth of the given scalar or pointer type. For vector types, /// returns the element type's bitwidth. @@ -561,6 +564,29 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, return false; } +bool llvm::willNotFreeBetween(const Instruction *Assume, + const Instruction *CtxI) { + if (CtxI->getParent() != Assume->getParent() || !Assume->comesBefore(CtxI)) + return false; + // Make sure the current function cannot arrange for another thread to free on + // its behalf. + if (!CtxI->getFunction()->hasNoSync()) + return false; + + // Check if there are any calls between the assume and CtxI that may + // free memory. + for (const auto &[Idx, I] : + enumerate(make_range(Assume->getIterator(), CtxI->getIterator()))) { + // Limit number of instructions to walk. + if (Idx > MaxInstrsToCheckForFree) + return false; + if (const auto *CB = dyn_cast(&I)) + if (!CB->hasFnAttr(Attribute::NoFree)) + return false; + } + return true; +} + // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but // we still have enough information about `RHS` to conclude non-zero. For // example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops diff --git a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll index 6d9aa8d1ea32b..05aad8a6118d2 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll @@ -770,10 +770,10 @@ define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_kno ; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: -; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr)) +; CHECK-NEXT: (Low: %B High: (2000 + %B)) ; CHECK-NEXT: Member: {%B,+,4}<%loop.header> ; CHECK-NEXT: Group GRP1: -; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr)) +; CHECK-NEXT: (Low: %A High: (2000 + %A)) ; CHECK-NEXT: Member: {%A,+,4}<%loop.header> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll index 96206977864e2..f7946206931c2 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll @@ -512,8 +512,8 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] ; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] ; CHECK: [[LOOP_INC]]: