diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 654a5f10cea96..0c83814881161 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -28,6 +28,7 @@ class Loop; class PredicatedScalarEvolution; class ScalarEvolution; class SCEV; +class SCEVAddRecExpr; class StoreInst; /// These are the kinds of recurrences that we support. @@ -477,6 +478,44 @@ class InductionDescriptor { SmallVector RedundantCasts; }; +/// A struct for saving information about monotonic variables. +/// Monotonic variable can be considered as a "conditional" induction variable: +/// its update happens only on loop iterations for which a certain predicate is +/// satisfied. In this implementation the predicate is represented as an edge in +/// loop CFG: variable is updated if this edge is executed on current loop +/// iteration. +class MonotonicDescriptor { +public: + using Edge = std::pair; + + MonotonicDescriptor() = default; + + const SmallPtrSetImpl &getChain() const { return Chain; } + Instruction *getStepInst() const { return StepInst; } + Edge getPredicateEdge() const { return PredEdge; } + const SCEVAddRecExpr *getExpr() const { return Expr; } + + /// Returns true if \p PN is a monotonic variable in the loop \p L. If \p PN + /// is monotonic, the monotonic descriptor \p D will contain the data + /// describing this variable. + static bool isMonotonicPHI(PHINode *PN, const Loop *L, + MonotonicDescriptor &Desc, ScalarEvolution &SE); + + /// Returns true if \p Val is a monotonic variable in the loop \p L (in this + /// case, the value should transitively contain monotonic phi as part of its + /// calculation). + static bool isMonotonicVal(Value *Val, const Loop *L, + MonotonicDescriptor &Desc, ScalarEvolution &SE); + +private: + SmallPtrSet Chain; + Instruction *StepInst; + Edge PredEdge; + const SCEVAddRecExpr *Expr; + + bool setSCEV(const SCEV *NewExpr); +}; + } // end namespace llvm #endif // LLVM_ANALYSIS_IVDESCRIPTORS_H diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 9f8ac6e8e2e0b..ee9a30fc33eb5 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -1661,3 +1661,122 @@ bool InductionDescriptor::isInductionPHI( D = InductionDescriptor(StartValue, IK_PtrInduction, Step); return true; } + +bool MonotonicDescriptor::setSCEV(const SCEV *NewExpr) { + auto *AddRec = dyn_cast(NewExpr); + if (!AddRec || !AddRec->isAffine()) + return false; + Expr = AddRec; + return true; +} + +// Recognize monotonic phi variable by matching the following pattern: +// loop_header: +// %monotonic_phi = [%start, %preheader], [%chain_phi0, %latch] +// +// step_bb: +// %step = add/gep %monotonic_phi, %step_val +// +// bbN: +// %chain_phiN = [%monotonic_phi, ], [%step, ] +// +// ... +// +// bb1: +// %chain_phi1 = [%monotonic_phi, ], [%chain_phi2, ] +// +// latch: +// %chain_phi0 = [%monotonic_phi, %pred], [%chain_phi1, %pred] +// +// For this pattern, monotonic phi is described by {%start, +, %step} recurrence +// and predicate is CFG edge %step_bb -> %bbN. +bool MonotonicDescriptor::isMonotonicPHI(PHINode *PN, const Loop *L, + MonotonicDescriptor &Desc, + ScalarEvolution &SE) { + if (!PN->getType()->isIntOrPtrTy() || PN->getParent() != L->getHeader()) + return false; + auto *BackEdgeInst = + dyn_cast(PN->getIncomingValueForBlock(L->getLoopLatch())); + if (!BackEdgeInst) + return false; + SmallVector Worklist{BackEdgeInst}; + std::optional> Inc; + while (!Worklist.empty()) { + auto *Phi = Worklist.pop_back_val(); + Desc.Chain.insert(Phi); + for (unsigned I = 0, E = Phi->getNumOperands(); I != E; ++I) { + auto *IncomingVal = Phi->getIncomingValue(I); + if (IncomingVal == PN) + continue; + if (!IncomingVal->hasOneUse()) + return false; + if (auto *IncomingPhi = dyn_cast(IncomingVal)) { + Worklist.push_back(IncomingPhi); + continue; + } + if (Inc) + return false; + Inc = std::make_pair(Edge{Phi->getIncomingBlock(I), Phi->getParent()}, + IncomingVal); + } + } + if (!Inc) + return false; + auto [PredEdge, StepOp] = *Inc; + auto *StepInst = dyn_cast(StepOp); + if (!StepInst) + return false; + Desc.StepInst = StepInst; + Desc.PredEdge = PredEdge; + + // Construct SCEVAddRec for this value. + Value *Start = PN->getIncomingValueForBlock(L->getLoopPreheader()); + + Value *Step = nullptr; + bool StepMatch = + PN->getType()->isPointerTy() + ? match(StepInst, m_PtrAdd(m_Specific(PN), m_Value(Step))) + : match(StepInst, m_Add(m_Specific(PN), m_Value(Step))); + if (!StepMatch || !L->isLoopInvariant(Step)) + return false; + + SCEV::NoWrapFlags WrapFlags = SCEV::FlagAnyWrap; + if (auto *GEP = dyn_cast(StepInst)) { + if (GEP->hasNoUnsignedWrap()) + WrapFlags = ScalarEvolution::setFlags(WrapFlags, SCEV::FlagNUW); + if (GEP->hasNoUnsignedSignedWrap()) + WrapFlags = ScalarEvolution::setFlags(WrapFlags, SCEV::FlagNSW); + } else if (auto *OBO = dyn_cast(StepInst)) { + if (OBO->hasNoUnsignedWrap()) + WrapFlags = ScalarEvolution::setFlags(WrapFlags, SCEV::FlagNUW); + if (OBO->hasNoSignedWrap()) + WrapFlags = ScalarEvolution::setFlags(WrapFlags, SCEV::FlagNSW); + } + + return Desc.setSCEV( + SE.getAddRecExpr(SE.getSCEV(Start), SE.getSCEV(Step), L, WrapFlags)); +} + +bool MonotonicDescriptor::isMonotonicVal(Value *Val, const Loop *L, + MonotonicDescriptor &Desc, + ScalarEvolution &SE) { + if (!Val->getType()->isIntOrPtrTy() || L->isLoopInvariant(Val)) + return false; + auto *CurInst = cast(Val); + + auto NonInvariantVal = [&](Value *V, bool AllowRepeats) { + return L->isLoopInvariant(V) ? nullptr : cast(V); + }; + + while (!isa(CurInst)) { + CurInst = find_singleton(CurInst->operands(), NonInvariantVal); + if (!CurInst) + return false; + }; + + if (!isMonotonicPHI(cast(CurInst), L, Desc, SE)) + return false; + + ValueToSCEVMapTy Map{{CurInst, Desc.getExpr()}}; + return Desc.setSCEV(SCEVParameterRewriter::rewrite(SE.getSCEV(Val), SE, Map)); +} diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 5d88e5f54e3d6..4484c03f3b7c7 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -25,6 +25,7 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" @@ -1024,7 +1025,8 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR, if (AR->getNoWrapFlags(SCEV::NoWrapMask)) return true; - if (Ptr && PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW)) + if (Ptr && isa(PSE.getSCEV(Ptr)) && + PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW)) return true; // An nusw getelementptr that is an AddRec cannot wrap. If it would wrap, @@ -1256,6 +1258,20 @@ static void findForkedSCEVs( } } +// Conservatively replace SCEV of Ptr value if it can't be computed directly, +// e.g. for monotonic values (they can be treated as affine AddRecs that are +// updated under some predicate). +static const SCEV * +replacePtrSCEV(PredicatedScalarEvolution &PSE, Value *Ptr, + const DenseMap &StridesMap, + const Loop *L) { + ScalarEvolution *SE = PSE.getSE(); + if (MonotonicDescriptor MD; + MonotonicDescriptor::isMonotonicVal(Ptr, L, MD, *SE)) + return MD.getExpr(); + return replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); +} + bool AccessAnalysis::createCheckForAccess( RuntimePointerChecking &RtCheck, MemAccessInfo Access, Type *AccessTy, const DenseMap &StridesMap, @@ -1282,7 +1298,7 @@ bool AccessAnalysis::createCheckForAccess( for (const auto &[Idx, Q] : enumerate(RTCheckPtrs)) dbgs() << "\t(" << Idx << ") " << *Q.getPointer() << "\n"); } else { - RTCheckPtrs = {{replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}}; + RTCheckPtrs = {{replacePtrSCEV(PSE, Ptr, StridesMap, TheLoop), false}}; } /// Check whether all pointers can participate in a runtime bounds check. They @@ -1301,8 +1317,7 @@ bool AccessAnalysis::createCheckForAccess( // If there's only one option for Ptr, look it up after bounds and wrap // checking, because assumptions might have been added to PSE. if (RTCheckPtrs.size() == 1) { - AR = - cast(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr)); + AR = cast(replacePtrSCEV(PSE, Ptr, StridesMap, TheLoop)); P.setPointer(AR); } diff --git a/llvm/test/Analysis/LoopAccessAnalysis/monotonic-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/monotonic-pointers.ll new file mode 100644 index 0000000000000..6c91a4e332573 --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/monotonic-pointers.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -disable-output -passes='print' %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +define void @monotonic_ptr_simple(ptr writeonly %dst, ptr readonly %src, i32 %c, i32 %n) { +; CHECK-LABEL: 'monotonic_ptr_simple' +; CHECK-NEXT: for.body: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %dst.addr.09 = phi ptr [ %dst, %entry ], [ %dst.addr.1, %for.inc ] +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %dst High: ((4 * (zext i32 %n to i64)) + %dst)) +; CHECK-NEXT: Member: {%dst,+,4}<%for.body> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: %src High: ((4 * (zext i32 %n to i64)) + %src)) +; CHECK-NEXT: Member: {%src,+,4}<%for.body> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %wide.trip.count = zext nneg i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %dst.addr.09 = phi ptr [ %dst, %entry ], [ %dst.addr.1, %for.inc ] + %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp slt i32 %0, %c + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %incdec.ptr = getelementptr inbounds i8, ptr %dst.addr.09, i64 4 + store i32 %0, ptr %dst.addr.09, align 4 + br label %for.inc + +for.inc: + %dst.addr.1 = phi ptr [ %incdec.ptr, %if.then ], [ %dst.addr.09, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +define void @monotonic_ptr_indexed(ptr writeonly %dst, ptr readonly %src, i32 %c, i32 %n) { +; CHECK-LABEL: 'monotonic_ptr_indexed' +; CHECK-NEXT: for.body: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %arrayidx5 = getelementptr inbounds i32, ptr %dst, i64 %idxprom4 +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %dst High: ((4 * (zext i32 %n to i64)) + %dst)) +; CHECK-NEXT: Member: {%dst,+,4}<%for.body> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: %src High: ((4 * (zext i32 %n to i64)) + %src)) +; CHECK-NEXT: Member: {%src,+,4}<%for.body> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %wide.trip.count = zext nneg i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %idx.012 = phi i32 [ 0, %entry ], [ %idx.1, %for.inc ] + %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp slt i32 %0, %c + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %inc = add nsw i32 %idx.012, 1 + %idxprom4 = sext i32 %idx.012 to i64 + %arrayidx5 = getelementptr inbounds i32, ptr %dst, i64 %idxprom4 + store i32 %0, ptr %arrayidx5, align 4 + br label %for.inc + +for.inc: + %idx.1 = phi i32 [ %inc, %if.then ], [ %idx.012, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +}