diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e5d496737d462..98034cfcd8413 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -389,6 +389,8 @@ Non-comprehensive list of changes in this release this build without optimizations (i.e. use `-O0` or use the `optnone` function attribute) or use the `fno-sanitize-merge=` flag in optimized builds. +- ``__builtin_assume_dereferenceable`` now accepts non-constant size operands. + New Compiler Flags ------------------ - New option ``-fno-sanitize-debug-trap-reasons`` added to disable emitting trap reasons into the debug info when compiling with trapping UBSan (e.g. ``-fsanitize-trap=undefined``). diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 0b3ba89dfceb3..f63fc0f43ea7e 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -854,7 +854,7 @@ def BuiltinAssumeAligned : Builtin { def BuiltinAssumeDereferenceable : Builtin { let Spellings = ["__builtin_assume_dereferenceable"]; let Attributes = [NoThrow, Const]; - let Prototype = "void(void const*, _Constant size_t)"; + let Prototype = "void(void const*, size_t)"; } def BuiltinFree : Builtin { diff --git a/clang/test/CodeGen/builtin-assume-dereferenceable.c b/clang/test/CodeGen/builtin-assume-dereferenceable.c index cadffd4a84c26..0dc4ba089ee3a 100644 --- a/clang/test/CodeGen/builtin-assume-dereferenceable.c +++ b/clang/test/CodeGen/builtin-assume-dereferenceable.c @@ -32,3 +32,62 @@ int test2(int *a) { __builtin_assume_dereferenceable(a, 32ull); return a[0]; } + +// CHECK-LABEL: @test3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store i32 [[N:%.*]], ptr [[N_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[TMP0]], i64 [[CONV]]) ] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret i32 [[TMP3]] +// +int test3(int *a, int n) { + __builtin_assume_dereferenceable(a, n); + return a[0]; +} + +// CHECK-LABEL: @test4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store i64 [[N:%.*]], ptr [[N_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[N_ADDR]], align 8 +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[TMP0]], i64 [[TMP1]]) ] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret i32 [[TMP3]] +// +int test4(int *a, unsigned long long n) { + __builtin_assume_dereferenceable(a, n); + return a[0]; +} + +// CHECK-LABEL: @test5( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[N_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store float [[N:%.*]], ptr [[N_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[N_ADDR]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = fptoui float [[TMP1]] to i64 +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[TMP0]], i64 [[CONV]]) ] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret i32 [[TMP3]] +// +int test5(int *a, float n) { + __builtin_assume_dereferenceable(a, n); + return a[0]; +} diff --git a/clang/test/SemaCXX/builtin-assume-dereferenceable.cpp b/clang/test/SemaCXX/builtin-assume-dereferenceable.cpp index b79b7c059567e..6380a9a7debf7 100644 --- a/clang/test/SemaCXX/builtin-assume-dereferenceable.cpp +++ b/clang/test/SemaCXX/builtin-assume-dereferenceable.cpp @@ -18,12 +18,12 @@ int test3(int *a) { } int test4(int *a, unsigned size) { - a = __builtin_assume_dereferenceable(a, size); // expected-error {{argument to '__builtin_assume_dereferenceable' must be a constant integer}} + __builtin_assume_dereferenceable(a, size); return a[0]; } int test5(int *a, unsigned long long size) { - a = __builtin_assume_dereferenceable(a, size); // expected-error {{argument to '__builtin_assume_dereferenceable' must be a constant integer}} + __builtin_assume_dereferenceable(a, size); return a[0]; } @@ -53,3 +53,8 @@ constexpr void *l = __builtin_assume_dereferenceable(p, 4); // expected-error {{ void *foo() { return l; } + +int test10(int *a) { + __builtin_assume_dereferenceable(a, a); // expected-error {{cannot initialize a parameter of type 'unsigned long' with an lvalue of type 'int *'}} + return a[0]; +} diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 73bfe1aabb4e0..52ab38583d5de 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -180,11 +180,15 @@ class MemoryDepChecker { const SmallVectorImpl &Instrs) const; }; - MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L, + MemoryDepChecker(PredicatedScalarEvolution &PSE, AssumptionCache *AC, + DominatorTree *DT, const Loop *L, const DenseMap &SymbolicStrides, - unsigned MaxTargetVectorWidthInBits) - : PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides), - MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {} + unsigned MaxTargetVectorWidthInBits, + std::optional &LoopGuards) + : PSE(PSE), AC(AC), DT(DT), InnermostLoop(L), + SymbolicStrides(SymbolicStrides), + MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits), + LoopGuards(LoopGuards) {} /// Register the location (instructions are given increasing numbers) /// of a write access. @@ -236,8 +240,8 @@ class MemoryDepChecker { /// In same cases when the dependency check fails we can still /// vectorize the loop with a dynamic array access check. - bool shouldRetryWithRuntimeCheck() const { - return FoundNonConstantDistanceDependence && + bool shouldRetryWithRuntimeChecks() const { + return ShouldRetryWithRuntimeChecks && Status == VectorizationSafetyStatus::PossiblySafeWithRtChecks; } @@ -288,6 +292,15 @@ class MemoryDepChecker { return PointerBounds; } + DominatorTree *getDT() const { + assert(DT && "requested DT, but it is not available"); + return DT; + } + AssumptionCache *getAC() const { + assert(AC && "requested AC, but it is not available"); + return AC; + } + private: /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and /// applies dynamic knowledge to simplify SCEV expressions and convert them @@ -296,6 +309,10 @@ class MemoryDepChecker { /// example we might assume a unit stride for a pointer in order to prove /// that a memory access is strided and doesn't wrap. PredicatedScalarEvolution &PSE; + + AssumptionCache *AC; + DominatorTree *DT; + const Loop *InnermostLoop; /// Reference to map of pointer values to @@ -327,9 +344,9 @@ class MemoryDepChecker { uint64_t MaxStoreLoadForwardSafeDistanceInBits = std::numeric_limits::max(); - /// If we see a non-constant dependence distance we can still try to - /// vectorize this loop with runtime checks. - bool FoundNonConstantDistanceDependence = false; + /// Whether we should try to vectorize the loop with runtime checks, if the + /// dependencies are not safe. + bool ShouldRetryWithRuntimeChecks = false; /// Result of the dependence checks, indicating whether the checked /// dependences are safe for vectorization, require RT checks or are known to @@ -358,7 +375,7 @@ class MemoryDepChecker { PointerBounds; /// Cache for the loop guards of InnermostLoop. - std::optional LoopGuards; + std::optional &LoopGuards; /// Check whether there is a plausible dependence between the two /// accesses. @@ -516,8 +533,9 @@ class RuntimePointerChecking { AliasSetId(AliasSetId), Expr(Expr), NeedsFreeze(NeedsFreeze) {} }; - RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE) - : DC(DC), SE(SE) {} + RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE, + std::optional &LoopGuards) + : DC(DC), SE(SE), LoopGuards(LoopGuards) {} /// Reset the state of the pointer runtime information. void reset() { @@ -631,6 +649,9 @@ class RuntimePointerChecking { /// Holds a pointer to the ScalarEvolution analysis. ScalarEvolution *SE; + /// Cache for the loop guards of the loop. + std::optional &LoopGuards; + /// Set of run-time checks required to establish independence of /// otherwise may-aliasing pointers in the loop. SmallVector Checks; @@ -670,7 +691,7 @@ class LoopAccessInfo { LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, - DominatorTree *DT, LoopInfo *LI, + DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC, bool AllowPartial = false); /// Return true we can analyze the memory accesses in the loop and there are @@ -806,6 +827,9 @@ class LoopAccessInfo { Loop *TheLoop; + /// Cache for the loop guards of TheLoop. + std::optional LoopGuards; + /// Determines whether we should generate partial runtime checks when not all /// memory accesses could be analyzed. bool AllowPartial; @@ -922,7 +946,9 @@ LLVM_ABI std::pair getStartAndEndForAccess( const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap, - std::pair> *PointerBounds); + std::pair> *PointerBounds, + DominatorTree *DT, AssumptionCache *AC, + std::optional &LoopGuards); class LoopAccessInfoManager { /// The cache. @@ -935,12 +961,13 @@ class LoopAccessInfoManager { LoopInfo &LI; TargetTransformInfo *TTI; const TargetLibraryInfo *TLI = nullptr; + AssumptionCache *AC; public: LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT, LoopInfo &LI, TargetTransformInfo *TTI, - const TargetLibraryInfo *TLI) - : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {} + const TargetLibraryInfo *TLI, AssumptionCache *AC) + : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI), AC(AC) {} LLVM_ABI const LoopAccessInfo &getInfo(Loop &L, bool AllowPartial = false); diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index b83fc3b72abf3..ca0b7823558ff 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1000,10 +1000,14 @@ class ScalarEvolution { /// (at every loop iteration). It is, at the same time, the minimum number /// of times S is divisible by 2. For example, given {4,+,8} it returns 2. /// If S is guaranteed to be 0, it returns the bitwidth of S. - LLVM_ABI uint32_t getMinTrailingZeros(const SCEV *S); + /// If \p CtxI is not nullptr, return a constant multiple valid at \p CtxI. + LLVM_ABI uint32_t getMinTrailingZeros(const SCEV *S, + const Instruction *CtxI = nullptr); - /// Returns the max constant multiple of S. - LLVM_ABI APInt getConstantMultiple(const SCEV *S); + /// Returns the max constant multiple of S. If \p CtxI is not nullptr, return + /// a constant multiple valid at \p CtxI. + LLVM_ABI APInt getConstantMultiple(const SCEV *S, + const Instruction *CtxI = nullptr); // Returns the max constant multiple of S. If S is exactly 0, return 1. LLVM_ABI APInt getNonZeroConstantMultiple(const SCEV *S); @@ -1339,6 +1343,7 @@ class ScalarEvolution { class LoopGuards { DenseMap RewriteMap; + SmallDenseSet> NotEqual; bool PreserveNUW = false; bool PreserveNSW = false; ScalarEvolution &SE; @@ -1525,8 +1530,10 @@ class ScalarEvolution { /// Return the Value set from which the SCEV expr is generated. ArrayRef getSCEVValues(const SCEV *S); - /// Private helper method for the getConstantMultiple method. - APInt getConstantMultipleImpl(const SCEV *S); + /// Private helper method for the getConstantMultiple method. If \p CtxI is + /// not nullptr, return a constant multiple valid at \p CtxI. + APInt getConstantMultipleImpl(const SCEV *S, + const Instruction *Ctx = nullptr); /// Information about the number of times a particular loop exit may be /// reached before exiting the loop. @@ -2310,10 +2317,6 @@ class ScalarEvolution { /// an add rec on said loop. void getUsedLoops(const SCEV *S, SmallPtrSetImpl &LoopsUsed); - /// Try to match the pattern generated by getURemExpr(A, B). If successful, - /// Assign A and B to LHS and RHS, respectively. - LLVM_ABI bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS); - /// Look for a SCEV expression with type `SCEVType` and operands `Ops` in /// `UniqueSCEVs`. Return if found, else nullptr. SCEV *findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef Ops); diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h index 09e3945f5a8ff..24a86c59d1cdb 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h @@ -91,6 +91,14 @@ inline bind_ty m_SCEVUnknown(const SCEVUnknown *&V) { return V; } +inline bind_ty m_scev_Add(const SCEVAddExpr *&V) { + return V; +} + +inline bind_ty m_scev_Mul(const SCEVMulExpr *&V) { + return V; +} + /// Match a specified const SCEV *. struct specificscev_ty { const SCEV *Expr; @@ -160,6 +168,18 @@ m_scev_ZExt(const Op0_t &Op0) { return m_scev_Unary(Op0); } +template +inline SCEVUnaryExpr_match +m_scev_PtrToInt(const Op0_t &Op0) { + return SCEVUnaryExpr_match(Op0); +} + +template +inline SCEVUnaryExpr_match +m_scev_Trunc(const Op0_t &Op0) { + return m_scev_Unary(Op0); +} + /// Match a binary SCEV. template struct SCEVBinaryExpr_match { @@ -199,6 +219,76 @@ m_scev_UDiv(const Op0_t &Op0, const Op1_t &Op1) { return m_scev_Binary(Op0, Op1); } +/// Match unsigned remainder pattern. +/// Matches patterns generated by getURemExpr. +template struct SCEVURem_match { + Op0_t Op0; + Op1_t Op1; + ScalarEvolution &SE; + + SCEVURem_match(Op0_t Op0, Op1_t Op1, ScalarEvolution &SE) + : Op0(Op0), Op1(Op1), SE(SE) {} + + bool match(const SCEV *Expr) const { + if (Expr->getType()->isPointerTy()) + return false; + + // Try to match 'zext (trunc A to iB) to iY', which is used + // for URem with constant power-of-2 second operands. Make sure the size of + // the operand A matches the size of the whole expressions. + const SCEV *LHS; + if (SCEVPatternMatch::match(Expr, m_scev_ZExt(m_scev_Trunc(m_SCEV(LHS))))) { + Type *TruncTy = cast(Expr)->getOperand()->getType(); + // Bail out if the type of the LHS is larger than the type of the + // expression for now. + if (SE.getTypeSizeInBits(LHS->getType()) > + SE.getTypeSizeInBits(Expr->getType())) + return false; + if (LHS->getType() != Expr->getType()) + LHS = SE.getZeroExtendExpr(LHS, Expr->getType()); + const SCEV *RHS = + SE.getConstant(APInt(SE.getTypeSizeInBits(Expr->getType()), 1) + << SE.getTypeSizeInBits(TruncTy)); + return Op0.match(LHS) && Op1.match(RHS); + } + + const SCEV *A; + const SCEVMulExpr *Mul; + if (!SCEVPatternMatch::match(Expr, m_scev_Add(m_scev_Mul(Mul), m_SCEV(A)))) + return false; + + const auto MatchURemWithDivisor = [&](const SCEV *B) { + // (SomeExpr + (-(SomeExpr / B) * B)). + if (Expr == SE.getURemExpr(A, B)) + return Op0.match(A) && Op1.match(B); + return false; + }; + + // (SomeExpr + (-1 * (SomeExpr / B) * B)). + if (Mul->getNumOperands() == 3 && isa(Mul->getOperand(0))) + return MatchURemWithDivisor(Mul->getOperand(1)) || + MatchURemWithDivisor(Mul->getOperand(2)); + + // (SomeExpr + ((-SomeExpr / B) * B)) or (SomeExpr + ((SomeExpr / B) * -B)). + if (Mul->getNumOperands() == 2) + return MatchURemWithDivisor(Mul->getOperand(1)) || + MatchURemWithDivisor(Mul->getOperand(0)) || + MatchURemWithDivisor(SE.getNegativeSCEV(Mul->getOperand(1))) || + MatchURemWithDivisor(SE.getNegativeSCEV(Mul->getOperand(0))); + return false; + } +}; + +/// Match the mathematical pattern A - (A / B) * B, where A and B can be +/// arbitrary expressions. Also match zext (trunc A to iB) to iY, which is used +/// for URem with constant power-of-2 second operands. It's not always easy, as +/// A and B can be folded (imagine A is X / 2, and B is 4, A / B becomes X / 8). +template +inline SCEVURem_match m_scev_URem(Op0_t LHS, Op1_t RHS, + ScalarEvolution &SE) { + return SCEVURem_match(LHS, RHS, SE); +} + inline class_match m_Loop() { return class_match(); } /// Match an affine SCEVAddRecExpr. diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 02990a3cb44f7..3e1e718114008 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -618,6 +618,12 @@ LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const DominatorTree *DT = nullptr, bool AllowEphemerals = false); +/// Returns true, if no instruction between \p Assume and \p CtxI may free +/// memory and the function is marked as NoSync. The latter ensures the current +/// function cannot arrange for another thread to free on its behalf. +LLVM_ABI bool willNotFreeBetween(const Instruction *Assume, + const Instruction *CtxI); + enum class OverflowResult { /// Always overflows in the direction of signed/unsigned min value. AlwaysOverflowsLow, diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h index a101151eed7cc..39fef921a9590 100644 --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -530,6 +530,7 @@ class SCEVExpander : public SCEVVisitor { bool isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L); + Value *tryToReuseLCSSAPhi(const SCEVAddRecExpr *S); Value *expandAddRecExprLiterally(const SCEVAddRecExpr *); PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, const Loop *L, Type *&TruncTy, diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index fcc2cf2f7e8e7..ff0a9966551ce 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -36,17 +36,13 @@ static bool isDereferenceableAndAlignedPointerViaAssumption( function_ref CheckSize, const DataLayout &DL, const Instruction *CtxI, AssumptionCache *AC, const DominatorTree *DT) { - // Dereferenceable information from assumptions is only valid if the value - // cannot be freed between the assumption and use. For now just use the - // information for values that cannot be freed in the function. - // TODO: More precisely check if the pointer can be freed between assumption - // and use. - if (!CtxI || Ptr->canBeFreed()) + if (!CtxI) return false; /// Look through assumes to see if both dereferencability and alignment can /// be proven by an assume if needed. RetainedKnowledge AlignRK; RetainedKnowledge DerefRK; + bool PtrCanBeFreed = Ptr->canBeFreed(); bool IsAligned = Ptr->getPointerAlignment(DL) >= Alignment; return getKnowledgeForValue( Ptr, {Attribute::Dereferenceable, Attribute::Alignment}, *AC, @@ -55,7 +51,11 @@ static bool isDereferenceableAndAlignedPointerViaAssumption( return false; if (RK.AttrKind == Attribute::Alignment) AlignRK = std::max(AlignRK, RK); - if (RK.AttrKind == Attribute::Dereferenceable) + + // Dereferenceable information from assumptions is only valid if the + // value cannot be freed between the assumption and use. + if ((!PtrCanBeFreed || willNotFreeBetween(Assume, CtxI)) && + RK.AttrKind == Attribute::Dereferenceable) DerefRK = std::max(DerefRK, RK); IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value(); if (IsAligned && DerefRK && CheckSize(DerefRK)) @@ -332,17 +332,10 @@ bool llvm::isDereferenceableAndAlignedInLoop( : SE.getBackedgeTakenCount(L); if (isa(MaxBECount)) return false; - - if (isa(BECount)) { - // TODO: Support symbolic max backedge taken counts for loops without - // computable backedge taken counts. - MaxBECount = - Predicates - ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates) - : SE.getConstantMaxBackedgeTakenCount(L); - } - const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess( - L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr); + std::optional LoopGuards; + const auto &[AccessStart, AccessEnd] = + getStartAndEndForAccess(L, PtrScev, LI->getType(), BECount, MaxBECount, + &SE, nullptr, &DT, AC, LoopGuards); if (isa(AccessStart) || isa(AccessEnd)) return false; @@ -351,7 +344,13 @@ bool llvm::isDereferenceableAndAlignedInLoop( const SCEV *PtrDiff = SE.getMinusSCEV(AccessEnd, AccessStart); if (isa(PtrDiff)) return false; - APInt MaxPtrDiff = SE.getUnsignedRangeMax(PtrDiff); + + if (!LoopGuards) + LoopGuards.emplace( + ScalarEvolution::LoopGuards::collect(AddRec->getLoop(), SE)); + + APInt MaxPtrDiff = + SE.getUnsignedRangeMax(SE.applyLoopGuards(PtrDiff, *LoopGuards)); Value *Base = nullptr; APInt AccessSize; @@ -391,16 +390,22 @@ bool llvm::isDereferenceableAndAlignedInLoop( } else return false; - Instruction *HeaderFirstNonPHI = &*L->getHeader()->getFirstNonPHIIt(); + Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt(); + if (BasicBlock *LoopPred = L->getLoopPredecessor()) { + if (isa(LoopPred->getTerminator())) + CtxI = LoopPred->getTerminator(); + } return isDereferenceableAndAlignedPointerViaAssumption( Base, Alignment, - [&SE, AccessSizeSCEV](const RetainedKnowledge &RK) { - return SE.isKnownPredicate(CmpInst::ICMP_ULE, AccessSizeSCEV, - SE.getSCEV(RK.IRArgValue)); + [&SE, AccessSizeSCEV, &LoopGuards](const RetainedKnowledge &RK) { + return SE.isKnownPredicate( + CmpInst::ICMP_ULE, + SE.applyLoopGuards(AccessSizeSCEV, *LoopGuards), + SE.applyLoopGuards(SE.getSCEV(RK.IRArgValue), *LoopGuards)); }, - DL, HeaderFirstNonPHI, AC, &DT) || + DL, CtxI, AC, &DT) || isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL, - HeaderFirstNonPHI, AC, &DT); + CtxI, AC, &DT); } static bool suppressSpeculativeLoadForSanitizers(const Instruction &CtxI) { diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index f8f741575f87a..02a6a66362493 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -23,6 +23,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/AssumeBundleQueries.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" @@ -208,28 +210,57 @@ static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B, /// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at /// \p MaxBTC is guaranteed inbounds of the accessed object. -static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, - const SCEV *MaxBTC, - const SCEV *EltSize, - ScalarEvolution &SE, - const DataLayout &DL) { +static bool evaluatePtrAddRecAtMaxBTCWillNotWrap( + const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize, + ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT, + AssumptionCache *AC, + std::optional &LoopGuards) { auto *PointerBase = SE.getPointerBase(AR->getStart()); auto *StartPtr = dyn_cast(PointerBase); if (!StartPtr) return false; + const Loop *L = AR->getLoop(); bool CheckForNonNull, CheckForFreed; - uint64_t DerefBytes = StartPtr->getValue()->getPointerDereferenceableBytes( + Value *StartPtrV = StartPtr->getValue(); + uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes( DL, CheckForNonNull, CheckForFreed); - if (CheckForNonNull || CheckForFreed) + if (DerefBytes && (CheckForNonNull || CheckForFreed)) return false; const SCEV *Step = AR->getStepRecurrence(SE); + Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType()); + const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes); + + // Check if we have a suitable dereferencable assumption we can use. + Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt(); + if (BasicBlock *LoopPred = L->getLoopPredecessor()) { + if (isa(LoopPred->getTerminator())) + CtxI = LoopPred->getTerminator(); + } + RetainedKnowledge DerefRK; + getKnowledgeForValue(StartPtrV, {Attribute::Dereferenceable}, *AC, + [&](RetainedKnowledge RK, Instruction *Assume, auto) { + if (!isValidAssumeForContext(Assume, CtxI, DT)) + return false; + if (StartPtrV->canBeFreed() && + !willNotFreeBetween(Assume, CtxI)) + return false; + DerefRK = std::max(DerefRK, RK); + return true; + }); + if (DerefRK) { + DerefBytesSCEV = + SE.getUMaxExpr(DerefBytesSCEV, SE.getSCEV(DerefRK.IRArgValue)); + } + + if (DerefBytesSCEV->isZero()) + return false; + bool IsKnownNonNegative = SE.isKnownNonNegative(Step); if (!IsKnownNonNegative && !SE.isKnownNegative(Step)) return false; - Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType()); Step = SE.getNoopOrSignExtend(Step, WiderTy); MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy); @@ -239,10 +270,25 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, const SCEV *StartOffset = SE.getNoopOrZeroExtend( SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy); + if (!LoopGuards) + LoopGuards.emplace(ScalarEvolution::LoopGuards::collect(AR->getLoop(), SE)); + MaxBTC = SE.applyLoopGuards(MaxBTC, *LoopGuards); + const SCEV *OffsetAtLastIter = mulSCEVOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE); - if (!OffsetAtLastIter) - return false; + if (!OffsetAtLastIter) { + // Re-try with constant max backedge-taken count if using the symbolic one + // failed. + MaxBTC = SE.getConstantMaxBackedgeTakenCount(AR->getLoop()); + if (isa(MaxBTC)) + return false; + MaxBTC = SE.getNoopOrZeroExtend( + MaxBTC, WiderTy); + OffsetAtLastIter = + mulSCEVOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE); + if (!OffsetAtLastIter) + return false; + } const SCEV *OffsetEndBytes = addSCEVNoOverflow( OffsetAtLastIter, SE.getNoopOrZeroExtend(EltSize, WiderTy), SE); @@ -256,8 +302,9 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE); if (!EndBytes) return false; - return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, - SE.getConstant(WiderTy, DerefBytes)); + + DerefBytesSCEV = SE.applyLoopGuards(DerefBytesSCEV, *LoopGuards); + return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV); } // For negative steps check if @@ -265,15 +312,16 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, // * StartOffset <= DerefBytes. assert(SE.isKnownNegative(Step) && "must be known negative"); return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) && - SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, - SE.getConstant(WiderTy, DerefBytes)); + SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV); } std::pair llvm::getStartAndEndForAccess( const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap, - std::pair> *PointerBounds) { + std::pair> *PointerBounds, + DominatorTree *DT, AssumptionCache *AC, + std::optional &LoopGuards) { std::pair *PtrBoundsPair; if (PointerBounds) { auto [Iter, Ins] = PointerBounds->insert( @@ -308,8 +356,8 @@ std::pair llvm::getStartAndEndForAccess( // sets ScEnd to the maximum unsigned value for the type. Note that LAA // separately checks that accesses cannot not wrap, so unsigned max // represents an upper bound. - if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, - DL)) { + if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL, + DT, AC, LoopGuards)) { ScEnd = AR->evaluateAtIteration(MaxBTC, *SE); } else { ScEnd = SE->getAddExpr( @@ -356,9 +404,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, bool NeedsFreeze) { const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); const SCEV *BTC = PSE.getBackedgeTakenCount(); - const auto &[ScStart, ScEnd] = - getStartAndEndForAccess(Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, - PSE.getSE(), &DC.getPointerBounds()); + const auto &[ScStart, ScEnd] = getStartAndEndForAccess( + Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(), + &DC.getPointerBounds(), DC.getDT(), DC.getAC(), LoopGuards); assert(!isa(ScStart) && !isa(ScEnd) && "must be able to compute both start and end expressions"); @@ -589,11 +637,11 @@ void RuntimePointerChecking::groupChecks( // dependence. Not grouping the checks for a[i] and a[i + 9000] allows // us to perform an accurate check in this case. // - // The above case requires that we have an UnknownDependence between - // accesses to the same underlying object. This cannot happen unless - // FoundNonConstantDistanceDependence is set, and therefore UseDependencies - // is also false. In this case we will use the fallback path and create - // separate checking groups for all pointers. + // In the above case, we have a non-constant distance and an Unknown + // dependence between accesses to the same underlying object, and could retry + // with runtime checks. Therefore UseDependencies is false. In this case we + // will use the fallback path and create separate checking groups for all + // pointers. // If we don't have the dependency partitions, construct a new // checking pointer group for each pointer. This is also required @@ -819,7 +867,7 @@ class AccessAnalysis { /// perform dependency checking. /// /// Note that this can later be cleared if we retry memcheck analysis without - /// dependency checking (i.e. FoundNonConstantDistanceDependence). + /// dependency checking (i.e. ShouldRetryWithRuntimeChecks). bool isDependencyCheckNeeded() const { return !CheckDeps.empty(); } /// We decided that no dependence analysis would be used. Reset the state. @@ -896,7 +944,7 @@ class AccessAnalysis { /// /// Note that, this is different from isDependencyCheckNeeded. When we retry /// memcheck analysis without dependency checking - /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is + /// (i.e. ShouldRetryWithRuntimeChecks), isDependencyCheckNeeded is /// cleared while this remains set if we have potentially dependent accesses. bool IsRTCheckAnalysisNeeded = false; @@ -1961,13 +2009,15 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src, const SCEV *BTC = PSE.getBackedgeTakenCount(); const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); ScalarEvolution &SE = *PSE.getSE(); - const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess( - InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds); + const auto &[SrcStart_, SrcEnd_] = + getStartAndEndForAccess(InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC, + &SE, &PointerBounds, DT, AC, LoopGuards); if (isa(SrcStart_) || isa(SrcEnd_)) return false; - const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess( - InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds); + const auto &[SinkStart_, SinkEnd_] = + getStartAndEndForAccess(InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC, + &SE, &PointerBounds, DT, AC, LoopGuards); if (isa(SinkStart_) || isa(SinkEnd_)) return false; @@ -2079,11 +2129,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( if (StrideAScaled == StrideBScaled) CommonStride = StrideAScaled; - // TODO: FoundNonConstantDistanceDependence is used as a necessary condition - // to consider retrying with runtime checks. Historically, we did not set it - // when (unscaled) strides were different but there is no inherent reason to. + // TODO: Historically, we didn't retry with runtime checks when (unscaled) + // strides were different but there is no inherent reason to. if (!isa(Dist)) - FoundNonConstantDistanceDependence |= StrideAPtrInt == StrideBPtrInt; + ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt; return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite); @@ -2713,7 +2762,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, DepsAreSafe = DepChecker->areDepsSafe(DepCands, Accesses.getDependenciesToCheck()); - if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) { + if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeChecks()) { LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); // Clear the dependency checks. We assume they are not needed. @@ -3004,7 +3053,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI, - bool AllowPartial) + AssumptionCache *AC, bool AllowPartial) : PSE(std::make_unique(*SE, *L)), PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) { unsigned MaxTargetVectorWidthInBits = std::numeric_limits::max(); @@ -3014,9 +3063,10 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, MaxTargetVectorWidthInBits = TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2; - DepChecker = std::make_unique(*PSE, L, SymbolicStrides, - MaxTargetVectorWidthInBits); - PtrRtChecking = std::make_unique(*DepChecker, SE); + DepChecker = std::make_unique( + *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, LoopGuards); + PtrRtChecking = + std::make_unique(*DepChecker, SE, LoopGuards); if (canAnalyzeLoop()) CanVecMem = analyzeLoop(AA, LI, TLI, DT); } @@ -3084,7 +3134,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L, // or if it was created with a different value of AllowPartial. if (Inserted || It->second->hasAllowPartial() != AllowPartial) It->second = std::make_unique(&L, &SE, TTI, TLI, &AA, &DT, - &LI, AllowPartial); + &LI, AC, AllowPartial); return *It->second; } @@ -3127,7 +3177,8 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F, auto &LI = FAM.getResult(F); auto &TTI = FAM.getResult(F); auto &TLI = FAM.getResult(F); - return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI); + auto &AC = FAM.getResult(F); + return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI, &AC); } AnalysisKey LoopAccessAnalysis::Key; diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 221468a2d1a84..239849e670350 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1774,7 +1774,7 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, { const SCEV *LHS; const SCEV *RHS; - if (matchURem(Op, LHS, RHS)) + if (match(Op, m_scev_URem(m_SCEV(LHS), m_SCEV(RHS), *this))) return getURemExpr(getZeroExtendExpr(LHS, Ty, Depth + 1), getZeroExtendExpr(RHS, Ty, Depth + 1)); } @@ -2682,21 +2682,30 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, return getAddExpr(NewOps, PreservedFlags); } } - } - // Canonicalize (-1 * urem X, Y) + X --> (Y * X/Y) - if (Ops.size() == 2) { - const SCEVMulExpr *Mul = dyn_cast(Ops[0]); - if (Mul && Mul->getNumOperands() == 2 && - Mul->getOperand(0)->isAllOnesValue()) { - const SCEV *X; - const SCEV *Y; - if (matchURem(Mul->getOperand(1), X, Y) && X == Ops[1]) { - return getMulExpr(Y, getUDivExpr(X, Y)); + // Try to push the constant operand into a ZExt: A + zext (-A + B) -> zext + // (B), if trunc (A) + -A + B does not unsigned-wrap. + const SCEVAddExpr *InnerAdd; + if (match(B, m_scev_ZExt(m_scev_Add(InnerAdd)))) { + const SCEV *NarrowA = getTruncateExpr(A, InnerAdd->getType()); + if (NarrowA == getNegativeSCEV(InnerAdd->getOperand(0)) && + getZeroExtendExpr(NarrowA, B->getType()) == A && + hasFlags(StrengthenNoWrapFlags(this, scAddExpr, {NarrowA, InnerAdd}, + SCEV::FlagAnyWrap), + SCEV::FlagNUW)) { + return getZeroExtendExpr(getAddExpr(NarrowA, InnerAdd), B->getType()); } } } + // Canonicalize (-1 * urem X, Y) + X --> (Y * X/Y) + const SCEV *Y; + if (Ops.size() == 2 && + match(Ops[0], + m_scev_Mul(m_scev_AllOnes(), + m_scev_URem(m_scev_Specific(Ops[1]), m_SCEV(Y), *this)))) + return getMulExpr(Y, getUDivExpr(Ops[1], Y)); + // Skip past any other cast SCEVs. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) ++Idx; @@ -3186,6 +3195,46 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, AddRec->getNoWrapFlags(FlagsMask)); } } + + // Try to push the constant operand into a ZExt: C * zext (A + B) -> + // zext (C*A + C*B) if trunc (C) * (A + B) does not unsigned-wrap. + const SCEVAddExpr *InnerAdd; + if (match(Ops[1], m_scev_ZExt(m_scev_Add(InnerAdd)))) { + const SCEV *NarrowC = getTruncateExpr(LHSC, InnerAdd->getType()); + if (isa(InnerAdd->getOperand(0)) && + getZeroExtendExpr(NarrowC, Ops[1]->getType()) == LHSC && + hasFlags(StrengthenNoWrapFlags(this, scMulExpr, {NarrowC, InnerAdd}, + SCEV::FlagAnyWrap), + SCEV::FlagNUW)) { + auto *Res = getMulExpr(NarrowC, InnerAdd, SCEV::FlagNUW, Depth + 1); + return getZeroExtendExpr(Res, Ops[1]->getType(), Depth + 1); + }; + } + + // Try to fold (C1 * D /u C2) -> C1/C2 * D, if C1 and C2 are powers-of-2, + // D is a multiple of C2, and C1 is a multiple of C2. If C2 is a multiple + // of C1, fold to (D /u (C2 /u C1)). + const SCEV *D; + APInt C1V = LHSC->getAPInt(); + // (C1 * D /u C2) == -1 * -C1 * D /u C2 when C1 != INT_MIN. Don't treat -1 + // as -1 * 1, as it won't enable additional folds. + if (C1V.isNegative() && !C1V.isMinSignedValue() && !C1V.isAllOnes()) + C1V = C1V.abs(); + const SCEVConstant *C2; + if (C1V.isPowerOf2() && + match(Ops[1], m_scev_UDiv(m_SCEV(D), m_SCEVConstant(C2))) && + C2->getAPInt().isPowerOf2() && + C1V.logBase2() <= getMinTrailingZeros(D)) { + const SCEV *NewMul = nullptr; + if (C1V.uge(C2->getAPInt())) { + NewMul = getMulExpr(getUDivExpr(getConstant(C1V), C2), D); + } else if (C2->getAPInt().logBase2() <= getMinTrailingZeros(D)) { + assert(C1V.ugt(1) && "C1 <= 1 should have been folded earlier"); + NewMul = getUDivExpr(D, getUDivExpr(C2, getConstant(C1V))); + } + if (NewMul) + return C1V == LHSC->getAPInt() ? NewMul : getNegativeSCEV(NewMul); + } } } @@ -4567,17 +4616,11 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V, /// If Expr computes ~A, return A else return nullptr static const SCEV *MatchNotExpr(const SCEV *Expr) { - const SCEVAddExpr *Add = dyn_cast(Expr); - if (!Add || Add->getNumOperands() != 2 || - !Add->getOperand(0)->isAllOnesValue()) - return nullptr; - - const SCEVMulExpr *AddRHS = dyn_cast(Add->getOperand(1)); - if (!AddRHS || AddRHS->getNumOperands() != 2 || - !AddRHS->getOperand(0)->isAllOnesValue()) - return nullptr; - - return AddRHS->getOperand(1); + const SCEV *MulOp; + if (match(Expr, m_scev_Add(m_scev_AllOnes(), + m_scev_Mul(m_scev_AllOnes(), m_SCEV(MulOp))))) + return MulOp; + return nullptr; } /// Return a SCEV corresponding to ~V = -1-V @@ -5358,20 +5401,15 @@ static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI, if (SourceBits != NewBits) return nullptr; - const SCEVSignExtendExpr *SExt = dyn_cast(Op); - const SCEVZeroExtendExpr *ZExt = dyn_cast(Op); - if (!SExt && !ZExt) - return nullptr; - const SCEVTruncateExpr *Trunc = - SExt ? dyn_cast(SExt->getOperand()) - : dyn_cast(ZExt->getOperand()); - if (!Trunc) - return nullptr; - const SCEV *X = Trunc->getOperand(); - if (X != SymbolicPHI) - return nullptr; - Signed = SExt != nullptr; - return Trunc->getType(); + if (match(Op, m_scev_SExt(m_scev_Trunc(m_scev_Specific(SymbolicPHI))))) { + Signed = true; + return cast(Op)->getOperand()->getType(); + } + if (match(Op, m_scev_ZExt(m_scev_Trunc(m_scev_Specific(SymbolicPHI))))) { + Signed = false; + return cast(Op)->getOperand()->getType(); + } + return nullptr; } static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) { @@ -6290,19 +6328,20 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { return getGEPExpr(GEP, IndexExprs); } -APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { +APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S, + const Instruction *CtxI) { uint64_t BitWidth = getTypeSizeInBits(S->getType()); auto GetShiftedByZeros = [BitWidth](uint32_t TrailingZeros) { return TrailingZeros >= BitWidth ? APInt::getZero(BitWidth) : APInt::getOneBitSet(BitWidth, TrailingZeros); }; - auto GetGCDMultiple = [this](const SCEVNAryExpr *N) { + auto GetGCDMultiple = [this, CtxI](const SCEVNAryExpr *N) { // The result is GCD of all operands results. - APInt Res = getConstantMultiple(N->getOperand(0)); + APInt Res = getConstantMultiple(N->getOperand(0), CtxI); for (unsigned I = 1, E = N->getNumOperands(); I < E && Res != 1; ++I) Res = APIntOps::GreatestCommonDivisor( - Res, getConstantMultiple(N->getOperand(I))); + Res, getConstantMultiple(N->getOperand(I), CtxI)); return Res; }; @@ -6310,33 +6349,33 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { case scConstant: return cast(S)->getAPInt(); case scPtrToInt: - return getConstantMultiple(cast(S)->getOperand()); + return getConstantMultiple(cast(S)->getOperand(), CtxI); case scUDivExpr: case scVScale: return APInt(BitWidth, 1); case scTruncate: { // Only multiples that are a power of 2 will hold after truncation. const SCEVTruncateExpr *T = cast(S); - uint32_t TZ = getMinTrailingZeros(T->getOperand()); + uint32_t TZ = getMinTrailingZeros(T->getOperand(), CtxI); return GetShiftedByZeros(TZ); } case scZeroExtend: { const SCEVZeroExtendExpr *Z = cast(S); - return getConstantMultiple(Z->getOperand()).zext(BitWidth); + return getConstantMultiple(Z->getOperand(), CtxI).zext(BitWidth); } case scSignExtend: { // Only multiples that are a power of 2 will hold after sext. const SCEVSignExtendExpr *E = cast(S); - uint32_t TZ = getMinTrailingZeros(E->getOperand()); + uint32_t TZ = getMinTrailingZeros(E->getOperand(), CtxI); return GetShiftedByZeros(TZ); } case scMulExpr: { const SCEVMulExpr *M = cast(S); if (M->hasNoUnsignedWrap()) { // The result is the product of all operand results. - APInt Res = getConstantMultiple(M->getOperand(0)); + APInt Res = getConstantMultiple(M->getOperand(0), CtxI); for (const SCEV *Operand : M->operands().drop_front()) - Res = Res * getConstantMultiple(Operand); + Res = Res * getConstantMultiple(Operand, CtxI); return Res; } @@ -6344,7 +6383,7 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { // sum of trailing zeros for all its operands. uint32_t TZ = 0; for (const SCEV *Operand : M->operands()) - TZ += getMinTrailingZeros(Operand); + TZ += getMinTrailingZeros(Operand, CtxI); return GetShiftedByZeros(TZ); } case scAddExpr: @@ -6353,9 +6392,9 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { if (N->hasNoUnsignedWrap()) return GetGCDMultiple(N); // Find the trailing bits, which is the minimum of its operands. - uint32_t TZ = getMinTrailingZeros(N->getOperand(0)); + uint32_t TZ = getMinTrailingZeros(N->getOperand(0), CtxI); for (const SCEV *Operand : N->operands().drop_front()) - TZ = std::min(TZ, getMinTrailingZeros(Operand)); + TZ = std::min(TZ, getMinTrailingZeros(Operand, CtxI)); return GetShiftedByZeros(TZ); } case scUMaxExpr: @@ -6365,10 +6404,20 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { case scSequentialUMinExpr: return GetGCDMultiple(cast(S)); case scUnknown: { - // ask ValueTracking for known bits + // Ask ValueTracking for known bits. SCEVUnknown only become available at + // the point their underlying IR instruction has been defined. If CtxI was + // not provided, use: + // * the first instruction in the entry block if it is an argument + // * the instruction itself otherwise. const SCEVUnknown *U = cast(S); + if (!CtxI) { + if (isa(U->getValue())) + CtxI = &*F.getEntryBlock().begin(); + else if (auto *I = dyn_cast(U->getValue())) + CtxI = I; + } unsigned Known = - computeKnownBits(U->getValue(), getDataLayout(), &AC, nullptr, &DT) + computeKnownBits(U->getValue(), getDataLayout(), &AC, CtxI, &DT) .countMinTrailingZeros(); return GetShiftedByZeros(Known); } @@ -6378,12 +6427,18 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { llvm_unreachable("Unknown SCEV kind!"); } -APInt ScalarEvolution::getConstantMultiple(const SCEV *S) { +APInt ScalarEvolution::getConstantMultiple(const SCEV *S, + const Instruction *CtxI) { + // Skip looking up and updating the cache if there is a context instruction, + // as the result will only be valid in the specified context. + if (CtxI) + return getConstantMultipleImpl(S, CtxI); + auto I = ConstantMultipleCache.find(S); if (I != ConstantMultipleCache.end()) return I->second; - APInt Result = getConstantMultipleImpl(S); + APInt Result = getConstantMultipleImpl(S, CtxI); auto InsertPair = ConstantMultipleCache.insert({S, Result}); assert(InsertPair.second && "Should insert a new key"); return InsertPair.first->second; @@ -6394,8 +6449,9 @@ APInt ScalarEvolution::getNonZeroConstantMultiple(const SCEV *S) { return Multiple == 0 ? APInt(Multiple.getBitWidth(), 1) : Multiple; } -uint32_t ScalarEvolution::getMinTrailingZeros(const SCEV *S) { - return std::min(getConstantMultiple(S).countTrailingZeros(), +uint32_t ScalarEvolution::getMinTrailingZeros(const SCEV *S, + const Instruction *CtxI) { + return std::min(getConstantMultiple(S, CtxI).countTrailingZeros(), (unsigned)getTypeSizeInBits(S->getType())); } @@ -10174,8 +10230,7 @@ const SCEV *ScalarEvolution::stripInjectiveFunctions(const SCEV *S) const { static const SCEV * SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, SmallVectorImpl *Predicates, - - ScalarEvolution &SE) { + ScalarEvolution &SE, const Loop *L) { uint32_t BW = A.getBitWidth(); assert(BW == SE.getTypeSizeInBits(B->getType())); assert(A != 0 && "A must be non-zero."); @@ -10191,7 +10246,12 @@ SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, // // B is divisible by D if and only if the multiplicity of prime factor 2 for B // is not less than multiplicity of this prime factor for D. - if (SE.getMinTrailingZeros(B) < Mult2) { + unsigned MinTZ = SE.getMinTrailingZeros(B); + // Try again with the terminator of the loop predecessor for context-specific + // result, if MinTZ s too small. + if (MinTZ < Mult2 && L->getLoopPredecessor()) + MinTZ = SE.getMinTrailingZeros(B, L->getLoopPredecessor()->getTerminator()); + if (MinTZ < Mult2) { // Check if we can prove there's no remainder using URem. const SCEV *URem = SE.getURemExpr(B, SE.getConstant(APInt::getOneBitSet(BW, Mult2))); @@ -10639,7 +10699,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::howFarToZero(const SCEV *V, return getCouldNotCompute(); const SCEV *E = SolveLinEquationWithOverflow( StepC->getAPInt(), getNegativeSCEV(Start), - AllowPredicates ? &Predicates : nullptr, *this); + AllowPredicates ? &Predicates : nullptr, *this, L); const SCEV *M = E; if (E != getCouldNotCompute()) { @@ -11418,8 +11478,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred, XNonConstOp = X; XFlagsPresent = ExpectedFlags; } - if (!isa(XConstOp) || - (XFlagsPresent & ExpectedFlags) != ExpectedFlags) + if (!isa(XConstOp)) return false; if (!splitBinaryAdd(Y, YConstOp, YNonConstOp, YFlagsPresent)) { @@ -11428,13 +11487,21 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred, YFlagsPresent = ExpectedFlags; } - if (!isa(YConstOp) || - (YFlagsPresent & ExpectedFlags) != ExpectedFlags) + if (YNonConstOp != XNonConstOp) return false; - if (YNonConstOp != XNonConstOp) + if (!isa(YConstOp)) return false; + // When matching ADDs with NUW flags (and unsigned predicates), only the + // second ADD (with the larger constant) requires NUW. + if ((YFlagsPresent & ExpectedFlags) != ExpectedFlags) + return false; + if (ExpectedFlags != SCEV::FlagNUW && + (XFlagsPresent & ExpectedFlags) != ExpectedFlags) { + return false; + } + OutC1 = cast(XConstOp)->getAPInt(); OutC2 = cast(YConstOp)->getAPInt(); @@ -11472,7 +11539,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred, std::swap(LHS, RHS); [[fallthrough]]; case ICmpInst::ICMP_ULE: - // (X + C1) u<= (X + C2) for C1 u<= C2. + // (X + C1) u<= (X + C2) for C1 u<= C2. if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNUW) && C1.ule(C2)) return true; @@ -11482,7 +11549,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred, std::swap(LHS, RHS); [[fallthrough]]; case ICmpInst::ICMP_ULT: - // (X + C1) u< (X + C2) if C1 u< C2. + // (X + C1) u< (X + C2) if C1 u< C2. if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNUW) && C1.ult(C2)) return true; break; @@ -12113,12 +12180,11 @@ ScalarEvolution::computeConstantDifference(const SCEV *More, const SCEV *Less) { // Try to match a common constant multiply. auto MatchConstMul = [](const SCEV *S) -> std::optional> { - auto *M = dyn_cast(S); - if (!M || M->getNumOperands() != 2 || - !isa(M->getOperand(0))) - return std::nullopt; - return { - {M->getOperand(1), cast(M->getOperand(0))->getAPInt()}}; + const APInt *C; + const SCEV *Op; + if (match(S, m_scev_Mul(m_scev_APInt(C), m_SCEV(Op)))) + return {{Op, *C}}; + return std::nullopt; }; if (auto MatchedMore = MatchConstMul(More)) { if (auto MatchedLess = MatchConstMul(Less)) { @@ -15285,67 +15351,6 @@ void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const { } } -// Match the mathematical pattern A - (A / B) * B, where A and B can be -// arbitrary expressions. Also match zext (trunc A to iB) to iY, which is used -// for URem with constant power-of-2 second operands. -// It's not always easy, as A and B can be folded (imagine A is X / 2, and B is -// 4, A / B becomes X / 8). -bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, - const SCEV *&RHS) { - if (Expr->getType()->isPointerTy()) - return false; - - // Try to match 'zext (trunc A to iB) to iY', which is used - // for URem with constant power-of-2 second operands. Make sure the size of - // the operand A matches the size of the whole expressions. - if (const auto *ZExt = dyn_cast(Expr)) - if (const auto *Trunc = dyn_cast(ZExt->getOperand(0))) { - LHS = Trunc->getOperand(); - // Bail out if the type of the LHS is larger than the type of the - // expression for now. - if (getTypeSizeInBits(LHS->getType()) > - getTypeSizeInBits(Expr->getType())) - return false; - if (LHS->getType() != Expr->getType()) - LHS = getZeroExtendExpr(LHS, Expr->getType()); - RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1) - << getTypeSizeInBits(Trunc->getType())); - return true; - } - const auto *Add = dyn_cast(Expr); - if (Add == nullptr || Add->getNumOperands() != 2) - return false; - - const SCEV *A = Add->getOperand(1); - const auto *Mul = dyn_cast(Add->getOperand(0)); - - if (Mul == nullptr) - return false; - - const auto MatchURemWithDivisor = [&](const SCEV *B) { - // (SomeExpr + (-(SomeExpr / B) * B)). - if (Expr == getURemExpr(A, B)) { - LHS = A; - RHS = B; - return true; - } - return false; - }; - - // (SomeExpr + (-1 * (SomeExpr / B) * B)). - if (Mul->getNumOperands() == 3 && isa(Mul->getOperand(0))) - return MatchURemWithDivisor(Mul->getOperand(1)) || - MatchURemWithDivisor(Mul->getOperand(2)); - - // (SomeExpr + ((-SomeExpr / B) * B)) or (SomeExpr + ((SomeExpr / B) * -B)). - if (Mul->getNumOperands() == 2) - return MatchURemWithDivisor(Mul->getOperand(1)) || - MatchURemWithDivisor(Mul->getOperand(0)) || - MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(1))) || - MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0))); - return false; -} - ScalarEvolution::LoopGuards ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) { BasicBlock *Header = L->getHeader(); @@ -15421,6 +15426,110 @@ void ScalarEvolution::LoopGuards::collectFromPHI( } } +// Return a new SCEV that modifies \p Expr to the closest number divides by +// \p Divisor and less or equal than Expr. For now, only handle constant +// Expr. +static const SCEV *getPreviousSCEVDivisibleByDivisor(const SCEV *Expr, + const APInt &DivisorVal, + ScalarEvolution &SE) { + const APInt *ExprVal; + if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() || + DivisorVal.isNonPositive()) + return Expr; + APInt Rem = ExprVal->urem(DivisorVal); + // return the SCEV: Expr - Expr % Divisor + return SE.getConstant(*ExprVal - Rem); +} + +// Return a new SCEV that modifies \p Expr to the closest number divides by +// \p Divisor and greater or equal than Expr. For now, only handle constant +// Expr. +static const SCEV *getNextSCEVDivisibleByDivisor(const SCEV *Expr, + const APInt &DivisorVal, + ScalarEvolution &SE) { + const APInt *ExprVal; + if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() || + DivisorVal.isNonPositive()) + return Expr; + APInt Rem = ExprVal->urem(DivisorVal); + if (Rem.isZero()) + return Expr; + // return the SCEV: Expr + Divisor - Expr % Divisor + return SE.getConstant(*ExprVal + DivisorVal - Rem); +} + +static bool collectDivisibilityInformation( + ICmpInst::Predicate Predicate, const SCEV *LHS, const SCEV *RHS, + DenseMap &DivInfo, + DenseMap &Multiples, ScalarEvolution &SE) { + // If we have LHS == 0, check if LHS is computing a property of some unknown + // SCEV %v which we can rewrite %v to express explicitly. + if (Predicate != CmpInst::ICMP_EQ || !match(RHS, m_scev_Zero())) + return false; + // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to + // explicitly express that. + const SCEVUnknown *URemLHS = nullptr; + const SCEV *URemRHS = nullptr; + if (!match(LHS, m_scev_URem(m_SCEVUnknown(URemLHS), m_SCEV(URemRHS), SE))) + return false; + + const SCEV *Multiple = + SE.getMulExpr(SE.getUDivExpr(URemLHS, URemRHS), URemRHS); + DivInfo[URemLHS] = Multiple; + if (auto *C = dyn_cast(URemRHS)) + Multiples[URemLHS] = C->getAPInt(); + return true; +} + +// Check if the condition is a divisibility guard (A % B == 0). +static bool isDivisibilityGuard(const SCEV *LHS, const SCEV *RHS, + ScalarEvolution &SE) { + const SCEV *X, *Y; + return match(LHS, m_scev_URem(m_SCEV(X), m_SCEV(Y), SE)) && RHS->isZero(); +} + +// Apply divisibility by \p Divisor on MinMaxExpr with constant values, +// recursively. This is done by aligning up/down the constant value to the +// Divisor. +static const SCEV *applyDivisibilityOnMinMaxExpr(const SCEV *MinMaxExpr, + APInt Divisor, + ScalarEvolution &SE) { + // Return true if \p Expr is a MinMax SCEV expression with a non-negative + // constant operand. If so, return in \p SCTy the SCEV type and in \p RHS + // the non-constant operand and in \p LHS the constant operand. + auto IsMinMaxSCEVWithNonNegativeConstant = + [&](const SCEV *Expr, SCEVTypes &SCTy, const SCEV *&LHS, + const SCEV *&RHS) { + if (auto *MinMax = dyn_cast(Expr)) { + if (MinMax->getNumOperands() != 2) + return false; + if (auto *C = dyn_cast(MinMax->getOperand(0))) { + if (C->getAPInt().isNegative()) + return false; + SCTy = MinMax->getSCEVType(); + LHS = MinMax->getOperand(0); + RHS = MinMax->getOperand(1); + return true; + } + } + return false; + }; + + const SCEV *MinMaxLHS = nullptr, *MinMaxRHS = nullptr; + SCEVTypes SCTy; + if (!IsMinMaxSCEVWithNonNegativeConstant(MinMaxExpr, SCTy, MinMaxLHS, + MinMaxRHS)) + return MinMaxExpr; + auto IsMin = isa(MinMaxExpr) || isa(MinMaxExpr); + assert(SE.isKnownNonNegative(MinMaxLHS) && "Expected non-negative operand!"); + auto *DivisibleExpr = + IsMin ? getPreviousSCEVDivisibleByDivisor(MinMaxLHS, Divisor, SE) + : getNextSCEVDivisibleByDivisor(MinMaxLHS, Divisor, SE); + SmallVector Ops = { + applyDivisibilityOnMinMaxExpr(MinMaxRHS, Divisor, SE), DivisibleExpr}; + return SE.getMinMaxExpr(SCTy, Ops); +} + void ScalarEvolution::LoopGuards::collectFromBlock( ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards, const BasicBlock *Block, const BasicBlock *Pred, @@ -15428,19 +15537,13 @@ void ScalarEvolution::LoopGuards::collectFromBlock( SmallVector ExprsToRewrite; auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS, const SCEV *RHS, - DenseMap - &RewriteMap) { + DenseMap &RewriteMap, + const LoopGuards &DivGuards) { // WARNING: It is generally unsound to apply any wrap flags to the proposed // replacement SCEV which isn't directly implied by the structure of that // SCEV. In particular, using contextual facts to imply flags is *NOT* // legal. See the scoping rules for flags in the header to understand why. - // If LHS is a constant, apply information to the other expression. - if (isa(LHS)) { - std::swap(LHS, RHS); - Predicate = CmpInst::getSwappedPredicate(Predicate); - } - // Check for a condition of the form (-C1 + X < C2). InstCombine will // create this form when combining two checks of the form (X u< C2 + C1) and // (X >=u C1). @@ -15473,115 +15576,6 @@ void ScalarEvolution::LoopGuards::collectFromBlock( if (MatchRangeCheckIdiom()) return; - // Return true if \p Expr is a MinMax SCEV expression with a non-negative - // constant operand. If so, return in \p SCTy the SCEV type and in \p RHS - // the non-constant operand and in \p LHS the constant operand. - auto IsMinMaxSCEVWithNonNegativeConstant = - [&](const SCEV *Expr, SCEVTypes &SCTy, const SCEV *&LHS, - const SCEV *&RHS) { - if (auto *MinMax = dyn_cast(Expr)) { - if (MinMax->getNumOperands() != 2) - return false; - if (auto *C = dyn_cast(MinMax->getOperand(0))) { - if (C->getAPInt().isNegative()) - return false; - SCTy = MinMax->getSCEVType(); - LHS = MinMax->getOperand(0); - RHS = MinMax->getOperand(1); - return true; - } - } - return false; - }; - - // Checks whether Expr is a non-negative constant, and Divisor is a positive - // constant, and returns their APInt in ExprVal and in DivisorVal. - auto GetNonNegExprAndPosDivisor = [&](const SCEV *Expr, const SCEV *Divisor, - APInt &ExprVal, APInt &DivisorVal) { - auto *ConstExpr = dyn_cast(Expr); - auto *ConstDivisor = dyn_cast(Divisor); - if (!ConstExpr || !ConstDivisor) - return false; - ExprVal = ConstExpr->getAPInt(); - DivisorVal = ConstDivisor->getAPInt(); - return ExprVal.isNonNegative() && !DivisorVal.isNonPositive(); - }; - - // Return a new SCEV that modifies \p Expr to the closest number divides by - // \p Divisor and greater or equal than Expr. - // For now, only handle constant Expr and Divisor. - auto GetNextSCEVDividesByDivisor = [&](const SCEV *Expr, - const SCEV *Divisor) { - APInt ExprVal; - APInt DivisorVal; - if (!GetNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal)) - return Expr; - APInt Rem = ExprVal.urem(DivisorVal); - if (!Rem.isZero()) - // return the SCEV: Expr + Divisor - Expr % Divisor - return SE.getConstant(ExprVal + DivisorVal - Rem); - return Expr; - }; - - // Return a new SCEV that modifies \p Expr to the closest number divides by - // \p Divisor and less or equal than Expr. - // For now, only handle constant Expr and Divisor. - auto GetPreviousSCEVDividesByDivisor = [&](const SCEV *Expr, - const SCEV *Divisor) { - APInt ExprVal; - APInt DivisorVal; - if (!GetNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal)) - return Expr; - APInt Rem = ExprVal.urem(DivisorVal); - // return the SCEV: Expr - Expr % Divisor - return SE.getConstant(ExprVal - Rem); - }; - - // Apply divisibilty by \p Divisor on MinMaxExpr with constant values, - // recursively. This is done by aligning up/down the constant value to the - // Divisor. - std::function - ApplyDivisibiltyOnMinMaxExpr = [&](const SCEV *MinMaxExpr, - const SCEV *Divisor) { - const SCEV *MinMaxLHS = nullptr, *MinMaxRHS = nullptr; - SCEVTypes SCTy; - if (!IsMinMaxSCEVWithNonNegativeConstant(MinMaxExpr, SCTy, MinMaxLHS, - MinMaxRHS)) - return MinMaxExpr; - auto IsMin = - isa(MinMaxExpr) || isa(MinMaxExpr); - assert(SE.isKnownNonNegative(MinMaxLHS) && - "Expected non-negative operand!"); - auto *DivisibleExpr = - IsMin ? GetPreviousSCEVDividesByDivisor(MinMaxLHS, Divisor) - : GetNextSCEVDividesByDivisor(MinMaxLHS, Divisor); - SmallVector Ops = { - ApplyDivisibiltyOnMinMaxExpr(MinMaxRHS, Divisor), DivisibleExpr}; - return SE.getMinMaxExpr(SCTy, Ops); - }; - - // If we have LHS == 0, check if LHS is computing a property of some unknown - // SCEV %v which we can rewrite %v to express explicitly. - if (Predicate == CmpInst::ICMP_EQ && match(RHS, m_scev_Zero())) { - // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to - // explicitly express that. - const SCEV *URemLHS = nullptr; - const SCEV *URemRHS = nullptr; - if (SE.matchURem(LHS, URemLHS, URemRHS)) { - if (const SCEVUnknown *LHSUnknown = dyn_cast(URemLHS)) { - auto I = RewriteMap.find(LHSUnknown); - const SCEV *RewrittenLHS = - I != RewriteMap.end() ? I->second : LHSUnknown; - RewrittenLHS = ApplyDivisibiltyOnMinMaxExpr(RewrittenLHS, URemRHS); - const auto *Multiple = - SE.getMulExpr(SE.getUDivExpr(RewrittenLHS, URemRHS), URemRHS); - RewriteMap[LHSUnknown] = Multiple; - ExprsToRewrite.push_back(LHSUnknown); - return; - } - } - } - // Do not apply information for constants or if RHS contains an AddRec. if (isa(LHS) || SE.containsAddRecurrence(RHS)) return; @@ -15610,51 +15604,10 @@ void ScalarEvolution::LoopGuards::collectFromBlock( return RewriteMap.lookup_or(S, S); }; - // Check for the SCEV expression (A /u B) * B while B is a constant, inside - // \p Expr. The check is done recuresively on \p Expr, which is assumed to - // be a composition of Min/Max SCEVs. Return whether the SCEV expression (A - // /u B) * B was found, and return the divisor B in \p DividesBy. For - // example, if Expr = umin (umax ((A /u 8) * 8, 16), 64), return true since - // (A /u 8) * 8 matched the pattern, and return the constant SCEV 8 in \p - // DividesBy. - std::function HasDivisibiltyInfo = - [&](const SCEV *Expr, const SCEV *&DividesBy) { - if (auto *Mul = dyn_cast(Expr)) { - if (Mul->getNumOperands() != 2) - return false; - auto *MulLHS = Mul->getOperand(0); - auto *MulRHS = Mul->getOperand(1); - if (isa(MulLHS)) - std::swap(MulLHS, MulRHS); - if (auto *Div = dyn_cast(MulLHS)) - if (Div->getOperand(1) == MulRHS) { - DividesBy = MulRHS; - return true; - } - } - if (auto *MinMax = dyn_cast(Expr)) - return HasDivisibiltyInfo(MinMax->getOperand(0), DividesBy) || - HasDivisibiltyInfo(MinMax->getOperand(1), DividesBy); - return false; - }; - - // Return true if Expr known to divide by \p DividesBy. - std::function IsKnownToDivideBy = - [&](const SCEV *Expr, const SCEV *DividesBy) { - if (SE.getURemExpr(Expr, DividesBy)->isZero()) - return true; - if (auto *MinMax = dyn_cast(Expr)) - return IsKnownToDivideBy(MinMax->getOperand(0), DividesBy) && - IsKnownToDivideBy(MinMax->getOperand(1), DividesBy); - return false; - }; - const SCEV *RewrittenLHS = GetMaybeRewritten(LHS); - const SCEV *DividesBy = nullptr; - if (HasDivisibiltyInfo(RewrittenLHS, DividesBy)) - // Check that the whole expression is divided by DividesBy - DividesBy = - IsKnownToDivideBy(RewrittenLHS, DividesBy) ? DividesBy : nullptr; + // Apply divisibility information when computing the constant multiple. + const APInt &DividesBy = + SE.getConstantMultiple(DivGuards.rewrite(RewrittenLHS)); // Collect rewrites for LHS and its transitive operands based on the // condition. @@ -15669,31 +15622,31 @@ void ScalarEvolution::LoopGuards::collectFromBlock( // predicate. const SCEV *One = SE.getOne(RHS->getType()); switch (Predicate) { - case CmpInst::ICMP_ULT: - if (RHS->getType()->isPointerTy()) - return; - RHS = SE.getUMaxExpr(RHS, One); - [[fallthrough]]; - case CmpInst::ICMP_SLT: { - RHS = SE.getMinusSCEV(RHS, One); - RHS = DividesBy ? GetPreviousSCEVDividesByDivisor(RHS, DividesBy) : RHS; - break; - } - case CmpInst::ICMP_UGT: - case CmpInst::ICMP_SGT: - RHS = SE.getAddExpr(RHS, One); - RHS = DividesBy ? GetNextSCEVDividesByDivisor(RHS, DividesBy) : RHS; - break; - case CmpInst::ICMP_ULE: - case CmpInst::ICMP_SLE: - RHS = DividesBy ? GetPreviousSCEVDividesByDivisor(RHS, DividesBy) : RHS; - break; - case CmpInst::ICMP_UGE: - case CmpInst::ICMP_SGE: - RHS = DividesBy ? GetNextSCEVDividesByDivisor(RHS, DividesBy) : RHS; - break; - default: - break; + case CmpInst::ICMP_ULT: + if (RHS->getType()->isPointerTy()) + return; + RHS = SE.getUMaxExpr(RHS, One); + [[fallthrough]]; + case CmpInst::ICMP_SLT: { + RHS = SE.getMinusSCEV(RHS, One); + RHS = getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE); + break; + } + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_SGT: + RHS = SE.getAddExpr(RHS, One); + RHS = getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE); + break; + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_SLE: + RHS = getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE); + break; + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_SGE: + RHS = getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE); + break; + default: + break; } SmallVector Worklist(1, LHS); @@ -15744,8 +15697,30 @@ void ScalarEvolution::LoopGuards::collectFromBlock( case CmpInst::ICMP_NE: if (match(RHS, m_scev_Zero())) { const SCEV *OneAlignedUp = - DividesBy ? GetNextSCEVDividesByDivisor(One, DividesBy) : One; + getNextSCEVDivisibleByDivisor(One, DividesBy, SE); To = SE.getUMaxExpr(FromRewritten, OneAlignedUp); + } else { + // LHS != RHS can be rewritten as (LHS - RHS) = UMax(1, LHS - RHS), + // but creating the subtraction eagerly is expensive. Track the + // inequalities in a separate map, and materialize the rewrite lazily + // when encountering a suitable subtraction while re-writing. + if (LHS->getType()->isPointerTy()) { + LHS = SE.getLosslessPtrToIntExpr(LHS); + RHS = SE.getLosslessPtrToIntExpr(RHS); + if (isa(LHS) || isa(RHS)) + break; + } + const SCEVConstant *C; + const SCEV *A, *B; + if (match(RHS, m_scev_Add(m_SCEVConstant(C), m_SCEV(A))) && + match(LHS, m_scev_Add(m_scev_Specific(C), m_SCEV(B)))) { + RHS = A; + LHS = B; + } + if (LHS > RHS) + std::swap(LHS, RHS); + Guards.NotEqual.insert({LHS, RHS}); + continue; } break; default: @@ -15817,8 +15792,11 @@ void ScalarEvolution::LoopGuards::collectFromBlock( // Now apply the information from the collected conditions to // Guards.RewriteMap. Conditions are processed in reverse order, so the - // earliest conditions is processed first. This ensures the SCEVs with the + // earliest conditions is processed first, except guards with divisibility + // information, which are moved to the back. This ensures the SCEVs with the // shortest dependency chains are constructed first. + SmallVector> + GuardsToProcess; for (auto [Term, EnterIfTrue] : reverse(Terms)) { SmallVector Worklist; SmallPtrSet Visited; @@ -15833,7 +15811,14 @@ void ScalarEvolution::LoopGuards::collectFromBlock( EnterIfTrue ? Cmp->getPredicate() : Cmp->getInversePredicate(); const auto *LHS = SE.getSCEV(Cmp->getOperand(0)); const auto *RHS = SE.getSCEV(Cmp->getOperand(1)); - CollectCondition(Predicate, LHS, RHS, Guards.RewriteMap); + // If LHS is a constant, apply information to the other expression. + // TODO: If LHS is not a constant, check if using CompareSCEVComplexity + // can improve results. + if (isa(LHS)) { + std::swap(LHS, RHS); + Predicate = CmpInst::getSwappedPredicate(Predicate); + } + GuardsToProcess.emplace_back(Predicate, LHS, RHS); continue; } @@ -15846,6 +15831,31 @@ void ScalarEvolution::LoopGuards::collectFromBlock( } } + // Process divisibility guards in reverse order to populate DivGuards early. + DenseMap Multiples; + LoopGuards DivGuards(SE); + for (const auto &[Predicate, LHS, RHS] : GuardsToProcess) { + if (!isDivisibilityGuard(LHS, RHS, SE)) + continue; + collectDivisibilityInformation(Predicate, LHS, RHS, DivGuards.RewriteMap, + Multiples, SE); + } + + for (const auto &[Predicate, LHS, RHS] : GuardsToProcess) + CollectCondition(Predicate, LHS, RHS, Guards.RewriteMap, DivGuards); + + // Apply divisibility information last. This ensures it is applied to the + // outermost expression after other rewrites for the given value. + for (const auto &[K, Divisor] : Multiples) { + const SCEV *DivisorSCEV = SE.getConstant(Divisor); + Guards.RewriteMap[K] = + SE.getMulExpr(SE.getUDivExpr(applyDivisibilityOnMinMaxExpr( + Guards.rewrite(K), Divisor, SE), + DivisorSCEV), + DivisorSCEV); + ExprsToRewrite.push_back(K); + } + // Let the rewriter preserve NUW/NSW flags if the unsigned/signed ranges of // the replacement expressions are contained in the ranges of the replaced // expressions. @@ -15878,13 +15888,15 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const { class SCEVLoopGuardRewriter : public SCEVRewriteVisitor { const DenseMap ⤅ + const SmallDenseSet> ≠ SCEV::NoWrapFlags FlagMask = SCEV::FlagAnyWrap; public: SCEVLoopGuardRewriter(ScalarEvolution &SE, const ScalarEvolution::LoopGuards &Guards) - : SCEVRewriteVisitor(SE), Map(Guards.RewriteMap) { + : SCEVRewriteVisitor(SE), Map(Guards.RewriteMap), + NotEqual(Guards.NotEqual) { if (Guards.PreserveNUW) FlagMask = ScalarEvolution::setFlags(FlagMask, SCEV::FlagNUW); if (Guards.PreserveNSW) @@ -15939,6 +15951,41 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const { } const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + // Helper to check if S is a subtraction (A - B) where A != B, and if so, + // return UMax(S, 1). + auto RewriteSubtraction = [&](const SCEV *S) -> const SCEV * { + const SCEV *LHS, *RHS; + if (MatchBinarySub(S, LHS, RHS)) { + if (LHS > RHS) + std::swap(LHS, RHS); + if (NotEqual.contains({LHS, RHS})) { + const SCEV *OneAlignedUp = getNextSCEVDivisibleByDivisor( + SE.getOne(S->getType()), SE.getConstantMultiple(S), SE); + return SE.getUMaxExpr(OneAlignedUp, S); + } + } + return nullptr; + }; + + // Check if Expr itself is a subtraction pattern with guard info. + if (const SCEV *Rewritten = RewriteSubtraction(Expr)) + return Rewritten; + + // Trip count expressions sometimes consist of adding 3 operands, i.e. + // (Const + A + B). There may be guard info for A + B, and if so, apply + // it. + // TODO: Could more generally apply guards to Add sub-expressions. + if (isa(Expr->getOperand(0)) && + Expr->getNumOperands() == 3) { + const SCEV *Add = + SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2)); + if (const SCEV *Rewritten = RewriteSubtraction(Add)) + return SE.getAddExpr( + Expr->getOperand(0), Rewritten, + ScalarEvolution::maskFlags(Expr->getNoWrapFlags(), FlagMask)); + if (const SCEV *S = Map.lookup(Add)) + return SE.getAddExpr(Expr->getOperand(0), S); + } SmallVector Operands; bool Changed = false; for (const auto *Op : Expr->operands()) { @@ -15971,7 +16018,7 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const { } }; - if (RewriteMap.empty()) + if (RewriteMap.empty() && NotEqual.empty()) return Expr; SCEVLoopGuardRewriter Rewriter(SE, *this); diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 21f844c4d2f45..161ad2adba85e 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -90,6 +90,9 @@ using namespace llvm::PatternMatch; static cl::opt DomConditionsMaxUses("dom-conditions-max-uses", cl::Hidden, cl::init(20)); +/// Maximum number of instructions to check between assume and context +/// instruction. +static constexpr unsigned MaxInstrsToCheckForFree = 16; /// Returns the bitwidth of the given scalar or pointer type. For vector types, /// returns the element type's bitwidth. @@ -552,6 +555,29 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, return false; } +bool llvm::willNotFreeBetween(const Instruction *Assume, + const Instruction *CtxI) { + if (CtxI->getParent() != Assume->getParent() || !Assume->comesBefore(CtxI)) + return false; + // Make sure the current function cannot arrange for another thread to free on + // its behalf. + if (!CtxI->getFunction()->hasNoSync()) + return false; + + // Check if there are any calls between the assume and CtxI that may + // free memory. + for (const auto &[Idx, I] : + enumerate(make_range(Assume->getIterator(), CtxI->getIterator()))) { + // Limit number of instructions to walk. + if (Idx > MaxInstrsToCheckForFree) + return false; + if (const auto *CB = dyn_cast(&I)) + if (!CB->hasFnAttr(Attribute::NoFree)) + return false; + } + return true; +} + // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but // we still have enough information about `RHS` to conclude non-zero. For // example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 033ef8be700eb..a98ce93017a7c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2069,6 +2069,27 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) { return new IntToPtrInst(P, CI.getType()); } + // Replace (inttoptr (add (ptrtoint %Base), %Offset)) with + // (getelementptr i8, %Base, %Offset) if the pointer is only used as integer + // value. + Value *Base; + Value *Offset; + auto UsesPointerAsInt = [](User *U) { + if (isa(U)) + return true; + if (auto *P = dyn_cast(U)) + return P->hasOneUse() && isa(*P->user_begin()); + return false; + }; + if (match(CI.getOperand(0), + m_OneUse(m_c_Add(m_PtrToIntSameSize(DL, m_Value(Base)), + m_Value(Offset)))) && + CI.getType()->getPointerAddressSpace() == + Base->getType()->getPointerAddressSpace() && + all_of(CI.users(), UsesPointerAsInt)) { + return GetElementPtrInst::Create(Builder.getInt8Ty(), Base, Offset); + } + if (Instruction *I = commonCastTransforms(CI)) return I; diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp index f3e992c039178..04039b885f3c5 100644 --- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -1009,7 +1009,8 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM, // in simplified form, and also needs LCSSA. Running // this pass will simplify all loops that contain inner loops, // regardless of whether anything ends up being flattened. - LoopAccessInfoManager LAIM(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr); + LoopAccessInfoManager LAIM(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr, + &AR.AC); for (Loop *InnerLoop : LN.getLoops()) { auto *OuterLoop = InnerLoop->getParentLoop(); if (!OuterLoop) diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 4f2bfb073bafa..448dc2b8b52b0 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -551,7 +551,7 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM, const Function *F = L.getHeader()->getParent(); OptimizationRemarkEmitter ORE(F); - LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr); + LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr, &LAR.AC); if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT)) return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index bf457194bfd8e..aff0b901bd031 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionPatternMatch.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" @@ -42,6 +43,7 @@ cl::opt llvm::SCEVCheapExpansionBudget( "controls the budget that is considered cheap (default = 4)")); using namespace PatternMatch; +using namespace SCEVPatternMatch; PoisonFlags::PoisonFlags(const Instruction *I) { NUW = false; @@ -504,7 +506,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // Recognize the canonical representation of an unsimplifed urem. const SCEV *URemLHS = nullptr; const SCEV *URemRHS = nullptr; - if (SE.matchURem(S, URemLHS, URemRHS)) { + if (match(S, m_scev_URem(m_SCEV(URemLHS), m_SCEV(URemRHS), SE))) { Value *LHS = expand(URemLHS); Value *RHS = expand(URemRHS); return InsertBinop(Instruction::URem, LHS, RHS, SCEV::FlagAnyWrap, @@ -1223,6 +1225,54 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { return Result; } +Value *SCEVExpander::tryToReuseLCSSAPhi(const SCEVAddRecExpr *S) { + Type *STy = S->getType(); + const Loop *L = S->getLoop(); + BasicBlock *EB = L->getExitBlock(); + if (!EB || !EB->getSinglePredecessor() || + !SE.DT.dominates(EB, Builder.GetInsertBlock())) + return nullptr; + + for (auto &PN : EB->phis()) { + if (!SE.isSCEVable(PN.getType())) + continue; + auto *ExitSCEV = SE.getSCEV(&PN); + if (!isa(ExitSCEV)) + continue; + Type *PhiTy = PN.getType(); + if (STy->isIntegerTy() && PhiTy->isPointerTy()) { + ExitSCEV = SE.getPtrToIntExpr(ExitSCEV, STy); + if (isa(ExitSCEV)) + continue; + } else if (S->getType() != PN.getType()) { + continue; + } + + // Check if we can re-use the existing PN, by adjusting it with an expanded + // offset, if the offset is simpler. + const SCEV *Diff = SE.getMinusSCEV(S, ExitSCEV); + const SCEV *Op = Diff; + match(Op, m_scev_Add(m_SCEVConstant(), m_SCEV(Op))); + match(Op, m_scev_Mul(m_scev_AllOnes(), m_SCEV(Op))); + match(Op, m_scev_PtrToInt(m_SCEV(Op))); + if (!isa(Op)) + continue; + + assert(Diff->getType()->isIntegerTy() && + "difference must be of integer type"); + Value *DiffV = expand(Diff); + Value *BaseV = fixupLCSSAFormFor(&PN); + if (PhiTy->isPointerTy()) { + if (STy->isPointerTy()) + return Builder.CreatePtrAdd(BaseV, DiffV); + BaseV = Builder.CreatePtrToInt(BaseV, DiffV->getType()); + } + return Builder.CreateAdd(BaseV, DiffV); + } + + return nullptr; +} + Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // In canonical mode we compute the addrec as an expression of a canonical IV // using evaluateAtIteration and expand the resulting SCEV expression. This @@ -1262,6 +1312,11 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { return V; } + // If S is expanded outside the defining loop, check if there is a + // matching LCSSA phi node for it. + if (Value *V = tryToReuseLCSSAPhi(S)) + return V; + // {X,+,F} --> X + {0,+,F} if (!S->getStart()->isZero()) { if (isa(S->getType())) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 703cfe969577d..b9cd3635047ac 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -994,7 +994,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, // Returns a scalar boolean value, which is true if any lane of its // (boolean) vector operands is true. It produces the reduced value across // all unrolled iterations. Unrolling will add all copies of its original - // operand as additional operands. + // operand as additional operands. AnyOf is poison-safe as all operands + // will be frozen. AnyOf, // Calculates the first active lane index of the vector predicate operands. // It produces the lane index across all unrolled iterations. Unrolling will diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 57b713d3dfcb9..8c424e10a74a5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -858,9 +858,9 @@ Value *VPInstruction::generate(VPTransformState &State) { return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags()); } case VPInstruction::AnyOf: { - Value *Res = State.get(getOperand(0)); + Value *Res = Builder.CreateFreeze(State.get(getOperand(0))); for (VPValue *Op : drop_begin(operands())) - Res = Builder.CreateOr(Res, State.get(Op)); + Res = Builder.CreateOr(Res, Builder.CreateFreeze(State.get(Op))); return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res); } case VPInstruction::FirstActiveLane: { diff --git a/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll b/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll index 0d1b0829c09da..311de84993001 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll @@ -106,10 +106,43 @@ exit: ret void } +define void @backward_dep_known_safe_due_to_backedge_taken_count(ptr %A) { +; CHECK-LABEL: 'backward_dep_known_safe_due_to_backedge_taken_count' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %A.510 = getelementptr inbounds i32, ptr %A, i64 510 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %iv.mul.2 = shl nuw nsw i64 %iv, 1 + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + %l = load i32, ptr %gep, align 4 + %add = add nsw i32 %l, 5 + %gep.mul.2 = getelementptr inbounds i32, ptr %A.510, i64 %iv.mul.2 + store i32 %add, ptr %gep.mul.2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + define void @backward_dep_known_distance_less_than_btc(ptr %A) { ; CHECK-LABEL: 'backward_dep_known_distance_less_than_btc' ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 8160 bits +; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 4064 bits ; CHECK-NEXT: Dependences: ; CHECK-NEXT: BackwardVectorizable: ; CHECK-NEXT: %l = load i32, ptr %gep, align 4 -> @@ -130,10 +163,10 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %iv.mul.2 = shl nuw nsw i64 %iv, 1 - %gep = getelementptr inbounds i32, ptr %A, i64 %iv + %gep = getelementptr inbounds i32, ptr %A, i64 %iv.mul.2 %l = load i32, ptr %gep, align 4 %add = add nsw i32 %l, 5 - %gep.mul.2 = getelementptr inbounds i32, ptr %A.510, i64 %iv.mul.2 + %gep.mul.2 = getelementptr inbounds i32, ptr %A.510, i64 %iv store i32 %add, ptr %gep.mul.2, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 256 diff --git a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll index 1dc8d4a7e73f8..b7f2ae0529d32 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll @@ -505,7 +505,7 @@ e.1: ret i32 1 } -define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption(ptr %A, ptr %B) { +define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption(ptr %A, ptr %B) nosync nofree { ; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption' ; CHECK-NEXT: loop.header: ; CHECK-NEXT: Memory dependences are safe with run-time checks @@ -518,10 +518,10 @@ define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_kno ; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: -; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr)) +; CHECK-NEXT: (Low: %B High: (2000 + %B)) ; CHECK-NEXT: Member: {%B,+,4}<%loop.header> ; CHECK-NEXT: Group GRP1: -; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr)) +; CHECK-NEXT: (Low: %A High: (2000 + %A)) ; CHECK-NEXT: Member: {%A,+,4}<%loop.header> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. @@ -565,7 +565,7 @@ e.2: ret void } -define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small(ptr %A, ptr %B) { +define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small(ptr %A, ptr %B) nosync nofree { ; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small' ; CHECK-NEXT: loop.header: ; CHECK-NEXT: Memory dependences are safe with run-time checks @@ -624,3 +624,129 @@ e.1: e.2: ret void } + +define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_nofree_via_context(ptr %A, ptr %B) nosync { +; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_nofree_via_context' +; CHECK-NEXT: loop.header: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %B High: (2000 + %B)) +; CHECK-NEXT: Member: {%B,+,4}<%loop.header> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: %A High: (2000 + %A)) +; CHECK-NEXT: Member: {%A,+,4}<%loop.header> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %A, i64 2000) ] + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %B, i64 2000) ] + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] + %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv + %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv + %l = load i32, ptr %gep.A, align 4 + store i32 0, ptr %gep.B, align 4 + %cntable.c.1 = icmp ult i64 %iv, 1000 + %iv.next = add nuw nsw i64 %iv, 1 + br i1 %cntable.c.1, label %b2, label %e.1 + +b2: + %uncntable.c.0 = icmp eq i32 %l, 0 + br i1 %uncntable.c.0, label %e.2, label %b3 + +b3: + %cntable.c.2 = icmp eq i64 %iv.next, 500 + br i1 %cntable.c.2, label %cleanup4, label %latch + +latch: + br label %loop.header + +cleanup4: + ret void + +e.1: + ret void + +e.2: + ret void +} + +define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_missing_nofree_multiple_predecessors(ptr %A, ptr %B, i1 %c) nosync { +; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_missing_nofree_multiple_predecessors' +; CHECK-NEXT: loop.header: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr)) +; CHECK-NEXT: Member: {%B,+,4}<%loop.header> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr)) +; CHECK-NEXT: Member: {%A,+,4}<%loop.header> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %A, i64 2000) ] + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %B, i64 2000) ] + br i1 %c, label %then, label %else + +then: + br label %loop.header + +else: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %then ], [ 0, %else ], [ %iv.next, %latch ] + %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv + %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv + %l = load i32, ptr %gep.A, align 4 + store i32 0, ptr %gep.B, align 4 + %cntable.c.1 = icmp ult i64 %iv, 1000 + %iv.next = add nuw nsw i64 %iv, 1 + br i1 %cntable.c.1, label %b2, label %e.1 + +b2: + %uncntable.c.0 = icmp eq i32 %l, 0 + br i1 %uncntable.c.0, label %e.2, label %b3 + +b3: + %cntable.c.2 = icmp eq i64 %iv.next, 500 + br i1 %cntable.c.2, label %cleanup4, label %latch + +latch: + br label %loop.header + +cleanup4: + ret void + +e.1: + ret void + +e.2: + ret void +} diff --git a/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll b/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll index 1a6e25859f085..468b22568277e 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll @@ -8,21 +8,10 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define void @test_distance_positive_independent_via_trip_count(ptr %A) { ; CHECK-LABEL: 'test_distance_positive_independent_via_trip_count' ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Memory dependences are safe ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Check 0: -; CHECK-NEXT: Comparing group GRP0: -; CHECK-NEXT: %gep.A.400 = getelementptr inbounds i32, ptr %A.400, i64 %iv -; CHECK-NEXT: Against group GRP1: -; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv ; CHECK-NEXT: Grouped accesses: -; CHECK-NEXT: Group GRP0: -; CHECK-NEXT: (Low: (400 + %A) High: (804 + %A)) -; CHECK-NEXT: Member: {(400 + %A),+,4}<%loop> -; CHECK-NEXT: Group GRP1: -; CHECK-NEXT: (Low: %A High: (101 + %A)) -; CHECK-NEXT: Member: {%A,+,1}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll new file mode 100644 index 0000000000000..6b2c78cebc44a --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes='print' -scalar-evolution-classify-expressions=0 -disable-output %s 2>&1 | FileCheck %s + +define void @ptrtoint_based_trip_count_known_via_guards_applied_to_add_subexpr(ptr %start, ptr %end) { +; CHECK-LABEL: 'ptrtoint_based_trip_count_known_via_guards_applied_to_add_subexpr' +; CHECK-NEXT: Determining loop execution counts for: @ptrtoint_based_trip_count_known_via_guards_applied_to_add_subexpr +; CHECK-NEXT: Loop %loop: backedge-taken count is i64 0 +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 0 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is i64 0 +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %end.i = ptrtoint ptr %end to i64 + %start.i = ptrtoint ptr %start to i64 + %sub = sub i64 %end.i, %start.i + %pre.1 = icmp eq i64 %sub, 4 + call void @llvm.assume(i1 %pre.1) + br label %loop + +loop: + %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] + store i32 0, ptr %iv + %iv.next = getelementptr inbounds nuw i8, ptr %iv, i64 4 + %ec = icmp eq ptr %iv.next, %end + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +declare void @clobber() + +define void @test_add_sub_1_guard(ptr %src, i32 %n) { +; CHECK-LABEL: 'test_add_sub_1_guard' +; CHECK-NEXT: Determining loop execution counts for: @test_add_sub_1_guard +; CHECK-NEXT: Loop %loop: backedge-taken count is (zext i32 (-1 + (%n /u 2)) to i64) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4294967295 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (zext i32 (-1 + (%n /u 2)) to i64) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %shr = lshr i32 %n, 1 + %sub.1 = add i32 %shr, -1 + %sub.ext = zext i32 %sub.1 to i64 + %pre = icmp eq i32 %shr, 1 + %end = getelementptr i8, ptr %src, i64 %sub.ext + br i1 %pre, label %loop, label %exit + +loop: + %iv = phi ptr [ %src, %entry ], [ %iv.next, %loop ] + call void @clobber() + %iv.next = getelementptr i8, ptr %iv, i64 1 + %ec = icmp eq ptr %iv, %end + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +declare void @use(ptr) + +define i32 @test_3_op_add(i32 %x, i32 %y, ptr %A) { +; CHECK-LABEL: 'test_3_op_add' +; CHECK-NEXT: Determining loop execution counts for: @test_3_op_add +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (zext i32 (1 + (-1 * %x) + %y) to i64)) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2147483647 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + (zext i32 (1 + (-1 * %x) + %y) to i64)) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %pre.0 = icmp ugt i32 %x, 0 + br i1 %pre.0, label %then, label %exit + +then: + %y.sub.x = sub i32 %y, %x + %pre.1 = icmp slt i32 %y.sub.x, 0 + %add.1 = add i32 %y.sub.x, 1 + %add.ext = zext i32 %add.1 to i64 + br i1 %pre.1, label %exit, label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %then ] + %and = and i64 %iv, 1 + %gep = getelementptr i8, ptr %A, i64 %and + call void @use(ptr %gep) + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %add.ext + br i1 %ec, label %exit, label %loop + +exit: + ret i32 0 +} diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll index 3c3748a6a5f02..1964fca603e23 100644 --- a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll @@ -104,3 +104,56 @@ exit: } declare void @clobber() + + +declare void @clobber.i32(i32) + +define void @test_guards_across_loops(i32 %N) { +; CHECK-LABEL: 'test_guards_across_loops' +; CHECK-NEXT: Classifying expressions for: @test_guards_across_loops +; CHECK-NEXT: %iv.1 = phi i32 [ 0, %entry ], [ %iv.1.next, %loop.1 ] +; CHECK-NEXT: --> {0,+,1}<%loop.1> U: full-set S: full-set Exits: <> LoopDispositions: { %loop.1: Computable } +; CHECK-NEXT: %iv.1.next = add i32 %iv.1, 1 +; CHECK-NEXT: --> {1,+,1}<%loop.1> U: full-set S: full-set Exits: <> LoopDispositions: { %loop.1: Computable } +; CHECK-NEXT: %iv.2 = phi i32 [ 0, %loop.1 ], [ %iv.2.next, %loop.2 ] +; CHECK-NEXT: --> {0,+,1}<%loop.2> U: full-set S: full-set Exits: (1 + %N) LoopDispositions: { %loop.2: Computable } +; CHECK-NEXT: %iv.2.next = add i32 %iv.2, 1 +; CHECK-NEXT: --> {1,+,1}<%loop.2> U: full-set S: full-set Exits: (2 + %N) LoopDispositions: { %loop.2: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_guards_across_loops +; CHECK-NEXT: Loop %loop.2: backedge-taken count is (1 + (zext i32 %N to i64)) +; CHECK-NEXT: Loop %loop.2: constant max backedge-taken count is i64 4294967296 +; CHECK-NEXT: Loop %loop.2: symbolic max backedge-taken count is (1 + (zext i32 %N to i64)) +; CHECK-NEXT: Loop %loop.2: Trip multiple is 1 +; CHECK-NEXT: Loop %loop.1: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop.1: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %loop.1: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %loop.1: Predicated backedge-taken count is (1 + (zext i32 %N to i64)) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,1}<%loop.1> Added Flags: +; CHECK-NEXT: Loop %loop.1: Predicated constant max backedge-taken count is i64 4294967296 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,1}<%loop.1> Added Flags: +; CHECK-NEXT: Loop %loop.1: Predicated symbolic max backedge-taken count is (1 + (zext i32 %N to i64)) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,1}<%loop.1> Added Flags: +; +entry: + br label %loop.1 + +loop.1: + %iv.1 = phi i32 [ 0, %entry ], [ %iv.1.next, %loop.1 ] + call void @clobber.i32(i32 %iv.1) + %ec.1 = icmp ugt i32 %iv.1, %N + %iv.1.next = add i32 %iv.1, 1 + br i1 %ec.1, label %loop.2, label %loop.1 + +loop.2: + %iv.2 = phi i32 [ 0, %loop.1 ], [ %iv.2.next, %loop.2 ] + call void @clobber.i32(i32 %iv.2) + %ec.2 = icmp ugt i32 %iv.2, %N + %iv.2.next = add i32 %iv.2, 1 + br i1 %ec.2, label %exit, label %loop.2 + +exit: + ret void +} diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-apply-to-adds.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-apply-to-adds.ll new file mode 100644 index 0000000000000..951b07272dd4b --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-apply-to-adds.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes='print' -scalar-evolution-classify-expressions=0 -disable-output %s 2>&1 | FileCheck %s + +define void @max_btc_improved_by_applying_guards_to_add_subexpr(i32 %low, i32 %high) { +; CHECK-LABEL: 'max_btc_improved_by_applying_guards_to_add_subexpr' +; CHECK-NEXT: Determining loop execution counts for: @max_btc_improved_by_applying_guards_to_add_subexpr +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (zext i32 (1 + (-1 * %low) + %high) to i64)) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 7 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + (zext i32 (1 + (-1 * %low) + %high) to i64)) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %sub = sub i32 %high, %low + %pre.1 = icmp slt i32 %sub, 8 + br i1 %pre.1, label %if.then, label %exit + +if.then: + %pre.2 = icmp slt i32 %sub, 0 + br i1 %pre.2, label %exit, label %ph + +ph: + %add.1 = add i32 %sub, 1 + %wide.trip.count = zext i32 %add.1 to i64 + br label %loop + +loop: + %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %wide.trip.count + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll index 8c77d704eac6a..4e5033b7a2f7f 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll @@ -12,9 +12,9 @@ define void @rewrite_zext(i32 %n) { ; CHECK-NEXT: %n.vec = and i64 %ext, -8 ; CHECK-NEXT: --> (8 * ((zext i32 %n to i64) /u 8)) U: [0,4294967289) S: [0,4294967289) ; CHECK-NEXT: %index = phi i64 [ 0, %check ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,8}<%loop> U: [0,17) S: [0,17) Exits: (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,8}<%loop> U: [0,17) S: [0,17) Exits: (-8 + (8 * ((zext i32 %n to i64) /u 8))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw nsw i64 %index, 8 -; CHECK-NEXT: --> {8,+,8}<%loop> U: [8,25) S: [8,25) Exits: (8 + (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {8,+,8}<%loop> U: [8,25) S: [8,25) Exits: (8 * ((zext i32 %n to i64) /u 8)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2 @@ -52,11 +52,11 @@ define i32 @rewrite_zext_min_max(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * ((16 umin (zext i32 %N to i64)) /u 4)) U: [0,17) S: [0,17) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * ((16 umin (zext i32 %N to i64)) /u 4)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_min_max ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -98,11 +98,11 @@ define i32 @rewrite_min_max_zext(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %umin, 28 ; CHECK-NEXT: --> (4 * ((16 umin (zext i32 %N to i64)) /u 4)) U: [0,17) S: [0,17) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * ((16 umin (zext i32 %N to i64)) /u 4)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_min_max_zext ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -144,11 +144,11 @@ define i32 @rewrite_sext_min_max(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nsw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_sext_min_max ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -190,11 +190,11 @@ define i32 @rewrite_min_max_sext(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %smin, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nsw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_min_max_sext ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -288,9 +288,9 @@ define i32 @rewrite_zext_no_icmp_ne(i32 %N) { ; CHECK-NEXT: %n.vec = and i64 %n.rnd.up, 8589934588 ; CHECK-NEXT: --> (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32)) to i64)) /u 4)) U: [4,4294967297) S: [4,4294967297) ; CHECK-NEXT: %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,4294967293) S: [0,4294967293) Exits: (4 * ((-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32)) to i64)) /u 4))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,4294967293) S: [0,4294967293) Exits: (-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32)) to i64)) /u 4))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,4294967297) S: [4,4294967297) Exits: (4 + (4 * ((-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32)) to i64)) /u 4))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,4294967297) S: [4,4294967297) Exits: (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32)) to i64)) /u 4)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_no_icmp_ne ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32)) to i64)) /u 4))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 1073741823 @@ -328,9 +328,9 @@ define void @rewrite_zext_and_base_1(i32 %n) { ; CHECK-NEXT: %n.vec = and i64 %ext, -8 ; CHECK-NEXT: --> (8 * ((zext i32 %n to i64) /u 8)) U: [0,4294967289) S: [0,4294967289) ; CHECK-NEXT: %index = phi i64 [ 0, %check ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,8}<%loop> U: [0,25) S: [0,25) Exits: (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,8}<%loop> U: [0,25) S: [0,25) Exits: (-8 + (8 * ((zext i32 %n to i64) /u 8))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw nsw i64 %index, 8 -; CHECK-NEXT: --> {8,+,8}<%loop> U: [8,33) S: [8,33) Exits: (8 + (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {8,+,8}<%loop> U: [8,33) S: [8,33) Exits: (8 * ((zext i32 %n to i64) /u 8)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_and_base_1 ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -371,9 +371,9 @@ define void @rewrite_zext_and_base_2(i32 %n) { ; CHECK-NEXT: %n.vec = and i64 %ext, -8 ; CHECK-NEXT: --> (8 * ((zext i32 %n to i64) /u 8)) U: [0,4294967289) S: [0,4294967289) ; CHECK-NEXT: %index = phi i64 [ 0, %check ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,8}<%loop> U: [0,25) S: [0,25) Exits: (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,8}<%loop> U: [0,25) S: [0,25) Exits: (-8 + (8 * ((zext i32 %n to i64) /u 8))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw nsw i64 %index, 8 -; CHECK-NEXT: --> {8,+,8}<%loop> U: [8,33) S: [8,33) Exits: (8 + (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {8,+,8}<%loop> U: [8,33) S: [8,33) Exits: (8 * ((zext i32 %n to i64) /u 8)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_and_base_2 ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -412,9 +412,9 @@ define void @guard_pessimizes_analysis_step2(i1 %c, i32 %N) { ; CHECK-NEXT: %init = phi i64 [ 2, %entry ], [ 4, %bb1 ] ; CHECK-NEXT: --> %init U: [2,5) S: [2,5) ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ %init, %loop.ph ] -; CHECK-NEXT: --> {%init,+,2}<%loop> U: [2,17) S: [2,17) Exits: ((2 * ((14 + (-1 * %init)) /u 2)) + %init) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%init,+,2}<%loop> U: [2,17) S: [2,17) Exits: 14 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 2 -; CHECK-NEXT: --> {(2 + %init),+,2}<%loop> U: [4,19) S: [4,19) Exits: (2 + (2 * ((14 + (-1 * %init)) /u 2)) + %init) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(2 + %init),+,2}<%loop> U: [4,19) S: [4,19) Exits: 16 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @guard_pessimizes_analysis_step2 ; CHECK-NEXT: Loop %loop: backedge-taken count is ((14 + (-1 * %init)) /u 2) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 6 @@ -456,11 +456,11 @@ define i32 @rewrite_sext_slt_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_sext_slt_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -500,11 +500,11 @@ define i32 @rewrite_zext_ult_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_ult_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -544,11 +544,11 @@ define i32 @rewrite_zext_ule_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_ule_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -588,11 +588,11 @@ define i32 @rewrite_zext_sle_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_sle_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -632,11 +632,11 @@ define i32 @rewrite_zext_uge_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * ((16 umin (zext i32 %N to i64)) /u 4)) U: [0,17) S: [0,17) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * ((16 umin (zext i32 %N to i64)) /u 4)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_uge_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -676,11 +676,11 @@ define i32 @rewrite_sext_sge_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_sext_sge_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -720,11 +720,11 @@ define i32 @rewrite_zext_ugt_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * ((16 umin (zext i32 %N to i64)) /u 4)) U: [0,17) S: [0,17) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * ((16 umin (zext i32 %N to i64)) /u 4)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_ugt_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -764,11 +764,11 @@ define i32 @rewrite_sext_sgt_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_sext_sgt_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -808,9 +808,9 @@ define void @rewrite_add_rec() { ; CHECK-NEXT: %n.vec = and i64 %sub, -2 ; CHECK-NEXT: --> (2 * ({9,+,-1}<%outer.header> /u 2)) U: [0,9) S: [0,9) Exits: 0 LoopDispositions: { %outer.header: Computable, %inner: Invariant } ; CHECK-NEXT: %inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ] -; CHECK-NEXT: --> {0,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 * ((-2 + (2 * ({9,+,-1}<%outer.header> /u 2))) /u 2)) LoopDispositions: { %inner: Computable, %outer.header: Variant } +; CHECK-NEXT: --> {0,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (-2 + (2 * ({9,+,-1}<%outer.header> /u 2))) LoopDispositions: { %inner: Computable, %outer.header: Variant } ; CHECK-NEXT: %inner.iv.next = add i64 %inner.iv, 2 -; CHECK-NEXT: --> {2,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 + (2 * ((-2 + (2 * ({9,+,-1}<%outer.header> /u 2))) /u 2))) LoopDispositions: { %inner: Computable, %outer.header: Variant } +; CHECK-NEXT: --> {2,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 * ({9,+,-1}<%outer.header> /u 2)) LoopDispositions: { %inner: Computable, %outer.header: Variant } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%outer.header> U: [1,11) S: [1,11) Exits: 10 LoopDispositions: { %outer.header: Computable, %inner: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_add_rec diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll index 9bf2427eddb9c..d5a2181e9bc5e 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -1231,7 +1231,7 @@ define void @optimized_range_check_unsigned3(ptr %pred, i1 %c) { ; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,3) S: [0,3) Exits: (-1 + %N) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i16, ptr %pred, i32 %iv -; CHECK-NEXT: --> {%pred,+,2}<%loop> U: full-set S: full-set Exits: ((2 * (zext i32 (-1 + %N) to i64)) + %pred) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%pred,+,2}<%loop> U: full-set S: full-set Exits: ((zext i32 (-2 + (2 * %N)) to i64) + %pred) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i32 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4) S: [1,4) Exits: %N LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @optimized_range_check_unsigned3 @@ -1390,9 +1390,9 @@ define void @ptr_induction_eq_2(ptr %a, i64 %n) { ; CHECK-NEXT: %b = getelementptr inbounds ptr, ptr %a, i64 %n ; CHECK-NEXT: --> ((8 * %n) + %a) U: full-set S: full-set ; CHECK-NEXT: %ptr.iv = phi ptr [ %ptr.iv.next, %loop ], [ %a, %entry ] -; CHECK-NEXT: --> {%a,+,8}<%loop> U: full-set S: full-set Exits: ((8 * ((-8 + (8 * %n)) /u 8)) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%a,+,8}<%loop> U: full-set S: full-set Exits: (-8 + (8 * %n) + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 8 -; CHECK-NEXT: --> {(8 + %a),+,8}<%loop> U: full-set S: full-set Exits: (8 + (8 * ((-8 + (8 * %n)) /u 8)) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(8 + %a),+,8}<%loop> U: full-set S: full-set Exits: ((8 * %n) + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @ptr_induction_eq_2 ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8 + (8 * %n)) /u 8) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693951 @@ -1431,7 +1431,7 @@ define void @ptr_induction_early_exit_eq_1_with_align_on_load(ptr %a, ptr %b, pt ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693951 +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693950 ; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8) ; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: symbolic max exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8) @@ -1470,7 +1470,7 @@ define void @ptr_induction_early_exit_eq_1_with_align_on_arguments(ptr align 8 % ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) /u 8) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693951 +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693950 ; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-8 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) /u 8) ; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: symbolic max exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) /u 8) @@ -1511,7 +1511,7 @@ define void @ptr_induction_early_exit_eq_1_align_assumption_1(ptr %a, ptr %b, pt ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693951 +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693950 ; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8) ; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: symbolic max exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8) @@ -1556,7 +1556,7 @@ define void @ptr_induction_early_exit_eq_1_align_assumption_2(ptr %a, ptr %b, pt ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693951 +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693950 ; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8) ; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: symbolic max exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8) diff --git a/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll new file mode 100644 index 0000000000000..1e21fbf08a92f --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll @@ -0,0 +1,190 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s + +declare void @use(ptr) + +define void @udiv4_and_udiv2(i1 %c, ptr %A) { +; CHECK-LABEL: 'udiv4_and_udiv2' +; CHECK-NEXT: Classifying expressions for: @udiv4_and_udiv2 +; CHECK-NEXT: %start = select i1 %c, i32 512, i32 0 +; CHECK-NEXT: --> %start U: [0,513) S: [0,513) +; CHECK-NEXT: %div.2 = lshr i32 %start, 1 +; CHECK-NEXT: --> (%start /u 2) U: [0,257) S: [0,257) +; CHECK-NEXT: %div.4 = lshr i32 %start, 2 +; CHECK-NEXT: --> (%start /u 4) U: [0,129) S: [0,129) +; CHECK-NEXT: %iv.start = zext i32 %div.4 to i64 +; CHECK-NEXT: --> ((zext i32 %start to i64) /u 4) U: [0,129) S: [0,129) +; CHECK-NEXT: %wide.trip.count = zext i32 %div.2 to i64 +; CHECK-NEXT: --> ((zext i32 %start to i64) /u 2) U: [0,257) S: [0,257) +; CHECK-NEXT: %iv = phi i64 [ %iv.start, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {((zext i32 %start to i64) /u 4),+,1}<%loop> U: full-set S: full-set Exits: ((zext i32 %start to i64) /u 2) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %gep.8 = getelementptr i8, ptr %A, i64 %iv +; CHECK-NEXT: --> {(((zext i32 %start to i64) /u 4) + %A),+,1}<%loop> U: full-set S: full-set Exits: (((zext i32 %start to i64) /u 2) + %A) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %gep.16 = getelementptr i16, ptr %A, i64 %iv +; CHECK-NEXT: --> {(((zext i32 %start to i64) /u 2) + %A),+,2}<%loop> U: full-set S: full-set Exits: ((zext i32 %start to i64) + %A) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %gep.32 = getelementptr i32, ptr %A, i64 %iv +; CHECK-NEXT: --> {((zext i32 %start to i64) + %A),+,4}<%loop> U: full-set S: full-set Exits: ((2 * (zext i32 %start to i64)) + %A) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %gep.40 = getelementptr <{ i32, i8 }>, ptr %A, i64 %iv +; CHECK-NEXT: --> {((5 * ((zext i32 %start to i64) /u 4)) + %A),+,5}<%loop> U: full-set S: full-set Exits: ((5 * ((zext i32 %start to i64) /u 2)) + %A) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %gep.48 = getelementptr <{ i32, i16 }>, ptr %A, i64 %iv +; CHECK-NEXT: --> {((6 * ((zext i32 %start to i64) /u 4)) + %A),+,6}<%loop> U: full-set S: full-set Exits: ((6 * ((zext i32 %start to i64) /u 2)) + %A) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add i64 %iv, 1 +; CHECK-NEXT: --> {(1 + ((zext i32 %start to i64) /u 4)),+,1}<%loop> U: full-set S: full-set Exits: (1 + ((zext i32 %start to i64) /u 2)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @udiv4_and_udiv2 +; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * ((zext i32 %start to i64) /u 4)) + ((zext i32 %start to i64) /u 2)) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 -1 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-1 * ((zext i32 %start to i64) /u 4)) + ((zext i32 %start to i64) /u 2)) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %start = select i1 %c, i32 512, i32 0 + %div.2 = lshr i32 %start, 1 + %div.4 = lshr i32 %start, 2 + %iv.start = zext i32 %div.4 to i64 + %wide.trip.count = zext i32 %div.2 to i64 + br label %loop + +loop: + %iv = phi i64 [ %iv.start, %entry ], [ %iv.next, %loop ] + %gep.8 = getelementptr i8, ptr %A, i64 %iv + call void @use(ptr %gep.8) + %gep.16 = getelementptr i16, ptr %A, i64 %iv + call void @use(ptr %gep.16) + %gep.32 = getelementptr i32, ptr %A, i64 %iv + call void @use(ptr %gep.32) + %gep.40 = getelementptr <{i32, i8}>, ptr %A, i64 %iv + call void @use(ptr %gep.40) + %gep.48 = getelementptr <{i32, i16}>, ptr %A, i64 %iv + call void @use(ptr %gep.48) + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, %wide.trip.count + br i1 %ec, label %exit, label %loop + +exit: + ret void +} +define void @udiv3_and_udiv5_mul_4(i1 %c, ptr %A) { +; CHECK-LABEL: 'udiv3_and_udiv5_mul_4' +; CHECK-NEXT: Classifying expressions for: @udiv3_and_udiv5_mul_4 +; CHECK-NEXT: %start = select i1 %c, i32 512, i32 0 +; CHECK-NEXT: --> %start U: [0,513) S: [0,513) +; CHECK-NEXT: %div.3 = udiv i32 %start, 3 +; CHECK-NEXT: --> (%start /u 3) U: [0,171) S: [0,171) +; CHECK-NEXT: %div.5 = udiv i32 %start, 5 +; CHECK-NEXT: --> (%start /u 5) U: [0,103) S: [0,103) +; CHECK-NEXT: %iv.start = zext i32 %div.5 to i64 +; CHECK-NEXT: --> ((zext i32 %start to i64) /u 5) U: [0,103) S: [0,103) +; CHECK-NEXT: %wide.trip.count = zext i32 %div.3 to i64 +; CHECK-NEXT: --> ((zext i32 %start to i64) /u 3) U: [0,171) S: [0,171) +; CHECK-NEXT: %iv = phi i64 [ %iv.start, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {((zext i32 %start to i64) /u 5),+,1}<%loop> U: full-set S: full-set Exits: ((zext i32 %start to i64) /u 3) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %gep.8 = getelementptr i8, ptr %A, i64 %iv +; CHECK-NEXT: --> {(((zext i32 %start to i64) /u 5) + %A),+,1}<%loop> U: full-set S: full-set Exits: (((zext i32 %start to i64) /u 3) + %A) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %gep.16 = getelementptr i16, ptr %A, i64 %iv +; CHECK-NEXT: --> {((2 * ((zext i32 %start to i64) /u 5)) + %A),+,2}<%loop> U: full-set S: full-set Exits: ((2 * ((zext i32 %start to i64) /u 3)) + %A) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %gep.32 = getelementptr i32, ptr %A, i64 %iv +; CHECK-NEXT: --> {((4 * ((zext i32 %start to i64) /u 5)) + %A),+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((zext i32 %start to i64) /u 3)) + %A) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %gep.40 = getelementptr <{ i32, i8 }>, ptr %A, i64 %iv +; CHECK-NEXT: --> {((5 * ((zext i32 %start to i64) /u 5)) + %A),+,5}<%loop> U: full-set S: full-set Exits: ((5 * ((zext i32 %start to i64) /u 3)) + %A) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %gep.48 = getelementptr <{ i32, i16 }>, ptr %A, i64 %iv +; CHECK-NEXT: --> {((6 * ((zext i32 %start to i64) /u 5)) + %A),+,6}<%loop> U: full-set S: full-set Exits: ((6 * ((zext i32 %start to i64) /u 3)) + %A) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add i64 %iv, 1 +; CHECK-NEXT: --> {(1 + ((zext i32 %start to i64) /u 5)),+,1}<%loop> U: full-set S: full-set Exits: (1 + ((zext i32 %start to i64) /u 3)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @udiv3_and_udiv5_mul_4 +; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * ((zext i32 %start to i64) /u 5)) + ((zext i32 %start to i64) /u 3)) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 -1 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-1 * ((zext i32 %start to i64) /u 5)) + ((zext i32 %start to i64) /u 3)) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %start = select i1 %c, i32 512, i32 0 + %div.3 = udiv i32 %start, 3 + %div.5 = udiv i32 %start, 5 + %iv.start = zext i32 %div.5 to i64 + %wide.trip.count = zext i32 %div.3 to i64 + br label %loop + +loop: + %iv = phi i64 [ %iv.start, %entry ], [ %iv.next, %loop ] + %gep.8 = getelementptr i8, ptr %A, i64 %iv + call void @use(ptr %gep.8) + %gep.16 = getelementptr i16, ptr %A, i64 %iv + call void @use(ptr %gep.16) + %gep.32 = getelementptr i32, ptr %A, i64 %iv + call void @use(ptr %gep.32) + %gep.40 = getelementptr <{i32, i8}>, ptr %A, i64 %iv + call void @use(ptr %gep.40) + %gep.48 = getelementptr <{i32, i16}>, ptr %A, i64 %iv + call void @use(ptr %gep.48) + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, %wide.trip.count + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +declare void @use.i64(i64) + +define void @dividend_not_known_multiple_of_divisor(i64 %x) { +; CHECK-LABEL: 'dividend_not_known_multiple_of_divisor' +; CHECK-NEXT: Classifying expressions for: @dividend_not_known_multiple_of_divisor +; CHECK-NEXT: %mul.2 = shl i64 %x, 1 +; CHECK-NEXT: --> (2 * %x) U: [0,-1) S: [-9223372036854775808,9223372036854775807) +; CHECK-NEXT: %div.16 = lshr exact i64 %mul.2, 4 +; CHECK-NEXT: --> ((2 * %x) /u 16) U: [0,1152921504606846976) S: [0,1152921504606846976) +; CHECK-NEXT: %m2 = and i64 %div.16, 1152921504606846974 +; CHECK-NEXT: --> (2 * ((2 * %x) /u 32)) U: [0,1152921504606846975) S: [0,1152921504606846975) +; CHECK-NEXT: %m3 = mul i64 %div.16, 2 +; CHECK-NEXT: --> (2 * ((2 * %x) /u 16)) U: [0,2305843009213693951) S: [0,2305843009213693951) +; CHECK-NEXT: %m4 = udiv i64 %m3, 4 +; CHECK-NEXT: --> ((2 * ((2 * %x) /u 16)) /u 4) U: [0,576460752303423488) S: [0,576460752303423488) +; CHECK-NEXT: Determining loop execution counts for: @dividend_not_known_multiple_of_divisor +; +entry: + %mul.2 = shl i64 %x, 1 + %div.16 = lshr exact i64 %mul.2, 4 + %m2 = and i64 %div.16, 1152921504606846974 + call void @use.i64(i64 %m2) + + %m3 = mul i64 %div.16, 2 + %m4 = udiv i64 %m3, 4 + call void @use.i64(i64 %m4) + ret void +} + +define void @btc_depends_on_div_mul(i64 %x) { +; CHECK-LABEL: 'btc_depends_on_div_mul' +; CHECK-NEXT: Classifying expressions for: @btc_depends_on_div_mul +; CHECK-NEXT: %mul.2 = shl i64 %x, 1 +; CHECK-NEXT: --> (2 * %x) U: [0,-1) S: [-9223372036854775808,9223372036854775807) +; CHECK-NEXT: %div.16 = lshr exact i64 %mul.2, 4 +; CHECK-NEXT: --> ((2 * %x) /u 16) U: [0,1152921504606846976) S: [0,1152921504606846976) +; CHECK-NEXT: %masked = and i64 %div.16, 1152921504606846974 +; CHECK-NEXT: --> (2 * ((2 * %x) /u 32)) U: [0,1152921504606846975) S: [0,1152921504606846975) +; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {0,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (-2 + (2 * ((2 * %x) /u 32))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add i64 %iv, 2 +; CHECK-NEXT: --> {2,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 * ((2 * %x) /u 32)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @btc_depends_on_div_mul +; CHECK-NEXT: Loop %loop: backedge-taken count is ((-2 + (2 * ((2 * %x) /u 32))) /u 2) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 9223372036854775807 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-2 + (2 * ((2 * %x) /u 32))) /u 2) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %mul.2 = shl i64 %x, 1 + %div.16 = lshr exact i64 %mul.2, 4 + %masked = and i64 %div.16, 1152921504606846974 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + call void @use.i64(i64 %iv) + %iv.next = add i64 %iv, 2 + %ec = icmp eq i64 %iv.next, %masked + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Analysis/ScalarEvolution/ne-guard-multiple-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/ne-guard-multiple-trip-count.ll new file mode 100644 index 0000000000000..220c5a1deb1a0 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/ne-guard-multiple-trip-count.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s + +declare void @foo() + +; Tests with multiple guards for the same value and different values. + +define void @test_guard_order_b_then_c_and_d(ptr %a, ptr %b, ptr %c, ptr %d) { +; CHECK-LABEL: 'test_guard_order_b_then_c_and_d' +; CHECK-NEXT: Classifying expressions for: @test_guard_order_b_then_c_and_d +; CHECK-NEXT: %iv = phi ptr [ %a, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {%a,+,1}<%loop> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 1 +; CHECK-NEXT: --> {(1 + %a),+,1}<%loop> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_guard_order_b_then_c_and_d +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 -2 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %cmp.eq.b = icmp ne ptr %a, %b + %cmp.eq.c = icmp ne ptr %a, %c + %cmp.eq.d = icmp ne ptr %b, %d + call void @llvm.assume(i1 %cmp.eq.b) + call void @llvm.assume(i1 %cmp.eq.c) + call void @llvm.assume(i1 %cmp.eq.d) + br label %loop + +loop: + %iv = phi ptr [ %a, %entry ], [ %iv.next, %loop ] + %iv.next = getelementptr i8, ptr %iv, i64 1 + call void @foo() + %ec = icmp eq ptr %iv.next, %b + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_guard_order_d_then_c_and_b(ptr %a, ptr %b, ptr %c, ptr %d) { +; CHECK-LABEL: 'test_guard_order_d_then_c_and_b' +; CHECK-NEXT: Classifying expressions for: @test_guard_order_d_then_c_and_b +; CHECK-NEXT: %iv = phi ptr [ %a, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {%a,+,1}<%loop> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 1 +; CHECK-NEXT: --> {(1 + %a),+,1}<%loop> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_guard_order_d_then_c_and_b +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 -2 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %cmp.eq.b = icmp ne ptr %a, %b + %cmp.eq.c = icmp ne ptr %a, %c + %cmp.eq.d = icmp ne ptr %b, %d + call void @llvm.assume(i1 %cmp.eq.d) + call void @llvm.assume(i1 %cmp.eq.c) + call void @llvm.assume(i1 %cmp.eq.b) + br label %loop + +loop: + %iv = phi ptr [ %a, %entry ], [ %iv.next, %loop ] + %iv.next = getelementptr i8, ptr %iv, i64 1 + call void @foo() + %ec = icmp eq ptr %iv.next, %b + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Analysis/ScalarEvolution/pr58402-large-number-of-zext-exprs.ll b/llvm/test/Analysis/ScalarEvolution/pr58402-large-number-of-zext-exprs.ll index c79befac2fb1d..1c108bd7318e9 100644 --- a/llvm/test/Analysis/ScalarEvolution/pr58402-large-number-of-zext-exprs.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr58402-large-number-of-zext-exprs.ll @@ -17,67 +17,67 @@ define i32 @pr58402_large_number_of_zext(ptr %dst) { ; CHECK-NEXT: %add7 = add i32 %i, 4 ; CHECK-NEXT: --> (4 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [4,5) S: [4,5) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i1 = and i32 %add7, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2)) U: [4,5) S: [4,5) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (4 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [4,5) S: [4,5) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.1 = add i32 %i1, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) U: [8,9) S: [8,9) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (8 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [8,9) S: [8,9) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i2 = and i32 %add7.1, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2)) U: [8,9) S: [8,9) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (8 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [8,9) S: [8,9) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.2 = add i32 %i2, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) U: [12,13) S: [12,13) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (12 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [12,13) S: [12,13) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i3 = and i32 %add7.2, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2)) U: [12,13) S: [12,13) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (12 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [12,13) S: [12,13) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.3 = add i32 %i3, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) U: [16,17) S: [16,17) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (16 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [16,17) S: [16,17) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i4 = and i32 %add7.3, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [16,17) S: [16,17) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (16 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [16,17) S: [16,17) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.4 = add i32 %i4, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [20,21) S: [20,21) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (20 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [20,21) S: [20,21) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i5 = and i32 %add7.4, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [20,21) S: [20,21) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (20 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [20,21) S: [20,21) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.5 = add i32 %i5, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [24,25) S: [24,25) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (24 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [24,25) S: [24,25) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i6 = and i32 %add7.5, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [24,25) S: [24,25) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (24 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [24,25) S: [24,25) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.6 = add i32 %i6, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [28,29) S: [28,29) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (28 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [28,29) S: [28,29) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i7 = and i32 %add7.6, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [28,29) S: [28,29) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (28 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [28,29) S: [28,29) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.7 = add i32 %i7, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [32,33) S: [32,33) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (32 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [32,33) S: [32,33) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i8 = and i32 %add7.7, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [32,33) S: [32,33) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (32 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [32,33) S: [32,33) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.8 = add i32 %i8, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [36,37) S: [36,37) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (36 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [36,37) S: [36,37) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i9 = and i32 %add7.8, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [36,37) S: [36,37) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (36 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [36,37) S: [36,37) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.9 = add i32 %i9, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [40,41) S: [40,41) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (40 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [40,41) S: [40,41) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i10 = and i32 %add7.9, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [40,41) S: [40,41) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (40 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [40,41) S: [40,41) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.10 = add i32 %i10, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [44,45) S: [44,45) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (44 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [44,45) S: [44,45) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i11 = and i32 %add7.10, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [44,45) S: [44,45) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (44 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [44,45) S: [44,45) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.11 = add i32 %i11, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [48,49) S: [48,49) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (48 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [48,49) S: [48,49) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i12 = and i32 %add7.11, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [48,49) S: [48,49) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (48 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [48,49) S: [48,49) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.12 = add i32 %i12, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [52,53) S: [52,53) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (52 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [52,53) S: [52,53) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i13 = and i32 %add7.12, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [52,53) S: [52,53) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (52 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [52,53) S: [52,53) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.13 = add i32 %i13, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [56,57) S: [56,57) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (56 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [56,57) S: [56,57) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i14 = and i32 %add7.13, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [56,57) S: [56,57) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (56 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [56,57) S: [56,57) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.14 = add i32 %i14, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [60,61) S: [60,61) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (60 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [60,61) S: [60,61) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i15 = and i32 %add7.14, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [60,61) S: [60,61) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (60 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [60,61) S: [60,61) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.15 = add i32 %i15, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [64,65) S: [64,65) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (64 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [64,65) S: [64,65) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i16 = and i32 %add7.15, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [64,65) S: [64,65) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (64 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [64,65) S: [64,65) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: Determining loop execution counts for: @pr58402_large_number_of_zext ; CHECK-NEXT: Loop %header: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %header: Unpredictable constant max backedge-taken count. diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll index e784d25385980..0c1f37bf58601 100644 --- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll +++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll @@ -382,7 +382,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) { ; X64-NEXT: %i9 = ptrtoint ptr %i7 to i64 ; X64-NEXT: --> {(ptrtoint ptr %arg to i64),+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i10 = sub i64 %i9, %i4 -; X64-NEXT: --> {0,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {0,+,1}<%bb6> U: [0,-1) S: [0,-1) Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10 ; X64-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg2) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i12 = load i8, ptr %i11, align 1 @@ -393,7 +393,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) { ; X64-NEXT: --> {(1 + %arg),+,1}<%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: Determining loop execution counts for: @pr46786_c26_char ; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) -; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -1 +; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -2 ; X64-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) ; X64-NEXT: Loop %bb6: Trip multiple is 1 ; @@ -406,9 +406,9 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) { ; X32-NEXT: %i8 = load i8, ptr %i7, align 1 ; X32-NEXT: --> %i8 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i9 = ptrtoint ptr %i7 to i64 -; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<%bb6> U: [0,8589934591) S: [0,8589934591) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<%bb6> U: [0,8589934590) S: [0,8589934590) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i10 = sub i64 %i9, %i4 -; X32-NEXT: --> {0,+,1}<%bb6> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {0,+,1}<%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10 ; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i12 = load i8, ptr %i11, align 1 @@ -419,7 +419,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) { ; X32-NEXT: --> {(1 + %arg),+,1}<%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: Determining loop execution counts for: @pr46786_c26_char ; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) -; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -1 +; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -2 ; X32-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) ; X32-NEXT: Loop %bb6: Trip multiple is 1 ; @@ -447,6 +447,84 @@ bb5: ret void } +define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) { +; X64-LABEL: 'pr46786_c26_char_cmp_ops_swapped' +; X64-NEXT: Classifying expressions for: @pr46786_c26_char_cmp_ops_swapped +; X64-NEXT: %i4 = ptrtoint ptr %arg to i64 +; X64-NEXT: --> (ptrtoint ptr %arg to i64) U: full-set S: full-set +; X64-NEXT: %i7 = phi ptr [ %arg, %bb3 ], [ %i14, %bb6 ] +; X64-NEXT: --> {%arg,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable } +; X64-NEXT: %i8 = load i8, ptr %i7, align 1 +; X64-NEXT: --> %i8 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } +; X64-NEXT: %i9 = ptrtoint ptr %i7 to i64 +; X64-NEXT: --> {(ptrtoint ptr %arg to i64),+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } +; X64-NEXT: %i10 = sub i64 %i9, %i4 +; X64-NEXT: --> {0,+,1}<%bb6> U: [0,-1) S: [0,-1) Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } +; X64-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10 +; X64-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg2) LoopDispositions: { %bb6: Computable } +; X64-NEXT: %i12 = load i8, ptr %i11, align 1 +; X64-NEXT: --> %i12 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } +; X64-NEXT: %i13 = add i8 %i12, %i8 +; X64-NEXT: --> (%i12 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } +; X64-NEXT: %i14 = getelementptr inbounds i8, ptr %i7, i64 1 +; X64-NEXT: --> {(1 + %arg),+,1}<%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable } +; X64-NEXT: Determining loop execution counts for: @pr46786_c26_char_cmp_ops_swapped +; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) +; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -2 +; X64-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) +; X64-NEXT: Loop %bb6: Trip multiple is 1 +; +; X32-LABEL: 'pr46786_c26_char_cmp_ops_swapped' +; X32-NEXT: Classifying expressions for: @pr46786_c26_char_cmp_ops_swapped +; X32-NEXT: %i4 = ptrtoint ptr %arg to i64 +; X32-NEXT: --> (zext i32 (ptrtoint ptr %arg to i32) to i64) U: [0,4294967296) S: [0,4294967296) +; X32-NEXT: %i7 = phi ptr [ %arg, %bb3 ], [ %i14, %bb6 ] +; X32-NEXT: --> {%arg,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable } +; X32-NEXT: %i8 = load i8, ptr %i7, align 1 +; X32-NEXT: --> %i8 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } +; X32-NEXT: %i9 = ptrtoint ptr %i7 to i64 +; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<%bb6> U: [0,8589934590) S: [0,8589934590) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable } +; X32-NEXT: %i10 = sub i64 %i9, %i4 +; X32-NEXT: --> {0,+,1}<%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable } +; X32-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10 +; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable } +; X32-NEXT: %i12 = load i8, ptr %i11, align 1 +; X32-NEXT: --> %i12 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } +; X32-NEXT: %i13 = add i8 %i12, %i8 +; X32-NEXT: --> (%i12 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } +; X32-NEXT: %i14 = getelementptr inbounds i8, ptr %i7, i64 1 +; X32-NEXT: --> {(1 + %arg),+,1}<%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable } +; X32-NEXT: Determining loop execution counts for: @pr46786_c26_char_cmp_ops_swapped +; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) +; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -2 +; X32-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) +; X32-NEXT: Loop %bb6: Trip multiple is 1 +; + %i = icmp eq ptr %arg1, %arg + br i1 %i, label %bb5, label %bb3 + +bb3: + %i4 = ptrtoint ptr %arg to i64 + br label %bb6 + +bb6: + %i7 = phi ptr [ %arg, %bb3 ], [ %i14, %bb6 ] + %i8 = load i8, ptr %i7 + %i9 = ptrtoint ptr %i7 to i64 + %i10 = sub i64 %i9, %i4 + %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10 + %i12 = load i8, ptr %i11 + %i13 = add i8 %i12, %i8 + store i8 %i13, ptr %i11 + %i14 = getelementptr inbounds i8, ptr %i7, i64 1 + %i15 = icmp eq ptr %i14, %arg1 + br i1 %i15, label %bb5, label %bb6 + +bb5: + ret void +} + + ; void pr46786_c26_int(int* start, int *end, int *other) { ; for (int* cur = start; cur != end; ++cur) ; other[cur - start] += *cur; diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll index 8d091a00ed4b9..d38010403dad7 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll @@ -61,7 +61,7 @@ define void @umin(i32 noundef %a, i32 noundef %b) { ; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umin (4 * %b))) ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646 ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umin (4 * %b))) -; CHECK-NEXT: Loop %for.body: Trip multiple is 1 +; CHECK-NEXT: Loop %for.body: Trip multiple is 2 ; ; void umin(unsigned a, unsigned b) { ; a *= 2; @@ -157,7 +157,7 @@ define void @smin(i32 noundef %a, i32 noundef %b) { ; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) smin (4 * %b))) ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646 ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) smin (4 * %b))) -; CHECK-NEXT: Loop %for.body: Trip multiple is 1 +; CHECK-NEXT: Loop %for.body: Trip multiple is 2 ; ; void smin(signed a, signed b) { ; a *= 2; diff --git a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll index bf140c7fa216a..f35c48b3e6fc5 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll @@ -574,5 +574,189 @@ exit: ret void } +define void @test_ptr_aligned_by_2_and_4_via_assumption(ptr %start, ptr %end) { +; CHECK-LABEL: 'test_ptr_aligned_by_2_and_4_via_assumption' +; CHECK-NEXT: Classifying expressions for: @test_ptr_aligned_by_2_and_4_via_assumption +; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 +; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_ptr_aligned_by_2_and_4_via_assumption +; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 +; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 +; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 2) ] + call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 4) ] + br label %loop + +loop: + %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] + store ptr %iv, ptr %iv + %iv.next = getelementptr i8, ptr %iv, i64 4 + %ec = icmp ne ptr %iv.next, %end + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + +define void @test_ptrs_aligned_by_4_via_assumption(ptr %start, ptr %end) { +; CHECK-LABEL: 'test_ptrs_aligned_by_4_via_assumption' +; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_4_via_assumption +; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 +; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_4_via_assumption +; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 4) ] + call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 4) ] + br label %loop + +loop: + %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] + store ptr %iv, ptr %iv + %iv.next = getelementptr i8, ptr %iv, i64 4 + %ec = icmp ne ptr %iv.next, %end + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + +define void @test_ptrs_aligned_by_8_via_assumption(ptr %start, ptr %end) { +; CHECK-LABEL: 'test_ptrs_aligned_by_8_via_assumption' +; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_8_via_assumption +; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 +; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_8_via_assumption +; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ] + call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ] + br label %loop + +loop: + %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] + store ptr %iv, ptr %iv + %iv.next = getelementptr i8, ptr %iv, i64 4 + %ec = icmp ne ptr %iv.next, %end + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + +declare i1 @cond() + +define void @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors(ptr %start, ptr %end) { +; CHECK-LABEL: 'test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors' +; CHECK-NEXT: Classifying expressions for: @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors +; CHECK-NEXT: %c = call i1 @cond() +; CHECK-NEXT: --> %c U: full-set S: full-set +; CHECK-NEXT: %iv = phi ptr [ %start, %then ], [ %start, %else ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 +; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors +; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 +; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 +; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 2) ] + call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 4) ] + %c = call i1 @cond() + br i1 %c, label %then, label %else + +then: + br label %loop + +else: + br label %loop + +loop: + %iv = phi ptr [ %start, %then] , [ %start, %else ], [ %iv.next, %loop ] + store ptr %iv, ptr %iv + %iv.next = getelementptr i8, ptr %iv, i64 4 + %ec = icmp ne ptr %iv.next, %end + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + +define void @test_urem_non_constant(ptr %dst, i32 %a, i32 %b) { +; CHECK-LABEL: 'test_urem_non_constant' +; CHECK-NEXT: Classifying expressions for: @test_urem_non_constant +; CHECK-NEXT: %rem = urem i32 %a, %b +; CHECK-NEXT: --> ((-1 * (%a /u %b) * %b) + %a) U: full-set S: full-set +; CHECK-NEXT: %and.0 = and i1 %pre.0, %pre.1 +; CHECK-NEXT: --> (%pre.1 umin %pre.0) U: full-set S: full-set +; CHECK-NEXT: %and.1 = and i1 %and.0, %pre.2 +; CHECK-NEXT: --> (%pre.1 umin %pre.2 umin %pre.0) U: full-set S: full-set +; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {0,+,%b}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %gep.dst = getelementptr inbounds i8, ptr %dst, i32 %b +; CHECK-NEXT: --> ((sext i32 %b to i64) + %dst) U: full-set S: full-set Exits: ((sext i32 %b to i64) + %dst) LoopDispositions: { %loop: Invariant } +; CHECK-NEXT: %iv.next = add i32 %iv, %b +; CHECK-NEXT: --> {%b,+,%b}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_urem_non_constant +; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. +; +entry: + %rem = urem i32 %a, %b + %pre.0 = icmp eq i32 %rem, 0 + %pre.1 = icmp ne i32 %a, 0 + %pre.2 = icmp ne i32 %b, 0 + %and.0 = and i1 %pre.0, %pre.1 + %and.1 = and i1 %and.0, %pre.2 + br i1 %and.1, label %loop, label %exit + +loop: + %iv = phi i32 [ 0, %entry], [ %iv.next, %loop ] + %gep.dst = getelementptr inbounds i8, ptr %dst, i32 %b + store i8 0, ptr %gep.dst + %iv.next = add i32 %iv, %b + %ec = icmp ne i32 %iv.next, %a + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + declare void @llvm.assume(i1) declare void @llvm.experimental.guard(i1, ...) diff --git a/llvm/test/Analysis/ScalarEvolution/zext-add.ll b/llvm/test/Analysis/ScalarEvolution/zext-add.ll new file mode 100644 index 0000000000000..a08feef7098ea --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/zext-add.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s + +declare i1 @cond() + +define void @test_push_constant_into_zext(ptr %dst, ptr %src, i32 %n, i64 %offset) { +; CHECK-LABEL: 'test_push_constant_into_zext' +; CHECK-NEXT: Classifying expressions for: @test_push_constant_into_zext +; CHECK-NEXT: %outer.ptr = phi ptr [ %src, %entry ], [ %ptr.iv.next, %inner.loop ] +; CHECK-NEXT: --> %outer.ptr U: full-set S: full-set Exits: <> LoopDispositions: { %outer.loop: Variant, %inner.loop: Invariant } +; CHECK-NEXT: %c = call i1 @cond() +; CHECK-NEXT: --> %c U: full-set S: full-set Exits: <> LoopDispositions: { %outer.loop: Variant, %inner.loop: Invariant } +; CHECK-NEXT: %iv = phi i32 [ 0, %outer.loop ], [ %iv.next, %inner.loop ] +; CHECK-NEXT: --> {0,+,1}<%inner.loop> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + (1 smax %n)) LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant } +; CHECK-NEXT: %ptr.iv = phi ptr [ %src, %outer.loop ], [ %ptr.iv.next, %inner.loop ] +; CHECK-NEXT: --> {%src,+,%offset}<%inner.loop> U: full-set S: full-set Exits: (((zext i32 (-1 + (1 smax %n)) to i64) * %offset) + %src) LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant } +; CHECK-NEXT: %l = load i8, ptr %outer.ptr, align 1 +; CHECK-NEXT: --> %l U: full-set S: full-set Exits: <> LoopDispositions: { %inner.loop: Variant, %outer.loop: Variant } +; CHECK-NEXT: %ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 %offset +; CHECK-NEXT: --> {(%offset + %src),+,%offset}<%inner.loop> U: full-set S: full-set Exits: (((zext i32 (1 smax %n) to i64) * %offset) + %src) LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant } +; CHECK-NEXT: %iv.next = add i32 %iv, 1 +; CHECK-NEXT: --> {1,+,1}<%inner.loop> U: [1,-2147483648) S: [1,-2147483648) Exits: (1 smax %n) LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant } +; CHECK-NEXT: Determining loop execution counts for: @test_push_constant_into_zext +; CHECK-NEXT: Loop %inner.loop: backedge-taken count is (-1 + (1 smax %n)) +; CHECK-NEXT: Loop %inner.loop: constant max backedge-taken count is i32 2147483646 +; CHECK-NEXT: Loop %inner.loop: symbolic max backedge-taken count is (-1 + (1 smax %n)) +; CHECK-NEXT: Loop %inner.loop: Trip multiple is 1 +; CHECK-NEXT: Loop %outer.loop: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %outer.loop: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %outer.loop: Unpredictable symbolic max backedge-taken count. +; +entry: + br label %outer.loop + +outer.loop: + %outer.ptr = phi ptr [ %src, %entry ], [ %ptr.iv.next, %inner.loop ] + %c = call i1 @cond() + br i1 %c, label %inner.loop, label %exit + +inner.loop: + %iv = phi i32 [ 0, %outer.loop ], [ %iv.next, %inner.loop ] + %ptr.iv = phi ptr [ %src, %outer.loop ], [ %ptr.iv.next, %inner.loop ] + %l = load i8, ptr %outer.ptr, align 1 + %ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 %offset + store i8 %l, ptr %dst, align 2 + %iv.next = add i32 %iv, 1 + %ec = icmp slt i32 %iv.next, %n + br i1 %ec, label %inner.loop, label %outer.loop + +exit: + ret void +} diff --git a/llvm/test/Analysis/ScalarEvolution/zext-signed-addrec.ll b/llvm/test/Analysis/ScalarEvolution/zext-signed-addrec.ll deleted file mode 100644 index 899d31d266e51..0000000000000 --- a/llvm/test/Analysis/ScalarEvolution/zext-signed-addrec.ll +++ /dev/null @@ -1,81 +0,0 @@ -; RUN: opt -loop-reduce -S < %s | FileCheck %s -; PR18000 - -target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@a = global i32 0, align 4 -@b = common global i32 0, align 4 -@e = common global i8 0, align 1 -@d = common global i32 0, align 4 -@c = common global i32 0, align 4 -@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 - -; Function Attrs: nounwind optsize uwtable -; CHECK-LABEL: foo -define i32 @foo() { -entry: - %.pr = load i32, ptr @b, align 4 - %cmp10 = icmp slt i32 %.pr, 1 - br i1 %cmp10, label %for.cond1.preheader.lr.ph, label %entry.for.end9_crit_edge - -entry.for.end9_crit_edge: ; preds = %entry - %.pre = load i32, ptr @c, align 4 - br label %for.end9 - -for.cond1.preheader.lr.ph: ; preds = %entry - %0 = load i32, ptr @a, align 4 - %tobool = icmp eq i32 %0, 0 - br i1 %tobool, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge, label %return.loopexit.split - -for.cond1.preheader.for.cond1.preheader.split_crit_edge: ; preds = %for.cond1.preheader.lr.ph, %for.inc8 - %1 = phi i32 [ %inc, %for.inc8 ], [ %.pr, %for.cond1.preheader.lr.ph ] - br label %if.end - -; CHECK-LABEL: if.end -if.end: ; preds = %if.end, %for.cond1.preheader.for.cond1.preheader.split_crit_edge - -; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %if.end ], [ 258, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ] - %indvars.iv = phi i32 [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %indvars.iv.next, %if.end ] - - %2 = phi i8 [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %dec, %if.end ] - %conv7 = mul i32 %indvars.iv, 258 - %shl = and i32 %conv7, 510 - store i32 %shl, ptr @c, align 4 - -; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, -258 - %dec = add i8 %2, -1 - - %cmp2 = icmp sgt i8 %dec, -1 - %indvars.iv.next = add i32 %indvars.iv, -1 - br i1 %cmp2, label %if.end, label %for.inc8 - -for.inc8: ; preds = %if.end - store i32 0, ptr @d, align 4 - %inc = add nsw i32 %1, 1 - store i32 %inc, ptr @b, align 4 - %cmp = icmp slt i32 %1, 0 - br i1 %cmp, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge, label %for.cond.for.end9_crit_edge - -for.cond.for.end9_crit_edge: ; preds = %for.inc8 - store i8 %dec, ptr @e, align 1 - br label %for.end9 - -for.end9: ; preds = %entry.for.end9_crit_edge, %for.cond.for.end9_crit_edge - %3 = phi i32 [ %.pre, %entry.for.end9_crit_edge ], [ %shl, %for.cond.for.end9_crit_edge ] - %call = tail call i32 (ptr, ...) @printf(ptr @.str, i32 %3) #2 - br label %return - -return.loopexit.split: ; preds = %for.cond1.preheader.lr.ph - store i8 1, ptr @e, align 1 - store i32 0, ptr @d, align 4 - br label %return - -return: ; preds = %return.loopexit.split, %for.end9 - %retval.0 = phi i32 [ 0, %for.end9 ], [ 1, %return.loopexit.split ] - ret i32 %retval.0 -} - -; Function Attrs: nounwind optsize -declare i32 @printf(ptr nocapture readonly, ...) - diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll index b71a360d1be12..8283e7bac3457 100644 --- a/llvm/test/CodeGen/PowerPC/common-chain.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain.ll @@ -721,6 +721,13 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64 ; CHECK-LABEL: spill_reduce_succ: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmpdi r6, 0 +; CHECK-NEXT: ble cr0, .LBB7_9 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: sldi r6, r6, 2 +; CHECK-NEXT: li r11, 1 +; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r26, r10 +; CHECK-NEXT: cmpdi r6, 1 ; CHECK-NEXT: std r14, -144(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r15, -136(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r16, -128(r1) # 8-byte Folded Spill @@ -733,231 +740,232 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64 ; CHECK-NEXT: std r23, -72(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r24, -64(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r31, -8(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r2, -152(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r9, -184(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r8, -176(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r7, -168(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r3, -160(r1) # 8-byte Folded Spill -; CHECK-NEXT: ble cr0, .LBB7_7 -; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: sldi r6, r6, 2 -; CHECK-NEXT: li r7, 1 -; CHECK-NEXT: mr r30, r10 -; CHECK-NEXT: cmpdi r6, 1 -; CHECK-NEXT: iselgt r7, r6, r7 -; CHECK-NEXT: addi r8, r7, -1 -; CHECK-NEXT: clrldi r6, r7, 63 -; CHECK-NEXT: cmpldi r8, 3 -; CHECK-NEXT: blt cr0, .LBB7_4 +; CHECK-NEXT: iselgt r11, r6, r11 +; CHECK-NEXT: addi r12, r11, -1 +; CHECK-NEXT: cmpldi r12, 3 +; CHECK-NEXT: clrldi r6, r11, 63 +; CHECK-NEXT: blt cr0, .LBB7_5 ; CHECK-NEXT: # %bb.2: # %for.body.preheader.new -; CHECK-NEXT: ld r14, -168(r1) # 8-byte Folded Reload -; CHECK-NEXT: mulli r24, r30, 24 -; CHECK-NEXT: ld r16, -184(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r15, -176(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r3, -160(r1) # 8-byte Folded Reload -; CHECK-NEXT: rldicl r0, r7, 62, 2 -; CHECK-NEXT: sldi r11, r30, 5 -; CHECK-NEXT: sldi r19, r30, 4 -; CHECK-NEXT: sldi r7, r14, 3 -; CHECK-NEXT: add r14, r30, r14 -; CHECK-NEXT: sldi r10, r16, 3 -; CHECK-NEXT: sldi r12, r15, 3 -; CHECK-NEXT: add r16, r30, r16 -; CHECK-NEXT: add r15, r30, r15 -; CHECK-NEXT: add r27, r11, r7 -; CHECK-NEXT: add r22, r24, r7 -; CHECK-NEXT: add r17, r19, r7 -; CHECK-NEXT: sldi r2, r14, 3 -; CHECK-NEXT: add r26, r24, r10 -; CHECK-NEXT: add r25, r24, r12 -; CHECK-NEXT: add r21, r19, r10 -; CHECK-NEXT: add r20, r19, r12 -; CHECK-NEXT: add r8, r11, r10 -; CHECK-NEXT: sldi r16, r16, 3 -; CHECK-NEXT: add r29, r5, r27 -; CHECK-NEXT: add r28, r4, r27 -; CHECK-NEXT: add r27, r3, r27 -; CHECK-NEXT: add r24, r5, r22 -; CHECK-NEXT: add r23, r4, r22 -; CHECK-NEXT: add r22, r3, r22 -; CHECK-NEXT: add r19, r5, r17 -; CHECK-NEXT: add r18, r4, r17 -; CHECK-NEXT: add r17, r3, r17 -; CHECK-NEXT: add r14, r5, r2 -; CHECK-NEXT: add r31, r4, r2 -; CHECK-NEXT: add r2, r3, r2 -; CHECK-NEXT: add r9, r5, r8 -; CHECK-NEXT: add r8, r11, r12 +; CHECK-NEXT: rldicl r11, r11, 62, 2 +; CHECK-NEXT: sldi r20, r8, 3 +; CHECK-NEXT: mr r14, r7 +; CHECK-NEXT: sldi r7, r7, 3 +; CHECK-NEXT: sldi r21, r9, 3 +; CHECK-NEXT: std r3, -160(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r9, -208(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r8, -184(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r5, -200(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r4, -168(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r11, -192(r1) # 8-byte Folded Spill +; CHECK-NEXT: sldi r11, r10, 5 +; CHECK-NEXT: add r0, r11, r20 +; CHECK-NEXT: add r12, r11, r21 +; CHECK-NEXT: add r30, r5, r0 +; CHECK-NEXT: add r0, r11, r7 +; CHECK-NEXT: std r21, -216(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r20, -224(r1) # 8-byte Folded Spill +; CHECK-NEXT: add r12, r5, r12 +; CHECK-NEXT: add r29, r5, r0 +; CHECK-NEXT: add r28, r4, r0 +; CHECK-NEXT: add r27, r3, r0 +; CHECK-NEXT: mulli r0, r10, 24 +; CHECK-NEXT: std r14, -176(r1) # 8-byte Folded Spill +; CHECK-NEXT: add r26, r0, r21 +; CHECK-NEXT: add r25, r0, r20 +; CHECK-NEXT: add r0, r0, r7 +; CHECK-NEXT: add r24, r5, r0 +; CHECK-NEXT: add r23, r4, r0 +; CHECK-NEXT: add r22, r3, r0 +; CHECK-NEXT: sldi r0, r10, 4 ; CHECK-NEXT: add r26, r5, r26 ; CHECK-NEXT: add r25, r5, r25 +; CHECK-NEXT: add r21, r0, r21 +; CHECK-NEXT: add r20, r0, r20 +; CHECK-NEXT: add r0, r0, r7 +; CHECK-NEXT: add r19, r5, r0 +; CHECK-NEXT: add r18, r4, r0 +; CHECK-NEXT: add r17, r3, r0 +; CHECK-NEXT: add r0, r10, r9 ; CHECK-NEXT: add r21, r5, r21 ; CHECK-NEXT: add r20, r5, r20 -; CHECK-NEXT: add r16, r5, r16 -; CHECK-NEXT: add r8, r5, r8 -; CHECK-NEXT: rldicl r3, r0, 2, 1 -; CHECK-NEXT: addi r3, r3, -4 -; CHECK-NEXT: sub r0, r12, r7 -; CHECK-NEXT: sub r12, r10, r7 -; CHECK-NEXT: li r7, 0 -; CHECK-NEXT: mr r10, r30 -; CHECK-NEXT: sldi r15, r15, 3 -; CHECK-NEXT: add r15, r5, r15 -; CHECK-NEXT: rldicl r3, r3, 62, 2 -; CHECK-NEXT: addi r3, r3, 1 -; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: sldi r0, r0, 3 +; CHECK-NEXT: add r16, r5, r0 +; CHECK-NEXT: add r0, r10, r8 +; CHECK-NEXT: sldi r0, r0, 3 +; CHECK-NEXT: add r15, r5, r0 +; CHECK-NEXT: add r0, r10, r14 +; CHECK-NEXT: sldi r0, r0, 3 +; CHECK-NEXT: add r2, r3, r0 +; CHECK-NEXT: ld r3, -224(r1) # 8-byte Folded Reload +; CHECK-NEXT: add r14, r5, r0 +; CHECK-NEXT: add r31, r4, r0 +; CHECK-NEXT: sub r0, r3, r7 +; CHECK-NEXT: ld r3, -192(r1) # 8-byte Folded Reload +; CHECK-NEXT: rldicl r9, r3, 2, 1 +; CHECK-NEXT: ld r3, -216(r1) # 8-byte Folded Reload +; CHECK-NEXT: addi r8, r9, -4 +; CHECK-NEXT: rldicl r8, r8, 62, 2 +; CHECK-NEXT: sub r7, r3, r7 +; CHECK-NEXT: ori r3, r9, 1 +; CHECK-NEXT: addi r8, r8, 1 +; CHECK-NEXT: mulld r3, r10, r3 +; CHECK-NEXT: mtctr r8 +; CHECK-NEXT: li r8, 0 +; CHECK-NEXT: std r10, -192(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r3, -216(r1) # 8-byte Folded Spill ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB7_3: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: lfd f0, 0(r2) ; CHECK-NEXT: lfd f1, 0(r31) -; CHECK-NEXT: add r3, r10, r30 -; CHECK-NEXT: add r3, r3, r30 ; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfd f1, 0(r14) -; CHECK-NEXT: add r3, r3, r30 -; CHECK-NEXT: add r10, r3, r30 ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfd f0, 0(r14) ; CHECK-NEXT: add r14, r14, r11 ; CHECK-NEXT: lfdx f0, r2, r0 ; CHECK-NEXT: lfdx f1, r31, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r15, r7 +; CHECK-NEXT: lfdx f1, r15, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r15, r7 -; CHECK-NEXT: lfdx f0, r2, r12 -; CHECK-NEXT: lfdx f1, r31, r12 +; CHECK-NEXT: stfdx f0, r15, r8 +; CHECK-NEXT: lfdx f0, r2, r7 +; CHECK-NEXT: lfdx f1, r31, r7 ; CHECK-NEXT: add r2, r2, r11 ; CHECK-NEXT: add r31, r31, r11 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r16, r7 +; CHECK-NEXT: lfdx f1, r16, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r16, r7 +; CHECK-NEXT: stfdx f0, r16, r8 ; CHECK-NEXT: lfd f0, 0(r17) ; CHECK-NEXT: lfd f1, 0(r18) ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r19, r7 +; CHECK-NEXT: lfdx f1, r19, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r19, r7 +; CHECK-NEXT: stfdx f0, r19, r8 ; CHECK-NEXT: lfdx f0, r17, r0 ; CHECK-NEXT: lfdx f1, r18, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r20, r7 +; CHECK-NEXT: lfdx f1, r20, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r20, r7 -; CHECK-NEXT: lfdx f0, r17, r12 -; CHECK-NEXT: lfdx f1, r18, r12 +; CHECK-NEXT: stfdx f0, r20, r8 +; CHECK-NEXT: lfdx f0, r17, r7 +; CHECK-NEXT: lfdx f1, r18, r7 ; CHECK-NEXT: add r17, r17, r11 ; CHECK-NEXT: add r18, r18, r11 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r21, r7 +; CHECK-NEXT: lfdx f1, r21, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r21, r7 +; CHECK-NEXT: stfdx f0, r21, r8 ; CHECK-NEXT: lfd f0, 0(r22) ; CHECK-NEXT: lfd f1, 0(r23) ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r24, r7 +; CHECK-NEXT: lfdx f1, r24, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r24, r7 +; CHECK-NEXT: stfdx f0, r24, r8 ; CHECK-NEXT: lfdx f0, r22, r0 ; CHECK-NEXT: lfdx f1, r23, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r25, r7 +; CHECK-NEXT: lfdx f1, r25, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r25, r7 -; CHECK-NEXT: lfdx f0, r22, r12 -; CHECK-NEXT: lfdx f1, r23, r12 +; CHECK-NEXT: stfdx f0, r25, r8 +; CHECK-NEXT: lfdx f0, r22, r7 +; CHECK-NEXT: lfdx f1, r23, r7 ; CHECK-NEXT: add r22, r22, r11 ; CHECK-NEXT: add r23, r23, r11 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r26, r7 +; CHECK-NEXT: lfdx f1, r26, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r26, r7 +; CHECK-NEXT: stfdx f0, r26, r8 ; CHECK-NEXT: lfd f0, 0(r27) ; CHECK-NEXT: lfd f1, 0(r28) ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r29, r7 +; CHECK-NEXT: lfdx f1, r29, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r29, r7 +; CHECK-NEXT: stfdx f0, r29, r8 ; CHECK-NEXT: lfdx f0, r27, r0 ; CHECK-NEXT: lfdx f1, r28, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r8, r7 +; CHECK-NEXT: lfdx f1, r30, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r8, r7 -; CHECK-NEXT: lfdx f0, r27, r12 -; CHECK-NEXT: lfdx f1, r28, r12 +; CHECK-NEXT: stfdx f0, r30, r8 +; CHECK-NEXT: lfdx f0, r27, r7 +; CHECK-NEXT: lfdx f1, r28, r7 ; CHECK-NEXT: add r27, r27, r11 ; CHECK-NEXT: add r28, r28, r11 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r9, r7 +; CHECK-NEXT: lfdx f1, r12, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r9, r7 -; CHECK-NEXT: add r7, r7, r11 +; CHECK-NEXT: stfdx f0, r12, r8 +; CHECK-NEXT: add r8, r8, r11 ; CHECK-NEXT: bdnz .LBB7_3 -; CHECK-NEXT: .LBB7_4: # %for.cond.cleanup.loopexit.unr-lcssa +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: ld r3, -160(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r4, -168(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r7, -176(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r8, -184(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r10, -192(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r5, -200(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r9, -208(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, -216(r1) # 8-byte Folded Reload +; CHECK-NEXT: .LBB7_5: # %for.cond.cleanup.loopexit.unr-lcssa ; CHECK-NEXT: cmpldi r6, 0 -; CHECK-NEXT: beq cr0, .LBB7_7 -; CHECK-NEXT: # %bb.5: # %for.body.epil.preheader -; CHECK-NEXT: ld r3, -184(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r0, -160(r1) # 8-byte Folded Reload -; CHECK-NEXT: sldi r8, r30, 3 -; CHECK-NEXT: add r3, r10, r3 -; CHECK-NEXT: sldi r3, r3, 3 -; CHECK-NEXT: add r7, r5, r3 -; CHECK-NEXT: add r9, r4, r3 -; CHECK-NEXT: add r11, r0, r3 -; CHECK-NEXT: ld r3, -176(r1) # 8-byte Folded Reload -; CHECK-NEXT: add r3, r10, r3 -; CHECK-NEXT: sldi r3, r3, 3 -; CHECK-NEXT: add r12, r5, r3 -; CHECK-NEXT: add r30, r4, r3 -; CHECK-NEXT: add r29, r0, r3 -; CHECK-NEXT: ld r3, -168(r1) # 8-byte Folded Reload -; CHECK-NEXT: add r3, r10, r3 -; CHECK-NEXT: li r10, 0 -; CHECK-NEXT: sldi r3, r3, 3 -; CHECK-NEXT: add r5, r5, r3 -; CHECK-NEXT: add r4, r4, r3 -; CHECK-NEXT: add r3, r0, r3 +; CHECK-NEXT: beq cr0, .LBB7_8 +; CHECK-NEXT: # %bb.6: # %for.body.epil.preheader +; CHECK-NEXT: add r11, r26, r9 +; CHECK-NEXT: add r12, r26, r8 +; CHECK-NEXT: add r9, r26, r7 +; CHECK-NEXT: sldi r27, r10, 3 +; CHECK-NEXT: sldi r11, r11, 3 +; CHECK-NEXT: sldi r0, r12, 3 +; CHECK-NEXT: sldi r9, r9, 3 +; CHECK-NEXT: add r28, r5, r11 +; CHECK-NEXT: add r10, r4, r11 +; CHECK-NEXT: add r11, r3, r11 +; CHECK-NEXT: add r12, r5, r0 +; CHECK-NEXT: add r30, r4, r0 +; CHECK-NEXT: add r29, r3, r0 +; CHECK-NEXT: add r5, r5, r9 +; CHECK-NEXT: add r4, r4, r9 +; CHECK-NEXT: add r3, r3, r9 +; CHECK-NEXT: li r9, 0 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB7_6: # %for.body.epil +; CHECK-NEXT: .LBB7_7: # %for.body.epil ; CHECK-NEXT: # -; CHECK-NEXT: lfdx f0, r3, r10 -; CHECK-NEXT: lfdx f1, r4, r10 +; CHECK-NEXT: lfdx f0, r3, r9 +; CHECK-NEXT: lfdx f1, r4, r9 ; CHECK-NEXT: addi r6, r6, -1 ; CHECK-NEXT: cmpldi r6, 0 ; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfd f1, 0(r5) ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfd f0, 0(r5) -; CHECK-NEXT: add r5, r5, r8 -; CHECK-NEXT: lfdx f0, r29, r10 -; CHECK-NEXT: lfdx f1, r30, r10 +; CHECK-NEXT: add r5, r5, r27 +; CHECK-NEXT: lfdx f0, r29, r9 +; CHECK-NEXT: lfdx f1, r30, r9 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r12, r10 +; CHECK-NEXT: lfdx f1, r12, r9 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r12, r10 -; CHECK-NEXT: lfdx f0, r11, r10 -; CHECK-NEXT: lfdx f1, r9, r10 +; CHECK-NEXT: stfdx f0, r12, r9 +; CHECK-NEXT: lfdx f0, r11, r9 +; CHECK-NEXT: lfdx f1, r10, r9 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r7, r10 +; CHECK-NEXT: lfdx f1, r28, r9 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r7, r10 -; CHECK-NEXT: add r10, r10, r8 -; CHECK-NEXT: bne cr0, .LBB7_6 -; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup +; CHECK-NEXT: stfdx f0, r28, r9 +; CHECK-NEXT: add r9, r9, r27 +; CHECK-NEXT: bne cr0, .LBB7_7 +; CHECK-NEXT: .LBB7_8: ; CHECK-NEXT: ld r2, -152(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r31, -8(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r27, -40(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload @@ -973,6 +981,8 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64 ; CHECK-NEXT: ld r16, -128(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r15, -136(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r14, -144(r1) # 8-byte Folded Reload +; CHECK-NEXT: .LBB7_9: # %for.cond.cleanup +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: blr entry: %cmp49 = icmp sgt i64 %m, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll index 07aa05f609c40..6f4f03caf8f37 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll @@ -912,7 +912,7 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt ; CHECK-NEXT: addi a6, a4, 1 ; CHECK-NEXT: andi a7, a6, -32 ; CHECK-NEXT: add a4, a7, a2 -; CHECK-NEXT: add a2, a4, a0 +; CHECK-NEXT: add a2, a0, a4 ; CHECK-NEXT: li t1, 5 ; CHECK-NEXT: vsetvli zero, t2, e8, m1, ta, ma ; CHECK-NEXT: .LBB14_3: # %bb15 @@ -1019,10 +1019,7 @@ define void @gather_no_scalar_remainder(ptr noalias nocapture noundef %arg, ptr ; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: beqz a2, .LBB15_3 ; CHECK-NEXT: # %bb.1: # %bb2 -; CHECK-NEXT: addi a2, a2, -16 -; CHECK-NEXT: andi a2, a2, -16 -; CHECK-NEXT: add a2, a2, a0 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: add a2, a0, a2 ; CHECK-NEXT: li a3, 5 ; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma ; CHECK-NEXT: .LBB15_2: # %bb4 diff --git a/llvm/test/Transforms/IndVarSimplify/AArch64/fold-ext-add.ll b/llvm/test/Transforms/IndVarSimplify/AArch64/fold-ext-add.ll new file mode 100644 index 0000000000000..640c910be4a82 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/AArch64/fold-ext-add.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p indvars -S %s | FileCheck %s + +target triple = "arm64-apple-macosx15.0.0" + +declare i1 @cond() + +define void @pred_mip_12(ptr %dst, ptr %src, i32 %n, i64 %offset) { +; CHECK-LABEL: define void @pred_mip_12( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]], i64 [[OFFSET:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[SMAX]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[OFFSET]], [[TMP0]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]] +; CHECK-NEXT: br label %[[OUTER_LOOP:.*]] +; CHECK: [[OUTER_LOOP_LOOPEXIT:.*]]: +; CHECK-NEXT: br label %[[OUTER_LOOP]] +; CHECK: [[OUTER_LOOP]]: +; CHECK-NEXT: [[OUTER_PTR:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[SCEVGEP]], %[[OUTER_LOOP_LOOPEXIT]] ] +; CHECK-NEXT: [[C:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C]], label %[[INNER_LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[INNER_LOOP_PREHEADER]]: +; CHECK-NEXT: br label %[[INNER_LOOP:.*]] +; CHECK: [[INNER_LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[INNER_LOOP]] ], [ 0, %[[INNER_LOOP_PREHEADER]] ] +; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[OUTER_PTR]], align 1 +; CHECK-NEXT: store i8 [[L]], ptr [[DST]], align 2 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[INNER_LOOP]], label %[[OUTER_LOOP_LOOPEXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %outer.loop + +outer.loop: + %outer.ptr = phi ptr [ %src, %entry ], [ %ptr.iv.next, %inner.loop ] + %c = call i1 @cond() + br i1 %c, label %inner.loop, label %exit + +inner.loop: + %iv = phi i32 [ 0, %outer.loop ], [ %iv.next, %inner.loop ] + %ptr.iv = phi ptr [ %src, %outer.loop ], [ %ptr.iv.next, %inner.loop ] + %l = load i8, ptr %outer.ptr, align 1 + %ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 %offset + store i8 %l, ptr %dst, align 2 + %iv.next = add i32 %iv, 1 + %ec = icmp slt i32 %iv.next, %n + br i1 %ec, label %inner.loop, label %outer.loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll index 99baa6105655d..4b52479fc6c4d 100644 --- a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll +++ b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll @@ -334,4 +334,88 @@ out_of_bounds: ret i32 -1 } +define void @slt_no_smax_needed(i64 %n, ptr %dst) { +; CHECK-LABEL: @slt_no_smax_needed( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[N_TRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 +; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[N_TRUNC]], 1 +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD_1]], 1 +; CHECK-NEXT: [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8 +; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SHR]], i32 1) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]] +; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %n.trunc = trunc i64 %n to i32 + %add.1 = add i32 %n.trunc, 1 + %shr = lshr i32 %add.1, 1 + %pre = icmp ult i32 %add.1, 8 + br i1 %pre, label %exit, label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i8, ptr %dst, i32 %iv + store i8 0, ptr %gep, align 1 + %iv.next = add i32 %iv, 1 + %ec = icmp slt i32 %iv.next, %shr + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + +define void @ult_no_umax_needed(i64 %n, ptr %dst) { +; CHECK-LABEL: @ult_no_umax_needed( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[N_TRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 +; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[N_TRUNC]], 1 +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD_1]], 1 +; CHECK-NEXT: [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8 +; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SHR]], i32 1) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]] +; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[UMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %n.trunc = trunc i64 %n to i32 + %add.1 = add i32 %n.trunc, 1 + %shr = lshr i32 %add.1, 1 + %pre = icmp ult i32 %add.1, 8 + br i1 %pre, label %exit, label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i8, ptr %dst, i32 %iv + store i8 0, ptr %gep, align 1 + %iv.next = add i32 %iv, 1 + %ec = icmp ult i32 %iv.next, %shr + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + !0 = !{i32 1, i32 2147483648} diff --git a/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll index b946bbf747083..69713e006ebb9 100644 --- a/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll +++ b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll @@ -96,3 +96,201 @@ loop: exit: ret void } + +define i32 @urem_order1(i32 %n) { +; CHECK-LABEL: define i32 @urem_order1( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[UREM:%.*]] = urem i32 [[N]], 3 +; CHECK-NEXT: [[UREM_ZERO:%.*]] = icmp eq i32 [[UREM]], 0 +; CHECK-NEXT: br i1 [[UREM_ZERO]], label %[[PH:.*]], label %[[EXIT:.*]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[N_NON_ZERO:%.*]] = icmp ne i32 [[N]], 0 +; CHECK-NEXT: br i1 [[N_NON_ZERO]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 3 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ 2, %[[PH]] ], [ 3, %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + %urem = urem i32 %n, 3 + %urem.zero = icmp eq i32 %urem, 0 + br i1 %urem.zero, label %ph, label %exit + +ph: + %n.non.zero = icmp ne i32 %n, 0 + br i1 %n.non.zero, label %loop, label %exit + +loop: + %iv = phi i32 [ 0, %ph ], [ %iv.next, %loop ] + call void @foo() + %iv.next = add i32 %iv, 3 + %ec = icmp eq i32 %iv.next, %n + br i1 %ec, label %exit, label %loop + +exit: + %res = phi i32 [ 1, %entry ], [ 2, %ph ], [ 3, %loop ] + ret i32 %res +} + +define i32 @urem_order2(i32 %n) { +; CHECK-LABEL: define i32 @urem_order2( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[N_NON_ZERO:%.*]] = icmp ne i32 [[N]], 0 +; CHECK-NEXT: br i1 [[N_NON_ZERO]], label %[[PH:.*]], label %[[EXIT:.*]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[UREM:%.*]] = urem i32 [[N]], 3 +; CHECK-NEXT: [[UREM_ZERO:%.*]] = icmp eq i32 [[UREM]], 0 +; CHECK-NEXT: br i1 [[UREM_ZERO]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 3 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ 2, %[[PH]] ], [ 3, %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + %n.non.zero = icmp ne i32 %n, 0 + br i1 %n.non.zero, label %ph, label %exit + +ph: + %urem = urem i32 %n, 3 + %urem.zero = icmp eq i32 %urem, 0 + br i1 %urem.zero, label %loop, label %exit + +loop: + %iv = phi i32 [ 0, %ph ], [ %iv.next, %loop ] + call void @foo() + %iv.next = add i32 %iv, 3 + %ec = icmp eq i32 %iv.next, %n + br i1 %ec, label %exit, label %loop + +exit: + %res = phi i32 [ 1, %entry ], [ 2, %ph ], [ 3, %loop ] + ret i32 %res +} + +define i64 @test_loop_with_div_order_1(i64 %n) { +; CHECK-LABEL: define i64 @test_loop_with_div_order_1( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IS_ZERO:%.*]] = icmp eq i64 [[N]], 0 +; CHECK-NEXT: br i1 [[IS_ZERO]], label %[[EXIT:.*]], label %[[CHECK_BOUNDS:.*]] +; CHECK: [[CHECK_BOUNDS]]: +; CHECK-NEXT: [[N_PLUS_63:%.*]] = add i64 [[N]], 63 +; CHECK-NEXT: [[UPPER_BOUND:%.*]] = lshr i64 [[N_PLUS_63]], 6 +; CHECK-NEXT: [[BOUNDS_CHECK:%.*]] = icmp ult i64 [[N_PLUS_63]], 64 +; CHECK-NEXT: br i1 [[BOUNDS_CHECK]], label %[[EXIT]], label %[[CHECK_PARITY:.*]] +; CHECK: [[CHECK_PARITY]]: +; CHECK-NEXT: [[IS_ODD:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[PARITY_CHECK:%.*]] = icmp eq i64 [[IS_ODD]], 0 +; CHECK-NEXT: br i1 [[PARITY_CHECK]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[DUMMY:%.*]] = load volatile i64, ptr null, align 8 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ult i64 [[IV_NEXT]], [[UPPER_BOUND]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT_LOOPEXIT:.*]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i64 0 +; +entry: + %is_zero = icmp eq i64 %n, 0 + br i1 %is_zero, label %exit, label %check_bounds + +check_bounds: + %n_plus_63 = add i64 %n, 63 + %upper_bound = lshr i64 %n_plus_63, 6 + %bounds_check = icmp ult i64 %n_plus_63, 64 + br i1 %bounds_check, label %exit, label %check_parity + +check_parity: + %is_odd = and i64 %n, 1 + %parity_check = icmp eq i64 %is_odd, 0 + br i1 %parity_check, label %loop, label %exit + +loop: + %iv = phi i64 [ %iv_next, %loop ], [ 0, %check_parity ] + %dummy = load volatile i64, ptr null, align 8 + %iv_next = add i64 %iv, 1 + %exit_cond = icmp ult i64 %iv_next, %upper_bound + br i1 %exit_cond, label %loop, label %exit + +exit: + ret i64 0 +} + +define i64 @test_loop_with_div_order_2(i64 %n) { +; CHECK-LABEL: define i64 @test_loop_with_div_order_2( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[N_PLUS_63:%.*]] = add i64 [[N]], 63 +; CHECK-NEXT: [[UPPER_BOUND:%.*]] = lshr i64 [[N_PLUS_63]], 6 +; CHECK-NEXT: [[BOUNDS_CHECK:%.*]] = icmp ult i64 [[N_PLUS_63]], 64 +; CHECK-NEXT: br i1 [[BOUNDS_CHECK]], label %[[EXIT:.*]], label %[[CHECK_BOUNDS:.*]] +; CHECK: [[CHECK_BOUNDS]]: +; CHECK-NEXT: [[IS_ZERO:%.*]] = icmp eq i64 [[N]], 0 +; CHECK-NEXT: br i1 [[IS_ZERO]], label %[[EXIT]], label %[[CHECK_PARITY:.*]] +; CHECK: [[CHECK_PARITY]]: +; CHECK-NEXT: [[IS_ODD:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[PARITY_CHECK:%.*]] = icmp eq i64 [[IS_ODD]], 0 +; CHECK-NEXT: br i1 [[PARITY_CHECK]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[DUMMY:%.*]] = load volatile i64, ptr null, align 8 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ult i64 [[IV_NEXT]], [[UPPER_BOUND]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT_LOOPEXIT:.*]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i64 0 +; +entry: + %n_plus_63 = add i64 %n, 63 + %upper_bound = lshr i64 %n_plus_63, 6 + %bounds_check = icmp ult i64 %n_plus_63, 64 + br i1 %bounds_check, label %exit, label %check_bounds + +check_bounds: + %is_zero = icmp eq i64 %n, 0 + br i1 %is_zero, label %exit, label %check_parity + +check_parity: + %is_odd = and i64 %n, 1 + %parity_check = icmp eq i64 %is_odd, 0 + br i1 %parity_check, label %loop, label %exit + +loop: + %iv = phi i64 [ %iv_next, %loop ], [ 0, %check_parity ] + %dummy = load volatile i64, ptr null, align 8 + %iv_next = add i64 %iv, 1 + %exit_cond = icmp ult i64 %iv_next, %upper_bound + br i1 %exit_cond, label %loop, label %exit + +exit: + ret i64 0 +} diff --git a/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll b/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll new file mode 100644 index 0000000000000..3bf1c2ef81cb9 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll @@ -0,0 +1,196 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -p indvars -S %s | FileCheck %s + +declare i1 @cond() + +define i64 @test_ptr_compare_guard(ptr %start, ptr %end) { +; CHECK-LABEL: define i64 @test_ptr_compare_guard( +; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[C_0:%.*]] = icmp eq ptr [[START]], [[END]] +; CHECK-NEXT: br i1 [[C_0]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]] +; CHECK: [[LOOP_HEADER_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[LOOP_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[I64_IV:%.*]] = phi i64 [ [[I64_IV_NEXT:%.*]], %[[LOOP_LATCH]] ], [ 0, %[[LOOP_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[C_1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[EXIT_LOOPEXIT:.*]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1 +; CHECK-NEXT: [[I64_IV_NEXT]] = add nuw i64 [[I64_IV]], 1 +; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]] +; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[I64_IV]], %[[LOOP_LATCH]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[RES_PH]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + %c.0 = icmp eq ptr %start, %end + br i1 %c.0, label %exit, label %loop.header + +loop.header: + %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ] + %i64.iv = phi i64 [ 0, %entry ], [ %i64.iv.next, %loop.latch ] + %c.1 = call i1 @cond() + br i1 %c.1, label %loop.latch, label %exit + +loop.latch: + %ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 + %i64.iv.next = add i64 %i64.iv, 1 + %c.2 = icmp eq ptr %ptr.iv.next, %end + br i1 %c.2, label %exit, label %loop.header + +exit: + %res = phi i64 [ 0, %entry ], [ %i64.iv, %loop.latch ], [ 0, %loop.header ] + ret i64 %res +} + +define void @test_sub_cmp(ptr align 8 %start, ptr %end) { +; CHECK-LABEL: define void @test_sub_cmp( +; CHECK-SAME: ptr align 8 [[START:%.*]], ptr [[END:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[START_INT:%.*]] = ptrtoint ptr [[START]] to i64 +; CHECK-NEXT: [[END_INT:%.*]] = ptrtoint ptr [[END]] to i64 +; CHECK-NEXT: [[PTR_DIFF:%.*]] = sub i64 [[START_INT]], [[END_INT]] +; CHECK-NEXT: [[CMP_ENTRY:%.*]] = icmp eq ptr [[START]], [[END]] +; CHECK-NEXT: br i1 [[CMP_ENTRY]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]] +; CHECK: [[LOOP_HEADER_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[LOOP_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[C_1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C_1]], label %[[EXIT_EARLY:.*]], label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 +; CHECK-NEXT: [[CMP_LATCH:%.*]] = icmp ult i64 [[IV_NEXT]], [[PTR_DIFF]] +; CHECK-NEXT: br i1 [[CMP_LATCH]], label %[[LOOP_HEADER]], label %[[EXIT_LOOPEXIT:.*]] +; CHECK: [[EXIT_EARLY]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +; N32-LABEL: define void @test_sub_cmp( +; N32-SAME: ptr align 8 [[START:%.*]], ptr [[END:%.*]]) { +; N32-NEXT: [[ENTRY:.*:]] +; N32-NEXT: [[START_INT:%.*]] = ptrtoint ptr [[START]] to i64 +; N32-NEXT: [[END_INT:%.*]] = ptrtoint ptr [[END]] to i64 +; N32-NEXT: [[PTR_DIFF:%.*]] = sub i64 [[START_INT]], [[END_INT]] +; N32-NEXT: [[CMP_ENTRY:%.*]] = icmp eq ptr [[START]], [[END]] +; N32-NEXT: br i1 [[CMP_ENTRY]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]] +; N32: [[LOOP_HEADER_PREHEADER]]: +; N32-NEXT: br label %[[LOOP_HEADER:.*]] +; N32: [[LOOP_HEADER]]: +; N32-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[LOOP_HEADER_PREHEADER]] ] +; N32-NEXT: [[C_1:%.*]] = call i1 @cond() +; N32-NEXT: br i1 [[C_1]], label %[[EXIT_EARLY:.*]], label %[[LOOP_LATCH]] +; N32: [[LOOP_LATCH]]: +; N32-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 +; N32-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[PTR_DIFF]] +; N32-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER]], label %[[EXIT_LOOPEXIT:.*]] +; N32: [[EXIT_EARLY]]: +; N32-NEXT: br label %[[EXIT]] +; N32: [[EXIT_LOOPEXIT]]: +; N32-NEXT: br label %[[EXIT]] +; N32: [[EXIT]]: +; N32-NEXT: ret void +; +entry: + %start.int = ptrtoint ptr %start to i64 + %end.int = ptrtoint ptr %end to i64 + %ptr.diff = sub i64 %start.int, %end.int + %cmp.entry = icmp eq ptr %start, %end + br i1 %cmp.entry, label %exit, label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %c.1 = call i1 @cond() + br i1 %c.1, label %exit.early, label %loop.latch + +loop.latch: + %iv.next = add i64 %iv, 1 + %cmp.latch = icmp ult i64 %iv.next, %ptr.diff + br i1 %cmp.latch, label %loop.header, label %exit + +exit.early: + br label %exit + +exit: + ret void +} + + +define void @test_ptr_diff_with_assume(ptr align 8 %start, ptr align 8 %end, ptr %P) { +; CHECK-LABEL: define void @test_ptr_diff_with_assume( +; CHECK-SAME: ptr align 8 [[START:%.*]], ptr align 8 [[END:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[START_INT:%.*]] = ptrtoint ptr [[START]] to i64 +; CHECK-NEXT: [[END_INT:%.*]] = ptrtoint ptr [[END]] to i64 +; CHECK-NEXT: [[PTR_DIFF:%.*]] = sub i64 [[START_INT]], [[END_INT]] +; CHECK-NEXT: [[DIFF_CMP:%.*]] = icmp ult i64 [[PTR_DIFF]], 2 +; CHECK-NEXT: call void @llvm.assume(i1 [[DIFF_CMP]]) +; CHECK-NEXT: [[COMPUTED_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[PTR_DIFF]] +; CHECK-NEXT: [[ENTRY_CMP:%.*]] = icmp eq ptr [[START]], [[END]] +; CHECK-NEXT: br i1 [[ENTRY_CMP]], label %[[EXIT:.*]], label %[[LOOP_BODY_PREHEADER:.*]] +; CHECK: [[LOOP_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP_BODY:.*]] +; CHECK: [[LOOP_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ], [ [[START]], %[[LOOP_BODY_PREHEADER]] ] +; CHECK-NEXT: [[TMP0:%.*]] = call i1 @cond() +; CHECK-NEXT: [[IV_NEXT]] = getelementptr i8, ptr [[IV]], i64 1 +; CHECK-NEXT: [[LOOP_CMP:%.*]] = icmp eq ptr [[IV_NEXT]], [[COMPUTED_END]] +; CHECK-NEXT: br i1 [[LOOP_CMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_BODY]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +; N32-LABEL: define void @test_ptr_diff_with_assume( +; N32-SAME: ptr align 8 [[START:%.*]], ptr align 8 [[END:%.*]], ptr [[P:%.*]]) { +; N32-NEXT: [[ENTRY:.*:]] +; N32-NEXT: [[START_INT:%.*]] = ptrtoint ptr [[START]] to i64 +; N32-NEXT: [[END_INT:%.*]] = ptrtoint ptr [[END]] to i64 +; N32-NEXT: [[PTR_DIFF:%.*]] = sub i64 [[START_INT]], [[END_INT]] +; N32-NEXT: [[DIFF_CMP:%.*]] = icmp ult i64 [[PTR_DIFF]], 2 +; N32-NEXT: call void @llvm.assume(i1 [[DIFF_CMP]]) +; N32-NEXT: [[COMPUTED_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[PTR_DIFF]] +; N32-NEXT: [[ENTRY_CMP:%.*]] = icmp eq ptr [[START]], [[END]] +; N32-NEXT: br i1 [[ENTRY_CMP]], label %[[EXIT:.*]], label %[[LOOP_BODY_PREHEADER:.*]] +; N32: [[LOOP_BODY_PREHEADER]]: +; N32-NEXT: br label %[[LOOP_BODY:.*]] +; N32: [[LOOP_BODY]]: +; N32-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ], [ [[START]], %[[LOOP_BODY_PREHEADER]] ] +; N32-NEXT: [[TMP0:%.*]] = call i1 @cond() +; N32-NEXT: [[IV_NEXT]] = getelementptr i8, ptr [[IV]], i64 1 +; N32-NEXT: [[LOOP_CMP:%.*]] = icmp eq ptr [[IV_NEXT]], [[COMPUTED_END]] +; N32-NEXT: br i1 [[LOOP_CMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_BODY]] +; N32: [[EXIT_LOOPEXIT]]: +; N32-NEXT: br label %[[EXIT]] +; N32: [[EXIT]]: +; N32-NEXT: ret void +; +entry: + %start.int = ptrtoint ptr %start to i64 + %end.int = ptrtoint ptr %end to i64 + %ptr.diff = sub i64 %start.int, %end.int + %diff.cmp = icmp ult i64 %ptr.diff, 2 + call void @llvm.assume(i1 %diff.cmp) + %computed.end = getelementptr i8, ptr %start, i64 %ptr.diff + %entry.cmp = icmp eq ptr %start, %end + br i1 %entry.cmp, label %exit, label %loop.body + +loop.body: + %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop.body ] + call i1 @cond() + %iv.next = getelementptr i8, ptr %iv, i64 1 + %loop.cmp = icmp eq ptr %iv.next, %computed.end + br i1 %loop.cmp, label %exit, label %loop.body + +exit: + ret void +} diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll index b1043b48d40ac..84ae79d53e25e 100644 --- a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll +++ b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll @@ -68,3 +68,44 @@ latch: ; preds = %inner_exit end: ; preds = %header ret void } + + +declare void @foo() + +define i64 @narow_canonical_iv_wide_multiplied_iv(i32 %x, i64 %y, ptr %0) { +; CHECK-LABEL: @narow_canonical_iv_wide_multiplied_iv( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SMAX:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 1) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]] +; CHECK-NEXT: br i1 [[EC]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[SMAX]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[Y:%.*]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: ret i64 [[TMP6]] +; +entry: + %smax = tail call i32 @llvm.smax.i32(i32 %x, i32 1) + %mul.y = shl i64 %y, 1 + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %iv.mul = phi i64 [ 1, %entry ], [ %iv.mul.next, %loop ] + %iv.mul.next = add i64 %iv.mul, %mul.y + call void @foo() + %iv.next = add i32 %iv, 1 + %ec = icmp ult i32 %iv.next, %smax + br i1 %ec, label %loop, label %exit + +exit: + ret i64 %iv.mul.next +} + +declare i32 @llvm.smax.i32(i32, i32) diff --git a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll index d24f9a4e40e38..17921afc5ff06 100644 --- a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll +++ b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll @@ -15,11 +15,9 @@ define void @_Z3fn1v() { ; CHECK-NEXT: [[J_SROA_0_0_COPYLOAD:%.*]] = load i8, ptr [[X5]], align 1 ; CHECK-NEXT: br label [[DOTPREHEADER4_LR_PH:%.*]] ; CHECK: .preheader4.lr.ph: -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[X4]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = sext i8 [[J_SROA_0_0_COPYLOAD]] to i64 -; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[X4]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP2]] ; CHECK-NEXT: br label [[DOTPREHEADER4:%.*]] ; CHECK: .preheader4: ; CHECK-NEXT: [[K_09:%.*]] = phi ptr [ undef, [[DOTPREHEADER4_LR_PH]] ], [ [[X25:%.*]], [[X22:%.*]] ] diff --git a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll index f28262b2a77e0..0023a10312451 100644 --- a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll +++ b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll @@ -30,6 +30,226 @@ define i32 @g(i32 %x) { ret i32 %b } +define i1 @inttoptr_add_ptrtoint_used_by_single_icmp(ptr %src, ptr %p2) { +; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp( +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 10 +; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2]], [[P:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %i = ptrtoint ptr %src to i64 + %a = add i64 %i, 10 + %p = inttoptr i64 %a to ptr + %c = icmp eq ptr %p, %p2 + ret i1 %c +} + +define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_operands_swapped(ptr %src, ptr %p2) { +; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_operands_swapped( +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 10 +; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2]], [[P:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %i = ptrtoint ptr %src to i64 + %a = add i64 10, %i + %p = inttoptr i64 %a to ptr + %c = icmp eq ptr %p, %p2 + ret i1 %c +} + +define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_constant_offset(ptr %src, i64 %off, ptr %p2) { +; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_constant_offset( +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[OFF:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2]], [[P:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %i = ptrtoint ptr %src to i64 + %a = add i64 %i, %off + %p = inttoptr i64 %a to ptr + %c = icmp eq ptr %p, %p2 + ret i1 %c +} + +define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_constant_offset_operands_swapped(ptr %src, i64 %off, ptr %p2) { +; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_constant_offset_operands_swapped( +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[OFF:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2]], [[P:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %i = ptrtoint ptr %src to i64 + %a = add i64 %off, %i + %p = inttoptr i64 %a to ptr + %c = icmp eq ptr %p, %p2 + ret i1 %c +} + +define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_different_src_address_spaces(ptr addrspace(1) %src, ptr %p2) { +; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_different_src_address_spaces( +; CHECK-NEXT: [[I:%.*]] = ptrtoint ptr addrspace(1) [[SRC:%.*]] to i64 +; CHECK-NEXT: [[A:%.*]] = add i64 [[I]], 10 +; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[A]] to ptr +; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2:%.*]], [[P]] +; CHECK-NEXT: ret i1 [[C]] +; + %i = ptrtoint ptr addrspace(1) %src to i64 + %a = add i64 %i, 10 + %p = inttoptr i64 %a to ptr + %c = icmp eq ptr %p, %p2 + ret i1 %c +} + +define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_different_dst_address_spaces(ptr %src, ptr addrspace(1) %p2) { +; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_different_dst_address_spaces( +; CHECK-NEXT: [[I:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64 +; CHECK-NEXT: [[A:%.*]] = add i64 [[I]], 10 +; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[A]] to ptr addrspace(1) +; CHECK-NEXT: [[C:%.*]] = icmp eq ptr addrspace(1) [[P2:%.*]], [[P]] +; CHECK-NEXT: ret i1 [[C]] +; + %i = ptrtoint ptr %src to i64 + %a = add i64 %i, 10 + %p = inttoptr i64 %a to ptr addrspace(1) + %c = icmp eq ptr addrspace(1) %p, %p2 + ret i1 %c +} + +define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_int_type_does_not_match_ptr_ty(ptr %src, ptr %p2) { +; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_int_type_does_not_match_ptr_ty( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64 +; CHECK-NEXT: [[I:%.*]] = trunc i64 [[TMP1]] to i8 +; CHECK-NEXT: [[A:%.*]] = add i8 [[I]], 10 +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[A]] to i64 +; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2:%.*]], [[P]] +; CHECK-NEXT: ret i1 [[C]] +; + %i = ptrtoint ptr %src to i8 + %a = add i8 %i, 10 + %p = inttoptr i8 %a to ptr + %c = icmp eq ptr %p, %p2 + ret i1 %c +} + +define i1 @inttoptr_add_multiple_users_ptrtoint_used_by_single_icmp(ptr %src, ptr %p2) { +; CHECK-LABEL: @inttoptr_add_multiple_users_ptrtoint_used_by_single_icmp( +; CHECK-NEXT: [[I:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64 +; CHECK-NEXT: [[A:%.*]] = add i64 [[I]], 10 +; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[A]] to ptr +; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2:%.*]], [[P]] +; CHECK-NEXT: call void @bar(i64 [[A]]) +; CHECK-NEXT: ret i1 [[C]] +; + %i = ptrtoint ptr %src to i64 + %a = add i64 %i, 10 + %p = inttoptr i64 %a to ptr + %c = icmp eq ptr %p, %p2 + call void @bar(i64 %a) + ret i1 %c +} + +define i1 @multiple_inttoptr_add_ptrtoint_used_by_single_icmp(ptr %src) { +; CHECK-LABEL: @multiple_inttoptr_add_ptrtoint_used_by_single_icmp( +; CHECK-NEXT: ret i1 false +; + %i = ptrtoint ptr %src to i64 + %a = add i64 %i, 10 + %p = inttoptr i64 %a to ptr + %a.2 = add i64 %i, 11 + %p.2 = inttoptr i64 %a.2 to ptr + %c = icmp eq ptr %p, %p.2 + ret i1 %c +} + +define i1 @multiple_inttoptr_add_ptrtoint_used_by_single_icmp_non_constant_offset(ptr %src, i64 %off.1) { +; CHECK-LABEL: @multiple_inttoptr_add_ptrtoint_used_by_single_icmp_non_constant_offset( +; CHECK-NEXT: ret i1 false +; + %i = ptrtoint ptr %src to i64 + %a = add i64 %i, 10 + %p = inttoptr i64 %a to ptr + %a.2 = add i64 %i, 11 + %p.2 = inttoptr i64 %a.2 to ptr + %c = icmp eq ptr %p, %p.2 + ret i1 %c +} + +define i1 @multiple_inttoptr_add_ptrtoint_used_by_single_icmp_multiple_non_constant_offset(ptr %src, i64 %off.1, i64 %off.2) { +; CHECK-LABEL: @multiple_inttoptr_add_ptrtoint_used_by_single_icmp_multiple_non_constant_offset( +; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[OFF_1:%.*]], [[OFF_2:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %i = ptrtoint ptr %src to i64 + %a = add i64 %i, %off.1 + %p = inttoptr i64 %a to ptr + %a.2 = add i64 %i, %off.2 + %p.2 = inttoptr i64 %a.2 to ptr + %c = icmp eq ptr %p, %p.2 + ret i1 %c +} + +define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_in_different_bb(i1 %bc, ptr %src, ptr %p2) { +; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_in_different_bb( +; CHECK-NEXT: br i1 [[BC:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 10 +; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2]], [[P:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; CHECK: else: +; CHECK-NEXT: ret i1 false +; + %i = ptrtoint ptr %src to i64 + %a = add i64 %i, 10 + %p = inttoptr i64 %a to ptr + br i1 %bc, label %then, label %else + +then: + %c = icmp eq ptr %p, %p2 + ret i1 %c + +else: + ret i1 false +} + +define i1 @inttoptr_add_ptrtoint_used_by_multiple_icmps(ptr %src, ptr %p2, ptr %p3) { +; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_multiple_icmps( +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 10 +; CHECK-NEXT: [[C_1:%.*]] = icmp eq ptr [[P2]], [[P:%.*]] +; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[P2]], [[P3:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[C_1]], [[C_2]] +; CHECK-NEXT: ret i1 [[XOR]] +; + %i = ptrtoint ptr %src to i64 + %a = add i64 %i, 10 + %p = inttoptr i64 %a to ptr + %c.1 = icmp eq ptr %p, %p2 + %c.2 = icmp eq ptr %p, %p3 + %xor = xor i1 %c.1, %c.2 + ret i1 %xor +} + +declare void @foo(ptr) +declare void @bar(i64) + +define i1 @inttoptr_add_ptrtoint_used_by_multiple_icmps_and_other_user(ptr %src, ptr %p2, ptr %p3) { +; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_multiple_icmps_and_other_user( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 10 +; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: [[C_1:%.*]] = icmp eq ptr [[P2:%.*]], [[P]] +; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[P3:%.*]], [[P]] +; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[C_1]], [[C_2]] +; CHECK-NEXT: call void @foo(ptr [[P]]) +; CHECK-NEXT: ret i1 [[XOR]] +; + %i = ptrtoint ptr %src to i64 + %a = add i64 %i, 10 + %p = inttoptr i64 %a to ptr + %c.1 = icmp eq ptr %p, %p2 + %c.2 = icmp eq ptr %p, %p3 + %xor = xor i1 %c.1, %c.2 + call void @foo(ptr %p) + ret i1 %xor +} + define i32 @h(i1 %A, i32 %B) { ; CHECK-LABEL: @h( ; CHECK-NEXT: EntryBlock: diff --git a/llvm/test/Transforms/InstCombine/inttoptr-add-phi.ll b/llvm/test/Transforms/InstCombine/inttoptr-add-phi.ll new file mode 100644 index 0000000000000..acceead23a5e5 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/inttoptr-add-phi.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=instcombine -S %s | FileCheck %s + +define i64 @inttoptr_used_by_phi_with_ptrtoint(i1 %c, ptr %src, ptr %p2) { +; CHECK-LABEL: define i64 @inttoptr_used_by_phi_with_ptrtoint( +; CHECK-SAME: i1 [[C:%.*]], ptr [[SRC:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[P:%.*]] = getelementptr i8, ptr [[SRC]], i64 10 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[TMP1]], %[[THEN]] ], [ 0, %[[ELSE]] ] +; CHECK-NEXT: ret i64 [[PHI]] +; + %i = ptrtoint ptr %src to i64 + %a = add i64 %i, 10 + %p = inttoptr i64 %a to ptr + br i1 %c, label %then, label %else + +then: + br label %exit + +else: + br label %exit + +exit: + %phi = phi ptr [ %p, %then ], [ null, %else ] + %i.2 = ptrtoint ptr %phi to i64 + ret i64 %i.2 +} + +declare void @foo(ptr) + +define i64 @inttoptr_used_by_phi_with_other_users(i1 %c, ptr %src, ptr %p2) { +; CHECK-LABEL: define i64 @inttoptr_used_by_phi_with_other_users( +; CHECK-SAME: i1 [[C:%.*]], ptr [[SRC:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[I:%.*]] = ptrtoint ptr [[SRC]] to i64 +; CHECK-NEXT: [[A:%.*]] = add i64 [[I]], 10 +; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[A]], %[[THEN]] ], [ 0, %[[ELSE]] ] +; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[A]] to ptr +; CHECK-NEXT: call void @foo(ptr [[P]]) +; CHECK-NEXT: ret i64 [[PHI]] +; + %i = ptrtoint ptr %src to i64 + %a = add i64 %i, 10 + %p = inttoptr i64 %a to ptr + br i1 %c, label %then, label %else + +then: + br label %exit + +else: + br label %exit + +exit: + %phi = phi ptr [ %p, %then ], [ null, %else ] + call void @foo(ptr %p) + %i.2 = ptrtoint ptr %phi to i64 + ret i64 %i.2 +} diff --git a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll index ea2cfe74be264..0123f15334281 100644 --- a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll @@ -16,10 +16,10 @@ define void @test(ptr %ptr) { ; CHECK-NEXT: [[LIM_0:%.*]] = phi i32 [ 65, [[ENTRY:%.*]] ], [ 1, [[DEAD:%.*]] ] ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 8 ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[LIM_0]], i32 2) -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[UMAX]], 3 +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[TMP2]], -8 ; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TMP0]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3 -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SCEVGEP]], i8 0, i64 [[TMP2]], i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SCEVGEP]], i8 0, i64 [[TMP1]], i1 false) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 1, [[FOR_BODY_PREHEADER]] ] diff --git a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll index df32e60d5065a..bc1543d8361a7 100644 --- a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll +++ b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -p loop-idiom -S %s | FileCheck %s define void @fold_add_zext_to_sext(ptr %dst, i1 %start) { @@ -9,10 +9,10 @@ define void @fold_add_zext_to_sext(ptr %dst, i1 %start) { ; CHECK-NEXT: [[TMP0:%.*]] = zext i1 [[START]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 25, [[START_EXT]] +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i32 [[START_EXT]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 100, [[TMP4]] ; CHECK-NEXT: [[TMP3:%.*]] = zext nneg i32 [[TMP2]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[SCEVGEP]], i8 0, i64 [[TMP4]], i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[SCEVGEP]], i8 0, i64 [[TMP3]], i1 false) ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START_EXT]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -40,3 +40,56 @@ loop: exit: ret void } + +declare i16 @get() + +define void @test_memset_size_can_use_info_from_guards(i32 %x, ptr %dst) { +; CHECK-LABEL: define void @test_memset_size_can_use_info_from_guards( +; CHECK-SAME: i32 [[X:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP1:.*]] +; CHECK: [[LOOP1_LOOPEXIT:.*]]: +; CHECK-NEXT: br label %[[LOOP1_BACKEDGE:.*]] +; CHECK: [[LOOP1]]: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[X]], %[[LOOP1_BACKEDGE]] ] +; CHECK-NEXT: [[L:%.*]] = call i16 @get() +; CHECK-NEXT: [[L_EXT:%.*]] = zext i16 [[L]] to i32 +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[L_EXT]], [[P]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[SUB]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp ult i32 [[SUB]], 2 +; CHECK-NEXT: br i1 [[EC]], label %[[LOOP1_BACKEDGE]], label %[[LOOP2_PREHEADER:.*]] +; CHECK: [[LOOP1_BACKEDGE]]: +; CHECK-NEXT: br label %[[LOOP1]] +; CHECK: [[LOOP2_PREHEADER]]: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SUB]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[DST]], i8 0, i64 [[UMAX]], i1 false) +; CHECK-NEXT: br label %[[LOOP2:.*]] +; CHECK: [[LOOP2]]: +; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[IV_2_NEXT:%.*]], %[[LOOP2]] ], [ 0, %[[LOOP2_PREHEADER]] ] +; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i32 [[IV_2]] +; CHECK-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 +; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i32 [[IV_2_NEXT]], [[SHR]] +; CHECK-NEXT: br i1 [[EC_2]], label %[[LOOP2]], label %[[LOOP1_LOOPEXIT]] +; +entry: + br label %loop1 + +loop1: + %p = phi i32 [ 0, %entry ], [ %x, %loop1 ], [ %x, %loop2 ] + %l = call i16 @get() + %l.ext = zext i16 %l to i32 + %sub = sub i32 %l.ext, %p + %shr = lshr i32 %sub, 1 + %ec = icmp ult i32 %sub, 2 + br i1 %ec, label %loop1, label %loop2 + +loop2: + %iv.2 = phi i32 [ 0, %loop1 ], [ %iv.2.next, %loop2 ] + %gep.dst = getelementptr i8, ptr %dst, i32 %iv.2 + store i8 0, ptr %gep.dst, align 1 + %iv.2.next = add i32 %iv.2, 1 + %ec.2 = icmp ult i32 %iv.2.next, %shr + br i1 %ec.2, label %loop2, label %loop1 +} diff --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll index 8fdaac3fdffe3..e8ea912246728 100644 --- a/llvm/test/Transforms/LoopIdiom/basic.ll +++ b/llvm/test/Transforms/LoopIdiom/basic.ll @@ -682,10 +682,7 @@ define void @PR14241(ptr %s, i64 %size) { ; CHECK-NEXT: [[END_PTR:%.*]] = getelementptr inbounds i32, ptr [[S:%.*]], i64 [[END_IDX]] ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr nuw i8, ptr [[S]], i64 4 ; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SIZE]], 2 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -8 -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], -4 ; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 4 [[S]], ptr align 4 [[SCEVGEP]], i64 [[TMP4]], i1 false) ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: diff --git a/llvm/test/Transforms/LoopIdiom/expand-scev-expand-simplifications.ll b/llvm/test/Transforms/LoopIdiom/expand-scev-expand-simplifications.ll new file mode 100644 index 0000000000000..9a59e5a8ccabb --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/expand-scev-expand-simplifications.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p loop-idiom -S %s | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" + +define void @test_simplify_scev_during_expansion_flags(i64 %start) { +; CHECK-LABEL: define void @test_simplify_scev_during_expansion_flags( +; CHECK-SAME: i64 [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[START_NEG:%.*]] = sub i64 0, [[START]] +; CHECK-NEXT: [[START_MUL:%.*]] = ashr exact i64 [[START_NEG]], 2 +; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[START_MUL]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 404, [[TMP0]] +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 null, i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[START_MUL]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 100 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %start.neg = sub i64 0, %start + %start.mul = ashr exact i64 %start.neg, 2 + br label %loop + +loop: + %iv = phi i64 [ %start.mul, %entry ], [ %iv.next, %loop ] + %ptr.iv = phi ptr [ null, %entry ], [ %ptr.iv.next, %loop ] + store i32 0, ptr %ptr.iv, align 4 + %ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 4 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopIdiom/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopIdiom/reuse-lcssa-phi-scev-expansion.ll new file mode 100644 index 0000000000000..a15db620e0082 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/reuse-lcssa-phi-scev-expansion.ll @@ -0,0 +1,224 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p loop-idiom -S %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" + +declare void @foo() +declare void @bar() + +define void @scev_expand_ptrtoint(i8 %x, ptr %start) { +; CHECK-LABEL: define void @scev_expand_ptrtoint( +; CHECK-SAME: i8 [[X:%.*]], ptr [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[START1:%.*]] = ptrtoint ptr [[START]] to i64 +; CHECK-NEXT: br label %[[LOOP_1_HEADER:.*]] +; CHECK: [[LOOP_1_HEADER]]: +; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_1_NEXT:%.*]], %[[LOOP_1_LATCH:.*]] ] +; CHECK-NEXT: [[C:%.*]] = icmp ule i8 [[X]], 1 +; CHECK-NEXT: br i1 [[C]], label %[[LOOP_1_LATCH]], label %[[MIDDLE:.*]] +; CHECK: [[LOOP_1_LATCH]]: +; CHECK-NEXT: [[PTR_IV_1_NEXT]] = getelementptr i8, ptr [[PTR_IV_1]], i64 1 +; CHECK-NEXT: br label %[[LOOP_1_HEADER]] +; CHECK: [[MIDDLE]]: +; CHECK-NEXT: [[PTR_IV_1_LCSSA:%.*]] = phi ptr [ [[PTR_IV_1]], %[[LOOP_1_HEADER]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X]], 0 +; CHECK-NEXT: [[CMP_EXT:%.*]] = zext i1 [[CMP]] to i64 +; CHECK-NEXT: [[GEP_START:%.*]] = getelementptr i8, ptr [[PTR_IV_1_LCSSA]], i64 [[CMP_EXT]] +; CHECK-NEXT: br label %[[LOOP_2_HEADER:.*]] +; CHECK: [[LOOP_2_HEADER]]: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], %[[LOOP_2_LATCH:.*]] ], [ 0, %[[MIDDLE]] ] +; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[GEP_START]], %[[MIDDLE]] ], [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP_2_LATCH]] ] +; CHECK-NEXT: switch i8 [[X]], label %[[LOOP_2_LATCH]] [ +; CHECK-NEXT: i8 1, label %[[LOOP_3_PREHEADER:.*]] +; CHECK-NEXT: i8 4, label %[[LOOP_3_PREHEADER]] +; CHECK-NEXT: ] +; CHECK: [[LOOP_3_PREHEADER]]: +; CHECK-NEXT: [[INDVAR_LCSSA:%.*]] = phi i64 [ [[INDVAR]], %[[LOOP_2_HEADER]] ], [ [[INDVAR]], %[[LOOP_2_HEADER]] ] +; CHECK-NEXT: [[PTR_IV_2_LCSSA:%.*]] = phi ptr [ [[PTR_IV_2]], %[[LOOP_2_HEADER]] ], [ [[PTR_IV_2]], %[[LOOP_2_HEADER]] ] +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 1, [[START1]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR_IV_1_LCSSA]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[CMP_EXT]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDVAR_LCSSA]], [[TMP4]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]] +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[SCEVGEP]]) +; CHECK-NEXT: br label %[[LOOP_3:.*]] +; CHECK: [[LOOP_2_LATCH]]: +; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr i8, ptr [[PTR_IV_2]], i64 1 +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: br label %[[LOOP_2_HEADER]] +; CHECK: [[LOOP_3]]: +; CHECK-NEXT: [[IV_3:%.*]] = phi i64 [ [[IV_3_NEXT:%.*]], %[[LOOP_3]] ], [ 1, %[[LOOP_3_PREHEADER]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR_IV_2_LCSSA]], i64 [[IV_3]] +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[TMP6]], 0 +; CHECK-NEXT: [[IV_3_NEXT]] = add i64 [[IV_3]], 1 +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[LOOP_3]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.1.header + +loop.1.header: + %ptr.iv.1 = phi ptr [ %start, %entry ], [ %ptr.iv.1.next, %loop.1.latch ] + %c = icmp ule i8 %x, 1 + br i1 %c, label %loop.1.latch, label %middle + +loop.1.latch: + %ptr.iv.1.next = getelementptr i8, ptr %ptr.iv.1, i64 1 + br label %loop.1.header + +middle: + %cmp = icmp eq i8 %x, 0 + %cmp.ext = zext i1 %cmp to i64 + %gep.start = getelementptr i8, ptr %ptr.iv.1, i64 %cmp.ext + br label %loop.2.header + +loop.2.header: + %ptr.iv.2 = phi ptr [ %gep.start, %middle ], [ %ptr.iv.2.next, %loop.2.latch ] + switch i8 %x, label %loop.2.latch [ + i8 1, label %loop.3 + i8 4, label %loop.3 + ] + +loop.2.latch: + %ptr.iv.2.next = getelementptr i8, ptr %ptr.iv.2, i64 1 + br label %loop.2.header + +loop.3: + %iv.3 = phi i64 [ 1, %loop.2.header ], [ 1, %loop.2.header ], [ %iv.3.next, %loop.3 ] + %gep = getelementptr i8, ptr %ptr.iv.2, i64 %iv.3 + %1 = load i8, ptr %gep, align 1 + %ec = icmp eq i8 %1, 0 + %iv.3.next = add i64 %iv.3, 1 + br i1 %ec, label %exit, label %loop.3 + +exit: + ret void +} + +declare i1 @cond() + +define ptr @test_lcssa_reuse_preserve_lcssa() { +; CHECK-LABEL: define ptr @test_lcssa_reuse_preserve_lcssa() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[LOOP_0_HEADER:.*]] +; CHECK: [[LOOP_0_HEADER]]: +; CHECK-NEXT: br label %[[LOOP_1:.*]] +; CHECK: [[LOOP_1]]: +; CHECK-NEXT: [[IV_1:%.*]] = phi ptr [ null, %[[LOOP_0_HEADER]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP_1]] ] +; CHECK-NEXT: [[IV_1_NEXT]] = getelementptr i8, ptr [[IV_1]], i64 1 +; CHECK-NEXT: [[EC_1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[EC_1]], label %[[THEN:.*]], label %[[LOOP_1]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[IV_1_LCSSA1:%.*]] = phi ptr [ [[IV_1]], %[[LOOP_1]] ] +; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C_2]], label %[[LOOP_2_PREHEADER:.*]], label %[[LOOP_0_LATCH:.*]] +; CHECK: [[LOOP_2_PREHEADER]]: +; CHECK-NEXT: [[IV_1_LCSSA:%.*]] = phi ptr [ [[IV_1_LCSSA1]], %[[THEN]] ] +; CHECK-NEXT: [[IV_1_LCSSA_LCSSA:%.*]] = phi ptr [ [[IV_1_LCSSA1]], %[[THEN]] ] +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr null) +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[IV_1_LCSSA]], i64 1 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[STRLEN]] +; CHECK-NEXT: br label %[[LOOP_2:.*]] +; CHECK: [[LOOP_2]]: +; CHECK-NEXT: [[IV_2:%.*]] = phi ptr [ [[RES:%.*]], %[[LOOP_2]] ], [ [[IV_1_LCSSA_LCSSA]], %[[LOOP_2_PREHEADER]] ] +; CHECK-NEXT: [[RES]] = getelementptr i8, ptr [[IV_2]], i64 1 +; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[IV_1_LCSSA_LCSSA]], align 1 +; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i8 [[L]], 0 +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[LOOP_2]] +; CHECK: [[LOOP_0_LATCH]]: +; CHECK-NEXT: br label %[[LOOP_0_HEADER]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret ptr [[SCEVGEP]] +; +entry: + br label %loop.0.header + +loop.0.header: + br label %loop.1 + +loop.1: + %iv.1 = phi ptr [ null, %loop.0.header ], [ %iv.1.next, %loop.1 ] + %iv.1.next = getelementptr i8, ptr %iv.1, i64 1 + %ec.1 = call i1 @cond() + br i1 %ec.1, label %then, label %loop.1 + +then: + %c.2 = call i1 @cond() + br i1 %c.2, label %loop.2, label %loop.0.latch + +loop.2: + %iv.2 = phi ptr [ %res, %loop.2 ], [ %iv.1, %then ] + %res = getelementptr i8, ptr %iv.2, i64 1 + %l = load i8, ptr %iv.1, align 1 + %ec.2 = icmp eq i8 %l, 0 + br i1 %ec.2, label %exit, label %loop.2 + +loop.0.latch: + br label %loop.0.header + +exit: + ret ptr %res +} + +; Test case for https://github.com/llvm/llvm-project/issues/155287. +; Make sure we don't crash when trying to construct a PtrToInt SCEV expression +; results in SCEVCouldNotCompute. +define void @phi_ptr_addressspace_ptrtoint_fail(ptr addrspace(1) %arg) { +; CHECK-LABEL: define void @phi_ptr_addressspace_ptrtoint_fail( +; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]]) { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: br label %[[LOOP_1:.*]] +; CHECK: [[LOOP_1]]: +; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ 0, %[[BB]] ], [ [[ADD5:%.*]], %[[LOOP_1]] ] +; CHECK-NEXT: [[ADD5]] = add i64 [[IV_1]], 1 +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i16, ptr addrspace(1) [[ARG]], i64 [[ADD5]] +; CHECK-NEXT: br i1 false, label %[[LOOP_1]], label %[[LOOP_2_PH:.*]] +; CHECK: [[LOOP_2_PH]]: +; CHECK-NEXT: [[IV_1_LCSSA1:%.*]] = phi i64 [ [[IV_1]], %[[LOOP_1]] ] +; CHECK-NEXT: [[IV_1_LCSSA:%.*]] = phi i64 [ [[IV_1]], %[[LOOP_1]] ] +; CHECK-NEXT: [[PHI:%.*]] = phi ptr addrspace(1) [ [[GETELEMENTPTR]], %[[LOOP_1]] ] +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[IV_1_LCSSA1]], 2 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr addrspace(1) [[ARG]], i64 [[TMP0]] +; CHECK-NEXT: call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 [[SCEVGEP]], i8 0, i64 8, i1 false) +; CHECK-NEXT: br label %[[LOOP_2_HEADER:.*]] +; CHECK: [[LOOP_2_HEADER]]: +; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[IV_1_LCSSA]], %[[LOOP_2_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP_2_LATCH:.*]] ] +; CHECK-NEXT: [[GREP_ARG:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i64 [[IV_2]] +; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV_2]], 1 +; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_2_LATCH]], label %[[EXIT:.*]] +; CHECK: [[LOOP_2_LATCH]]: +; CHECK-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], 1 +; CHECK-NEXT: br label %[[LOOP_2_HEADER]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +bb: + br label %loop.1 + +loop.1: + %iv.1 = phi i64 [ 0, %bb ], [ %add5, %loop.1] + %add5 = add i64 %iv.1, 1 + %getelementptr = getelementptr i16, ptr addrspace(1) %arg, i64 %add5 + br i1 false, label %loop.1, label %loop.2.ph + +loop.2.ph: + %phi = phi ptr addrspace(1) [ %getelementptr, %loop.1] + br label %loop.2.header + +loop.2.header: + %iv.2 = phi i64 [ %iv.1, %loop.2.ph ], [ %iv.2.next, %loop.2.latch ] + %grep.arg = getelementptr i32, ptr addrspace(1) %arg, i64 %iv.2 + store i32 0, ptr addrspace(1) %grep.arg, align 4 + %ec = icmp ult i64 %iv.2, 1 + br i1 %ec, label %loop.2.latch, label %exit + +loop.2.latch: + %iv.2.next = add i64 %iv.2, 1 + br label %loop.2.header + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll b/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll index 10e10653a431d..037851f364e24 100644 --- a/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll +++ b/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll @@ -59,19 +59,14 @@ define void @test(ptr %arg, i64 %arg1) { ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds double, ptr [[LCSSA_PTR_IV_1]], i64 1 ; CHECK-NEXT: br label [[INNER_2:%.*]] ; CHECK: inner.2: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[INNER_2]] ], [ 0, [[INNER_1_EXIT]] ] ; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[GEP_5]], [[INNER_1_EXIT]] ], [ [[PTR_IV_2_NEXT:%.*]], [[INNER_2]] ] ; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr inbounds double, ptr [[PTR_IV_2]], i64 1 -; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 ; CHECK-NEXT: br i1 false, label [[INNER_3_LVER_CHECK:%.*]], label [[INNER_2]] ; CHECK: inner.3.lver.check: -; CHECK-NEXT: [[INDVAR_LCSSA:%.*]] = phi i64 [ [[INDVAR]], [[INNER_2]] ] ; CHECK-NEXT: [[LCSSA_PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2]], [[INNER_2]] ] ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds double, ptr [[PTR_PHI]], i64 1 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds double, ptr [[LCSSA_PTR_IV_2]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDVAR_LCSSA]], 3 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 24 -; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[LCSSA_PTR_IV_1]], i64 [[TMP1]] +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[LCSSA_PTR_IV_2]], i64 16 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[GEP_7]], [[GEP_1]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR_PHI]], [[SCEVGEP3]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] @@ -104,7 +99,7 @@ define void @test(ptr %arg, i64 %arg1) { ; CHECK-NEXT: br i1 [[C_2]], label [[OUTER_LATCH_LOOPEXIT4:%.*]], label [[INNER_3]] ; CHECK: outer.latch.loopexit: ; CHECK-NEXT: br label [[OUTER_LATCH]] -; CHECK: outer.latch.loopexit4: +; CHECK: outer.latch.loopexit3: ; CHECK-NEXT: br label [[OUTER_LATCH]] ; CHECK: outer.latch: ; CHECK-NEXT: br label [[INNER_1_LVER_CHECK]] diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/zext-signed-addrec.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/zext-signed-addrec.ll new file mode 100644 index 0000000000000..f2fa771ac6f29 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/zext-signed-addrec.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -loop-reduce -S %s | FileCheck %s +; PR18000 + +target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = global i32 0, align 4 +@b = common global i32 0, align 4 +@e = common global i8 0, align 1 +@d = common global i32 0, align 4 +@c = common global i32 0, align 4 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +define i32 @foo() { +; CHECK-LABEL: define i32 @foo() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[DOTPR:%.*]] = load i32, ptr @b, align 4 +; CHECK-NEXT: [[CMP10:%.*]] = icmp slt i32 [[DOTPR]], 1 +; CHECK-NEXT: br i1 [[CMP10]], label %[[OUTER_PH:.*]], label %[[ENTRY_ELSE:.*]] +; CHECK: [[ENTRY_ELSE]]: +; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, ptr @c, align 4 +; CHECK-NEXT: br label %[[MERGE:.*]] +; CHECK: [[OUTER_PH]]: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @a, align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label %[[OUTER_HEADER_PREHEADER:.*]], label %[[P_ELSE:.*]] +; CHECK: [[OUTER_HEADER_PREHEADER]]: +; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] +; CHECK: [[OUTER_HEADER]]: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[INC:%.*]], %[[OUTER_LATCH:.*]] ], [ [[DOTPR]], %[[OUTER_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label %[[INNER_LOOP:.*]] +; CHECK: [[INNER_LOOP]]: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], %[[INNER_LOOP]] ], [ 516, %[[OUTER_HEADER]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i8 [ 1, %[[OUTER_HEADER]] ], [ [[DEC:%.*]], %[[INNER_LOOP]] ] +; CHECK-NEXT: [[SHL:%.*]] = add i32 [[LSR_IV]], -258 +; CHECK-NEXT: store i32 [[SHL]], ptr @c, align 4 +; CHECK-NEXT: [[DEC]] = add i8 [[TMP2]], -1 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], -258 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i8 [[DEC]], -1 +; CHECK-NEXT: br i1 [[CMP2]], label %[[INNER_LOOP]], label %[[OUTER_LATCH]] +; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: [[LSR_IV_NEXT_LCSSA:%.*]] = phi i32 [ [[LSR_IV_NEXT]], %[[INNER_LOOP]] ] +; CHECK-NEXT: store i32 0, ptr @d, align 4 +; CHECK-NEXT: [[INC]] = add nsw i32 [[TMP1]], 1 +; CHECK-NEXT: store i32 [[INC]], ptr @b, align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[OUTER_HEADER]], label %[[OUTER_EXIT:.*]] +; CHECK: [[OUTER_EXIT]]: +; CHECK-NEXT: [[LSR_IV_NEXT_LCSSA_LCSSA:%.*]] = phi i32 [ [[LSR_IV_NEXT_LCSSA]], %[[OUTER_LATCH]] ] +; CHECK-NEXT: store i8 [[DEC]], ptr @e, align 1 +; CHECK-NEXT: br label %[[MERGE]] +; CHECK: [[MERGE]]: +; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[DOTPRE]], %[[ENTRY_ELSE]] ], [ [[LSR_IV_NEXT_LCSSA_LCSSA]], %[[OUTER_EXIT]] ] +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar(i32 [[TMP3]]) +; CHECK-NEXT: br label %[[RETURN:.*]] +; CHECK: [[P_ELSE]]: +; CHECK-NEXT: store i8 1, ptr @e, align 1 +; CHECK-NEXT: store i32 0, ptr @d, align 4 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, %[[MERGE]] ], [ 1, %[[P_ELSE]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +entry: + %.pr = load i32, ptr @b, align 4 + %cmp10 = icmp slt i32 %.pr, 1 + br i1 %cmp10, label %outer.ph, label %entry.else + +entry.else: + %.pre = load i32, ptr @c, align 4 + br label %merge + +outer.ph: + %0 = load i32, ptr @a, align 4 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %outer.header, label %p.else + +outer.header: + %1 = phi i32 [ %.pr, %outer.ph ], [ %inc, %outer.latch ] + br label %inner.loop + +inner.loop: + %iv = phi i32 [ 1, %outer.header ], [ %iv.next, %inner.loop ] + %2 = phi i8 [ 1, %outer.header ], [ %dec, %inner.loop ] + %conv7 = mul i32 %iv, 258 + %shl = and i32 %conv7, 510 + store i32 %shl, ptr @c, align 4 + %dec = add i8 %2, -1 + %cmp2 = icmp sgt i8 %dec, -1 + %iv.next = add i32 %iv, -1 + br i1 %cmp2, label %inner.loop, label %outer.latch + +outer.latch: + store i32 0, ptr @d, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, ptr @b, align 4 + %cmp = icmp slt i32 %1, 0 + br i1 %cmp, label %outer.header, label %outer.exit + +outer.exit: + store i8 %dec, ptr @e, align 1 + br label %merge + +merge: + %3 = phi i32 [ %.pre, %entry.else ], [ %shl, %outer.exit ] + %call = tail call i32 @bar(i32 %3) + br label %return + +p.else: + store i8 1, ptr @e, align 1 + store i32 0, ptr @d, align 4 + br label %return + +return: + %retval.0 = phi i32 [ 0, %merge ], [ 1, %p.else ] + ret i32 %retval.0 +} + +declare i32 @bar(i32) + diff --git a/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll b/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll index cee8c8abdb450..43389b5df8f00 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll @@ -18,8 +18,7 @@ define i64 @test_duplicated_phis(i64 noundef %N) { ; CHECK: [[FOR_BODY_PREHEADER_NEW]]: ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[MUL]], -4 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[UNROLL_ITER]], -4 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP5]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP4]], 1 ; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = sub i64 -3, [[TMP3]] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: @@ -84,3 +83,41 @@ for.end: %res.0.lcssa = phi i64 [ 0, %entry ], [ %spec.select, %for.end.loopexit.unr-lcssa ] ret i64 %res.0.lcssa } + +define i64 @duplicated_phis_compare_uses_mul_udiv(i64 %x) { +; CHECK-LABEL: define i64 @duplicated_phis_compare_uses_mul_udiv( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MUL_2:%.*]] = shl i64 [[X]], 1 +; CHECK-NEXT: [[DIV_16:%.*]] = lshr i64 [[MUL_2]], 4 +; CHECK-NEXT: [[MASKED:%.*]] = and i64 [[DIV_16]], 1152921504606846974 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: [[IV_1_NEXT]] = add i64 [[IV_1]], 2 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[MASKED]], [[IV_1_NEXT]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i64 [[IV_1_NEXT]] +; +entry: + %mul.2 = shl i64 %x, 1 + %div.16 = lshr exact i64 %mul.2, 4 + %masked = and i64 %div.16, 1152921504606846974 + br label %loop + +loop: + %iv.1 = phi i64 [ 0, %entry ], [ %iv.1.next, %loop ] + %iv.2 = phi i64 [ 0, %entry ], [ %iv.2.next, %loop ] + call void @clobber() + %iv.1.next = add i64 %iv.1, 2 + %iv.2.next = add i64 %iv.2, 2 + %ec = icmp eq i64 %iv.2.next, %masked + br i1 %ec, label %exit, label %loop + +exit: + ret i64 %iv.1.next +} + +declare void @clobber() diff --git a/llvm/test/Transforms/LoopUnroll/Hexagon/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopUnroll/Hexagon/reuse-lcssa-phi-scev-expansion.ll new file mode 100644 index 0000000000000..f74fb14e397f3 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/Hexagon/reuse-lcssa-phi-scev-expansion.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p loop-unroll -S %s | FileCheck %s + +target triple = "hexagon-unknown-linux" + +declare void @foo() + +define void @preserve_lcssa_when_reusing_existing_phi() { +; CHECK-LABEL: define void @preserve_lcssa_when_reusing_existing_phi() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[LOOP_1_HEADER:.*]] +; CHECK: [[LOOP_1_HEADER]]: +; CHECK-NEXT: br label %[[LOOP_2_HEADER:.*]] +; CHECK: [[LOOP_2_HEADER]]: +; CHECK-NEXT: br label %[[LOOP_3:.*]] +; CHECK: [[LOOP_3]]: +; CHECK-NEXT: [[IV_3:%.*]] = phi i32 [ [[IV_3_NEXT:%.*]], %[[LOOP_3]] ], [ 0, %[[LOOP_2_HEADER]] ] +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[IV_3_NEXT]] = add i32 [[IV_3]], 1 +; CHECK-NEXT: br i1 false, label %[[PH:.*]], label %[[LOOP_3]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[IV_3_LCSSA:%.*]] = phi i32 [ [[IV_3]], %[[LOOP_3]] ] +; CHECK-NEXT: br i1 true, label %[[LOOP_2_LATCH:.*]], label %[[LOOP_4_PREHEADER:.*]] +; CHECK: [[LOOP_4_PREHEADER]]: +; CHECK-NEXT: [[IV_3_LCSSA_LCSSA1:%.*]] = phi i32 [ [[IV_3_LCSSA]], %[[PH]] ] +; CHECK-NEXT: [[IV_3_LCSSA_LCSSA:%.*]] = phi i32 [ [[IV_3_LCSSA]], %[[PH]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[IV_3_LCSSA_LCSSA1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[TMP1]], 7 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7 +; CHECK-NEXT: br i1 [[TMP3]], label %[[LOOP_1_LATCH_UNR_LCSSA:.*]], label %[[LOOP_4_PREHEADER_NEW:.*]] +; CHECK: [[LOOP_4_PREHEADER_NEW]]: +; CHECK-NEXT: br label %[[LOOP_4:.*]] +; CHECK: [[LOOP_2_LATCH]]: +; CHECK-NEXT: br label %[[LOOP_2_HEADER]] +; CHECK: [[LOOP_4]]: +; CHECK-NEXT: [[IV_4:%.*]] = phi i32 [ 0, %[[LOOP_4_PREHEADER_NEW]] ], [ [[INC_I_7:%.*]], %[[LOOP_4]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, %[[LOOP_4_PREHEADER_NEW]] ], [ [[NITER_NEXT_7:%.*]], %[[LOOP_4]] ] +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[INC_I_7]] = add nuw nsw i32 [[IV_4]], 8 +; CHECK-NEXT: [[NITER_NEXT_7]] = add nuw nsw i32 [[NITER]], 8 +; CHECK-NEXT: br i1 true, label %[[LOOP_1_LATCH_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_4]] +; CHECK: [[LOOP_1_LATCH_UNR_LCSSA_LOOPEXIT]]: +; CHECK-NEXT: [[IV_4_UNR_PH:%.*]] = phi i32 [ [[INC_I_7]], %[[LOOP_4]] ] +; CHECK-NEXT: br label %[[LOOP_1_LATCH_UNR_LCSSA]] +; CHECK: [[LOOP_1_LATCH_UNR_LCSSA]]: +; CHECK-NEXT: [[IV_4_UNR:%.*]] = phi i32 [ 0, %[[LOOP_4_PREHEADER]] ], [ [[IV_4_UNR_PH]], %[[LOOP_1_LATCH_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_4_EPIL_PREHEADER:.*]], label %[[LOOP_1_LATCH:.*]] +; CHECK: [[LOOP_4_EPIL_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP_4_EPIL:.*]] +; CHECK: [[LOOP_4_EPIL]]: +; CHECK-NEXT: [[IV_4_EPIL:%.*]] = phi i32 [ [[INC_I_EPIL:%.*]], %[[LOOP_4_EPIL]] ], [ [[IV_4_UNR]], %[[LOOP_4_EPIL_PREHEADER]] ] +; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i32 [ 0, %[[LOOP_4_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_4_EPIL]] ] +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[INC_I_EPIL]] = add i32 [[IV_4_EPIL]], 1 +; CHECK-NEXT: [[EC_EPIL:%.*]] = icmp eq i32 [[IV_4_EPIL]], [[IV_3_LCSSA_LCSSA]] +; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i32 [[EPIL_ITER]], 1 +; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[LOOP_4_EPIL]], label %[[LOOP_1_LATCH_EPILOG_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[LOOP_1_LATCH_EPILOG_LCSSA]]: +; CHECK-NEXT: br label %[[LOOP_1_LATCH]] +; CHECK: [[LOOP_1_LATCH]]: +; CHECK-NEXT: br label %[[LOOP_1_HEADER]] +; +entry: + br label %loop.1.header + +loop.1.header: + br label %loop.2.header + +loop.2.header: + br label %loop.3 + +loop.3: + %iv.3 = phi i32 [ %iv.3.next, %loop.3 ], [ 0, %loop.2.header ] + call void @foo() + %iv.3.next = add i32 %iv.3, 1 + br i1 false, label %ph, label %loop.3 + +ph: + br i1 true, label %loop.2.latch, label %loop.4 + +loop.2.latch: + br label %loop.2.header + +loop.4: + %iv.4 = phi i32 [ 0, %ph ], [ %inc.i, %loop.4 ] + call void @foo() + %inc.i = add i32 %iv.4, 1 + %ec = icmp eq i32 %iv.4, %iv.3 + br i1 %ec, label %loop.1.latch, label %loop.4 + +loop.1.latch: + br label %loop.1.header +} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.unroll.disable"} +;. diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-guards.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-guards.ll index 824e23fcf3e6e..c1d8395965577 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-guards.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-guards.ll @@ -201,6 +201,78 @@ loop.latch: %ec = icmp eq i32 %iv.next, %n br i1 %ec, label %exit, label %loop.header +exit: + ret void +} + +define void @test_peel_guard_sub_1_btc(i32 %n) { +; CHECK-LABEL: define void @test_peel_guard_sub_1_btc( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[PRE:%.*]] = icmp eq i32 [[SUB]], 0 +; CHECK-NEXT: br i1 [[PRE]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]] +; CHECK: [[LOOP_HEADER_PREHEADER]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_HEADER_PREHEADER_SPLIT:.*]], label %[[EXIT_LOOPEXIT_PEEL_BEGIN:.*]] +; CHECK: [[LOOP_HEADER_PREHEADER_SPLIT]]: +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 1, %[[LOOP_HEADER_PREHEADER_SPLIT]] ] +; CHECK-NEXT: br i1 false, label %[[LOOP_LATCH]], label %[[THEN:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[CALL136:%.*]] = load volatile ptr, ptr null, align 4294967296 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP2]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]]: +; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP_LATCH]] ] +; CHECK-NEXT: br label %[[EXIT_LOOPEXIT_PEEL_BEGIN]] +; CHECK: [[EXIT_LOOPEXIT_PEEL_BEGIN]]: +; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 1, %[[LOOP_HEADER_PREHEADER]] ], [ [[DOTPH]], %[[EXIT_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER_PEEL:.*]] +; CHECK: [[LOOP_HEADER_PEEL]]: +; CHECK-NEXT: [[CMP115_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]] +; CHECK-NEXT: br i1 [[CMP115_PEEL]], label %[[LOOP_LATCH_PEEL:.*]], label %[[THEN_PEEL:.*]] +; CHECK: [[THEN_PEEL]]: +; CHECK-NEXT: [[CALL136_PEEL:%.*]] = load volatile ptr, ptr null, align 4294967296 +; CHECK-NEXT: br label %[[LOOP_LATCH_PEEL]] +; CHECK: [[LOOP_LATCH_PEEL]]: +; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add nuw i32 [[TMP3]], 1 +; CHECK-NEXT: [[EC_PEEL:%.*]] = icmp eq i32 [[IV_NEXT_PEEL]], [[N]] +; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_LOOPEXIT_PEEL_NEXT:.*]], label %[[EXIT_LOOPEXIT_PEEL_NEXT]] +; CHECK: [[EXIT_LOOPEXIT_PEEL_NEXT]]: +; CHECK-NEXT: br label %[[LOOP_HEADER_PEEL_NEXT:.*]] +; CHECK: [[LOOP_HEADER_PEEL_NEXT]]: +; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %sub = add i32 %n, -1 + %pre = icmp eq i32 %sub, 0 + br i1 %pre, label %exit, label %loop.header + +loop.header: ; preds = %loop.latch, %entry + %iv = phi i32 [ %iv.next, %loop.latch ], [ 1, %entry ] + %cmp115 = icmp eq i32 %iv, %sub + br i1 %cmp115, label %loop.latch, label %then + +then: + %call136 = load volatile ptr, ptr null, align 4294967296 + br label %loop.latch + +loop.latch: + %iv.next = add nuw i32 %iv, 1 + %ec = icmp eq i32 %iv.next, %n + br i1 %ec, label %exit, label %loop.header + exit: ret void } @@ -208,4 +280,5 @@ exit: ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1} ; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopUnroll/scevunroll.ll b/llvm/test/Transforms/LoopUnroll/scevunroll.ll index b6b14e365cc1d..995eb0c52ff36 100644 --- a/llvm/test/Transforms/LoopUnroll/scevunroll.ll +++ b/llvm/test/Transforms/LoopUnroll/scevunroll.ll @@ -435,3 +435,61 @@ exit: } declare void @fn(i32) + +define void @peel_int_eq_condition(i32 %start) { +; CHECK-LABEL: @peel_int_eq_condition( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_PEEL_BEGIN:%.*]] +; CHECK: loop.peel.begin: +; CHECK-NEXT: br label [[LOOP_PEEL:%.*]] +; CHECK: loop.peel: +; CHECK-NEXT: [[C_0_PEEL:%.*]] = icmp eq i32 [[START:%.*]], [[START]] +; CHECK-NEXT: br i1 [[C_0_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[LOOP_LATCH_PEEL:%.*]] +; CHECK: if.then.peel: +; CHECK-NEXT: call void @fn(i32 [[START]]) +; CHECK-NEXT: br label [[LOOP_LATCH_PEEL]] +; CHECK: loop.latch.peel: +; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[EXITCOND_PEEL:%.*]] = icmp slt i32 [[START]], 100 +; CHECK-NEXT: br i1 [[EXITCOND_PEEL]], label [[LOOP_PEEL_NEXT:%.*]], label [[EXIT:%.*]] +; CHECK: loop.peel.next: +; CHECK-NEXT: br label [[LOOP_PEEL_NEXT1:%.*]] +; CHECK: loop.peel.next1: +; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] +; CHECK: entry.peel.newph: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[LOOP_LATCH]] +; CHECK: if.then: +; CHECK-NEXT: call void @fn(i32 [[IV]]) +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp slt i32 [[IV]], 100 +; CHECK-NEXT: br i1 [[EC]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ] + %c.0 = icmp eq i32 %iv, %start + br i1 %c.0, label %if.then, label %loop.latch + +if.then: + call void @fn(i32 %iv) + br label %loop.latch + +loop.latch: + %iv.next = add i32 %iv, 1 + %ec = icmp slt i32 %iv, 100 + br i1 %ec, label %loop, label %exit + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll index 427a05cc1c843..2b60480afa476 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll @@ -63,7 +63,9 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP3]] +; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP18]], [[TMP15]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll index 1a8e5940d88e7..3c091ceb17144 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll @@ -63,7 +63,9 @@ define float @fminnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP3]] +; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP15]], [[TMP18]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll index 3bb20e2a81d83..bd84494d6c6c5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -37,7 +37,8 @@ define i64 @same_exit_block_pre_inc_use1() #1 { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP14]], align 1 ; CHECK-NEXT: [[TMP16:%.*]] = icmp ne [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP5]] -; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP16]]) +; CHECK-NEXT: [[TMP8:%.*]] = freeze [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP8]]) ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]] ; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -118,7 +119,8 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp uge <2 x i64> [[VEC_IND]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) +; CHECK-NEXT: [[TMP3:%.*]] = freeze <2 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64 ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) @@ -194,7 +196,8 @@ define i64 @loop_contains_safe_call() #1 { ; CHECK-NEXT: [[TMP3:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast oge <4 x float> [[TMP3]], splat (float 3.000000e+00) ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64 ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -281,7 +284,8 @@ define i64 @loop_contains_safe_div() #1 { ; CHECK-NEXT: [[TMP13:%.*]] = udiv [[WIDE_LOAD]], splat (i32 20000) ; CHECK-NEXT: [[TMP15:%.*]] = icmp ne [[TMP13]], splat (i32 1) ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX2]], [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP15]]) +; CHECK-NEXT: [[TMP9:%.*]] = freeze [[TMP15]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[INDEX1]] ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -363,7 +367,8 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -464,7 +469,8 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) +; CHECK-NEXT: [[TMP20:%.*]] = freeze <4 x i1> [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP20]]) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll index 61ef3cef603fa..21cecea735ad5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll @@ -29,24 +29,25 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP8]], align 1 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP10]], align 1 -; CHECK-NEXT: [[TMP32:%.*]] = icmp ne [[WIDE_LOAD4]], [[WIDE_LOAD8]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP28]], align 1 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP26]], align 1 +; CHECK-NEXT: [[TMP59:%.*]] = icmp ne [[WIDE_LOAD4]], [[WIDE_LOAD8]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP32]]) -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]] -; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]] -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP34:%.*]] = freeze [[TMP59]] +; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP34]]) +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]] +; CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP12]], [[TMP35]] +; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP12]], label [[VECTOR_EARLY_EXIT:%.*]], label [[LOOP_INC:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 510, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP32]], i1 true) +; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP59]], i1 true) ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX1]], [[TMP61]] ; CHECK-NEXT: [[TMP17:%.*]] = add i64 3, [[TMP16]] ; CHECK-NEXT: br label [[LOOP_END]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index c4fc60908c7e0..132e77951abd6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -597,7 +597,7 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-LABEL: @test_gather_not_profitable_pr48429( ; AVX512-NEXT: entry: ; AVX512-NEXT: [[IDX_EXT:%.*]] = sext i32 [[D:%.*]] to i64 -; AVX512-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[PTR:%.*]], i64 [[IDX_EXT]] +; AVX512-NEXT: [[SCEVGEP1:%.*]] = getelementptr float, ptr [[PTR:%.*]], i64 [[IDX_EXT]] ; AVX512-NEXT: [[CMP_NOT10:%.*]] = icmp eq i32 [[D]], 0 ; AVX512-NEXT: br i1 [[CMP_NOT10]], label [[FOR_END:%.*]], label [[ITER_CHECK:%.*]] ; AVX512: iter.check: @@ -616,17 +616,12 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 6 ; AVX512-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], 8 ; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DEST:%.*]], i64 [[TMP8]] -; AVX512-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP6]], 2 -; AVX512-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 4 -; AVX512-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP10]] ; AVX512-NEXT: [[TMP11:%.*]] = mul nsw i64 [[IDX_EXT]], -4 ; AVX512-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP11]] -; AVX512-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP4]] -; AVX512-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP12]] ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DEST]], [[SCEVGEP1]] ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR]], [[SCEVGEP]] ; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; AVX512-NEXT: [[BOUND04:%.*]] = icmp ult ptr [[DEST]], [[SCEVGEP3]] +; AVX512-NEXT: [[BOUND04:%.*]] = icmp ult ptr [[DEST]], [[PTR]] ; AVX512-NEXT: [[BOUND15:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP]] ; AVX512-NEXT: [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]] ; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]] @@ -651,9 +646,9 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[TMP18]], align 4, !alias.scope [[META8:![0-9]+]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD]], <16 x ptr> [[TMP14]], i32 4, <16 x i1> splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]] ; AVX512-NEXT: [[TMP19:%.*]] = getelementptr float, ptr [[TMP16]], i32 0 -; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x float>, ptr [[TMP19]], align 4, !alias.scope [[META15:![0-9]+]] +; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x float>, ptr [[TMP19]], align 4, !alias.scope [[META15:![0-9]+]] ; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, <16 x ptr> [[TMP14]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD8]], <16 x ptr> [[TMP20]], i32 4, <16 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META13]] +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD6]], <16 x ptr> [[TMP20]], i32 4, <16 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META13]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX512-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 1024 ; AVX512-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -687,16 +682,16 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[OFFSET_IDX21]] ; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM]] ; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i32 0 -; AVX512-NEXT: [[WIDE_LOAD17:%.*]] = load <8 x float>, ptr [[TMP30]], align 4, !alias.scope [[META17:![0-9]+]] -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD17]], <8 x ptr> [[TMP26]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META20:![0-9]+]], !noalias [[META22:![0-9]+]] +; AVX512-NEXT: [[WIDE_LOAD14:%.*]] = load <8 x float>, ptr [[TMP30]], align 4, !alias.scope [[META8]] +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD14]], <8 x ptr> [[TMP26]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META13]] ; AVX512-NEXT: [[TMP31:%.*]] = getelementptr float, ptr [[TMP28]], i32 0 -; AVX512-NEXT: [[WIDE_LOAD18:%.*]] = load <8 x float>, ptr [[TMP31]], align 4, !alias.scope [[META24:![0-9]+]] -; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, <8 x ptr> [[TMP26]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD18]], <8 x ptr> [[TMP32]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META20]], !noalias [[META22]] +; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <8 x float>, ptr [[TMP31]], align 4, !alias.scope [[META15]] +; AVX512-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, <8 x ptr> [[TMP26]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD15]], <8 x ptr> [[TMP27]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META13]] ; AVX512-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX18]], 8 ; AVX512-NEXT: [[PTR_IND20]] = getelementptr i8, ptr [[POINTER_PHI19]], i64 512 -; AVX512-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC10]] -; AVX512-NEXT: br i1 [[TMP33]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; AVX512-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC10]] +; AVX512-NEXT: br i1 [[TMP32]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; AVX512: vec.epilog.middle.block: ; AVX512-NEXT: [[CMP_N17:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC10]] ; AVX512-NEXT: br i1 [[CMP_N17]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] @@ -715,15 +710,15 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: store float [[TMP35]], ptr [[ARRAYIDX5]], align 4 ; AVX512-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[PTR_ADDR_012]], i64 1 ; AVX512-NEXT: [[ADD_PTR6]] = getelementptr inbounds float, ptr [[DEST_ADDR_011]], i64 16 -; AVX512-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[ADD_PTR]] -; AVX512-NEXT: br i1 [[CMP_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; AVX512-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP1]] +; AVX512-NEXT: br i1 [[CMP_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; AVX512: for.end: ; AVX512-NEXT: ret void ; ; FVW2-LABEL: @test_gather_not_profitable_pr48429( ; FVW2-NEXT: entry: ; FVW2-NEXT: [[IDX_EXT:%.*]] = sext i32 [[D:%.*]] to i64 -; FVW2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[PTR:%.*]], i64 [[IDX_EXT]] +; FVW2-NEXT: [[SCEVGEP1:%.*]] = getelementptr float, ptr [[PTR:%.*]], i64 [[IDX_EXT]] ; FVW2-NEXT: [[CMP_NOT10:%.*]] = icmp eq i32 [[D]], 0 ; FVW2-NEXT: br i1 [[CMP_NOT10]], label [[FOR_END:%.*]], label [[FOR_BODY_LR_PH:%.*]] ; FVW2: for.body.lr.ph: @@ -742,17 +737,12 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; FVW2-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 6 ; FVW2-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], 8 ; FVW2-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DEST:%.*]], i64 [[TMP8]] -; FVW2-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP6]], 2 -; FVW2-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 4 -; FVW2-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP10]] ; FVW2-NEXT: [[TMP11:%.*]] = mul nsw i64 [[IDX_EXT]], -4 ; FVW2-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP11]] -; FVW2-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP4]] -; FVW2-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP12]] ; FVW2-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DEST]], [[SCEVGEP1]] ; FVW2-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR]], [[SCEVGEP]] ; FVW2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; FVW2-NEXT: [[BOUND04:%.*]] = icmp ult ptr [[DEST]], [[SCEVGEP3]] +; FVW2-NEXT: [[BOUND04:%.*]] = icmp ult ptr [[DEST]], [[PTR]] ; FVW2-NEXT: [[BOUND15:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP]] ; FVW2-NEXT: [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]] ; FVW2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]] @@ -810,7 +800,7 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; FVW2-NEXT: store float [[TMP32]], ptr [[ARRAYIDX5]], align 4 ; FVW2-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[PTR_ADDR_012]], i64 1 ; FVW2-NEXT: [[ADD_PTR6]] = getelementptr inbounds float, ptr [[DEST_ADDR_011]], i64 16 -; FVW2-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[ADD_PTR]] +; FVW2-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP1]] ; FVW2-NEXT: br i1 [[CMP_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; FVW2: for.end: ; FVW2-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size-needs-loop-guards.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size-needs-loop-guards.ll new file mode 100644 index 0000000000000..37f0d5418b427 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size-needs-loop-guards.ll @@ -0,0 +1,213 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 5 +; RUN: opt -p loop-vectorize -force-vector-width=2 -S %s | FileCheck %s + +define void @loop_guard_on_assume_needed_to_prove_dereferenceable_ptr_arg_noundef(i64 %x, ptr noalias noundef %A, ptr noalias %B, ptr noalias %C) nofree nosync { +; CHECK-LABEL: define void @loop_guard_on_assume_needed_to_prove_dereferenceable_ptr_arg_noundef( +; CHECK-SAME: i64 [[X:%.*]], ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[X]]) ] +; CHECK-NEXT: [[C_X:%.*]] = icmp uge i64 [[X]], 128 +; CHECK-NEXT: br i1 [[C_X]], label %[[LOOP_HEADER_PREHEADER:.*]], [[EXIT:label %.*]] +; CHECK: [[LOOP_HEADER_PREHEADER]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP6]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 +; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, [[EXIT_LOOPEXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %A, i64 4), "dereferenceable"(ptr %A, i64 %x) ] + %c.x = icmp uge i64 %x, 128 + br i1 %c.x, label %loop.header, label %exit + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.b = getelementptr i32, ptr %B, i64 %iv + %l.b = load i32, ptr %gep.b, align 4 + %c.1 = icmp eq i32 %l.b, 0 + br i1 %c.1, label %loop.latch, label %loop.then + +loop.then: + %gep.a = getelementptr i32, ptr %A, i64 %iv + %l.a = load i32, ptr %gep.a, align 4 + br label %loop.latch + +loop.latch: + %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] + %gep.c = getelementptr inbounds i32, ptr %C, i64 %iv + store i32 %merge, ptr %gep.c, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 32 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + +define void @loop_guard_on_assume_needed_to_prove_dereferenceable(i64 %x, ptr noalias %A, ptr noalias %B, ptr noalias %C) nofree nosync { +; CHECK-LABEL: define void @loop_guard_on_assume_needed_to_prove_dereferenceable( +; CHECK-SAME: i64 [[X:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "noundef"(ptr [[A]]), "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[X]]) ] +; CHECK-NEXT: [[C_X:%.*]] = icmp uge i64 [[X]], 128 +; CHECK-NEXT: br i1 [[C_X]], label %[[LOOP_HEADER_PREHEADER:.*]], [[EXIT:label %.*]] +; CHECK: [[LOOP_HEADER_PREHEADER]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP13]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[TMP16]], splat (i1 true) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP16]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP18]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP17]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 +; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, [[EXIT_LOOPEXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +entry: + call void @llvm.assume(i1 true) [ "noundef"(ptr %A), "align"(ptr %A, i64 4), "dereferenceable"(ptr %A, i64 %x) ] + %c.x = icmp uge i64 %x, 128 + br i1 %c.x, label %loop.header, label %exit + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.b = getelementptr i32, ptr %B, i64 %iv + %l.b = load i32, ptr %gep.b, align 4 + %c.1 = icmp eq i32 %l.b, 0 + br i1 %c.1, label %loop.latch, label %loop.then + +loop.then: + %gep.a = getelementptr i32, ptr %A, i64 %iv + %l.a = load i32, ptr %gep.a, align 4 + br label %loop.latch + +loop.latch: + %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] + %gep.c = getelementptr inbounds i32, ptr %C, i64 %iv + store i32 %merge, ptr %gep.c, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 32 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + +define void @loop_guard_on_trip_count_needed_to_prove_dereferenceable(i32 %x, ptr noalias dereferenceable(128) align 4 %A, ptr noalias %B, ptr noalias %C) { +; CHECK-LABEL: define void @loop_guard_on_trip_count_needed_to_prove_dereferenceable( +; CHECK-SAME: i32 [[X:%.*]], ptr noalias align 4 dereferenceable(128) [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[C_X:%.*]] = icmp sgt i32 [[X]], 0 +; CHECK-NEXT: br i1 [[C_X]], label %[[PH:.*]], [[EXIT:label %.*]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[N:%.*]] = tail call i32 @llvm.smin.i32(i32 [[X]], i32 31) +; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N_EXT]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_EXT]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_EXT]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP6]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_EXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT_LOOPEXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +entry: + %c.x = icmp sgt i32 %x, 0 + br i1 %c.x, label %ph, label %exit + +ph: + %n = tail call i32 @llvm.smin.i32(i32 %x, i32 31) + %n.ext = zext i32 %n to i64 + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop.latch ] + %gep.b = getelementptr i32, ptr %B, i64 %iv + %l.b = load i32, ptr %gep.b, align 4 + %c.1 = icmp eq i32 %l.b, 0 + br i1 %c.1, label %loop.latch, label %loop.then + +loop.then: + %gep.a = getelementptr i32, ptr %A, i64 %iv + %l.a = load i32, ptr %gep.a, align 4 + br label %loop.latch + +loop.latch: + %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] + %gep.c = getelementptr inbounds i32, ptr %C, i64 %iv + store i32 %merge, ptr %gep.c, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n.ext + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + + + +declare i32 @llvm.smin.i32(i32, i32) diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll index 7a54519c7cdf8..27f956144aabd 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll @@ -1587,6 +1587,385 @@ exit: declare ptr @get_ptr() declare void @may_free() +define void @deref_assumption_in_header_constant_trip_count_nofree_via_context(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nosync { +; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_nofree_via_context( +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4000) ] +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH1:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD1]], <2 x i32> [[WIDE_LOAD2]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP6]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT1:.*]], label %[[SCALAR_PH1]] +; CHECK: [[SCALAR_PH1]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH1]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 +; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[SCALAR_PH]] ], [ [[L_B]], %[[EXIT]] ] +; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT1]], label %[[EXIT]], !llvm.loop [[LOOP37:![0-9]+]] +; CHECK: [[EXIT1]]: +; CHECK-NEXT: ret void +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.a = getelementptr i32, ptr %a, i64 %iv + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %l.b = load i32, ptr %gep.b, align 4 + %c.1 = icmp sge i32 %l.b, 0 + br i1 %c.1, label %loop.latch, label %loop.then + +loop.then: + %l.a = load i32, ptr %gep.a, align 4 + br label %loop.latch + +loop.latch: + %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + store i32 %merge, ptr %gep.c, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1000 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + +define void @deref_assumption_in_header_constant_trip_count_may_free(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nosync { +; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_may_free( +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4000) ] +; CHECK-NEXT: call void @may_free() +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH1:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP18]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP12]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[TMP19:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP19]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP20]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT1:.*]], label %[[SCALAR_PH1]] +; CHECK: [[SCALAR_PH1]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH1]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 +; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[SCALAR_PH]] ], [ [[L_B]], %[[EXIT]] ] +; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT1]], label %[[EXIT]], !llvm.loop [[LOOP39:![0-9]+]] +; CHECK: [[EXIT1]]: +; CHECK-NEXT: ret void +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] + call void @may_free() + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.a = getelementptr i32, ptr %a, i64 %iv + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %l.b = load i32, ptr %gep.b, align 4 + %c.1 = icmp sge i32 %l.b, 0 + br i1 %c.1, label %loop.latch, label %loop.then + +loop.then: + %l.a = load i32, ptr %gep.a, align 4 + br label %loop.latch + +loop.latch: + %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + store i32 %merge, ptr %gep.c, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1000 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + +define void @deref_assumption_in_header_constant_trip_count_nofree_via_context_but_missing_nosync(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) { +; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_nofree_via_context_but_missing_nosync( +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4000) ] +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH1:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP13]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[TMP16]], splat (i1 true) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP16]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP18]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP17]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT1:.*]], label %[[SCALAR_PH1]] +; CHECK: [[SCALAR_PH1]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH1]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 +; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[SCALAR_PH]] ], [ [[L_B]], %[[EXIT]] ] +; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT1]], label %[[EXIT]], !llvm.loop [[LOOP41:![0-9]+]] +; CHECK: [[EXIT1]]: +; CHECK-NEXT: ret void +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.a = getelementptr i32, ptr %a, i64 %iv + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %l.b = load i32, ptr %gep.b, align 4 + %c.1 = icmp sge i32 %l.b, 0 + br i1 %c.1, label %loop.latch, label %loop.then + +loop.then: + %l.a = load i32, ptr %gep.a, align 4 + br label %loop.latch + +loop.latch: + %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + store i32 %merge, ptr %gep.c, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1000 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + +define void @deref_assumption_in_header_constant_trip_count_multiple_loop_predecessors(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i1 %pre) nosync { +; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_multiple_loop_predecessors( +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i1 [[PRE:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4000) ] +; CHECK-NEXT: br i1 [[PRE]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: store i32 0, ptr [[A]], align 4 +; CHECK-NEXT: br label %[[LOOP_HEADER_PREHEADER:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: store i32 0, ptr [[B]], align 4 +; CHECK-NEXT: br label %[[LOOP_HEADER_PREHEADER]] +; CHECK: [[LOOP_HEADER_PREHEADER]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH1:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP13]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[TMP16]], splat (i1 true) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP16]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP18]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP17]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT1:.*]], label %[[SCALAR_PH1]] +; CHECK: [[SCALAR_PH1]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH1]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 +; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[SCALAR_PH]] ], [ [[L_B]], %[[EXIT]] ] +; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT1]], label %[[EXIT]], !llvm.loop [[LOOP43:![0-9]+]] +; CHECK: [[EXIT1]]: +; CHECK-NEXT: ret void +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] + br i1 %pre, label %then, label %else + +then: + store i32 0, ptr %a + br label %loop.header + +else: + store i32 0, ptr %b + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %then ], [ 0, %else ], [ %iv.next, %loop.latch ] + %gep.a = getelementptr i32, ptr %a, i64 %iv + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %l.b = load i32, ptr %gep.b, align 4 + %c.1 = icmp sge i32 %l.b, 0 + br i1 %c.1, label %loop.latch, label %loop.then + +loop.then: + %l.a = load i32, ptr %gep.a, align 4 + br label %loop.latch + +loop.latch: + %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + store i32 %merge, ptr %gep.c, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1000 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} ;. ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} @@ -1624,4 +2003,12 @@ declare void @may_free() ; CHECK: [[LOOP33]] = distinct !{[[LOOP33]], [[META2]], [[META1]]} ; CHECK: [[LOOP34]] = distinct !{[[LOOP34]], [[META1]], [[META2]]} ; CHECK: [[LOOP35]] = distinct !{[[LOOP35]], [[META2]], [[META1]]} +; CHECK: [[LOOP36]] = distinct !{[[LOOP36]], [[META1]], [[META2]]} +; CHECK: [[LOOP37]] = distinct !{[[LOOP37]], [[META2]], [[META1]]} +; CHECK: [[LOOP38]] = distinct !{[[LOOP38]], [[META1]], [[META2]]} +; CHECK: [[LOOP39]] = distinct !{[[LOOP39]], [[META2]], [[META1]]} +; CHECK: [[LOOP40]] = distinct !{[[LOOP40]], [[META1]], [[META2]]} +; CHECK: [[LOOP41]] = distinct !{[[LOOP41]], [[META2]], [[META1]]} +; CHECK: [[LOOP42]] = distinct !{[[LOOP42]], [[META1]], [[META2]]} +; CHECK: [[LOOP43]] = distinct !{[[LOOP43]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll index a2eddad179216..8b95df4b96c48 100644 --- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll @@ -63,7 +63,9 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP3]] +; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP15]], [[TMP18]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll index 1ca5586942d7c..74ea22e0f8d6f 100644 --- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll @@ -208,7 +208,8 @@ define float @fmaxnum_1(ptr %src, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] @@ -276,7 +277,8 @@ define float @fmaxnum_2(ptr %src, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] @@ -346,7 +348,8 @@ define float @fmaxnum_induction_starts_at_10(ptr %src, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP4]] @@ -417,7 +420,8 @@ define float @fmaxnum_induction_starts_at_value(ptr %src, i64 %start, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll index 68bc8d0640a3f..6b1e411c3ca68 100644 --- a/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll @@ -208,7 +208,8 @@ define float @fminnum_1(ptr %src, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] @@ -276,7 +277,8 @@ define float @fminnum_2(ptr %src, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] diff --git a/llvm/test/Transforms/LoopVectorize/miniters.ll b/llvm/test/Transforms/LoopVectorize/miniters.ll index 0b4c002045186..6d06a03d0d018 100644 --- a/llvm/test/Transforms/LoopVectorize/miniters.ll +++ b/llvm/test/Transforms/LoopVectorize/miniters.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "vector.ph:" --version 5 ; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s ; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s -check-prefix=UNROLL @@ -8,37 +9,89 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @a = common global [1000 x i32] zeroinitializer, align 16 ; Generate min.iters.check to skip the vector loop and jump to scalar.ph directly when loop iteration number is less than VF * UF. -; CHECK-LABEL: foo( -; CHECK: %min.iters.check = icmp ult i64 %N, 4 -; CHECK: br i1 %min.iters.check, label %scalar.ph, label %vector.ph -; UNROLL-LABEL: foo( -; UNROLL: %min.iters.check = icmp ult i64 %N, 8 -; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %vector.ph - define void @foo(i64 %N) { +; CHECK-LABEL: define void @foo( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[C:%.*]] = icmp sgt i64 [[N]], 0 +; CHECK-NEXT: br i1 [[C]], label %[[LOOP_PREHEADER:.*]], [[EXIT:label %.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; +; UNROLL-LABEL: define void @foo( +; UNROLL-SAME: i64 [[N:%.*]]) { +; UNROLL-NEXT: [[ENTRY:.*:]] +; UNROLL-NEXT: [[C:%.*]] = icmp sgt i64 [[N]], 0 +; UNROLL-NEXT: br i1 [[C]], label %[[LOOP_PREHEADER:.*]], [[EXIT:label %.*]] +; UNROLL: [[LOOP_PREHEADER]]: +; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] +; UNROLL: [[VECTOR_PH]]: +; entry: - %cmp.8 = icmp sgt i64 %N, 0 - br i1 %cmp.8, label %for.body.preheader, label %for.end - -for.body.preheader: ; preds = %entry - br label %for.body + %c = icmp sgt i64 %N, 0 + br i1 %c, label %loop, label %exit -for.body: ; preds = %for.body, %for.body.preheader - %i.09 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds [1000 x i32], ptr @b, i64 0, i64 %i.09 +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds [1000 x i32], ptr @b, i64 0, i64 %iv %tmp = load i32, ptr %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds [1000 x i32], ptr @c, i64 0, i64 %i.09 + %arrayidx1 = getelementptr inbounds [1000 x i32], ptr @c, i64 0, i64 %iv %tmp1 = load i32, ptr %arrayidx1, align 4 %add = add nsw i32 %tmp1, %tmp - %arrayidx2 = getelementptr inbounds [1000 x i32], ptr @a, i64 0, i64 %i.09 + %arrayidx2 = getelementptr inbounds [1000 x i32], ptr @a, i64 0, i64 %iv store i32 %add, ptr %arrayidx2, align 4 - %inc = add nuw nsw i64 %i.09, 1 - %exitcond = icmp eq i64 %inc, %N - br i1 %exitcond, label %for.end.loopexit, label %for.body + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %N + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @min_iters_known_via_loop_guards_add(i32 %start, i32 %end, ptr %src) { +; CHECK-LABEL: define void @min_iters_known_via_loop_guards_add( +; CHECK-SAME: i32 [[START:%.*]], i32 [[END:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[END]], [[START]] +; CHECK-NEXT: [[PRE:%.*]] = icmp sgt i32 [[SUB]], 100 +; CHECK-NEXT: call void @llvm.assume(i1 [[PRE]]) +; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[SUB]], 1 +; CHECK-NEXT: [[IV_START:%.*]] = zext i32 [[ADD_1]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 101, [[IV_START]] +; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; +; UNROLL-LABEL: define void @min_iters_known_via_loop_guards_add( +; UNROLL-SAME: i32 [[START:%.*]], i32 [[END:%.*]], ptr [[SRC:%.*]]) { +; UNROLL-NEXT: [[ENTRY:.*:]] +; UNROLL-NEXT: [[SUB:%.*]] = sub i32 [[END]], [[START]] +; UNROLL-NEXT: [[PRE:%.*]] = icmp sgt i32 [[SUB]], 100 +; UNROLL-NEXT: call void @llvm.assume(i1 [[PRE]]) +; UNROLL-NEXT: [[ADD_1:%.*]] = add i32 [[SUB]], 1 +; UNROLL-NEXT: [[IV_START:%.*]] = zext i32 [[ADD_1]] to i64 +; UNROLL-NEXT: [[TMP0:%.*]] = sub i64 101, [[IV_START]] +; UNROLL-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] +; UNROLL: [[VECTOR_PH]]: +; +entry: + %sub = sub i32 %end, %start + %pre = icmp sgt i32 %sub, 100 + call void @llvm.assume(i1 %pre) + %add.1 = add i32 %sub, 1 + %iv.start = zext i32 %add.1 to i64 + br label %loop -for.end.loopexit: ; preds = %for.body - br label %for.end +loop: + %iv = phi i64 [ %iv.start, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i64, ptr %src, i64 %iv + store i64 %iv, ptr %gep + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 100 + br i1 %ec, label %exit, label %loop -for.end: ; preds = %for.end.loopexit, %entry +exit: ret void } diff --git a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll index 2747895f06a7b..3eac7678bbc95 100644 --- a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll @@ -18,11 +18,9 @@ define void @reuse_lcssa_phi_for_add_rec1(ptr %head) { ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 ; CHECK-NEXT: br i1 [[EC_1]], label %[[PH:.*]], label %[[LOOP_1]] ; CHECK: [[PH]]: -; CHECK-NEXT: [[IV_2_LCSSA:%.*]] = phi i32 [ [[IV_2]], %[[LOOP_1]] ] ; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP_1]] ] -; CHECK-NEXT: [[IV_2_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_2_NEXT]], %[[LOOP_1]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[IV_2_NEXT]], %[[LOOP_1]] ] ; CHECK-NEXT: [[SRC_2:%.*]] = tail call noalias noundef dereferenceable_or_null(8) ptr @calloc(i64 1, i64 8) -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[IV_2_LCSSA]], 1 ; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP0]], i32 1) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]] ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 @@ -106,27 +104,23 @@ define void @runtime_checks_ptr_inductions(ptr %dst.1, ptr %dst.2, i1 %c) { ; CHECK-LABEL: define void @runtime_checks_ptr_inductions( ; CHECK-SAME: ptr [[DST_1:%.*]], ptr [[DST_2:%.*]], i1 [[C:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[DST_11:%.*]] = ptrtoint ptr [[DST_1]] to i64 ; CHECK-NEXT: br label %[[LOOP_1:.*]] ; CHECK: [[LOOP_1]]: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], %[[LOOP_1]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ [[DST_1]], %[[ENTRY]] ], [ [[PTR_IV_1_NEXT:%.*]], %[[LOOP_1]] ] ; CHECK-NEXT: [[CALL:%.*]] = call i32 @val() ; CHECK-NEXT: [[SEL_DST:%.*]] = select i1 [[C]], ptr [[DST_1]], ptr [[DST_2]] ; CHECK-NEXT: [[PTR_IV_1_NEXT]] = getelementptr i8, ptr [[PTR_IV_1]], i64 1 ; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i32 [[CALL]], 0 -; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 ; CHECK-NEXT: br i1 [[EC_1]], label %[[LOOP_2_HEADER_PREHEADER:.*]], label %[[LOOP_1]] ; CHECK: [[LOOP_2_HEADER_PREHEADER]]: -; CHECK-NEXT: [[SEL_DST_LCSSA2:%.*]] = phi ptr [ [[SEL_DST]], %[[LOOP_1]] ] -; CHECK-NEXT: [[INDVAR_LCSSA:%.*]] = phi i64 [ [[INDVAR]], %[[LOOP_1]] ] +; CHECK-NEXT: [[SEL_DST_LCSSA1:%.*]] = phi ptr [ [[SEL_DST]], %[[LOOP_1]] ] ; CHECK-NEXT: [[PTR_IV_1_LCSSA:%.*]] = phi ptr [ [[PTR_IV_1]], %[[LOOP_1]] ] ; CHECK-NEXT: [[SEL_DST_LCSSA:%.*]] = phi ptr [ [[SEL_DST]], %[[LOOP_1]] ] -; CHECK-NEXT: [[SEL_DST_LCSSA23:%.*]] = ptrtoint ptr [[SEL_DST_LCSSA2]] to i64 +; CHECK-NEXT: [[SEL_DST_LCSSA12:%.*]] = ptrtoint ptr [[SEL_DST_LCSSA1]] to i64 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDVAR_LCSSA]], [[DST_11]] -; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[SEL_DST_LCSSA23]] +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[PTR_IV_1_LCSSA]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[SEL_DST_LCSSA12]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 2 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -148,13 +142,13 @@ define void @runtime_checks_ptr_inductions(ptr %dst.1, ptr %dst.2, i1 %c) { ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1023, %[[MIDDLE_BLOCK]] ], [ 1, %[[LOOP_2_HEADER_PREHEADER]] ], [ 1, %[[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[PTR_IV_1_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ], [ [[PTR_IV_1_LCSSA]], %[[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[SEL_DST_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ], [ [[SEL_DST_LCSSA]], %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[PTR_IV_1_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ], [ [[PTR_IV_1_LCSSA]], %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[SEL_DST_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ], [ [[SEL_DST_LCSSA]], %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP_2_HEADER:.*]] ; CHECK: [[LOOP_2_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[DEC7:%.*]], %[[LOOP_2_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] -; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ] -; CHECK-NEXT: [[PTR_IV_3:%.*]] = phi ptr [ [[PTR_IV_3_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[BC_RESUME_VAL6]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[PTR_IV_3:%.*]] = phi ptr [ [[PTR_IV_3_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ] ; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i32 [[IV]], 1024 ; CHECK-NEXT: br i1 [[EC_2]], label %[[EXIT:.*]], label %[[LOOP_2_LATCH]] ; CHECK: [[LOOP_2_LATCH]]: @@ -196,3 +190,288 @@ loop.2.latch: exit: ret void } + + +declare void @foo() +declare void @bar() + +define void @expand_diff_scev_unknown(ptr %dst, i1 %invar.c, i32 %step) mustprogress { +; CHECK-LABEL: define void @expand_diff_scev_unknown( +; CHECK-SAME: ptr [[DST:%.*]], i1 [[INVAR_C:%.*]], i32 [[STEP:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP_1:.*]] +; CHECK: [[LOOP_1]]: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], %[[LOOP_1]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[STEP]], %[[ENTRY]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP_1]] ] +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1 +; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 +; CHECK-NEXT: br i1 [[INVAR_C]], label %[[LOOP_2_PREHEADER:.*]], label %[[LOOP_1]] +; CHECK: [[LOOP_2_PREHEADER]]: +; CHECK-NEXT: [[INDVAR_LCSSA1:%.*]] = phi i32 [ [[INDVAR]], %[[LOOP_1]] ] +; CHECK-NEXT: [[IV_1_LCSSA:%.*]] = phi i32 [ [[IV_1]], %[[LOOP_1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[IV_1_LCSSA]], [[STEP]] +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[STEP]], -2 +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[INDVAR_LCSSA1]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SMAX]], [[TMP4]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP5]], i32 1) +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[UMIN]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[UMIN]] +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[STEP]], i32 1) +; CHECK-NEXT: [[TMP8:%.*]] = udiv i32 [[TMP7]], [[UMAX]] +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP9]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], 1 +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP9]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP9]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[IV_1_LCSSA]], [[N_VEC]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[IV_1_LCSSA]], [[INDEX]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP9]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ [[IV_1_LCSSA]], %[[LOOP_2_PREHEADER]] ], [ [[IV_1_LCSSA]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label %[[LOOP_2:.*]] +; CHECK: [[LOOP_2]]: +; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[IV_2_NEXT:%.*]], %[[LOOP_2]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2]], [[STEP]] +; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV_2]] +; CHECK-NEXT: store i32 0, ptr [[GEP_DST]], align 4 +; CHECK-NEXT: [[EC_2:%.*]] = icmp slt i32 [[IV_2_NEXT]], 0 +; CHECK-NEXT: br i1 [[EC_2]], label %[[LOOP_2]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.1 + +loop.1: + %iv.1 = phi i32 [ %step, %entry ], [ %iv.1.next, %loop.1 ] + call void @foo() + %iv.1.next = add i32 %iv.1, 1 + br i1 %invar.c, label %loop.2, label %loop.1 + +loop.2: + %iv.2 = phi i32 [ %iv.1, %loop.1 ], [ %iv.2.next, %loop.2 ] + %iv.2.next = add nsw i32 %iv.2, %step + %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv.2 + store i32 0, ptr %gep.dst + %ec.2 = icmp slt i32 %iv.2.next, 0 + br i1 %ec.2, label %loop.2, label %exit + +exit: + ret void +} + +define void @expand_diff_neg_ptrtoint_expr(ptr %src, ptr %start) { +; CHECK-LABEL: define void @expand_diff_neg_ptrtoint_expr( +; CHECK-SAME: ptr [[SRC:%.*]], ptr [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC]] to i64 +; CHECK-NEXT: br label %[[LOOP_1:.*]] +; CHECK: [[LOOP_1]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_1]] ] +; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_1_NEXT:%.*]], %[[LOOP_1]] ] +; CHECK-NEXT: [[PTR_IV_1_NEXT]] = getelementptr i8, ptr [[PTR_IV_1]], i64 8 +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i64 [[IV_NEXT]], 32 +; CHECK-NEXT: br i1 [[EC_1]], label %[[LOOP_2_PREHEADER:.*]], label %[[LOOP_1]] +; CHECK: [[LOOP_2_PREHEADER]]: +; CHECK-NEXT: [[PTR_IV_1_NEXT_LCSSA:%.*]] = phi ptr [ [[PTR_IV_1_NEXT]], %[[LOOP_1]] ] +; CHECK-NEXT: br label %[[LOOP_2:.*]] +; CHECK: [[LOOP_2]]: +; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[IV_NEXT_1:%.*]], %[[LOOP_2]] ], [ 1, %[[LOOP_2_PREHEADER]] ] +; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP_2]] ], [ [[PTR_IV_1_NEXT_LCSSA]], %[[LOOP_2_PREHEADER]] ] +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr i8, ptr [[PTR_IV_2]], i64 8 +; CHECK-NEXT: [[IV_NEXT_1]] = add i64 [[IV_1]], 1 +; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i64 [[IV_NEXT_1]], 32 +; CHECK-NEXT: br i1 [[EC_2]], label %[[LOOP_3_PREHEADER:.*]], label %[[LOOP_2]] +; CHECK: [[LOOP_3_PREHEADER]]: +; CHECK-NEXT: [[TMP1:%.*]] = phi ptr [ [[PTR_IV_2_NEXT]], %[[LOOP_2]] ] +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[SRC2]] +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP1]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP5]], [[TMP0]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16 +; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -16 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] +; CHECK-NEXT: [[OFFSET_IDX5:%.*]] = mul i64 [[INDEX]], 8 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[OFFSET_IDX5]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], -1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP10]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], -2 +; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 1, %[[LOOP_3_PREHEADER]] ], [ 1, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[TMP1]], %[[LOOP_3_PREHEADER]] ], [ [[TMP1]], %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label %[[LOOP_3:.*]] +; CHECK: [[LOOP_3]]: +; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[IV_NEXT_2:%.*]], %[[LOOP_3]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[PTR_IV_3:%.*]] = phi ptr [ [[PTR_IV_3_NEXT:%.*]], %[[LOOP_3]] ], [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[IV_2]], -1 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP12]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP_SRC]], align 8 +; CHECK-NEXT: [[PTR_IV_3_NEXT]] = getelementptr i8, ptr [[PTR_IV_3]], i64 8 +; CHECK-NEXT: store i64 [[L]], ptr [[PTR_IV_3]], align 8 +; CHECK-NEXT: [[IV_NEXT_2]] = add i64 [[IV_2]], 1 +; CHECK-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_2]], 0 +; CHECK-NEXT: br i1 [[EC_3]], label %[[EXIT]], label %[[LOOP_3]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.1 + +loop.1: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop.1 ] + %ptr.iv.1 = phi ptr [ %start, %entry ], [ %ptr.iv.1.next, %loop.1 ] + %ptr.iv.1.next = getelementptr i8, ptr %ptr.iv.1, i64 8 + call void @foo() + %iv.next = add i64 %iv, 1 + %ec.1 = icmp eq i64 %iv.next, 32 + br i1 %ec.1, label %loop.2, label %loop.1 + +loop.2: + %iv.1 = phi i64 [ 1, %loop.1 ], [ %iv.next.1, %loop.2 ] + %ptr.iv.2 = phi ptr [ %ptr.iv.1.next, %loop.1 ], [ %ptr.iv.2.next, %loop.2 ] + call void @bar() + %ptr.iv.2.next = getelementptr i8, ptr %ptr.iv.2, i64 8 + %iv.next.1 = add i64 %iv.1, 1 + %ec.2 = icmp eq i64 %iv.next.1, 32 + br i1 %ec.2, label %loop.3, label %loop.2 + +loop.3: + %iv.2 = phi i64 [ 1, %loop.2 ], [ %iv.next.2, %loop.3 ] + %ptr.iv.3 = phi ptr [ %ptr.iv.2.next, %loop.2 ], [ %ptr.iv.3.next, %loop.3 ] + %6 = add i64 %iv.2, -1 + %gep.src = getelementptr double, ptr %src, i64 %6 + %l = load i64, ptr %gep.src, align 8 + %ptr.iv.3.next = getelementptr i8, ptr %ptr.iv.3, i64 8 + store i64 %l, ptr %ptr.iv.3, align 8 + %iv.next.2 = add i64 %iv.2, 1 + %ec.3 = icmp eq i64 %iv.next.2, 0 + br i1 %ec.3, label %exit, label %loop.3 + +exit: + ret void +} + + +declare i1 @cond() + +define void @scev_exp_reuse_const_add(ptr %dst, ptr %src) { +; CHECK-LABEL: define void @scev_exp_reuse_const_add( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC]] to i64 +; CHECK-NEXT: br label %[[LOOP_1:.*]] +; CHECK: [[LOOP_1]]: +; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ [[DST]], %[[ENTRY]] ], [ [[PTR_IV_1_NEXT:%.*]], %[[LOOP_1]] ] +; CHECK-NEXT: [[PTR_IV_1_NEXT]] = getelementptr i8, ptr [[PTR_IV_1]], i64 2 +; CHECK-NEXT: [[C:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C]], label %[[LOOP_2_PH:.*]], label %[[LOOP_1]] +; CHECK: [[LOOP_2_PH]]: +; CHECK-NEXT: [[PTR_IV_1_NEXT_LCSSA:%.*]] = phi ptr [ [[PTR_IV_1_NEXT]], %[[LOOP_1]] ] +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 -2, [[SRC2]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR_IV_1_NEXT_LCSSA]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR_IV_1_NEXT_LCSSA]], i64 80 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_IV_1_NEXT_LCSSA]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP6]], align 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: store <2 x i16> [[WIDE_LOAD]], ptr [[TMP7]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 40 +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 40, %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_2_PH]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[PTR_IV_1_NEXT_LCSSA]], %[[LOOP_2_PH]] ], [ [[PTR_IV_1_NEXT_LCSSA]], %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label %[[LOOP_2:.*]] +; CHECK: [[LOOP_2]]: +; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP_2]] ] +; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP_2]] ] +; CHECK-NEXT: [[IV_2_NEXT]] = add i64 [[IV_1]], 1 +; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV_2_NEXT]] +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP_SRC_1]], align 2 +; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr i8, ptr [[PTR_IV_2]], i64 2 +; CHECK-NEXT: store i16 [[L]], ptr [[PTR_IV_2]], align 2 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_1]], 40 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_2]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.1 + +loop.1: + %ptr.iv.1 = phi ptr [ %dst, %entry ], [ %ptr.iv.1.next, %loop.1 ] + %ptr.iv.1.next = getelementptr i8, ptr %ptr.iv.1, i64 2 + %c = call i1 @cond() + br i1 %c, label %loop.2.ph, label %loop.1 + +loop.2.ph: + br label %loop.2 + +loop.2: + %iv.1 = phi i64 [ 0, %loop.2.ph ], [ %iv.2.next, %loop.2 ] + %ptr.iv.2 = phi ptr [ %ptr.iv.1.next, %loop.2.ph ], [ %ptr.iv.2.next, %loop.2 ] + %iv.2.next = add i64 %iv.1, 1 + %gep.src.1 = getelementptr i16, ptr %src, i64 %iv.2.next + %l = load i16, ptr %gep.src.1, align 2 + %ptr.iv.2.next = getelementptr i8, ptr %ptr.iv.2, i64 2 + store i16 %l, ptr %ptr.iv.2, align 2 + %ec = icmp eq i64 %iv.1, 40 + br i1 %ec, label %exit, label %loop.2 + +exit: + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare double @llvm.cos.f64(double) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll index 22d9a5363bee6..0e9d3bdae5c6f 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -475,6 +475,105 @@ for.body: br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !12 } +declare i1 @cond() + +define void @test_scev_check_mul_add_expansion(ptr %out, ptr %in, i32 %len, i32 %d) { +; CHECK-LABEL: @test_scev_check_mul_add_expansion( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PRE_1:%.*]] = icmp samesign ugt i32 [[D:%.*]], 5 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[PRE_1]]) +; CHECK-NEXT: [[PRE_2:%.*]] = icmp ult i32 [[D]], 7 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[PRE_2]]) +; CHECK-NEXT: [[PRE_3:%.*]] = icmp slt i32 [[D]], [[LEN:%.*]] +; CHECK-NEXT: tail call void @llvm.assume(i1 [[PRE_3]]) +; CHECK-NEXT: [[SMAX3:%.*]] = call i32 @llvm.smax.i32(i32 [[LEN]], i32 7) +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[SMAX3]], -6 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i32 [[LEN]], 10 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 12 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i32 [[LEN]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[OUT]], i64 [[TMP3]] +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP2]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[IN]], [[TMP4]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -4 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[N_VEC]], 6 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], 6 +; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[OFFSET_IDX]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[OUT]], i64 [[TMP6]] +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP7]], align 2, !alias.scope [[META37:![0-9]+]], !noalias [[META40:![0-9]+]] +; CHECK-NEXT: store i32 0, ptr [[IN]], align 4, !alias.scope [[META40]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 6, [[ENTRY:%.*]] ], [ 6, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[IV]] to i64 +; CHECK-NEXT: [[ARRAYIDX80:%.*]] = getelementptr i16, ptr [[OUT]], i64 [[TMP9]] +; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX80]], align 2 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: store i32 0, ptr [[IN]], align 4 +; CHECK-NEXT: [[CMP7_NOT:%.*]] = icmp sgt i32 [[LEN]], [[IV_NEXT]] +; CHECK-NEXT: br i1 [[CMP7_NOT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP43:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +; FORCED_OPTSIZE-LABEL: @test_scev_check_mul_add_expansion( +; FORCED_OPTSIZE-NEXT: entry: +; FORCED_OPTSIZE-NEXT: [[PRE_1:%.*]] = icmp sgt i32 [[D:%.*]], 5 +; FORCED_OPTSIZE-NEXT: tail call void @llvm.assume(i1 [[PRE_1]]) +; FORCED_OPTSIZE-NEXT: [[PRE_2:%.*]] = icmp samesign ule i32 [[D]], 6 +; FORCED_OPTSIZE-NEXT: tail call void @llvm.assume(i1 [[PRE_2]]) +; FORCED_OPTSIZE-NEXT: [[PRE_3:%.*]] = icmp slt i32 [[D]], [[LEN:%.*]] +; FORCED_OPTSIZE-NEXT: tail call void @llvm.assume(i1 [[PRE_3]]) +; FORCED_OPTSIZE-NEXT: br label [[LOOP:%.*]] +; FORCED_OPTSIZE: loop: +; FORCED_OPTSIZE-NEXT: [[IV:%.*]] = phi i32 [ 6, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; FORCED_OPTSIZE-NEXT: [[ARRAYIDX80:%.*]] = getelementptr i16, ptr [[OUT:%.*]], i32 [[IV]] +; FORCED_OPTSIZE-NEXT: store i16 0, ptr [[ARRAYIDX80]], align 2 +; FORCED_OPTSIZE-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; FORCED_OPTSIZE-NEXT: store i32 0, ptr [[IN:%.*]], align 4 +; FORCED_OPTSIZE-NEXT: [[CMP7_NOT:%.*]] = icmp sgt i32 [[LEN]], [[IV_NEXT]] +; FORCED_OPTSIZE-NEXT: br i1 [[CMP7_NOT]], label [[LOOP]], label [[EXIT:%.*]] +; FORCED_OPTSIZE: exit: +; FORCED_OPTSIZE-NEXT: ret void +; +entry: + %pre.1 = icmp sgt i32 %d, 5 + tail call void @llvm.assume(i1 %pre.1) + %pre.2 = icmp samesign ule i32 %d, 6 + tail call void @llvm.assume(i1 %pre.2) + %pre.3 = icmp slt i32 %d, %len + tail call void @llvm.assume(i1 %pre.3) + br label %loop + +loop: + %iv = phi i32 [ 6, %entry ], [ %iv.next, %loop ] + %arrayidx80 = getelementptr i16, ptr %out, i32 %iv + store i16 0, ptr %arrayidx80, align 2 + %iv.next = add nuw nsw i32 %iv, 1 + store i32 0, ptr %in, align 4 + %cmp7.not = icmp sgt i32 %len, %iv.next + br i1 %cmp7.not, label %loop, label %exit + +exit: + ret void +} + ; CHECK: !9 = !DILocation(line: 101, column: 1, scope: !{{.*}}) !llvm.module.flags = !{!0, !1} diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll index b640c1911cb0d..99a86c94b23dd 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll @@ -434,3 +434,62 @@ loop: exit: ret void } + + +define void @remove_diff_checks_via_guards(i32 %x, i32 %y, ptr %A) { +; CHECK-LABEL: define void @remove_diff_checks_via_guards( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], ptr [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[OFFSET:%.*]] = sub i32 [[X]], [[Y]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[OFFSET]], 0 +; CHECK-NEXT: br i1 [[CMP]], [[EXIT:label %.*]], label %[[LOOP_PREHEADER:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[X]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], 1 +; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP1]], i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[SMAX2]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw i32 [[TMP2]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[X]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP5]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[SMAX]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[SMAX]], 4294967295 +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[SMAX]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[OFFSET]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP11]], [[OFFSET]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[SMAX]], 4294967295 +; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP9]], [[TMP14]] +; CHECK-NEXT: br i1 [[TMP15]], [[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[OFFSET]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i64 [[TMP16]], 2 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP17]], 16 +; CHECK-NEXT: br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH1:label %.*]] +; +entry: + %offset = sub i32 %x, %y + %cmp = icmp sge i32 %offset, 0 + br i1 %cmp, label %exit, label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %iv.ext = sext i32 %iv to i64 + %gep.A = getelementptr i32, ptr %A, i64 %iv.ext + %l = load i32, ptr %gep.A, align 1 + %iv.offset = add i32 %iv, %offset + %iv.offset.ext = sext i32 %iv.offset to i64 + %gep.A.offset = getelementptr i32, ptr %A, i64 %iv.offset.ext + store i32 %l, ptr %gep.A.offset, align 1 + %iv.next = add i32 %iv, 1 + %ec = icmp sgt i32 %iv, %x + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll new file mode 100644 index 0000000000000..194d16e41f921 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck --check-prefix=VF4IC2 %s +; RUN: opt -p loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -S %s | FileCheck --check-prefix=VF8IC1 %s + +; Test case from https://github.com/llvm/llvm-project/issues/153946. +; %shr and thus %early.cond will be poison from %iv == 4 onwards. +; Make sure the mask being poison does not propagate across lanes in the +; OR reduction when computing the early exit condition in the vector loop. +define noundef i32 @f(i32 noundef %g) { +; VF4IC2-LABEL: define noundef i32 @f( +; VF4IC2-SAME: i32 noundef [[G:%.*]]) { +; VF4IC2-NEXT: [[ENTRY:.*]]: +; VF4IC2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF4IC2: [[VECTOR_PH]]: +; VF4IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[G]], i64 0 +; VF4IC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; VF4IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4IC2: [[VECTOR_BODY]]: +; VF4IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4IC2-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4IC2-NEXT: [[TMP0:%.*]] = shl nuw nsw <4 x i32> [[VEC_IND]], splat (i32 3) +; VF4IC2-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], [[TMP0]] +; VF4IC2-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer +; VF4IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; VF4IC2-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP5]] +; VF4IC2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]]) +; VF4IC2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 +; VF4IC2-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP7]] +; VF4IC2-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; VF4IC2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF4IC2: [[MIDDLE_SPLIT]]: +; VF4IC2-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 +; VF4IC2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; VF4IC2: [[MIDDLE_BLOCK]]: +; VF4IC2-NEXT: br i1 true, label %[[RETURN:.*]], label %[[SCALAR_PH]] +; VF4IC2: [[VECTOR_EARLY_EXIT]]: +; VF4IC2-NEXT: [[TMP14:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; VF4IC2-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 +; VF4IC2-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], [[TMP15]] +; VF4IC2-NEXT: br label %[[RETURN]] +; VF4IC2: [[SCALAR_PH]]: +; VF4IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF4IC2-NEXT: br label %[[LOOP_HEADER:.*]] +; VF4IC2: [[LOOP_HEADER]]: +; VF4IC2-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF4IC2-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IV]], 3 +; VF4IC2-NEXT: [[SHR:%.*]] = ashr i32 [[G]], [[MUL]] +; VF4IC2-NEXT: [[EARLY_COND:%.*]] = icmp eq i32 [[SHR]], 0 +; VF4IC2-NEXT: br i1 [[EARLY_COND]], label %[[LOOP_LATCH]], label %[[RETURN]] +; VF4IC2: [[LOOP_LATCH]]: +; VF4IC2-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; VF4IC2-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 8 +; VF4IC2-NEXT: br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; VF4IC2: [[RETURN]]: +; VF4IC2-NEXT: [[RES:%.*]] = phi i32 [ [[SHR]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ [[TMP10]], %[[VECTOR_EARLY_EXIT]] ] +; VF4IC2-NEXT: ret i32 [[RES]] +; +; VF8IC1-LABEL: define noundef i32 @f( +; VF8IC1-SAME: i32 noundef [[G:%.*]]) { +; VF8IC1-NEXT: [[ENTRY:.*]]: +; VF8IC1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF8IC1: [[VECTOR_PH]]: +; VF8IC1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[G]], i64 0 +; VF8IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +; VF8IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; VF8IC1: [[VECTOR_BODY]]: +; VF8IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF8IC1-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF8IC1-NEXT: [[TMP0:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 3) +; VF8IC1-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[BROADCAST_SPLAT]], [[TMP0]] +; VF8IC1-NEXT: [[TMP2:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer +; VF8IC1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 +; VF8IC1-NEXT: [[TMP8:%.*]] = freeze <8 x i1> [[TMP2]] +; VF8IC1-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP8]]) +; VF8IC1-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; VF8IC1-NEXT: br i1 true, label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF8IC1: [[MIDDLE_SPLIT]]: +; VF8IC1-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 +; VF8IC1-NEXT: br i1 [[TMP3]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; VF8IC1: [[MIDDLE_BLOCK]]: +; VF8IC1-NEXT: br i1 true, label %[[RETURN:.*]], label %[[SCALAR_PH]] +; VF8IC1: [[VECTOR_EARLY_EXIT]]: +; VF8IC1-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 true) +; VF8IC1-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 +; VF8IC1-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], [[TMP6]] +; VF8IC1-NEXT: br label %[[RETURN]] +; VF8IC1: [[SCALAR_PH]]: +; VF8IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF8IC1-NEXT: br label %[[LOOP_HEADER:.*]] +; VF8IC1: [[LOOP_HEADER]]: +; VF8IC1-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF8IC1-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IV]], 3 +; VF8IC1-NEXT: [[SHR:%.*]] = ashr i32 [[G]], [[MUL]] +; VF8IC1-NEXT: [[EARLY_COND:%.*]] = icmp eq i32 [[SHR]], 0 +; VF8IC1-NEXT: br i1 [[EARLY_COND]], label %[[LOOP_LATCH]], label %[[RETURN]] +; VF8IC1: [[LOOP_LATCH]]: +; VF8IC1-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; VF8IC1-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 8 +; VF8IC1-NEXT: br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; VF8IC1: [[RETURN]]: +; VF8IC1-NEXT: [[RES:%.*]] = phi i32 [ [[SHR]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT]] ] +; VF8IC1-NEXT: ret i32 [[RES]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %mul = shl nuw nsw i32 %iv, 3 + %shr = ashr i32 %g, %mul + %early.cond = icmp eq i32 %shr, 0 + br i1 %early.cond, label %loop.latch, label %return + +loop.latch: + %iv.next = add nuw nsw i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 8 + br i1 %ec, label %return, label %loop.header + +return: + %res = phi i32 [ %shr, %loop.latch ], [ %iv, %loop.header ] + ret i32 %res +} + + diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll index e79995f673fb4..d949c77bb7ece 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll @@ -7,21 +7,49 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 1024) ] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 1024) ] +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[TMP8]] +; CHECK-NEXT: br label [[LOOP_END]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] ; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] ; CHECK: loop.inc: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ -1, [[LOOP_INC]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP1]] ], [ -1, [[LOOP_INC]] ], [ -1, [[MIDDLE_BLOCK]] ], [ [[TMP9]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -95,8 +123,94 @@ loop.end: ret i64 %retval } -define i64 @early_exit_alignment_and_deref_known_via_assumption(ptr noalias %p1, ptr noalias %p2, i64 %n) nofree nosync { -; CHECK-LABEL: define i64 @early_exit_alignment_and_deref_known_via_assumption( +define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero(ptr noalias %p1, ptr noalias %p2, i64 %n) nofree nosync { +; CHECK-LABEL: define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero( +; CHECK-SAME: ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 [[N]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 [[N]]) ] +; CHECK-NEXT: [[C:%.*]] = icmp ne i64 [[N]], 0 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_PREHEADER:%.*]], label [[LOOP_END:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]] +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[TMP7]] +; CHECK-NEXT: br label [[LOOP_END_LOOPEXIT]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END_LOOPEXIT]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: loop.end.loopexit: +; CHECK-NEXT: [[RETVAL_PH:%.*]] = phi i64 [ -1, [[LOOP_INC]] ], [ [[INDEX]], [[LOOP]] ], [ -1, [[MIDDLE_BLOCK]] ], [ [[TMP8]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: br label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[RETVAL_PH]], [[LOOP_END_LOOPEXIT]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %p1, i64 4), "dereferenceable"(ptr %p1, i64 %n) ] + call void @llvm.assume(i1 true) [ "align"(ptr %p2, i64 4), "dereferenceable"(ptr %p2, i64 %n) ] + %c = icmp ne i64 %n, 0 + br i1 %c, label %loop, label %loop.end + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, %n + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 0, %entry ], [ %index, %loop ], [ -1, %loop.inc ] + ret i64 %retval +} + +define i64 @early_exit_alignment_and_deref_known_via_assumption_n_may_be_zero(ptr noalias %p1, ptr noalias %p2, i64 %n) nofree nosync { +; CHECK-LABEL: define i64 @early_exit_alignment_and_deref_known_via_assumption_n_may_be_zero( ; CHECK-SAME: ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 [[N]]) ] @@ -141,3 +255,419 @@ loop.end: %retval = phi i64 [ %index, %loop ], [ -1, %loop.inc ] ret i64 %retval } + +define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero_i16(ptr noalias %p1, ptr noalias %p2, i32 %n) nofree nosync { +; CHECK-LABEL: define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero_i16( +; CHECK-SAME: ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[N_SCALED:%.*]] = shl nuw nsw i64 [[N_EXT]], 1 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 [[N_SCALED]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 [[N_SCALED]]) ] +; CHECK-NEXT: [[C:%.*]] = icmp ne i64 [[N_EXT]], 0 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_PREHEADER:%.*]], label [[LOOP_END:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END_LOOPEXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[N_EXT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END_LOOPEXIT]] +; CHECK: loop.end.loopexit: +; CHECK-NEXT: [[RETVAL_PH:%.*]] = phi i64 [ -1, [[LOOP_INC]] ], [ [[INDEX]], [[LOOP]] ] +; CHECK-NEXT: br label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[RETVAL_PH]], [[LOOP_END_LOOPEXIT]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %n.ext = zext i32 %n to i64 + %n.scaled = shl nuw nsw i64 %n.ext, 1 + call void @llvm.assume(i1 true) [ "align"(ptr %p1, i64 4), "dereferenceable"(ptr %p1, i64 %n.scaled) ] + call void @llvm.assume(i1 true) [ "align"(ptr %p2, i64 4), "dereferenceable"(ptr %p2, i64 %n.scaled) ] + %c = icmp ne i64 %n.ext, 0 + br i1 %c, label %loop, label %loop.end + +loop: + %index = phi i64 [ 0, %entry ], [ %index.next, %loop.inc ] + %gep.p1 = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %gep.p1, align 1 + %gep.p2 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %gep.p2, align 1 + %c.0 = icmp eq i8 %ld1, %ld2 + br i1 %c.0, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, %n.ext + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 0, %entry ], [ %index, %loop ], [ -1, %loop.inc ] + ret i64 %retval +} + +define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero_i16_ptr_iv(ptr %A, i32 noundef %n) nofree nosync { +; CHECK-LABEL: define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero_i16_ptr_iv( +; CHECK-SAME: ptr [[A:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 2) ] +; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[N_EXT]], 1 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 [[MUL]]) ] +; CHECK-NEXT: [[A_END:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL]] +; CHECK-NEXT: [[PRE:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_HEADER_PREHEADER:%.*]] +; CHECK: loop.header.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[MUL]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[IV_NEXT1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: br label [[LOOP_HEADER1:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP_HEADER1]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 2 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = freeze <4 x i1> [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP_HEADER1]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[LOOP_LATCH1:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP12]] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[IV1:%.*]] = phi ptr [ [[IV_NEXT1]], [[LOOP_LATCH1]] ], [ [[A]], [[LOOP_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ [[IV1]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[IV]], align 2 +; CHECK-NEXT: [[C_0:%.*]] = icmp eq i16 [[L]], 0 +; CHECK-NEXT: br i1 [[C_0]], label [[EXIT_LOOPEXIT]], label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[IV]], i64 2 +; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV_NEXT]], [[A_END]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: exit.loopexit: +; CHECK-NEXT: [[P_PH:%.*]] = phi ptr [ [[A_END]], [[LOOP_LATCH]] ], [ [[IV]], [[LOOP_HEADER]] ], [ [[A_END]], [[LOOP_LATCH1]] ], [ [[TMP13]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[P_PH]], [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[RES:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %A, i64 2) ] + %n.ext = zext i32 %n to i64 + %mul = shl nuw nsw i64 %n.ext, 1 + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %A, i64 %mul) ] + %A.end = getelementptr i8, ptr %A, i64 %mul + %pre = icmp eq i32 %n, 0 + br i1 %pre, label %exit, label %loop.header + +loop.header: + %iv = phi ptr [ %iv.next, %loop.latch ], [ %A, %entry ] + %l = load i16, ptr %iv, align 2 + %c.0 = icmp eq i16 %l, 0 + br i1 %c.0, label %exit, label %loop.latch + +loop.latch: + %iv.next = getelementptr inbounds nuw i8, ptr %iv, i64 2 + %ec = icmp eq ptr %iv.next, %A.end + br i1 %ec, label %exit, label %loop.header + +exit: + %p = phi ptr [ %A, %entry ], [ %iv, %loop.header ], [ %A.end, %loop.latch ] + %res = ptrtoint ptr %p to i64 + ret i64 %res +} + +; The existing assumptions is strong enough to vectorize this. +define ptr @find_deref_pointer_distance_align_attribute_argument(ptr align 2 %first, ptr align 2 %last) nofree nosync { +; CHECK-LABEL: define ptr @find_deref_pointer_distance_align_attribute_argument( +; CHECK-SAME: ptr align 2 [[FIRST:%.*]], ptr align 2 [[LAST:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[LAST]], i64 2) ] +; CHECK-NEXT: [[LAST_I64:%.*]] = ptrtoint ptr [[LAST]] to i64 +; CHECK-NEXT: [[FIRST_I64:%.*]] = ptrtoint ptr [[FIRST]] to i64 +; CHECK-NEXT: [[DISTANCE:%.*]] = sub i64 [[LAST_I64]], [[FIRST_I64]] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[FIRST]], i64 [[DISTANCE]]) ] +; CHECK-NEXT: [[C_0:%.*]] = icmp eq ptr [[FIRST]], [[LAST]] +; CHECK-NEXT: br i1 [[C_0]], label [[EXIT:%.*]], label [[LOOP_HEADER_PREHEADER:%.*]] +; CHECK: loop.header.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST_I64]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[FIRST_I64]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP4]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP15]], align 2 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 1) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ [[FIRST]], [[LOOP_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[IV]], align 2 +; CHECK-NEXT: [[C_1:%.*]] = icmp eq i16 [[L]], 1 +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_LOOPEXIT]], label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[IV]], i64 2 +; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[IV_NEXT]], [[LAST]] +; CHECK-NEXT: br i1 [[C_2]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: exit.loopexit: +; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I_PH:%.*]] = phi ptr [ [[IV_NEXT]], [[LOOP_LATCH]] ], [ [[IV]], [[LOOP_HEADER]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ [[TMP14]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I:%.*]] = phi ptr [ [[FIRST]], [[ENTRY:%.*]] ], [ [[FIRST_ADDR_0_LCSSA_I_PH]], [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret ptr [[FIRST_ADDR_0_LCSSA_I]] +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %first, i64 2) ] + call void @llvm.assume(i1 true) [ "align"(ptr %last, i64 2) ] + %last.i64 = ptrtoint ptr %last to i64 + %first.i64 = ptrtoint ptr %first to i64 + %distance = sub i64 %last.i64, %first.i64 + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %first, i64 %distance) ] + %c.0 = icmp eq ptr %first, %last + br i1 %c.0, label %exit, label %loop.header + +loop.header: + %iv = phi ptr [ %first, %entry ], [ %iv.next, %loop.latch ] + %l = load i16, ptr %iv, align 2 + %c.1 = icmp eq i16 %l, 1 + br i1 %c.1, label %exit, label %loop.latch + +loop.latch: + %iv.next = getelementptr inbounds nuw i8, ptr %iv, i64 2 + %c.2 = icmp eq ptr %iv.next, %last + br i1 %c.2, label %exit, label %loop.header + +exit: + %first.addr.0.lcssa.i = phi ptr [ %first, %entry ], [ %iv, %loop.header ], [ %iv.next, %loop.latch ] + ret ptr %first.addr.0.lcssa.i +} + +; The existing assumptions is strong enough to vectorize this. +define ptr @find_deref_pointer_distance_align_assumption(ptr %first, ptr %last) nofree nosync { +; CHECK-LABEL: define ptr @find_deref_pointer_distance_align_assumption( +; CHECK-SAME: ptr [[FIRST:%.*]], ptr [[LAST:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LAST_I64:%.*]] = ptrtoint ptr [[LAST]] to i64 +; CHECK-NEXT: [[FIRST_I64:%.*]] = ptrtoint ptr [[FIRST]] to i64 +; CHECK-NEXT: [[DISTANCE:%.*]] = sub i64 [[LAST_I64]], [[FIRST_I64]] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[LAST]], i64 2) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[FIRST]], i64 [[DISTANCE]]) ] +; CHECK-NEXT: [[C_0:%.*]] = icmp eq ptr [[FIRST]], [[LAST]] +; CHECK-NEXT: br i1 [[C_0]], label [[EXIT:%.*]], label [[LOOP_HEADER_PREHEADER:%.*]] +; CHECK: loop.header.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST_I64]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[FIRST_I64]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP4]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP15]], align 2 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 1) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ [[FIRST]], [[LOOP_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[IV]], align 2 +; CHECK-NEXT: [[C_1:%.*]] = icmp eq i16 [[L]], 1 +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_LOOPEXIT]], label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[IV]], i64 2 +; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[IV_NEXT]], [[LAST]] +; CHECK-NEXT: br i1 [[C_2]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: exit.loopexit: +; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I_PH:%.*]] = phi ptr [ [[IV_NEXT]], [[LOOP_LATCH]] ], [ [[IV]], [[LOOP_HEADER]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ [[TMP14]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I:%.*]] = phi ptr [ [[FIRST]], [[ENTRY:%.*]] ], [ [[FIRST_ADDR_0_LCSSA_I_PH]], [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret ptr [[FIRST_ADDR_0_LCSSA_I]] +; +entry: + %last.i64 = ptrtoint ptr %last to i64 + %first.i64 = ptrtoint ptr %first to i64 + %distance = sub i64 %last.i64, %first.i64 + call void @llvm.assume(i1 true) [ "align"(ptr %first, i64 2) ] + call void @llvm.assume(i1 true) [ "align"(ptr %last, i64 2) ] + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %first, i64 %distance) ] + %c.0 = icmp eq ptr %first, %last + br i1 %c.0, label %exit, label %loop.header + +loop.header: + %iv = phi ptr [ %first, %entry ], [ %iv.next, %loop.latch ] + %l = load i16, ptr %iv, align 2 + %c.1 = icmp eq i16 %l, 1 + br i1 %c.1, label %exit, label %loop.latch + +loop.latch: + %iv.next = getelementptr inbounds nuw i8, ptr %iv, i64 2 + %c.2 = icmp eq ptr %iv.next, %last + br i1 %c.2, label %exit, label %loop.header + +exit: + %first.addr.0.lcssa.i = phi ptr [ %first, %entry ], [ %iv, %loop.header ], [ %iv.next, %loop.latch ] + ret ptr %first.addr.0.lcssa.i +} + +define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_size_nofree_via_context(ptr noalias %p1, ptr noalias %p2) nosync { +; CHECK-LABEL: define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_size_nofree_via_context( +; CHECK-SAME: ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 1024) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 1024) ] +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[TMP7]] +; CHECK-NEXT: br label [[LOOP_END]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ -1, [[LOOP_INC]] ], [ -1, [[MIDDLE_BLOCK]] ], [ [[TMP8]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %p1, i64 4), "dereferenceable"(ptr %p1, i64 1024) ] + call void @llvm.assume(i1 true) [ "align"(ptr %p2, i64 4), "dereferenceable"(ptr %p2, i64 1024) ] + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 1024 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ -1, %loop.inc ] + ret i64 %retval +} diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll index de8a3c5a8eaf2..9bd26841146fb 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll @@ -23,7 +23,8 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; VF4IC4-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 10) ; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4IC4-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; VF4IC4-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP2]] +; VF4IC4-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; VF4IC4-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; VF4IC4-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] ; VF4IC4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-only.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-only.ll new file mode 100644 index 0000000000000..aeb2ad5df46fa --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-only.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -p loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S %s | FileCheck %s + + +; FIXME: currently the live-outs are not handled correctly. +; Test for https://github.com/llvm/llvm-project/issues/154967. +define i8 @iv_used_in_exit_with_math(i8 noundef %g) { +; CHECK-LABEL: define i8 @iv_used_in_exit_with_math( +; CHECK-SAME: i8 noundef [[G:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[S:%.*]] = shl nuw i8 1, [[IV]] +; CHECK-NEXT: [[A:%.*]] = and i8 [[S]], [[G]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A]], 0 +; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[RETURN:.*]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV_NEXT]], 4 +; CHECK-NEXT: br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RES_IV1:%.*]] = phi i8 [ 32, %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[RES_IV2:%.*]] = phi i8 [ 0, %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[RES:%.*]] = add i8 [[RES_IV1]], [[RES_IV2]] +; CHECK-NEXT: ret i8 [[RES]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop.latch ] + %s = shl nuw i8 1, %iv + %a = and i8 %s, %g + %c = icmp eq i8 %a, 0 + br i1 %c, label %loop.latch, label %return + +loop.latch: + %iv.next = add nuw nsw i8 %iv, 1 + %ec = icmp eq i8 %iv.next, 4 + br i1 %ec, label %return, label %loop.header + +return: + %res.iv1 = phi i8 [ 32, %loop.latch ], [ %iv, %loop.header ] + %res.iv2 = phi i8 [ 0, %loop.latch ], [ %iv, %loop.header ] + %res = add i8 %res.iv1, %res.iv2 + ret i8 %res +} + +define i32 @iv_used_in_exit_with_loads(ptr align 4 dereferenceable(128) %src) { +; CHECK-LABEL: define i32 @iv_used_in_exit_with_loads( +; CHECK-SAME: ptr align 4 dereferenceable(128) [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[RETURN:.*]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 32 +; CHECK-NEXT: br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RES_IV1:%.*]] = phi i32 [ 32, %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[RES_IV2:%.*]] = phi i32 [ 0, %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[RES_IV1]], [[RES_IV2]] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep = getelementptr inbounds i32, ptr %src, i32 %iv + %l = load i32, ptr %gep + %c = icmp eq i32 %l, 0 + br i1 %c, label %loop.latch, label %return + +loop.latch: + %iv.next = add nuw nsw i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 32 + br i1 %ec, label %return, label %loop.header + +return: + %res.iv1 = phi i32 [ 32, %loop.latch ], [ %iv, %loop.header ] + %res.iv2 = phi i32 [ 0, %loop.latch ], [ %iv, %loop.header ] + %res = add i32 %res.iv1, %res.iv2 + ret i32 %res +} diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll index 0f99ed576f1fe..01f65546e3534 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll @@ -18,7 +18,8 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD3]], splat (i32 10) ; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4IC4-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]]) +; VF4IC4-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP8]] +; VF4IC4-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; VF4IC4-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; VF4IC4-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] ; VF4IC4-NEXT: br i1 [[TMP5]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -95,7 +96,8 @@ define i64 @same_exit_block_pre_inc_use1() { ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP19]], align 1 ; VF4IC4-NEXT: [[TMP13:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD3]], [[WIDE_LOAD7]] ; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) +; VF4IC4-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP13]] +; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]]) ; VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; VF4IC4-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; VF4IC4-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -170,7 +172,8 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[TMP17:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD3]], splat (i8 72) ; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4IC4-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]]) +; VF4IC4-NEXT: [[TMP9:%.*]] = freeze <4 x i1> [[TMP17]] +; VF4IC4-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP9]]) ; VF4IC4-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; VF4IC4-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] ; VF4IC4-NEXT: br i1 [[TMP5]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -242,7 +245,8 @@ define i64 @same_exit_block_post_inc_use() { ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP19]], align 1 ; VF4IC4-NEXT: [[TMP13:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD3]], [[WIDE_LOAD7]] ; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) +; VF4IC4-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP13]] +; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]]) ; VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; VF4IC4-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; VF4IC4-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -321,7 +325,8 @@ define i64 @diff_exit_block_pre_inc_use1() { ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP19]], align 1 ; VF4IC4-NEXT: [[TMP13:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD3]], [[WIDE_LOAD7]] ; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) +; VF4IC4-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP13]] +; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]]) ; VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; VF4IC4-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; VF4IC4-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -407,7 +412,8 @@ define i64 @diff_exit_block_post_inc_use1() { ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP19]], align 1 ; VF4IC4-NEXT: [[TMP13:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD3]], [[WIDE_LOAD7]] ; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) +; VF4IC4-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP13]] +; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]]) ; VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; VF4IC4-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; VF4IC4-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -497,7 +503,8 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD14]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[TMP21:%.*]] = icmp ne <4 x i8> [[REVERSE14]], [[REVERSE15]] ; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4IC4-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP21]]) +; VF4IC4-NEXT: [[TMP13:%.*]] = freeze <4 x i1> [[TMP21]] +; VF4IC4-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) ; VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 ; VF4IC4-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; VF4IC4-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -576,7 +583,8 @@ define i8 @same_exit_block_use_loaded_value() { ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP19]], align 1 ; VF4IC4-NEXT: [[TMP13:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD3]], [[WIDE_LOAD7]] ; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) +; VF4IC4-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP13]] +; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]]) ; VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; VF4IC4-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; VF4IC4-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -659,7 +667,8 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD13]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[TMP21:%.*]] = icmp ne <4 x i8> [[REVERSE6]], [[REVERSE14]] ; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4IC4-NEXT: [[TMP25:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP21]]) +; VF4IC4-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP21]] +; VF4IC4-NEXT: [[TMP25:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]]) ; VF4IC4-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 ; VF4IC4-NEXT: [[TMP27:%.*]] = or i1 [[TMP25]], [[TMP26]] ; VF4IC4-NEXT: br i1 [[TMP27]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index 2069570d4a10f..aa7136b71fae8 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -25,7 +25,8 @@ define i64 @same_exit_block_phi_of_consts() { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -102,7 +103,8 @@ define i64 @diff_exit_block_phi_of_consts() { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -200,7 +202,8 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) +; CHECK-NEXT: [[TMP20:%.*]] = freeze <4 x i1> [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP20]]) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -283,7 +286,8 @@ define i32 @diff_blocks_invariant_early_exit_cond(ptr %s) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[COND]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -334,6 +338,345 @@ early.exit: for.end: ret i32 0 } + +define void @inner_loop_trip_count_depends_on_outer_iv(ptr align 8 dereferenceable(1792) %this, ptr %dst) { +; CHECK-LABEL: define void @inner_loop_trip_count_depends_on_outer_iv( +; CHECK-SAME: ptr align 8 dereferenceable(1792) [[THIS:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[THIS]], i64 1000 +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ] +; CHECK-NEXT: [[C_1:%.*]] = icmp eq i64 [[OUTER_IV]], 0 +; CHECK-NEXT: br i1 [[C_1]], label [[THEN:%.*]], label [[INNER_HEADER_PREHEADER:%.*]] +; CHECK: inner.header.preheader: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[OUTER_IV]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[OUTER_IV]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[OUTER_IV]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[GEP_SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x ptr>, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x ptr> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP3]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[OUTER_IV]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[OUTER_LATCH_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: br label [[THEN_LOOPEXIT:%.*]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[INNER_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label [[INNER_HEADER:%.*]] +; CHECK: inner.header: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[INNER_LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP_IV:%.*]] = getelementptr ptr, ptr [[GEP_SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[GEP_IV]], align 8 +; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[L]], null +; CHECK-NEXT: br i1 [[C_2]], label [[THEN_LOOPEXIT]], label [[INNER_LATCH]] +; CHECK: inner.latch: +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[OUTER_IV]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[OUTER_LATCH_LOOPEXIT]], label [[INNER_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: then.loopexit: +; CHECK-NEXT: br label [[THEN]] +; CHECK: then: +; CHECK-NEXT: store i32 0, ptr [[DST]], align 4 +; CHECK-NEXT: br label [[OUTER_LATCH]] +; CHECK: outer.latch.loopexit: +; CHECK-NEXT: br label [[OUTER_LATCH]] +; CHECK: outer.latch: +; CHECK-NEXT: [[OUTER_IV_NEXT]] = add i64 [[OUTER_IV]], 1 +; CHECK-NEXT: [[OUTER_EC:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], 100 +; CHECK-NEXT: br i1 [[OUTER_EC]], label [[EXIT:%.*]], label [[OUTER_HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %gep.src = getelementptr i8, ptr %this, i64 1000 + br label %outer.header + +outer.header: + %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] + %c.1 = icmp eq i64 %outer.iv, 0 + br i1 %c.1, label %then, label %inner.header + +inner.header: + %iv = phi i64 [ %iv.next, %inner.latch ], [ 0, %outer.header ] + %gep.iv = getelementptr ptr, ptr %gep.src, i64 %iv + %l = load ptr, ptr %gep.iv, align 8 + %c.2 = icmp eq ptr %l, null + br i1 %c.2, label %then, label %inner.latch + +inner.latch: + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %outer.iv + br i1 %ec, label %outer.latch, label %inner.header + +then: + store i32 0, ptr %dst, align 4 + br label %outer.latch + +outer.latch: + %outer.iv.next = add i64 %outer.iv, 1 + %outer.ec = icmp eq i64 %outer.iv.next, 100 + br i1 %outer.ec, label %exit, label %outer.header + +exit: + ret void +} + +define i64 @loop_guard_needed_to_prove_dereferenceable(i32 %x, i1 %cmp2) { +; CHECK-LABEL: define i64 @loop_guard_needed_to_prove_dereferenceable( +; CHECK-SAME: i32 [[X:%.*]], i1 [[CMP2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca [32 x i32], align 4 +; CHECK-NEXT: call void @init_mem(ptr [[A]], i64 128) +; CHECK-NEXT: [[C_X:%.*]] = icmp sgt i32 [[X]], 0 +; CHECK-NEXT: br i1 [[C_X]], label [[PH:%.*]], label [[EXIT:%.*]] +; CHECK: ph: +; CHECK-NEXT: [[N:%.*]] = tail call i32 @llvm.smin.i32(i32 [[X]], i32 31) +; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[N_EXT]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [32 x i32], ptr [[A]], i64 0, i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[PH]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX42:%.*]] = getelementptr [32 x i32], ptr [[A]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX42]], align 4 +; CHECK-NEXT: [[CMP43:%.*]] = icmp eq i32 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[CMP43]], label [[EXIT_LOOPEXIT]], label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N_EXT]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: exit.loopexit: +; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ [[IV]], [[LOOP_HEADER]] ], [ -1, [[LOOP_LATCH]] ], [ -1, [[MIDDLE_BLOCK]] ], [ [[TMP8]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[RES_PH]], [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + %A = alloca [32 x i32], align 4 + call void @init_mem(ptr %A, i64 128) + %c.x = icmp sgt i32 %x, 0 + br i1 %c.x, label %ph, label %exit + +ph: + %n = tail call i32 @llvm.smin.i32(i32 %x, i32 31) + %n.ext = zext i32 %n to i64 + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop.latch ] + %arrayidx42 = getelementptr [32 x i32], ptr %A, i64 0, i64 %iv + %0 = load i32, ptr %arrayidx42, align 4 + %cmp43 = icmp eq i32 %0, 0 + br i1 %cmp43, label %exit, label %loop.latch + +loop.latch: + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, %n.ext + br i1 %ec, label %exit, label %loop.header + +exit: + %res = phi i64 [ 0, %entry ], [ -1, %loop.latch ], [ %iv, %loop.header ] + ret i64 %res +} + +declare i32 @llvm.smin.i32(i32, i32) + +@A = external global [100 x {i32, i8} ] + +define ptr @btc_and_max_btc_require_predicates(ptr noalias %start, i64 %offset) { +; CHECK-LABEL: define ptr @btc_and_max_btc_require_predicates( +; CHECK-SAME: ptr noalias [[START:%.*]], i64 [[OFFSET:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[END:%.*]] = getelementptr i32, ptr [[START]], i64 [[OFFSET]] +; CHECK-NEXT: [[PRE_1:%.*]] = icmp ult i64 [[OFFSET]], 100 +; CHECK-NEXT: call void @llvm.assume(i1 [[PRE_1]]) +; CHECK-NEXT: [[PRE_2:%.*]] = icmp ugt i64 [[OFFSET]], 1 +; CHECK-NEXT: call void @llvm.assume(i1 [[PRE_2]]) +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV_1:%.*]] = phi ptr [ @A, [[ENTRY:%.*]] ], [ [[IV_1_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi ptr [ [[START]], [[ENTRY]] ], [ [[IV_2_NEXT:%.*]], [[LOOP_LATCH]] ] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[IV_1]], align 4 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_LATCH]], label [[EXIT:%.*]] +; CHECK: loop.latch: +; CHECK-NEXT: [[IV_2_NEXT]] = getelementptr i8, ptr [[IV_2]], i64 40 +; CHECK-NEXT: [[IV_1_NEXT]] = getelementptr i8, ptr [[IV_1]], i64 40 +; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV_2]], [[END]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]] +; CHECK: exit: +; CHECK-NEXT: [[RES:%.*]] = phi ptr [ [[IV_1]], [[LOOP_HEADER]] ], [ [[IV_2]], [[LOOP_LATCH]] ] +; CHECK-NEXT: ret ptr [[RES]] +; +entry: + %end = getelementptr i32, ptr %start, i64 %offset + %pre.1 = icmp ult i64 %offset, 100 + call void @llvm.assume(i1 %pre.1) + %pre.2 = icmp ugt i64 %offset, 1 + call void @llvm.assume(i1 %pre.2) + br label %loop.header + +loop.header: + %iv.1 = phi ptr [ @A, %entry ], [ %iv.1.next, %loop.latch ] + %iv.2 = phi ptr [ %start, %entry ], [ %iv.2.next, %loop.latch ] + %l = load i32, ptr %iv.1, align 4 + %c = icmp eq i32 %l, 0 + br i1 %c, label %loop.latch, label %exit + +loop.latch: + %iv.2.next = getelementptr i8, ptr %iv.2, i64 40 + %iv.1.next = getelementptr i8, ptr %iv.1, i64 40 + %ec = icmp eq ptr %iv.2, %end + br i1 %ec, label %exit, label %loop.header + +exit: + %res = phi ptr [ %iv.1, %loop.header ], [ %iv.2, %loop.latch ] + ret ptr %res +} + +define i64 @loop_guards_needed_to_prove_deref_multiple(i32 %x, i1 %c, ptr dereferenceable(1024) %src) { +; CHECK-LABEL: define i64 @loop_guards_needed_to_prove_deref_multiple( +; CHECK-SAME: i32 [[X:%.*]], i1 [[C:%.*]], ptr dereferenceable(1024) [[SRC:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_AND:%.*]] = and i32 [[X]], -2 +; CHECK-NEXT: [[PRE_0:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: br i1 [[PRE_0]], label [[THEN:%.*]], label [[EXIT:%.*]] +; CHECK: then: +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 [[X_AND]], i32 0 +; CHECK-NEXT: [[PRE_1:%.*]] = icmp ugt i32 [[SEL]], 1024 +; CHECK-NEXT: br i1 [[PRE_1]], label [[EXIT]], label [[PH:%.*]] +; CHECK: ph: +; CHECK-NEXT: [[PRE_2:%.*]] = icmp ne i32 [[SEL]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[PRE_2]]) +; CHECK-NEXT: [[N:%.*]] = add i32 [[SEL]], -1 +; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SEL]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 +; CHECK-NEXT: [[IV_NEXT:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IV_NEXT]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[LOOP_LATCH:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[IV_NEXT]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT]], [[LOOP_LATCH]] ], [ 0, [[PH]] ] +; CHECK-NEXT: br label [[LOOP_HEADER1:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT1:%.*]], [[LOOP_LATCH1:%.*]] ], [ [[IV]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP_SRC_I:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I]], align 1 +; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], 0 +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_LOOPEXIT]], label [[LOOP_LATCH1]] +; CHECK: loop.latch: +; CHECK-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV1]], [[N_EXT]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER1]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK: exit.loopexit: +; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ [[IV1]], [[LOOP_HEADER1]] ], [ 0, [[LOOP_LATCH1]] ], [ 0, [[LOOP_LATCH]] ], [ [[TMP10]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ -1, [[ENTRY:%.*]] ], [ -2, [[THEN]] ], [ [[RES_PH]], [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + %x.and = and i32 %x, -2 + %pre.0 = icmp eq i32 %x, 0 + br i1 %pre.0, label %then, label %exit + +then: + %sel = select i1 %c, i32 %x.and, i32 0 + %pre.1 = icmp ugt i32 %sel, 1024 + br i1 %pre.1, label %exit, label %ph + +ph: + %pre.2 = icmp ne i32 %sel, 0 + call void @llvm.assume(i1 %pre.2) + %n = add i32 %sel, -1 + %n.ext = zext i32 %n to i64 + br label %loop.header + +loop.header: + %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %ph ] + %gep.src.i = getelementptr i8, ptr %src, i64 %iv + %l = load i8, ptr %gep.src.i, align 1 + %c.1 = icmp eq i8 %l, 0 + br i1 %c.1, label %exit, label %loop.latch + +loop.latch: + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, %n.ext + br i1 %ec, label %exit, label %loop.header + +exit: + %res = phi i64 [ -1, %entry ], [ -2, %then ], [ 0, %loop.latch ], [ %iv, %loop.header ] + ret i64 %res +} ;. ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} @@ -345,4 +688,10 @@ for.end: ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} +; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]} +; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index a21666a31b6a2..6050119b67ff4 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -24,7 +24,8 @@ define i64 @same_exit_block_pre_inc_use1() { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -104,7 +105,8 @@ define i32 @same_exit_block_pre_inc_use1_iv64_endi32_step2() { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -187,7 +189,8 @@ define i32 @same_exit_block_pre_inc_use1_iv128_endi32_step2() { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], splat (i8 3) ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i128 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) +; CHECK-NEXT: [[TMP1:%.*]] = freeze <4 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i128 [[INDEX_NEXT2]], 64 ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -272,7 +275,8 @@ define float @same_exit_block_pre_inc_use1_iv64_endf32() { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -360,7 +364,8 @@ define ptr @same_exit_block_pre_inc_use1_iv64_endptr() { ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 ; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD6]] ; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP15]]) +; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT7]], 64 ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -442,7 +447,8 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], splat (i8 72) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]]) +; CHECK-NEXT: [[TMP1:%.*]] = freeze <4 x i1> [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -518,7 +524,8 @@ define i64 @same_exit_block_pre_inc1_use_inv_cond(i1 %cond) { ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[COND]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) -; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]]) +; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]]) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -600,7 +607,8 @@ define i64 @same_exit_block_pre_inc_use1_gep_two_indices() { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -680,7 +688,8 @@ define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -760,7 +769,8 @@ define i64 @same_exit_block_pre_inc_use2() { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -837,7 +847,8 @@ define i64 @same_exit_block_pre_inc_use3() { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] @@ -916,7 +927,8 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp uge <4 x i64> [[VEC_IND]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) +; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64 ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) @@ -993,7 +1005,8 @@ define i64 @same_exit_block_post_inc_use() { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) +; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[TMP15]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] @@ -1069,7 +1082,8 @@ define ptr @same_exit_block_post_inc_use1_ivptr() { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], splat (i8 72) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP15]]) +; CHECK-NEXT: [[TMP1:%.*]] = freeze <4 x i1> [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] @@ -1143,7 +1157,8 @@ define i64 @same_exit_block_post_inc_use2() { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]]) +; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP19]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] @@ -1224,7 +1239,8 @@ define i64 @diff_exit_block_pre_inc_use1() { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] @@ -1311,7 +1327,8 @@ define i64 @diff_exit_block_pre_inc_use2() { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] @@ -1395,7 +1412,8 @@ define i64 @diff_exit_block_pre_inc_use3() { ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX2]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 64 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] @@ -1480,7 +1498,8 @@ define i64 @diff_exit_block_post_inc_use1() { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) +; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[TMP15]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] @@ -1567,7 +1586,8 @@ define i64 @diff_exit_block_post_inc_use2() { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP15]], align 1 ; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]]) +; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP19]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] @@ -1656,7 +1676,8 @@ define i64 @diff_exit_block_post_inc_use3(i64 %start) { ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP19]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 64 ; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP20]], [[TMP21]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] @@ -1748,7 +1769,8 @@ define i64 @loop_contains_safe_call() { ; CHECK-NEXT: [[TMP3:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast oge <4 x float> [[TMP3]], splat (float 3.000000e+00) ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64 ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] @@ -1824,7 +1846,8 @@ define i64 @loop_contains_safe_div() { ; CHECK-NEXT: [[TMP3:%.*]] = udiv <4 x i32> [[WIDE_LOAD]], splat (i32 20000) ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP3]], splat (i32 1) ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64 ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] @@ -1901,7 +1924,8 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] @@ -1984,7 +2008,8 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD2]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <4 x i8> [[REVERSE]], [[REVERSE3]] ; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]]) +; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]]) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 1020 ; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] @@ -2111,7 +2136,8 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll index fdd5e0e7958ec..69d0076497c66 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll @@ -21,7 +21,8 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; VF8UF1-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer ; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; VF8UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]]) +; VF8UF1-NEXT: [[TMP7:%.*]] = freeze <8 x i1> [[TMP3]] +; VF8UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP7]]) ; VF8UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; VF8UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] ; VF8UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -55,18 +56,19 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] -; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 -; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 -; VF8UF2-NEXT: [[TMP6:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer -; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; VF8UF2-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP6]]) -; VF8UF2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; VF8UF2-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] -; VF8UF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF8UF2-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF8UF2-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]] +; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P_SRC1]], i32 0 +; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 +; VF8UF2-NEXT: [[TMP2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer +; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 8 +; VF8UF2-NEXT: [[TMP3:%.*]] = freeze <8 x i1> [[TMP2]] +; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]]) +; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF8UF2-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] +; VF8UF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF8UF2: [[MIDDLE_SPLIT]]: -; VF8UF2-NEXT: br i1 [[TMP3]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; VF8UF2: [[VECTOR_EARLY_EXIT]]: @@ -75,13 +77,13 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; VF8UF2-NEXT: br label %[[LOOP_HEADER:.*]] ; VF8UF2: [[LOOP_HEADER]]: -; VF8UF2-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; VF8UF2-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]] -; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1 +; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] +; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1 ; VF8UF2-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 ; VF8UF2-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]] ; VF8UF2: [[LOOP_LATCH]]: -; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1 +; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16 ; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] ; VF8UF2: [[EXIT]]: @@ -98,7 +100,8 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 0 ; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer -; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]]) +; VF16UF1-NEXT: [[TMP1:%.*]] = freeze <16 x i1> [[TMP3]] +; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP1]]) ; VF16UF1-NEXT: br label %[[MIDDLE_SPLIT:.*]] ; VF16UF1: [[MIDDLE_SPLIT]]: ; VF16UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] @@ -157,7 +160,8 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; VF8UF1-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer ; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; VF8UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]]) +; VF8UF1-NEXT: [[TMP7:%.*]] = freeze <8 x i1> [[TMP3]] +; VF8UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP7]]) ; VF8UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; VF8UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] ; VF8UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -196,15 +200,16 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] ; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 -; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 -; VF8UF2-NEXT: [[TMP6:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer +; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 +; VF8UF2-NEXT: [[TMP6:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer ; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; VF8UF2-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP6]]) -; VF8UF2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; VF8UF2-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] -; VF8UF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF8UF2-NEXT: [[TMP3:%.*]] = freeze <8 x i1> [[TMP6]] +; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]]) +; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF8UF2-NEXT: [[TMP9:%.*]] = or i1 [[TMP4]], [[TMP5]] +; VF8UF2-NEXT: br i1 [[TMP9]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF8UF2: [[MIDDLE_SPLIT]]: -; VF8UF2-NEXT: br i1 [[TMP3]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; VF8UF2: [[VECTOR_EARLY_EXIT]]: @@ -238,7 +243,8 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 0 ; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer -; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]]) +; VF16UF1-NEXT: [[TMP1:%.*]] = freeze <16 x i1> [[TMP3]] +; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP1]]) ; VF16UF1-NEXT: br label %[[MIDDLE_SPLIT:.*]] ; VF16UF1: [[MIDDLE_SPLIT]]: ; VF16UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] @@ -284,3 +290,147 @@ exit: %res = phi i64 [ %iv, %loop.header ], [ 1, %loop.latch ] ret i64 %res } + +define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosync nofree { +; VF8UF1-LABEL: define i8 @test_early_exit_max_vector_tc_eq_16( +; VF8UF1-SAME: ptr dereferenceable(17) [[A:%.*]]) #[[ATTR0]] { +; VF8UF1-NEXT: [[ENTRY:.*]]: +; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF8UF1: [[VECTOR_PH]]: +; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]] +; VF8UF1: [[VECTOR_BODY]]: +; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF8UF1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] +; VF8UF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 +; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 1 +; VF8UF1-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer +; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; VF8UF1-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP1]] +; VF8UF1-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP5]]) +; VF8UF1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF8UF1-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]] +; VF8UF1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF8UF1: [[MIDDLE_SPLIT]]: +; VF8UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; VF8UF1: [[MIDDLE_BLOCK]]: +; VF8UF1-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; VF8UF1: [[VECTOR_EARLY_EXIT]]: +; VF8UF1-NEXT: br label %[[EXIT]] +; VF8UF1: [[SCALAR_PH]]: +; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF8UF1-NEXT: br label %[[LOOP_HEADER:.*]] +; VF8UF1: [[LOOP_HEADER]]: +; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF8UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] +; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1 +; VF8UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 +; VF8UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]] +; VF8UF1: [[LOOP_LATCH]]: +; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 +; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]] +; VF8UF1: [[EXIT]]: +; VF8UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] +; VF8UF1-NEXT: ret i8 [[RES]] +; +; VF8UF2-LABEL: define i8 @test_early_exit_max_vector_tc_eq_16( +; VF8UF2-SAME: ptr dereferenceable(17) [[A:%.*]]) #[[ATTR0]] { +; VF8UF2-NEXT: [[ENTRY:.*]]: +; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF8UF2: [[VECTOR_PH]]: +; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF8UF2: [[VECTOR_BODY]]: +; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] +; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 +; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 +; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer +; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; VF8UF2-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP3]] +; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP6]]) +; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF8UF2-NEXT: [[TMP7:%.*]] = or i1 [[TMP4]], [[TMP5]] +; VF8UF2-NEXT: br i1 [[TMP7]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF8UF2: [[MIDDLE_SPLIT]]: +; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; VF8UF2: [[MIDDLE_BLOCK]]: +; VF8UF2-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; VF8UF2: [[VECTOR_EARLY_EXIT]]: +; VF8UF2-NEXT: br label %[[EXIT]] +; VF8UF2: [[SCALAR_PH]]: +; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF8UF2-NEXT: br label %[[LOOP_HEADER:.*]] +; VF8UF2: [[LOOP_HEADER]]: +; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] +; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1 +; VF8UF2-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 +; VF8UF2-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]] +; VF8UF2: [[LOOP_LATCH]]: +; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 +; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]] +; VF8UF2: [[EXIT]]: +; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] +; VF8UF2-NEXT: ret i8 [[RES]] +; +; VF16UF1-LABEL: define i8 @test_early_exit_max_vector_tc_eq_16( +; VF16UF1-SAME: ptr dereferenceable(17) [[A:%.*]]) #[[ATTR0]] { +; VF16UF1-NEXT: [[ENTRY:.*]]: +; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF16UF1: [[VECTOR_PH]]: +; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] +; VF16UF1: [[VECTOR_BODY]]: +; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] +; VF16UF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 +; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 +; VF16UF1-NEXT: [[TMP1:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer +; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; VF16UF1-NEXT: [[TMP3:%.*]] = freeze <16 x i1> [[TMP1]] +; VF16UF1-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]]) +; VF16UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF16UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[TMP5]] +; VF16UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF16UF1: [[MIDDLE_SPLIT]]: +; VF16UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; VF16UF1: [[MIDDLE_BLOCK]]: +; VF16UF1-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; VF16UF1: [[VECTOR_EARLY_EXIT]]: +; VF16UF1-NEXT: br label %[[EXIT]] +; VF16UF1: [[SCALAR_PH]]: +; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF16UF1-NEXT: br label %[[LOOP_HEADER:.*]] +; VF16UF1: [[LOOP_HEADER]]: +; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] +; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1 +; VF16UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 +; VF16UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]] +; VF16UF1: [[LOOP_LATCH]]: +; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 +; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; VF16UF1: [[EXIT]]: +; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] +; VF16UF1-NEXT: ret i8 [[RES]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %p.src = getelementptr inbounds i8, ptr %A, i64 %iv + %l = load i8, ptr %p.src, align 1 + %c = icmp eq i8 %l, 0 + br i1 %c, label %exit, label %loop.latch + +loop.latch: + %iv.next = add nsw i64 %iv, 1 + %cmp = icmp eq i64 %iv.next, 17 + br i1 %cmp, label %exit, label %loop.header + +exit: + %res = phi i8 [ 0, %loop.header ], [ 1, %loop.latch ] + ret i8 %res +} diff --git a/llvm/test/Transforms/LoopVersioning/invalidate-laa-after-versioning.ll b/llvm/test/Transforms/LoopVersioning/invalidate-laa-after-versioning.ll index 8075314a65b49..4148c3541ae7a 100644 --- a/llvm/test/Transforms/LoopVersioning/invalidate-laa-after-versioning.ll +++ b/llvm/test/Transforms/LoopVersioning/invalidate-laa-after-versioning.ll @@ -56,19 +56,14 @@ define void @test(ptr %arg, i64 %arg1) { ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds double, ptr [[LCSSA_PTR_IV_1]], i64 1 ; CHECK-NEXT: br label [[INNER_2:%.*]] ; CHECK: inner.2: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[INNER_2]] ], [ 0, [[INNER_1_EXIT]] ] ; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[GEP_5]], [[INNER_1_EXIT]] ], [ [[PTR_IV_2_NEXT:%.*]], [[INNER_2]] ] ; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr inbounds double, ptr [[PTR_IV_2]], i64 1 -; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 ; CHECK-NEXT: br i1 false, label [[INNER_3_LVER_CHECK:%.*]], label [[INNER_2]] ; CHECK: inner.3.lver.check: -; CHECK-NEXT: [[INDVAR_LCSSA:%.*]] = phi i64 [ [[INDVAR]], [[INNER_2]] ] ; CHECK-NEXT: [[LCSSA_PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2]], [[INNER_2]] ] ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds double, ptr [[PTR_PHI]], i64 1 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds double, ptr [[LCSSA_PTR_IV_2]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDVAR_LCSSA]], 3 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 24 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[LCSSA_PTR_IV_1]], i64 [[TMP1]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[LCSSA_PTR_IV_2]], i64 16 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[GEP_7]], [[GEP_1]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR_PHI]], [[SCEVGEP]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] @@ -90,10 +85,10 @@ define void @test(ptr %arg, i64 %arg1) { ; CHECK: inner.3: ; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ 0, [[INNER_3_PH]] ], [ [[IV_2_NEXT:%.*]], [[INNER_3]] ] ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds double, ptr [[GEP_6]], i64 [[IV_2]] -; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_7]], align 8, !alias.scope !0, !noalias !3 -; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_8]], align 8, !alias.scope !3 +; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_7]], align 8, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] +; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_8]], align 8, !alias.scope [[META3]] ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr double, ptr [[PTR_PHI]], i64 [[IV_2]] -; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr [[GEP_9]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr [[GEP_9]], align 8, !alias.scope [[META3]] ; CHECK-NEXT: [[IV_2_NEXT]] = add nuw nsw i64 [[IV_2]], 1 ; CHECK-NEXT: [[C_2:%.*]] = icmp eq i64 [[IV_2]], 1 ; CHECK-NEXT: br i1 [[C_2]], label [[OUTER_LATCH_LOOPEXIT3:%.*]], label [[INNER_3]] diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll new file mode 100644 index 0000000000000..471b1aa5dce0f --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll @@ -0,0 +1,137 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -O3 -S %s | FileCheck %s + +target triple = "arm64-apple-macosx15.0.0" + +define i64 @std_find_i16_constant_offset_with_assumptions(ptr %first.coerce, i16 noundef signext %s) nofree nosync { +; CHECK-LABEL: define i64 @std_find_i16_constant_offset_with_assumptions( +; CHECK-SAME: ptr [[FIRST_COERCE:%.*]], i16 noundef signext [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST_COERCE]], i64 2) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[FIRST_COERCE]], i64 256) ] +; CHECK-NEXT: [[COERCE_VAL_IP:%.*]] = getelementptr i8, ptr [[FIRST_COERCE]], i64 256 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST_COERCE]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP0:%.*]] = freeze <8 x i1> [[TMP9]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[TMP0]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_SPLIT]]: +; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[RETURN:.*]] +; CHECK: [[VECTOR_EARLY_EXIT]]: +; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP0]], i1 true) +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[FIRST_COERCE]], i64 [[TMP7]] +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[__FIRST_ADDR_0_LCSSA_I_I_PH:%.*]] = phi ptr [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ], [ [[COERCE_VAL_IP]], %[[MIDDLE_SPLIT]] ] +; CHECK-NEXT: [[DOTPRE:%.*]] = ptrtoint ptr [[__FIRST_ADDR_0_LCSSA_I_I_PH]] to i64 +; CHECK-NEXT: ret i64 [[DOTPRE]] +; +entry: + %first = alloca { ptr }, align 8 + %s.addr = alloca i16, align 2 + store ptr %first.coerce, ptr %first, align 8 + store i16 %s, ptr %s.addr, align 2 + %0 = load ptr, ptr %first, align 8 + call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 2) ] + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %0, i64 256) ] + %start.ptr = load ptr, ptr %first, align 8 + %1 = load i64, ptr %first, align 8 + %coerce.val.pi.i = add i64 %1, 256 + %coerce.val.ip = inttoptr i64 %coerce.val.pi.i to ptr + %cmp.not6.i.i = icmp eq ptr %start.ptr, %coerce.val.ip + br i1 %cmp.not6.i.i, label %return, label %loop.ph + +loop.ph: + %2 = load i16, ptr %s.addr, align 2 + br label %loop.header + +loop.header: + %ptr.iv = phi ptr [ %start.ptr, %loop.ph ], [ %ptr.iv.next, %loop.latch ] + %3 = load i16, ptr %ptr.iv, align 2 + %cmp2.i.i = icmp eq i16 %3, %2 + br i1 %cmp2.i.i, label %return, label %loop.latch + +loop.latch: + %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 2 + %cmp.not.i.i = icmp eq ptr %ptr.iv.next, %coerce.val.ip + br i1 %cmp.not.i.i, label %return, label %loop.header + +return: + %merge = phi ptr [ %start.ptr, %entry ], [ %coerce.val.ip, %loop.latch ], [ %ptr.iv, %loop.header ] + %res = ptrtoint ptr %merge to i64 + ret i64 %res +} + +define i64 @std_find_i16_constant_offset_no_assumptions(ptr %first.coerce, i16 noundef signext %s) nofree nosync { +; CHECK-LABEL: define i64 @std_find_i16_constant_offset_no_assumptions( +; CHECK-SAME: ptr [[FIRST_COERCE:%.*]], i16 noundef signext [[S:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[COERCE_VAL_IP:%.*]] = getelementptr i8, ptr [[FIRST_COERCE]], i64 256 +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[FIRST_COERCE]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[PTR_IV]], align 2 +; CHECK-NEXT: [[CMP2_I_I:%.*]] = icmp eq i16 [[TMP1]], [[S]] +; CHECK-NEXT: br i1 [[CMP2_I_I]], label %[[RETURN:.*]], label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 2 +; CHECK-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[COERCE_VAL_IP]] +; CHECK-NEXT: br i1 [[CMP_NOT_I_I]], label %[[RETURN]], label %[[LOOP_HEADER]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[MERGE_PH:%.*]] = phi ptr [ [[COERCE_VAL_IP]], %[[LOOP_LATCH]] ], [ [[PTR_IV]], %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[DOTPRE:%.*]] = ptrtoint ptr [[MERGE_PH]] to i64 +; CHECK-NEXT: ret i64 [[DOTPRE]] +; +entry: + %first = alloca { ptr }, align 8 + %s.addr = alloca i16, align 2 + store ptr %first.coerce, ptr %first, align 8 + store i16 %s, ptr %s.addr, align 2 + %0 = load ptr, ptr %first, align 8 + %start.ptr = load ptr, ptr %first, align 8 + %1 = load i64, ptr %first, align 8 + %coerce.val.pi.i = add i64 %1, 256 + %coerce.val.ip = inttoptr i64 %coerce.val.pi.i to ptr + %cmp.not6.i.i = icmp eq ptr %start.ptr, %coerce.val.ip + br i1 %cmp.not6.i.i, label %return, label %loop.ph + +loop.ph: + %2 = load i16, ptr %s.addr, align 2 + br label %loop.header + +loop.header: + %ptr.iv = phi ptr [ %start.ptr, %loop.ph ], [ %ptr.iv.next, %loop.latch ] + %3 = load i16, ptr %ptr.iv, align 2 + %cmp2.i.i = icmp eq i16 %3, %2 + br i1 %cmp2.i.i, label %return, label %loop.latch + +loop.latch: + %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 2 + %cmp.not.i.i = icmp eq ptr %ptr.iv.next, %coerce.val.ip + br i1 %cmp.not.i.i, label %return, label %loop.header + +return: + %merge = phi ptr [ %start.ptr, %entry ], [ %coerce.val.ip, %loop.latch ], [ %ptr.iv, %loop.header ] + %res = ptrtoint ptr %merge to i64 + ret i64 %res +} + +declare void @llvm.assume(i1 noundef) +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +;. diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp index 678960418d7d7..d2b506af8d398 100644 --- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp @@ -11,6 +11,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionNormalization.h" +#include "llvm/Analysis/ScalarEvolutionPatternMatch.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Constants.h" @@ -26,6 +27,8 @@ namespace llvm { +using namespace SCEVPatternMatch; + // We use this fixture to ensure that we clean up ScalarEvolution before // deleting the PassManager. class ScalarEvolutionsTest : public testing::Test { @@ -64,11 +67,6 @@ static std::optional computeConstantDifference(ScalarEvolution &SE, return SE.computeConstantDifference(LHS, RHS); } - static bool matchURem(ScalarEvolution &SE, const SCEV *Expr, const SCEV *&LHS, - const SCEV *&RHS) { - return SE.matchURem(Expr, LHS, RHS); - } - static bool isImpliedCond( ScalarEvolution &SE, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, @@ -1524,7 +1522,7 @@ TEST_F(ScalarEvolutionsTest, MatchURem) { auto *URemI = getInstructionByName(F, N); auto *S = SE.getSCEV(URemI); const SCEV *LHS, *RHS; - EXPECT_TRUE(matchURem(SE, S, LHS, RHS)); + EXPECT_TRUE(match(S, m_scev_URem(m_SCEV(LHS), m_SCEV(RHS), SE))); EXPECT_EQ(LHS, SE.getSCEV(URemI->getOperand(0))); EXPECT_EQ(RHS, SE.getSCEV(URemI->getOperand(1))); EXPECT_EQ(LHS->getType(), S->getType()); @@ -1537,7 +1535,7 @@ TEST_F(ScalarEvolutionsTest, MatchURem) { auto *URem1 = getInstructionByName(F, "rem4"); auto *S = SE.getSCEV(Ext); const SCEV *LHS, *RHS; - EXPECT_TRUE(matchURem(SE, S, LHS, RHS)); + EXPECT_TRUE(match(S, m_scev_URem(m_SCEV(LHS), m_SCEV(RHS), SE))); EXPECT_NE(LHS, SE.getSCEV(URem1->getOperand(0))); // RHS and URem1->getOperand(1) have different widths, so compare the // integer values. diff --git a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp index 118bf67320a3b..7471355603640 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp @@ -41,7 +41,8 @@ class VPlanSlpTest : public VPlanTestIRBase { AARes.reset(new AAResults(*TLI)); AARes->addAAResult(*BasicAA); PSE.reset(new PredicatedScalarEvolution(*SE, *L)); - LAI.reset(new LoopAccessInfo(L, &*SE, nullptr, &*TLI, &*AARes, &*DT, &*LI)); + LAI.reset( + new LoopAccessInfo(L, &*SE, nullptr, &*TLI, &*AARes, &*DT, &*LI, &*AC)); IAI.reset(new InterleavedAccessInfo(*PSE, L, &*DT, &*LI, &*LAI)); IAI->analyzeInterleaving(false); return {Plan, *IAI};