diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 84b4ad7c1d5a9..8649bd48b9b70 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -413,29 +413,30 @@ class MemoryDepChecker { uint64_t MaxStride; std::optional CommonStride; - /// TypeByteSize is either the common store size of both accesses, or 0 when - /// store sizes mismatch. - uint64_t TypeByteSize; + /// TypeByteSize is a pair of alloc sizes of the source and sink. + std::pair TypeByteSize; + + // HasSameSize is a boolean indicating whether the store sizes of the source + // and sink are equal. + // TODO: Remove this. + bool HasSameSize; bool AIsWrite; bool BIsWrite; DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride, std::optional CommonStride, - uint64_t TypeByteSize, bool AIsWrite, - bool BIsWrite) + std::pair TypeByteSize, + bool HasSameSize, bool AIsWrite, bool BIsWrite) : Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride), - TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} + TypeByteSize(TypeByteSize), HasSameSize(HasSameSize), + AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} }; /// Get the dependence distance, strides, type size and whether it is a write - /// for the dependence between A and B. Returns a DepType, if we can prove - /// there's no dependence or the analysis fails. Outlined to lambda to limit - /// he scope of various temporary variables, like A/BPtr, StrideA/BPtr and - /// others. Returns either the dependence result, if it could already be - /// determined, or a DepDistanceStrideAndSizeInfo struct, noting that - /// TypeByteSize could be 0 when store sizes mismatch, and this should be - /// checked in the caller. + /// for the dependence between A and B. Returns either a DepType, the + /// dependence result, if it could already be determined, or a + /// DepDistanceStrideAndSizeInfo struct. std::variant getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B, diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 47dccde45337b..67472ddcf1b66 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2095,14 +2095,12 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( return MemoryDepChecker::Dependence::Unknown; } - TypeSize AStoreSz = DL.getTypeStoreSize(ATy); - TypeSize BStoreSz = DL.getTypeStoreSize(BTy); - - // If store sizes are not the same, set TypeByteSize to zero, so we can check - // it in the caller isDependent. uint64_t ASz = DL.getTypeAllocSize(ATy); uint64_t BSz = DL.getTypeAllocSize(BTy); - uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0; + + // Both the source and sink sizes are needed in dependence checks, depending + // on the use. + std::pair TypeByteSize(ASz, BSz); uint64_t StrideAScaled = std::abs(StrideAPtrInt) * ASz; uint64_t StrideBScaled = std::abs(StrideBPtrInt) * BSz; @@ -2113,19 +2111,41 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( if (StrideAScaled == StrideBScaled) CommonStride = StrideAScaled; - // TODO: Historically, we didn't retry with runtime checks when (unscaled) - // strides were different but there is no inherent reason to. - if (!isa(Dist)) - ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt; - // If distance is a SCEVCouldNotCompute, return Unknown immediately. if (isa(Dist)) { LLVM_DEBUG(dbgs() << "LAA: Uncomputable distance.\n"); return Dependence::Unknown; } + if (!isa(Dist)) { + if (!LoopGuards) + LoopGuards.emplace( + ScalarEvolution::LoopGuards::collect(InnermostLoop, SE)); + Dist = SE.applyLoopGuards(Dist, *LoopGuards); + + // TODO: Historically, we didn't retry with runtime checks when (unscaled) + // strides were different but there is no inherent reason to. + ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt; + } + + // When the distance is possibly zero, we're reading/writing the same memory + // location: if the store sizes are not equal, fail with an unknown + // dependence. + TypeSize AStoreSz = DL.getTypeStoreSize(ATy); + TypeSize BStoreSz = DL.getTypeStoreSize(BTy); + if (AStoreSz != BStoreSz && SE.isKnownNonPositive(Dist) && + SE.isKnownNonNegative(Dist)) { + LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence distance with " + "different type sizes\n"); + return Dependence::Unknown; + } + + // TODO: Remove this. + bool HasSameSize = AStoreSz == BStoreSz; + return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride, - TypeByteSize, AIsWrite, BIsWrite); + TypeByteSize, HasSameSize, AIsWrite, + BIsWrite); } MemoryDepChecker::Dependence::DepType @@ -2157,9 +2177,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, return std::get(Res); } - auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] = - std::get(Res); - bool HasSameSize = TypeByteSize > 0; + auto &[Dist, MaxStride, CommonStride, TypeByteSize, HasSameSize, AIsWrite, + BIsWrite] = std::get(Res); ScalarEvolution &SE = *PSE.getSE(); auto &DL = InnermostLoop->getHeader()->getDataLayout(); @@ -2180,32 +2199,22 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, uint64_t ConstDist = match(Dist, m_scev_APInt(APDist)) ? APDist->abs().getZExtValue() : 0; - // Attempt to prove strided accesses independent. - if (APDist) { - // If the distance between accesses and their strides are known constants, - // check whether the accesses interlace each other. - if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize && - areStridedAccessesIndependent(ConstDist, *CommonStride, TypeByteSize)) { - LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); - return Dependence::NoDep; - } - } else { - if (!LoopGuards) - LoopGuards.emplace( - ScalarEvolution::LoopGuards::collect(InnermostLoop, SE)); - Dist = SE.applyLoopGuards(Dist, *LoopGuards); + // Attempt to prove strided accesses independent. If the distance between + // accesses and their strides are known constants, check whether the accesses + // interlace each other. + if (ConstDist && CommonStride && CommonStride > 1 && HasSameSize && + areStridedAccessesIndependent(ConstDist, *CommonStride, + TypeByteSize.first)) { + LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); + return Dependence::NoDep; } // Negative distances are not plausible dependencies. if (SE.isKnownNonPositive(Dist)) { if (SE.isKnownNonNegative(Dist)) { - if (HasSameSize) { - // Write to the same location with the same size. - return Dependence::Forward; - } - LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but " - "different type sizes\n"); - return Dependence::Unknown; + // Write to the same location with the same size. + assert(HasSameSize && "Accesses must have the same size"); + return Dependence::Forward; } bool IsTrueDataDependence = (AIsWrite && !BIsWrite); @@ -2223,7 +2232,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, : Dependence::Unknown; } if (!HasSameSize || - couldPreventStoreLoadForward(ConstDist, TypeByteSize)) { + couldPreventStoreLoadForward(ConstDist, TypeByteSize.first)) { LLVM_DEBUG( dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); return Dependence::ForwardButPreventsForwarding; @@ -2289,7 +2298,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // We know that Dist is positive, but it may not be constant. Use the signed // minimum for computations below, as this ensures we compute the closest // possible dependence distance. - uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize; + uint64_t MinDistanceNeeded = + MaxStride * (MinNumIter - 1) + TypeByteSize.first; if (MinDistanceNeeded > static_cast(MinDistance)) { if (!ConstDist) { // For non-constant distances, we checked the lower bound of the @@ -2317,14 +2327,15 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, bool IsTrueDataDependence = (!AIsWrite && BIsWrite); if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist && - couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride)) + couldPreventStoreLoadForward(MinDistance, TypeByteSize.first, + *CommonStride)) return Dependence::BackwardVectorizableButPreventsForwarding; uint64_t MaxVF = MinDepDistBytes / MaxStride; LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance << " with max VF = " << MaxVF << '\n'); - uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; + uint64_t MaxVFInBits = MaxVF * TypeByteSize.first * 8; if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) { // For non-constant distances, we checked the lower bound of the dependence // distance and the distance may be larger at runtime (and safe for diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll index 27a85c7a46084..b503ede0845c3 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll @@ -187,6 +187,45 @@ exit: ret void } +; In the following test, dependence distance is possibly zero, +; but this is not equivalent to the condition known-non-positive +; and known-non-negative. + +define void @possibly_zero_dist_diff_typesz(ptr %p) { +; CHECK-LABEL: 'possibly_zero_dist_diff_typesz' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %ld.p = load i32, ptr %gep.p.iv.i32, align 1 -> +; CHECK-NEXT: store i16 %trunc, ptr %gep.p.iv.i16, align 1 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ] + %gep.p.iv.i32 = getelementptr inbounds nuw i32, ptr %p, i16 %iv + %ld.p = load i32, ptr %gep.p.iv.i32, align 1 + %trunc = trunc i32 %ld.p to i16 + %gep.p.iv.i16 = getelementptr inbounds nuw i16, ptr %p, i16 %iv + store i16 %trunc, ptr %gep.p.iv.i16, align 1 + %iv.next = add nuw nsw i16 %iv, 1 + %exit.cond = icmp eq i16 %iv.next, 32 + br i1 %exit.cond, label %exit, label %loop + +exit: + ret void +} + ; In the following test, the sink is loop-invariant. define void @type_size_equivalence_sink_loopinv(ptr nocapture %vec, i64 %n) { diff --git a/llvm/test/Analysis/LoopAccessAnalysis/is-safe-dep-distance-with-loop-guards.ll b/llvm/test/Analysis/LoopAccessAnalysis/is-safe-dep-distance-with-loop-guards.ll index 9cc0a976c900e..d51e1d93eb833 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/is-safe-dep-distance-with-loop-guards.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/is-safe-dep-distance-with-loop-guards.ll @@ -38,13 +38,55 @@ exit: ret void } -define void @safe_deps_2_due_to_dependence_distance(i16 %n, ptr %p3, i16 noundef %q, ptr %p1, ptr %p2) { -; CHECK-LABEL: 'safe_deps_2_due_to_dependence_distance' +define void @safe_with_rtchecks_loopguards(i16 %n, ptr %p3, i16 noundef %q, ptr %p1, ptr %p2) { +; CHECK-LABEL: 'safe_with_rtchecks_loopguards' ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Memory dependences are safe with run-time checks ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %arrayidx22 = getelementptr inbounds [2 x i32], ptr %alloca, i16 %iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %arrayidx33 = getelementptr inbounds i8, ptr %arrayidx22, i16 4 +; CHECK-NEXT: Check 1: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %arrayidx22 = getelementptr inbounds [2 x i32], ptr %alloca, i16 %iv +; CHECK-NEXT: Against group GRP2: +; CHECK-NEXT: %arrayidx42 = getelementptr inbounds [2 x i32], ptr %arrayidx40, i16 %iv +; CHECK-NEXT: Check 2: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %arrayidx22 = getelementptr inbounds [2 x i32], ptr %alloca, i16 %iv +; CHECK-NEXT: Against group GRP3: +; CHECK-NEXT: %arrayidx53 = getelementptr inbounds i8, ptr %arrayidx42, i16 4 +; CHECK-NEXT: Check 3: +; CHECK-NEXT: Comparing group GRP1: +; CHECK-NEXT: %arrayidx33 = getelementptr inbounds i8, ptr %arrayidx22, i16 4 +; CHECK-NEXT: Against group GRP2: +; CHECK-NEXT: %arrayidx42 = getelementptr inbounds [2 x i32], ptr %arrayidx40, i16 %iv +; CHECK-NEXT: Check 4: +; CHECK-NEXT: Comparing group GRP1: +; CHECK-NEXT: %arrayidx33 = getelementptr inbounds i8, ptr %arrayidx22, i16 4 +; CHECK-NEXT: Against group GRP3: +; CHECK-NEXT: %arrayidx53 = getelementptr inbounds i8, ptr %arrayidx42, i16 4 +; CHECK-NEXT: Check 5: +; CHECK-NEXT: Comparing group GRP2: +; CHECK-NEXT: %arrayidx42 = getelementptr inbounds [2 x i32], ptr %arrayidx40, i16 %iv +; CHECK-NEXT: Against group GRP3: +; CHECK-NEXT: %arrayidx53 = getelementptr inbounds i8, ptr %arrayidx42, i16 4 ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %alloca High: (-4 + (8 * %n) + %alloca)) +; CHECK-NEXT: Member: {%alloca,+,8}<%loop> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: (4 + %alloca) High: ((8 * %n) + %alloca)) +; CHECK-NEXT: Member: {(4 + %alloca),+,8}<%loop> +; CHECK-NEXT: Group GRP2: +; CHECK-NEXT: (Low: ((8 * %n) + %alloca) High: (-4 + (16 * %n) + %alloca)) +; CHECK-NEXT: Member: {((8 * %n) + %alloca),+,8}<%loop> +; CHECK-NEXT: Group GRP3: +; CHECK-NEXT: (Low: (4 + (8 * %n) + %alloca) High: ((16 * %n) + %alloca)) +; CHECK-NEXT: Member: {(4 + (8 * %n) + %alloca),+,8}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/unknown-dependence-with-loop-guards.ll b/llvm/test/Analysis/LoopAccessAnalysis/unknown-dependence-with-loop-guards.ll new file mode 100644 index 0000000000000..55d973482a5af --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/unknown-dependence-with-loop-guards.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s + +define void @unsafe_dep_loopguards(ptr %a, ptr %b, ptr %c) { +; CHECK-LABEL: 'unsafe_dep_loopguards' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.a = load [4 x i32], ptr %gep.a.offset.2, align 4 -> +; CHECK-NEXT: store i32 0, ptr %gep.a.offset.4, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: ptr %c +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %gep.a.offset.2 = getelementptr i32, ptr %gep.a.offset, i32 4 +; CHECK-NEXT: Check 1: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: ptr %c +; CHECK-NEXT: Against group GRP2: +; CHECK-NEXT: %gep.a.offset.4 = getelementptr i32, ptr %a, i32 %offset.4 +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %c High: (16 + %c)) +; CHECK-NEXT: Member: %c +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: (16 + (4 * (sext i32 %ld.b to i64)) + %a) High: (64 + (4 * (sext i32 %ld.b to i64)) + %a)) +; CHECK-NEXT: Member: {(16 + (4 * (sext i32 %ld.b to i64)) + %a),+,32}<%loop> +; CHECK-NEXT: Group GRP2: +; CHECK-NEXT: (Low: ((4 * (sext i32 (4 + %ld.b) to i64)) + %a) High: (36 + (4 * (sext i32 (4 + %ld.b) to i64)) + %a)) +; CHECK-NEXT: Member: {((4 * (sext i32 (4 + %ld.b) to i64)) + %a),+,32}<%loop> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-NEXT: {(4 + %ld.b),+,8}<%loop> Added Flags: +; CHECK-NEXT: {((4 * (sext i32 (4 + %ld.b) to i64)) + %a),+,32}<%loop> Added Flags: +; CHECK-NEXT: {%ld.b,+,8}<%loop> Added Flags: +; CHECK-NEXT: {(16 + (4 * (sext i32 %ld.b to i64)) + %a),+,32}<%loop> Added Flags: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; CHECK-NEXT: [PSE] %gep.a.offset.2 = getelementptr i32, ptr %gep.a.offset, i32 4: +; CHECK-NEXT: (16 + (4 * (sext i32 {%ld.b,+,8}<%loop> to i64)) + %a) +; CHECK-NEXT: --> {(16 + (4 * (sext i32 %ld.b to i64)) + %a),+,32}<%loop> +; CHECK-NEXT: [PSE] %gep.a.offset.4 = getelementptr i32, ptr %a, i32 %offset.4: +; CHECK-NEXT: ((4 * (sext i32 {(4 + %ld.b),+,8}<%loop> to i64)) + %a) +; CHECK-NEXT: --> {((4 * (sext i32 (4 + %ld.b) to i64)) + %a),+,32}<%loop> +; +entry: + %ld.b = load i32, ptr %b + %guard.cond = icmp slt i32 0, %ld.b + br i1 %guard.cond, label %exit, label %loop + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %offset = add i32 %ld.b, %iv + %gep.a.offset = getelementptr i32, ptr %a, i32 %offset + %gep.a.offset.2 = getelementptr i32, ptr %gep.a.offset, i32 4 + %ld.a = load [4 x i32], ptr %gep.a.offset.2 + store [4 x i32] %ld.a, ptr %c + %offset.4 = add i32 %offset, 4 + %gep.a.offset.4 = getelementptr i32, ptr %a, i32 %offset.4 + store i32 0, ptr %gep.a.offset.4 + %iv.next = add i32 %iv, 8 + %exit.cond = icmp eq i32 %iv.next, 16 + br i1 %exit.cond, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-use-after-free.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-use-after-free.ll index d5239d5a4e33d..4417486ac8212 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-use-after-free.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-use-after-free.ll @@ -1,6 +1,5 @@ ; REQUIRES: asserts ; RUN: opt -passes=loop-vectorize -debug-only=loop-accesses -force-vector-width=4 -disable-output %s 2>&1 | FileCheck %s -check-prefix=LOOP-ACCESS -; RUN: opt -passes=loop-vectorize -debug-only=vectorutils -force-vector-width=4 -disable-output %s 2>&1 | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-redhat-linux-gnu" @@ -25,29 +24,6 @@ target triple = "x86_64-redhat-linux-gnu" ; LOOP-ACCESS: Too many dependences, stopped recording -; If no dependences are recorded because there are too many, LoopAccessAnalysis -; just conservatively returns true for any pair of instructions compared (even -; those belonging to the same store group). This tests make sure that we do not -; incorrectly release a store group which had no dependences between its -; members, even if we have no dependences recorded because there are too many. - -; CHECK: LV: Creating an interleave group with: store ptr null, ptr %phi5, align 8 -; CHECK: LV: Inserted: store ptr %load12, ptr %getelementptr11, align 8 -; CHECK: into the interleave group with store ptr null, ptr %phi5 -; CHECK: LV: Inserted: store ptr %load7, ptr %getelementptr, align 8 -; CHECK: into the interleave group with store ptr null, ptr %phi5 - -; CHECK: LV: Creating an interleave group with: store ptr null, ptr %getelementptr13, align 8 -; CHECK: LV: Inserted: store ptr null, ptr %phi6, align 8 -; CHECK: into the interleave group with store ptr null, ptr %getelementptr13 -; CHECK: LV: Invalidated store group due to dependence between store ptr %load7, ptr %getelementptr, align 8 and store ptr null, ptr %getelementptr13, align 8 -; CHECK-NOT: LV: Invalidated store group due to dependence between - -; Note: The (only) invalidated store group is the one containing A (store ptr %load7, ptr %getelementptr, align 8) which is: -; Group with instructions: -; store ptr null, ptr %phi5, align 8 -; store ptr %load7, ptr %getelementptr, align 8 -; store ptr %load12, ptr %getelementptr11, align 8 define void @test(ptr %arg, ptr %arg1) local_unnamed_addr #0 { bb: br label %bb2 diff --git a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll index 5894c3af1d637..3dfe14b005405 100644 --- a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll @@ -109,43 +109,17 @@ define void @runtime_checks_ptr_inductions(ptr %dst.1, ptr %dst.2, i1 %c) { ; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ [[DST_1]], %[[ENTRY]] ], [ [[PTR_IV_1_NEXT:%.*]], %[[LOOP_1]] ] ; CHECK-NEXT: [[CALL:%.*]] = call i32 @val() ; CHECK-NEXT: [[SEL_DST:%.*]] = select i1 [[C]], ptr [[DST_1]], ptr [[DST_2]] -; CHECK-NEXT: [[SEL_DST_LCSSA12:%.*]] = ptrtoint ptr [[SEL_DST]] to i64 ; CHECK-NEXT: [[PTR_IV_1_NEXT]] = getelementptr i8, ptr [[PTR_IV_1]], i64 1 ; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i32 [[CALL]], 0 ; CHECK-NEXT: br i1 [[EC_1]], label %[[LOOP_2_HEADER_PREHEADER:.*]], label %[[LOOP_1]] ; CHECK: [[LOOP_2_HEADER_PREHEADER]]: ; CHECK-NEXT: [[PTR_IV_1_LCSSA:%.*]] = phi ptr [ [[PTR_IV_1]], %[[LOOP_1]] ] ; CHECK-NEXT: [[SEL_DST_LCSSA:%.*]] = phi ptr [ [[SEL_DST]], %[[LOOP_1]] ] -; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] -; CHECK: [[VECTOR_MEMCHECK]]: -; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[PTR_IV_1_LCSSA]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[SEL_DST_LCSSA12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR_IV_1_LCSSA]], i64 1022 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[SEL_DST_LCSSA]], i64 1022 -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_IV_1_LCSSA]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[SEL_DST_LCSSA]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP4]], align 1 -; CHECK-NEXT: store <2 x i8> [[WIDE_LOAD]], ptr [[NEXT_GEP]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1022 -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1023, %[[MIDDLE_BLOCK]] ], [ 1, %[[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[PTR_IV_1_LCSSA]], %[[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[SEL_DST_LCSSA]], %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP_2_HEADER:.*]] ; CHECK: [[LOOP_2_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[DEC7:%.*]], %[[LOOP_2_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] -; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ] -; CHECK-NEXT: [[PTR_IV_3:%.*]] = phi ptr [ [[PTR_IV_3_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[DEC7:%.*]], %[[LOOP_2_LATCH:.*]] ], [ 1, %[[LOOP_2_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[PTR_IV_1_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[PTR_IV_3:%.*]] = phi ptr [ [[PTR_IV_3_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[SEL_DST_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ] ; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i32 [[IV]], 1024 ; CHECK-NEXT: br i1 [[EC_2]], label %[[EXIT:.*]], label %[[LOOP_2_LATCH]] ; CHECK: [[LOOP_2_LATCH]]: @@ -154,7 +128,7 @@ define void @runtime_checks_ptr_inductions(ptr %dst.1, ptr %dst.2, i1 %c) { ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV_3]], align 1 ; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr i8, ptr [[PTR_IV_2]], i64 1 ; CHECK-NEXT: store i8 [[L]], ptr [[PTR_IV_2]], align 1 -; CHECK-NEXT: br label %[[LOOP_2_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label %[[LOOP_2_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -240,7 +214,7 @@ define void @expand_diff_scev_unknown(ptr %dst, i1 %invar.c, i32 %step) mustprog ; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP15]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -253,7 +227,7 @@ define void @expand_diff_scev_unknown(ptr %dst, i1 %invar.c, i32 %step) mustprog ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV_2]] ; CHECK-NEXT: store i32 0, ptr [[GEP_DST]], align 4 ; CHECK-NEXT: [[EC_2:%.*]] = icmp slt i32 [[IV_2_NEXT]], 0 -; CHECK-NEXT: br i1 [[EC_2]], label %[[LOOP_2]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[EC_2]], label %[[LOOP_2]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -326,7 +300,7 @@ define void @expand_diff_neg_ptrtoint_expr(ptr %src, ptr %start) { ; CHECK-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[NEXT_GEP]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], -2 -; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -343,7 +317,7 @@ define void @expand_diff_neg_ptrtoint_expr(ptr %src, ptr %start) { ; CHECK-NEXT: store i64 [[L]], ptr [[PTR_IV_3]], align 8 ; CHECK-NEXT: [[IV_NEXT_2]] = add i64 [[IV_2]], 1 ; CHECK-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_2]], 0 -; CHECK-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP_3]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP_3]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -420,7 +394,7 @@ define void @scev_exp_reuse_const_add(ptr %dst, ptr %src) { ; CHECK-NEXT: store <2 x i16> [[WIDE_LOAD]], ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 40 -; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -436,7 +410,7 @@ define void @scev_exp_reuse_const_add(ptr %dst, ptr %src) { ; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr i8, ptr [[PTR_IV_2]], i64 2 ; CHECK-NEXT: store i16 [[L]], ptr [[PTR_IV_2]], align 2 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_1]], 40 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_2]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_2]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ;