From bbb7ddc1a8d1269c4e529019e7e134ea9e9442fb Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga Date: Wed, 10 Sep 2025 13:16:44 +0000 Subject: [PATCH] [DA] Add option to dump delinearization result in printing analysis --- .../llvm/Analysis/DependenceAnalysis.h | 48 +++-- llvm/lib/Analysis/DependenceAnalysis.cpp | 202 ++++++++++++------ .../Analysis/DependenceAnalysis/DADelin.ll | 152 ++++++++++++- 3 files changed, 319 insertions(+), 83 deletions(-) diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h index f66c79d915665..eec10d62f9b8a 100644 --- a/llvm/include/llvm/Analysis/DependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -284,6 +284,27 @@ class LLVM_ABI FullDependence final : public Dependence { /// DependenceInfo - This class is the main dependence-analysis driver. class DependenceInfo { public: + /// Represents the result of delinearization computed a depends query. Since + /// a depends query analyzes the relationship between two memory accesses, + /// the delinearization result has two lists of subscript expressions (one + /// for each memory access), The sizes (dimensions) must be shared between + /// the two accesses. + /// TODO: It might be better to define this (or a variant of it) within + /// Delinearize, rather than inside DependenceAnalysis. + struct DelinearizedAccessesInfo { + /// Subscriptions for the source memory access. + SmallVector SrcSubscripts; + + /// Subscriptions for the destination memory access. + SmallVector DstSubscripts; + + /// Sizes (dimensions) shared between the two accesses. + SmallVector Sizes; + + /// Print the delinearization result. + void print(raw_ostream &OS, unsigned Depth) const; + }; + DependenceInfo(Function *F, AAResults *AA, ScalarEvolution *SE, LoopInfo *LI) : AA(AA), SE(SE), LI(LI), F(F) {} @@ -298,9 +319,13 @@ class DependenceInfo { /// solved at compilation time. By default UnderRuntimeAssumptions is false /// for a safe approximation of the dependence relation that does not /// require runtime checks. + /// If \p RecordDelinearization is provided and the delinearization process + /// is successful, the result is stored in \p RecordDelinearization. It's + /// primary for testing purposes. LLVM_ABI std::unique_ptr depends(Instruction *Src, Instruction *Dst, - bool UnderRuntimeAssumptions = false); + bool UnderRuntimeAssumptions = false, + DelinearizedAccessesInfo *RecordDelinearization = nullptr); /// getSplitIteration - Give a dependence that's splittable at some /// particular level, return the iteration that should be used to split @@ -897,25 +922,24 @@ class DependenceInfo { /// Given a linear access function, tries to recover subscripts /// for each dimension of the array element access. - bool tryDelinearize(Instruction *Src, Instruction *Dst, - SmallVectorImpl &Pair); + std::optional + tryDelinearize(Instruction *Src, Instruction *Dst, + SmallVectorImpl &Pair); /// Tries to delinearize \p Src and \p Dst access functions for a fixed size /// multi-dimensional array. Calls tryDelinearizeFixedSizeImpl() to /// delinearize \p Src and \p Dst separately, - bool tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst, - const SCEV *SrcAccessFn, const SCEV *DstAccessFn, - SmallVectorImpl &SrcSubscripts, - SmallVectorImpl &DstSubscripts); + std::optional + tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst, + const SCEV *SrcAccessFn, const SCEV *DstAccessFn); /// Tries to delinearize access function for a multi-dimensional array with /// symbolic runtime sizes. - /// Returns true upon success and false otherwise. - bool + /// Returns delineazied result upon success and nullopt otherwise. + std::optional tryDelinearizeParametricSize(Instruction *Src, Instruction *Dst, - const SCEV *SrcAccessFn, const SCEV *DstAccessFn, - SmallVectorImpl &SrcSubscripts, - SmallVectorImpl &DstSubscripts); + const SCEV *SrcAccessFn, + const SCEV *DstAccessFn); /// checkSubscript - Helper function for checkSrcSubscript and /// checkDstSubscript to avoid duplicate code diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index da86a8d2cc9c0..48885617c0849 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -121,6 +121,11 @@ static cl::opt MIVMaxLevelThreshold( cl::desc("Maximum depth allowed for the recursive algorithm used to " "explore MIV direction vectors.")); +static cl::opt DumpDelinearizationResult( + "da-dump-delinearization-result", cl::init(false), cl::Hidden, + cl::desc("When printing analysis, dump the result of delinearization along " + "with dependence.")); + //===----------------------------------------------------------------------===// // basics @@ -183,11 +188,19 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA, for (inst_iterator DstI = SrcI, DstE = inst_end(F); DstI != DstE; ++DstI) { if (DstI->mayReadOrWriteMemory()) { + DependenceInfo::DelinearizedAccessesInfo DAI; + std::unique_ptr D = DA->depends( + &*SrcI, &*DstI, /*UnderRuntimeAssumptions=*/true, &DAI); + OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n"; - OS << " da analyze - "; - if (auto D = DA->depends(&*SrcI, &*DstI, - /*UnderRuntimeAssumptions=*/true)) { + // Dump the delinearization result if available. + if (DumpDelinearizationResult && !DAI.Sizes.empty()) + DAI.print(OS, 4); + + // Dump the dependence result. + OS << " da analyze - "; + if (D) { #ifndef NDEBUG // Verify that the distance being zero is equivalent to the // direction being EQ. @@ -200,7 +213,6 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA, "Inconsistent distance and direction."); } #endif - // Normalize negative direction vectors if required by clients. if (NormalizeResults && D->normalize(&SE)) OS << "normalized - "; @@ -489,6 +501,46 @@ LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { } #endif +void DependenceInfo::DelinearizedAccessesInfo::print(raw_ostream &OS, + unsigned Depth) const { + assert(SrcSubscripts.size() == DstSubscripts.size() && + "Mismatched number of subscripts"); + + // Currently tryDelinenearizeParametricSize appends the element size to Sizes, + // while tryDelinenearizeFixedSize does not. Thus the length of Sizes differs + // depending on which function processed the delinearization. + // TODO: This inconsistency will be fixed after removing GEP-type driven + // delinearization. + assert((SrcSubscripts.size() == Sizes.size() || + SrcSubscripts.size() == Sizes.size() + 1) && + "Mismatched number of subscripts and sizes"); + + if (SrcSubscripts.empty()) { + OS.indent(Depth) << "Delinearization failed\n"; + return; + } + + OS.indent(Depth) << "Delinearized accesses:\n"; + + OS.indent(Depth + 2) << "Sizes[*]"; + // If the length of Sizes is same as that of SrcSubscripts, the last element + // of Sizes is the element size. It's not particularly useful to print it out + // here. + for (unsigned I = 0; I < SrcSubscripts.size() - 1; I++) + OS << "[" << *Sizes[I] << "]"; + OS << "\n"; + + OS.indent(Depth + 2) << "Src"; + for (const SCEV *S : SrcSubscripts) + OS << "[" << *S << "]"; + OS << "\n"; + + OS.indent(Depth + 2) << "Dst"; + for (const SCEV *S : DstSubscripts) + OS << "[" << *S << "]"; + OS << "\n"; +} + // Updates X with the intersection // of the Constraints X and Y. Returns true if X has changed. // Corresponds to Figure 4 from the paper @@ -3312,8 +3364,9 @@ void DependenceInfo::updateDirection(Dependence::DVEntry &Level, /// source and destination array references are recurrences on a nested loop, /// this function flattens the nested recurrences into separate recurrences /// for each loop level. -bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, - SmallVectorImpl &Pair) { +std::optional +DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, + SmallVectorImpl &Pair) { assert(isLoadOrStore(Src) && "instruction is not load or store"); assert(isLoadOrStore(Dst) && "instruction is not load or store"); Value *SrcPtr = getLoadStorePointerOperand(Src); @@ -3328,28 +3381,27 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, dyn_cast(SE->getPointerBase(DstAccessFn)); if (!SrcBase || !DstBase || SrcBase != DstBase) - return false; + return std::nullopt; - SmallVector SrcSubscripts, DstSubscripts; - - if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn, - SrcSubscripts, DstSubscripts) && - !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn, - SrcSubscripts, DstSubscripts)) - return false; + std::optional DAI = + tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn); + if (!DAI) + DAI = tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn); + if (!DAI) + return std::nullopt; assert(isLoopInvariant(SrcBase, SrcLoop) && isLoopInvariant(DstBase, DstLoop) && "Expected SrcBase and DstBase to be loop invariant"); - int Size = SrcSubscripts.size(); + int Size = DAI->SrcSubscripts.size(); LLVM_DEBUG({ dbgs() << "\nSrcSubscripts: "; for (int I = 0; I < Size; I++) - dbgs() << *SrcSubscripts[I]; + dbgs() << *DAI->SrcSubscripts[I]; dbgs() << "\nDstSubscripts: "; for (int I = 0; I < Size; I++) - dbgs() << *DstSubscripts[I]; + dbgs() << *DAI->DstSubscripts[I]; }); // The delinearization transforms a single-subscript MIV dependence test into @@ -3358,21 +3410,21 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, // has found, and then initialize the pairs following the delinearization. Pair.resize(Size); for (int I = 0; I < Size; ++I) { - Pair[I].Src = SrcSubscripts[I]; - Pair[I].Dst = DstSubscripts[I]; + Pair[I].Src = DAI->SrcSubscripts[I]; + Pair[I].Dst = DAI->DstSubscripts[I]; unifySubscriptType(&Pair[I]); } - return true; + return DAI; } /// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying -/// arrays accessed are fixed-size arrays. Return true if delinearization was +/// arrays accessed are fixed-size arrays. Return delinearized result if /// successful. -bool DependenceInfo::tryDelinearizeFixedSize( - Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, - const SCEV *DstAccessFn, SmallVectorImpl &SrcSubscripts, - SmallVectorImpl &DstSubscripts) { +std::optional +DependenceInfo::tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst, + const SCEV *SrcAccessFn, + const SCEV *DstAccessFn) { LLVM_DEBUG({ const SCEVUnknown *SrcBase = dyn_cast(SE->getPointerBase(SrcAccessFn)); @@ -3382,24 +3434,23 @@ bool DependenceInfo::tryDelinearizeFixedSize( "expected src and dst scev unknowns to be equal"); }); + DelinearizedAccessesInfo DAI; SmallVector SrcSizes; SmallVector DstSizes; - if (!tryDelinearizeFixedSizeImpl(SE, Src, SrcAccessFn, SrcSubscripts, + if (!tryDelinearizeFixedSizeImpl(SE, Src, SrcAccessFn, DAI.SrcSubscripts, SrcSizes) || - !tryDelinearizeFixedSizeImpl(SE, Dst, DstAccessFn, DstSubscripts, - DstSizes)) - return false; + !tryDelinearizeFixedSizeImpl(SE, Dst, DstAccessFn, DAI.DstSubscripts, + DstSizes)) { + return std::nullopt; + } // Check that the two size arrays are non-empty and equal in length and // value. if (SrcSizes.size() != DstSizes.size() || - !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) { - SrcSubscripts.clear(); - DstSubscripts.clear(); - return false; - } + !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) + return std::nullopt; - assert(SrcSubscripts.size() == DstSubscripts.size() && + assert(DAI.SrcSubscripts.size() == DAI.DstSubscripts.size() && "Expected equal number of entries in the list of SrcSubscripts and " "DstSubscripts."); @@ -3412,6 +3463,9 @@ bool DependenceInfo::tryDelinearizeFixedSize( // impossible to verify this at compile-time. As such we can only delinearize // iff the subscripts are positive and are less than the range of the // dimension. + // TODO: It might be better to consolidate these checks with those in + // tryDelinearizeParametricSize and move them into a member function of + // DelinearizedAccessesInfo. if (!DisableDelinearizationChecks) { auto AllIndicesInRange = [&](SmallVector &DimensionSizes, SmallVectorImpl &Subscripts, @@ -3442,26 +3496,30 @@ bool DependenceInfo::tryDelinearizeFixedSize( return true; }; - if (!AllIndicesInRange(SrcSizes, SrcSubscripts, SrcPtr) || - !AllIndicesInRange(DstSizes, DstSubscripts, DstPtr)) { + if (!AllIndicesInRange(SrcSizes, DAI.SrcSubscripts, SrcPtr) || + !AllIndicesInRange(DstSizes, DAI.DstSubscripts, DstPtr)) { LLVM_DEBUG(dbgs() << "Check failed: AllIndicesInRange.\n"); - SrcSubscripts.clear(); - DstSubscripts.clear(); - return false; + return std::nullopt; } } + + for (int Size : SrcSizes) { + assert(Size > 0 && "Expected array dimension size to be positive."); + DAI.Sizes.push_back(SE->getConstant(SrcAccessFn->getType(), Size)); + } + LLVM_DEBUG({ dbgs() << "Delinearized subscripts of fixed-size array\n" << "SrcGEP:" << *SrcPtr << "\n" << "DstGEP:" << *DstPtr << "\n"; }); - return true; + return DAI; } -bool DependenceInfo::tryDelinearizeParametricSize( - Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, - const SCEV *DstAccessFn, SmallVectorImpl &SrcSubscripts, - SmallVectorImpl &DstSubscripts) { +std::optional +DependenceInfo::tryDelinearizeParametricSize(Instruction *Src, Instruction *Dst, + const SCEV *SrcAccessFn, + const SCEV *DstAccessFn) { Value *SrcPtr = getLoadStorePointerOperand(Src); Value *DstPtr = getLoadStorePointerOperand(Dst); @@ -3474,7 +3532,7 @@ bool DependenceInfo::tryDelinearizeParametricSize( const SCEV *ElementSize = SE->getElementSize(Src); if (ElementSize != SE->getElementSize(Dst)) - return false; + return std::nullopt; const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); @@ -3482,7 +3540,9 @@ bool DependenceInfo::tryDelinearizeParametricSize( const SCEVAddRecExpr *SrcAR = dyn_cast(SrcSCEV); const SCEVAddRecExpr *DstAR = dyn_cast(DstSCEV); if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) - return false; + return std::nullopt; + + DelinearizedAccessesInfo DAI; // First step: collect parametric terms in both array references. SmallVector Terms; @@ -3490,19 +3550,18 @@ bool DependenceInfo::tryDelinearizeParametricSize( collectParametricTerms(*SE, DstAR, Terms); // Second step: find subscript sizes. - SmallVector Sizes; - findArrayDimensions(*SE, Terms, Sizes, ElementSize); + findArrayDimensions(*SE, Terms, DAI.Sizes, ElementSize); // Third step: compute the access functions for each subscript. - computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes); - computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes); + computeAccessFunctions(*SE, SrcAR, DAI.SrcSubscripts, DAI.Sizes); + computeAccessFunctions(*SE, DstAR, DAI.DstSubscripts, DAI.Sizes); // Fail when there is only a subscript: that's a linearized access function. - if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || - SrcSubscripts.size() != DstSubscripts.size()) - return false; + if (DAI.SrcSubscripts.size() < 2 || DAI.DstSubscripts.size() < 2 || + DAI.SrcSubscripts.size() != DAI.DstSubscripts.size()) + return std::nullopt; - size_t Size = SrcSubscripts.size(); + size_t Size = DAI.SrcSubscripts.size(); // Statically check that the array bounds are in-range. The first subscript we // don't have a size for and it cannot overflow into another subscript, so is @@ -3512,30 +3571,30 @@ bool DependenceInfo::tryDelinearizeParametricSize( // to the dependency checks. if (!DisableDelinearizationChecks) for (size_t I = 1; I < Size; ++I) { - bool SNN = isKnownNonNegative(SrcSubscripts[I], SrcPtr); - bool DNN = isKnownNonNegative(DstSubscripts[I], DstPtr); - bool SLT = isKnownLessThan(SrcSubscripts[I], Sizes[I - 1]); - bool DLT = isKnownLessThan(DstSubscripts[I], Sizes[I - 1]); + bool SNN = isKnownNonNegative(DAI.SrcSubscripts[I], SrcPtr); + bool DNN = isKnownNonNegative(DAI.DstSubscripts[I], DstPtr); + bool SLT = isKnownLessThan(DAI.SrcSubscripts[I], DAI.Sizes[I - 1]); + bool DLT = isKnownLessThan(DAI.DstSubscripts[I], DAI.Sizes[I - 1]); if (SNN && DNN && SLT && DLT) continue; LLVM_DEBUG({ dbgs() << "Delinearization checks failed: can't prove the following\n"; if (!SNN) - dbgs() << " isKnownNonNegative(" << *SrcSubscripts[I] << ")\n"; + dbgs() << " isKnownNonNegative(" << *DAI.SrcSubscripts[I] << ")\n"; if (!DNN) - dbgs() << " isKnownNonNegative(" << *DstSubscripts[I] << ")\n"; + dbgs() << " isKnownNonNegative(" << *DAI.DstSubscripts[I] << ")\n"; if (!SLT) - dbgs() << " isKnownLessThan(" << *SrcSubscripts[I] << ", " - << *Sizes[I - 1] << ")\n"; + dbgs() << " isKnownLessThan(" << *DAI.SrcSubscripts[I] << ", " + << *DAI.Sizes[I - 1] << ")\n"; if (!DLT) - dbgs() << " isKnownLessThan(" << *DstSubscripts[I] << ", " - << *Sizes[I - 1] << ")\n"; + dbgs() << " isKnownLessThan(" << *DAI.DstSubscripts[I] << ", " + << *DAI.Sizes[I - 1] << ")\n"; }); - return false; + return std::nullopt; } - return true; + return DAI; } //===----------------------------------------------------------------------===// @@ -3583,7 +3642,8 @@ SCEVUnionPredicate DependenceInfo::getRuntimeAssumptions() const { // up to date with respect to this routine. std::unique_ptr DependenceInfo::depends(Instruction *Src, Instruction *Dst, - bool UnderRuntimeAssumptions) { + bool UnderRuntimeAssumptions, + DelinearizedAccessesInfo *RecordDelinearization) { SmallVector Assume; bool PossiblyLoopIndependent = true; if (Src == Dst) @@ -3702,9 +3762,13 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, Pair[0].Dst = DstSCEV; if (Delinearize) { - if (tryDelinearize(Src, Dst, Pair)) { + std::optional DAI = + tryDelinearize(Src, Dst, Pair); + if (DAI) { LLVM_DEBUG(dbgs() << " delinearized\n"); Pairs = Pair.size(); + if (RecordDelinearization) + *RecordDelinearization = *DAI; } } diff --git a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll index 8f94a455d3724..e53977dd0b9e8 100644 --- a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll +++ b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll @@ -1,10 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ -; RUN: | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa \ +; RUN: -da-dump-delinearization-result 2>&1 | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8m.main-arm-none-eabi" +; FIXME: It seems that we cannot prove that %m * %o doesn't signed wrap in +; almost all cases. If it wraps, the delinearization result should be +; discarded, otherwise the result can be incorrect. + ;; for (int i = 0; i < n; i++) ;; for (int j = 0; j < m; j++) ;; for (int k = 0; k < o; k++) @@ -13,10 +17,22 @@ target triple = "thumbv8m.main-arm-none-eabi" define void @t1(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't1' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - consistent anti [0 0 0|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - none! ; entry: @@ -78,6 +94,10 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t2(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't2' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 ; CHECK-NEXT: da analyze - anti [* * *|<]! @@ -145,6 +165,10 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t3(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't3' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 ; CHECK-NEXT: da analyze - anti [* * *|<]! @@ -212,6 +236,10 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t4(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't4' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 ; CHECK-NEXT: da analyze - anti [* * *|<]! @@ -279,6 +307,10 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t5(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't5' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 ; CHECK-NEXT: da analyze - anti [* * *|<]! @@ -346,10 +378,22 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t6(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't6' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{1,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - consistent anti [-1 0 0]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{1,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{1,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - none! ; entry: @@ -414,10 +458,22 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t7(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't7' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{-1,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - consistent anti [1 0 0]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{-1,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{-1,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - none! ; entry: @@ -482,10 +538,22 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t8(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't8' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{1,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{1,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{1,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - consistent anti [0 0 1]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%m][%o] +; CHECK-NEXT: Src[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] +; CHECK-NEXT: Dst[{0,+,1}<%for.cond1.preheader>][{0,+,1}<%for.cond5.preheader>][{0,+,1}<%for.body8>] ; CHECK-NEXT: da analyze - none! ; entry: @@ -709,3 +777,83 @@ loop: exit: ret void } + +; for (int i = 0; i < 5; i++) +; for (int j = 0; j < n; j++) +; for (int k = 0; k < m; k++) +; if (j < 5 && k < 5) +; a[i*n*m + j*m + k] = 0; +; +; FIXME: The memory access would be delinearized as follows: +; +; - Sizes: [*][%n][%m] +; - Subscripts: [{0,+,1}<%loop.i>][{0,+,1}<%loop.j>][{0,+,1}<%loop.k>] +; +; The subsequent analysis assumes that different subscript values map to +; different memory locations (i.e., bijective). It is not true in this case, +; which lead incorrect analysis results. For example, there are cases like the +; following: +; +; (n, m) | i*n*m + j*m + k +; -----------------|--------------------------------------------- +; (2^16, 2^16) | i*n*m is always 0 +; (2^16 - 1, 2^16) | n*m = -m, hence i*n*m + j*m is 0 when i = j +; +define void @prod_of_sizes_can_wrap(ptr nocapture %a, i32 %n, i32 %m) { +; CHECK-LABEL: 'prod_of_sizes_can_wrap' +; CHECK-NEXT: Src: store i8 0, ptr %gep, align 1 --> Dst: store i8 0, ptr %gep, align 1 +; CHECK-NEXT: Delinearized accesses: +; CHECK-NEXT: Sizes[*][%n][%m] +; CHECK-NEXT: Src[{0,+,1}<%loop.i.header>][{0,+,1}<%loop.j.header>][{0,+,1}<%loop.k.header>] +; CHECK-NEXT: Dst[{0,+,1}<%loop.i.header>][{0,+,1}<%loop.j.header>][{0,+,1}<%loop.k.header>] +; CHECK-NEXT: da analyze - none! +; +entry: + %cmp.n = icmp sgt i32 %n, 0 + %cmp.m = icmp sgt i32 %m, 0 + %guard = and i1 %cmp.n, %cmp.m + %nm = mul i32 %n, %m + br i1 %guard, label %loop.i.header, label %exit + +loop.i.header: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop.i.latch ] + br label %loop.j.header + +loop.j.header: + %j = phi i32 [ 0, %loop.i.header ], [ %j.next, %loop.j.latch ] + br label %loop.k.header + +loop.k.header: + %k = phi i32 [ 0, %loop.j.header ], [ %k.next, %loop.k.latch ] + %small.j = icmp slt i32 %j, 5 + %small.k = icmp slt i32 %k, 5 + %small = and i1 %small.j, %small.k + br i1 %small, label %if.then, label %loop.k.latch + +if.then: + %offset.i = mul nsw i32 %i, %nm + %offset.j = mul nsw i32 %j, %m + %offset.0 = add nsw i32 %offset.i, %offset.j + %offset = add nsw i32 %offset.0, %k + %gep = getelementptr inbounds i8, ptr %a, i32 %offset + store i8 0, ptr %gep + br label %loop.k.latch + +loop.k.latch: + %k.next = add nsw i32 %k, 1 + %ec.k = icmp eq i32 %k.next, %m + br i1 %ec.k, label %loop.j.latch, label %loop.k.header + +loop.j.latch: + %j.next = add nsw i32 %j, 1 + %ec.j = icmp eq i32 %j.next, %n + br i1 %ec.j, label %loop.i.latch, label %loop.j.header + +loop.i.latch: + %i.next = add nsw i32 %i, 1 + %ec.i = icmp eq i32 %i.next, 5 + br i1 %ec.i, label %exit, label %loop.i.header + +exit: + ret void +}