From f56dd8217c1be10ba615433f03bae5cb8ab25da2 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Tue, 21 Oct 2025 01:45:27 -0700 Subject: [PATCH] [DA] Cache delinearization results. NFCI. An instruction can appear in multiple source-destination dependency pairs. If this is the case, delinearization is requested and recomputed for the same instruction again and again. Instead, cache the delinearization and query the cache first before computing it. I made this observation while going through debug logs for DA, and wanted to test whether you like this idea or not before I try to measure whether this has a compile-time benefit, which is of course the reason to do this. --- .../llvm/Analysis/DependenceAnalysis.h | 6 ++++ llvm/lib/Analysis/DependenceAnalysis.cpp | 36 ++++++++++++++++--- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h index 18a8f8aabb44a..04fa9ad0774bd 100644 --- a/llvm/include/llvm/Analysis/DependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -420,6 +420,12 @@ class DependenceInfo { Function *F; SmallVector Assumptions; + /// Cache for delinearized subscripts to avoid recomputation. + /// Maps (Instruction, Loop, AccessFn) -> Subscripts + DenseMap, + SmallVector> + DelinearizationCache; + /// Subscript - This private struct represents a pair of subscripts from /// a pair of potentially multi-dimensional array references. We use a /// vector of them to guide subscript partitioning. diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 805b6820e1e1c..7e413c65a71a6 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -3463,11 +3463,37 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, SmallVector SrcSubscripts, DstSubscripts; - if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn, - SrcSubscripts, DstSubscripts) && - !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn, - SrcSubscripts, DstSubscripts)) - return false; + // Check cache for both Src and Dst subscripts + auto SrcCacheKey = std::make_tuple(Src, SrcLoop, SrcAccessFn); + auto DstCacheKey = std::make_tuple(Dst, DstLoop, DstAccessFn); + auto SrcCacheIt = DelinearizationCache.find(SrcCacheKey); + auto DstCacheIt = DelinearizationCache.find(DstCacheKey); + bool SrcCached = (SrcCacheIt != DelinearizationCache.end()); + bool DstCached = (DstCacheIt != DelinearizationCache.end()); + + if (SrcCached && DstCached) { + // Both are cached - use cached values and skip delinearization + SrcSubscripts = SrcCacheIt->second; + DstSubscripts = DstCacheIt->second; + LLVM_DEBUG(dbgs() << " Delinearization cache hit for both Src and Dst\n"); + } else { + // At least one is not cached - need to compute both + if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn, + SrcSubscripts, DstSubscripts) && + !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn, + SrcSubscripts, DstSubscripts)) + return false; + + // Cache the results + if (!SrcCached) { + DelinearizationCache[SrcCacheKey] = SrcSubscripts; + LLVM_DEBUG(dbgs() << " Cached Src subscripts\n"); + } + if (!DstCached) { + DelinearizationCache[DstCacheKey] = DstSubscripts; + LLVM_DEBUG(dbgs() << " Cached Dst subscripts\n"); + } + } assert(isLoopInvariant(SrcBase, SrcLoop) && isLoopInvariant(DstBase, DstLoop) &&