From f56dd8217c1be10ba615433f03bae5cb8ab25da2 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer@nvidia.com>
Date: Tue, 21 Oct 2025 01:45:27 -0700
Subject: [PATCH] [DA] Cache delinearization results. NFCI.

An instruction can appear in multiple source-destination dependency
pairs. If this is the case, delinearization is requested and recomputed
for the same instruction again and again. Instead, cache the
delinearization and query the cache first before computing it. I made this
observation while going through debug logs for DA, and wanted to test
whether you like this idea or not before I try to measure whether this
has a compile-time benefit, which is of course the reason to do this.
---
 .../llvm/Analysis/DependenceAnalysis.h        |  6 ++++
 llvm/lib/Analysis/DependenceAnalysis.cpp      | 36 ++++++++++++++++---
 2 files changed, 37 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index 18a8f8aabb44a..04fa9ad0774bd 100644
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -420,6 +420,12 @@ class DependenceInfo {
   Function *F;
   SmallVector<const SCEVPredicate *, 4> Assumptions;
 
+  /// Cache for delinearized subscripts to avoid recomputation.
+  /// Maps (Instruction, Loop, AccessFn) -> Subscripts
+  DenseMap<std::tuple<Instruction *, Loop *, const SCEV *>,
+           SmallVector<const SCEV *, 4>>
+      DelinearizationCache;
+
   /// Subscript - This private struct represents a pair of subscripts from
   /// a pair of potentially multi-dimensional array references. We use a
   /// vector of them to guide subscript partitioning.
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 805b6820e1e1c..7e413c65a71a6 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -3463,11 +3463,37 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
 
   SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
 
-  if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
-                               SrcSubscripts, DstSubscripts) &&
-      !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
-                                    SrcSubscripts, DstSubscripts))
-    return false;
+  // Check cache for both Src and Dst subscripts
+  auto SrcCacheKey = std::make_tuple(Src, SrcLoop, SrcAccessFn);
+  auto DstCacheKey = std::make_tuple(Dst, DstLoop, DstAccessFn);
+  auto SrcCacheIt = DelinearizationCache.find(SrcCacheKey);
+  auto DstCacheIt = DelinearizationCache.find(DstCacheKey);
+  bool SrcCached = (SrcCacheIt != DelinearizationCache.end());
+  bool DstCached = (DstCacheIt != DelinearizationCache.end());
+
+  if (SrcCached && DstCached) {
+    // Both are cached - use cached values and skip delinearization
+    SrcSubscripts = SrcCacheIt->second;
+    DstSubscripts = DstCacheIt->second;
+    LLVM_DEBUG(dbgs() << "  Delinearization cache hit for both Src and Dst\n");
+  } else {
+    // At least one is not cached - need to compute both
+    if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
+                                 SrcSubscripts, DstSubscripts) &&
+        !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
+                                      SrcSubscripts, DstSubscripts))
+      return false;
+
+    // Cache the results
+    if (!SrcCached) {
+      DelinearizationCache[SrcCacheKey] = SrcSubscripts;
+      LLVM_DEBUG(dbgs() << "  Cached Src subscripts\n");
+    }
+    if (!DstCached) {
+      DelinearizationCache[DstCacheKey] = DstSubscripts;
+      LLVM_DEBUG(dbgs() << "  Cached Dst subscripts\n");
+    }
+  }
 
   assert(isLoopInvariant(SrcBase, SrcLoop) &&
          isLoopInvariant(DstBase, DstLoop) &&