[DTLTO] [LLVM] Initial DTLTO cache implementation

romanova-ekaterina · romanova-ekaterina · commit 1ecbb72954b1 · 2025-09-03T23:13:12.000-07:00
This patch implements DTLTO cache. DTLTO cache is implemented the same
 way as ThinLTO cache. In fact the same class Cache is used for both of them.

 Because parameters for codegen are different for DTLTO and ThinLTO
 (DTLTO codegen is done by invoking clang and its codegen parameters are
 not fully synchronized with codegen parameters used by LTO backend).
 The object files generated by DTLTO and ThinLTO might be different and
 shouldn't be mixed. If ThinLTO and DTLTO share the same cache
 directory, the cache file won't interfere with each other.

 I added a couple of test files in cross-project-test/dtlto directory,
 but if more tests are required for initial implementation, I could add
 them.
diff --git a/cross-project-tests/dtlto/dtlto-cache.test b/cross-project-tests/dtlto/dtlto-cache.test
@@ -0,0 +1,89 @@
+REQUIRES: x86-registered-target, ld.lld
+
+# Show that the ThinLTO cache works with DTLTO.
+
+RUN: rm -rf %t && split-file %s %t && cd %t
+
+# Compile source files into bitcode files.
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c main.c
+
+# Execute the linker and check that the cache is populated.
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main.o foo.o -o populate1.elf \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+# Check that there are two backend compilation jobs occurred.
+RUN: grep -wo args populate1.*.dist-file.json | wc -l | grep -qx 3
+RUN: ls cache.dir/llvmcache.timestamp
+RUN: ls cache.dir | count 3
+
+# Execute the linker again and check that a fully populated cache is used correctly, 
+# i.e., no additional cache entries are created for cache hits.
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main.o foo.o -o populate2.elf \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+# Check that there are no backend compilation jobs occurred.
+RUN: grep -wo args populate2.*.dist-file.json | wc -l | grep -qx 1
+RUN: ls cache.dir | count 3
+
+RUN: %clang -O0 --target=x86_64-linux-gnu -flto=thin -c foo.c -o foo.O0.o
+RUN: %clang -O0 --target=x86_64-linux-gnu -flto=thin -c main.c -o main.O0.o
+
+# Execute the linker again and check that the cache is populated correctly when there 
+# are no cache hits but there are existing cache entries.
+# As a side effect, this also verifies that the optimization level is considered when 
+# evaluating the cache entry key.
+
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main.O0.o foo.O0.o -o populate3.elf \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+# Check that there are two new backend compilation jobs occurred.
+RUN: grep -wo args populate3.*.dist-file.json | wc -l | grep -qx 3
+RUN: ls cache.dir | count 5
+
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c main-partial.c 
+
+# Execute the linker and check that everything works correctly with the partially populated cache;
+# One more cache entry should be generated after this run.
+
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main-partial.o foo.o -o main-partial.elf \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+# Check that there is one new backend compilation jobs occurred.
+RUN: grep -wo args main-partial.*.dist-file.json | wc -l | grep -qx 2
+RUN: ls cache.dir | count 6
+
+#--- foo.c
+volatile int foo_int;
+__attribute__((retain)) int foo(int x) { return x + foo_int; }
+
+#--- main.c
+extern int foo(int x);
+__attribute__((retain)) int main(int argc, char** argv) {
+  return foo(argc);
+}
+
+#--- main-partial.c
+extern int foo(int x);
+__attribute__((retain)) int main(int argc, char** argv) {
+  return foo(argc+1);
+}
diff --git a/cross-project-tests/dtlto/dtlto-thinlto-cache.test b/cross-project-tests/dtlto/dtlto-thinlto-cache.test
@@ -0,0 +1,66 @@
+REQUIRES: x86-registered-target, ld.lld
+
+# This test verifies that a cache populated by a ThinLTO link is not reused by a DTLTO link and vice versa.
+
+RUN: rm -rf %t && split-file %s %t && cd %t
+
+# Compile source files into bitcode files.
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c main.c
+
+# Execute the linker and check that ThinLTO cache is populated.
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main.o foo.o -o main.elf \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+RUN: ls cache.dir/llvmcache.timestamp
+RUN: ls cache.dir | count 3
+
+# Execute the linker and check that DTLTO adds additional entries to the ThinLTO cache, implying they do not share entries.
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main.o foo.o -o populate1.elf \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+# Check that there are two backend compilation jobs occurred.
+RUN: grep -wo args populate1.*.dist-file.json | wc -l | grep -qx 3
+RUN: ls cache.dir | count 5
+
+# Clean up cache directory.
+RUN: rm -rf cache.dir
+
+# Execute the linker and check that DTLTO cache is populated. 
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main.o foo.o -o populate2.elf \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+# Check that there are two backend compilation jobs occurred.
+RUN: grep -wo args populate2.*.dist-file.json | wc -l | grep -qx 3
+RUN: ls cache.dir/llvmcache.timestamp
+RUN: ls cache.dir | count 3
+
+# Execute the linker and check that DTLTO adds additional entries to the ThinLTO cache, 
+# implying they do not share entries.
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main.o foo.o -o main.elf \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+RUN: ls cache.dir | count 5
+
+#--- foo.c
+volatile int foo_int;
+__attribute__((retain)) int foo(int x) { return x + foo_int; }
+
+#--- main.c
+extern int foo(int x);
+__attribute__((retain)) int main(int argc, char** argv) {
+  return foo(argc);
+}
diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h
@@ -281,6 +281,7 @@ struct Config {
   LLVM_ABI Error addSaveTemps(std::string OutputFileName,
                               bool UseInputModulePath = false,
                               const DenseSet<StringRef> &SaveTempsArgs = {});
+  mutable uint8_t Dtlto = 0;
 };
 
 struct LTOLLVMDiagnosticHandler : public DiagnosticHandler {
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
@@ -168,6 +168,7 @@ std::string llvm::computeLTOCacheKey(
   AddString(Conf.OverrideTriple);
   AddString(Conf.DefaultTriple);
   AddString(Conf.DwoDir);
+  AddUint8(Conf.Dtlto);
 
   // Include the hash for the current module
   auto ModHash = Index.getModuleHash(ModuleID);
@@ -2244,14 +2245,17 @@ class OutOfProcessThinBackend : public CGThinBackend {
 
   SmallVector<StringRef, 0> CodegenOptions;
   DenseSet<StringRef> CommonInputs;
-
+  std::atomic<uint64_t> CachedJobs{0};
   // Information specific to individual backend compilation job.
   struct Job {
     unsigned Task;
     StringRef ModuleID;
     StringRef NativeObjectPath;
     StringRef SummaryIndexPath;
     ImportsFilesContainer ImportsFiles;
+    std::string CacheKey;
+    AddStreamFn CacheAddStream;
+    bool Cached = false;
   };
   // The set of backend compilations jobs.
   SmallVector<Job> Jobs;
@@ -2265,12 +2269,15 @@ class OutOfProcessThinBackend : public CGThinBackend {
   // The target triple to supply for backend compilations.
   llvm::Triple Triple;
 
+  // Cache
+  FileCache Cache;
+
 public:
   OutOfProcessThinBackend(
       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
       ThreadPoolStrategy ThinLTOParallelism,
       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-      AddStreamFn AddStream, lto::IndexWriteCallback OnWrite,
+      AddStreamFn AddStream, FileCache CacheFn, lto::IndexWriteCallback OnWrite,
       bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
       StringRef LinkerOutputFile, StringRef Distributor,
       ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler,
@@ -2280,14 +2287,16 @@ class OutOfProcessThinBackend : public CGThinBackend {
                       ShouldEmitImportsFiles, ThinLTOParallelism),
         LinkerOutputFile(LinkerOutputFile), DistributorPath(Distributor),
         DistributorArgs(DistributorArgs), RemoteCompiler(RemoteCompiler),
-        RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps) {}
+        RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps),
+        Cache(std::move(CacheFn)) {}
 
   virtual void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset,
                      llvm::Triple Triple) override {
     UID = itostr(sys::Process::getProcessId());
     Jobs.resize((size_t)ThinLTONumTasks);
     this->ThinLTOTaskOffset = ThinLTOTaskOffset;
     this->Triple = Triple;
+    this->Conf.Dtlto = 1;
   }
 
   Error start(
@@ -2304,13 +2313,14 @@ class OutOfProcessThinBackend : public CGThinBackend {
                                        itostr(Task) + "." + UID + ".native.o");
 
     Job &J = Jobs[Task - ThinLTOTaskOffset];
-    J = {
-        Task,
-        ModulePath,
-        Saver.save(ObjFilePath.str()),
-        Saver.save(ObjFilePath.str() + ".thinlto.bc"),
-        {} // Filled in by emitFiles below.
-    };
+    J = {Task,
+         ModulePath,
+         Saver.save(ObjFilePath.str()),
+         Saver.save(ObjFilePath.str() + ".thinlto.bc"),
+         {}, // Filled in by emitFiles below.
+         "",
+         nullptr,
+         false};
 
     assert(ModuleToDefinedGVSummaries.count(ModulePath));
 
@@ -2326,6 +2336,35 @@ class OutOfProcessThinBackend : public CGThinBackend {
             else
               Err = std::move(E);
           }
+
+          if (Cache.isValid() &&
+              CombinedIndex.modulePaths().count(J.ModuleID) &&
+              all_of(CombinedIndex.getModuleHash(J.ModuleID),
+                     [](uint32_t V) { return V != 0; })) {
+
+            const GVSummaryMapTy &DefinedGlobals =
+                ModuleToDefinedGVSummaries.find(ModulePath)->second;
+
+            // Compute and store a bitcode module cache key.
+            J.CacheKey = computeLTOCacheKey(
+                Conf, CombinedIndex, ModulePath, ImportList, ExportList,
+                ResolvedODR, DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
+
+            // Check if we have something in the cache.
+            auto CacheAddStreamExp = Cache(J.Task, J.CacheKey, J.ModuleID);
+            if (Error E = CacheAddStreamExp.takeError()) {
+              Err = joinErrors(std::move(*Err), std::move(E));
+            } else {
+              AddStreamFn &CacheAddStream = *CacheAddStreamExp;
+              if (!CacheAddStream) {
+                J.Cached = true; // Cache hit, mark the job as cached.
+                CachedJobs.fetch_add(1);
+              } else {
+                // Cache miss, save cache 'add stream' function for a later use.
+                J.CacheAddStream = std::move(CacheAddStream);
+              }
+            }
+          }
         },
         std::ref(J), std::ref(ImportList));
 
@@ -2417,6 +2456,9 @@ class OutOfProcessThinBackend : public CGThinBackend {
         for (const auto &J : Jobs) {
           assert(J.Task != 0);
 
+          if (!Cache.getCacheDirectoryPath().empty() && J.Cached)
+            continue;
+
           SmallVector<StringRef, 2> Inputs;
           SmallVector<StringRef, 1> Outputs;
 
@@ -2488,20 +2530,26 @@ class OutOfProcessThinBackend : public CGThinBackend {
         removeFile(JsonFile);
     });
 
-    SmallVector<StringRef, 3> Args = {DistributorPath};
-    llvm::append_range(Args, DistributorArgs);
-    Args.push_back(JsonFile);
-    std::string ErrMsg;
-    if (sys::ExecuteAndWait(Args[0], Args,
-                            /*Env=*/std::nullopt, /*Redirects=*/{},
-                            /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg)) {
-      return make_error<StringError>(
-          BCError + "distributor execution failed" +
-              (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")),
-          inconvertibleErrorCode());
+    if (CachedJobs.load() < Jobs.size()) {
+      SmallVector<StringRef, 3> Args = {DistributorPath};
+      llvm::append_range(Args, DistributorArgs);
+      Args.push_back(JsonFile);
+      std::string ErrMsg;
+      if (sys::ExecuteAndWait(Args[0], Args,
+                              /*Env=*/std::nullopt, /*Redirects=*/{},
+                              /*SecondsToWait=*/0, /*MemoryLimit=*/0,
+                              &ErrMsg)) {
+        return make_error<StringError>(
+            BCError + "distributor execution failed" +
+                (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")),
+            inconvertibleErrorCode());
+      }
     }
 
     for (auto &Job : Jobs) {
+      if (Cache.isValid() && !Job.CacheKey.empty())
+        if (Job.Cached)
+          continue;
       // Load the native object from a file into a memory buffer
       // and store its contents in the output buffer.
       auto ObjFileMbOrErr =
@@ -2512,15 +2560,32 @@ class OutOfProcessThinBackend : public CGThinBackend {
             BCError + "cannot open native object file: " +
                 Job.NativeObjectPath + ": " + EC.message(),
             inconvertibleErrorCode());
-      auto StreamOrErr = AddStream(Job.Task, Job.ModuleID);
-      if (Error Err = StreamOrErr.takeError())
-        report_fatal_error(std::move(Err));
-      auto &Stream = *StreamOrErr->get();
-      *Stream.OS << ObjFileMbOrErr->get()->getMemBufferRef().getBuffer();
-      if (Error Err = Stream.commit())
-        report_fatal_error(std::move(Err));
-    }
 
+      MemoryBufferRef ObjFileMbRef = ObjFileMbOrErr->get()->getMemBufferRef();
+      if (Cache.isValid() && Job.CacheAddStream) {
+        // Obtain a file stream for a storing a cache entry.
+        auto CachedFileStreamOrErr = Job.CacheAddStream(Job.Task, Job.ModuleID);
+        if (!CachedFileStreamOrErr)
+          return joinErrors(
+              CachedFileStreamOrErr.takeError(),
+              createStringError(inconvertibleErrorCode(),
+                                "Cannot get a cache file stream: %s",
+                                Job.NativeObjectPath.data()));
+        // Store a file buffer into the cache stream.
+        auto &CacheStream = *(CachedFileStreamOrErr->get());
+        *(CacheStream.OS) << ObjFileMbRef.getBuffer();
+        if (Error Err = CacheStream.commit())
+          return Err;
+      } else {
+        auto StreamOrErr = AddStream(Job.Task, Job.ModuleID);
+        if (Error Err = StreamOrErr.takeError())
+          report_fatal_error(std::move(Err));
+        auto &Stream = *StreamOrErr->get();
+        *Stream.OS << ObjFileMbRef.getBuffer();
+        if (Error Err = Stream.commit())
+          report_fatal_error(std::move(Err));
+      }
+    }
     return Error::success();
   }
 };
@@ -2535,12 +2600,12 @@ ThinBackend lto::createOutOfProcessThinBackend(
   auto Func =
       [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
           const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-          AddStreamFn AddStream, FileCache /*Cache*/) {
+          AddStreamFn AddStream, FileCache Cache) {
         return std::make_unique<OutOfProcessThinBackend>(
             Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
-            AddStream, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles,
-            LinkerOutputFile, Distributor, DistributorArgs, RemoteCompiler,
-            RemoteCompilerArgs, SaveTemps);
+            AddStream, Cache, OnWrite, ShouldEmitIndexFiles,
+            ShouldEmitImportsFiles, LinkerOutputFile, Distributor,
+            DistributorArgs, RemoteCompiler, RemoteCompilerArgs, SaveTemps);
       };
   return ThinBackend(Func, Parallelism);
 }