From 238322d9a69bc25c9f85ee9be97aed66ad62bf2d Mon Sep 17 00:00:00 2001
From: PGZXB <420254146@qq.com>
Date: Tue, 22 Mar 2022 18:42:35 +0800
Subject: [PATCH 1/5] Refactor offline-cache and support it on cuda

---
 taichi/backends/cpu/codegen_cpu.cpp   | 10 +++-
 taichi/backends/cpu/codegen_cpu.h     |  6 +-
 taichi/backends/cuda/codegen_cuda.cpp |  6 +-
 taichi/backends/wasm/codegen_wasm.cpp |  4 ++
 taichi/codegen/codegen.cpp            |  5 +-
 taichi/codegen/codegen.h              |  3 +-
 taichi/codegen/codegen_llvm.cpp       | 63 ++++++++++++++++-----
 taichi/codegen/codegen_llvm.h         | 17 +++---
 taichi/llvm/llvm_offline_cache.cpp    | 33 ++++++++---
 taichi/llvm/llvm_offline_cache.h      | 13 ++++-
 taichi/llvm/llvm_program.cpp          | 79 ++-------------------------
 taichi/llvm/llvm_program.h            | 11 +---
 taichi/program/kernel.h               |  8 ---
 13 files changed, 119 insertions(+), 139 deletions(-)
diff --git a/taichi/backends/cpu/codegen_cpu.cpp b/taichi/backends/cpu/codegen_cpu.cpp
index bedeaea907885..9be50d6bbcebb 100644
--- a/taichi/backends/cpu/codegen_cpu.cpp
+++ b/taichi/backends/cpu/codegen_cpu.cpp
@@ -16,11 +16,15 @@ class CodeGenLLVMCPU : public CodeGenLLVM {
  public:
   using IRVisitor::visit;
 
-  CodeGenLLVMCPU(Kernel *kernel, IRNode *ir, bool needs_cache)
-      : CodeGenLLVM(kernel, ir, nullptr, needs_cache) {
+  CodeGenLLVMCPU(Kernel *kernel, IRNode *ir)
+      : CodeGenLLVM(kernel, ir, nullptr) {
     TI_AUTO_PROF
   }
 
+  bool supports_offline_cache() const override {
+    return true;
+  }
+
   void create_offload_range_for(OffloadedStmt *stmt) override {
     int step = 1;
 
@@ -195,7 +199,7 @@ class CodeGenLLVMCPU : public CodeGenLLVM {
 
 FunctionType CodeGenCPU::codegen() {
   TI_AUTO_PROF
-  return CodeGenLLVMCPU(kernel, ir, needs_cache_).gen();
+  return CodeGenLLVMCPU(kernel, ir).gen();
 }
 
 TLANG_NAMESPACE_END
diff --git a/taichi/backends/cpu/codegen_cpu.h b/taichi/backends/cpu/codegen_cpu.h
index a00458a73165a..c3d723c75eff2 100644
--- a/taichi/backends/cpu/codegen_cpu.h
+++ b/taichi/backends/cpu/codegen_cpu.h
@@ -8,14 +8,10 @@ TLANG_NAMESPACE_BEGIN
 
 class CodeGenCPU : public KernelCodeGen {
  public:
-  CodeGenCPU(Kernel *kernel, IRNode *ir = nullptr, bool needs_cache = false)
-      : KernelCodeGen(kernel, ir), needs_cache_(needs_cache) {
+  CodeGenCPU(Kernel *kernel, IRNode *ir = nullptr) : KernelCodeGen(kernel, ir) {
   }
 
   FunctionType codegen() override;
-
- private:
-  bool needs_cache_{false};
 };
 
 TLANG_NAMESPACE_END
diff --git a/taichi/backends/cuda/codegen_cuda.cpp b/taichi/backends/cuda/codegen_cuda.cpp
index 45e1e6a9affef..c1c3dcf1807b6 100644
--- a/taichi/backends/cuda/codegen_cuda.cpp
+++ b/taichi/backends/cuda/codegen_cuda.cpp
@@ -31,10 +31,12 @@ class CodeGenLLVMCUDA : public CodeGenLLVM {
       : CodeGenLLVM(kernel, ir) {
   }
 
+  bool supports_offline_cache() const override {
+    return true;
+  }
+
   FunctionType compile_module_to_executable() override {
 #ifdef TI_WITH_CUDA
-    eliminate_unused_functions();
-
     auto offloaded_local = offloaded_tasks;
     for (auto &task : offloaded_local) {
       llvm::Function *func = module->getFunction(task.name);
diff --git a/taichi/backends/wasm/codegen_wasm.cpp b/taichi/backends/wasm/codegen_wasm.cpp
index a49d70b2f3886..bba625393ecf8 100644
--- a/taichi/backends/wasm/codegen_wasm.cpp
+++ b/taichi/backends/wasm/codegen_wasm.cpp
@@ -212,6 +212,10 @@ class CodeGenLLVMWASM : public CodeGenLLVM {
 
   FunctionType gen() override {
     TI_AUTO_PROF
+    // lower kernel
+    if (!kernel->lowered()) {
+      kernel->lower();
+    }
     // emit_to_module
     stat.add("codegen_taichi_kernel_function");
     auto offloaded_task_name = init_taichi_kernel_function();
diff --git a/taichi/codegen/codegen.cpp b/taichi/codegen/codegen.cpp
index b7bb02dd835d8..263f15d039d4d 100644
--- a/taichi/codegen/codegen.cpp
+++ b/taichi/codegen/codegen.cpp
@@ -32,11 +32,10 @@ KernelCodeGen::KernelCodeGen(Kernel *kernel, IRNode *ir)
 
 std::unique_ptr<KernelCodeGen> KernelCodeGen::create(Arch arch,
                                                      Kernel *kernel,
-                                                     Stmt *stmt,
-                                                     bool needs_cache) {
+                                                     Stmt *stmt) {
 #ifdef TI_WITH_LLVM
   if (arch_is_cpu(arch) && arch != Arch::wasm) {
-    return std::make_unique<CodeGenCPU>(kernel, stmt, needs_cache);
+    return std::make_unique<CodeGenCPU>(kernel, stmt);
   } else if (arch == Arch::wasm) {
     return std::make_unique<CodeGenWASM>(kernel, stmt);
   } else if (arch == Arch::cuda) {
diff --git a/taichi/codegen/codegen.h b/taichi/codegen/codegen.h
index 42ab840a0b081..e700fe0f349e2 100644
--- a/taichi/codegen/codegen.h
+++ b/taichi/codegen/codegen.h
@@ -19,8 +19,7 @@ class KernelCodeGen {
 
   static std::unique_ptr<KernelCodeGen> create(Arch arch,
                                                Kernel *kernel,
-                                               Stmt *stmt = nullptr,
-                                               bool needs_cache = false);
+                                               Stmt *stmt = nullptr);
 
   virtual FunctionType codegen() = 0;
 };
diff --git a/taichi/codegen/codegen_llvm.cpp b/taichi/codegen/codegen_llvm.cpp
index ca5454048bbe6..b7277451007b3 100644
--- a/taichi/codegen/codegen_llvm.cpp
+++ b/taichi/codegen/codegen_llvm.cpp
@@ -1,3 +1,4 @@
+#include "taichi/llvm/llvm_offline_cache.h"
 #ifdef TI_WITH_LLVM
 #include "taichi/codegen/codegen_llvm.h"
 
@@ -304,8 +305,7 @@ void CodeGenLLVM::emit_struct_meta_base(const std::string &name,
 
 CodeGenLLVM::CodeGenLLVM(Kernel *kernel,
                          IRNode *ir,
-                         std::unique_ptr<llvm::Module> &&module,
-                         bool needs_cache)
+                         std::unique_ptr<llvm::Module> &&module)
     // TODO: simplify LLVMModuleBuilder ctor input
     : LLVMModuleBuilder(
           module == nullptr ? kernel->program->get_llvm_program_impl()
@@ -314,7 +314,6 @@ CodeGenLLVM::CodeGenLLVM(Kernel *kernel,
                             : std::move(module),
           kernel->program->get_llvm_program_impl()->get_llvm_context(
               kernel->arch)),
-      needs_cache_(needs_cache),
       kernel(kernel),
       ir(ir),
       prog(kernel->program) {
@@ -2268,17 +2267,6 @@ void CodeGenLLVM::eliminate_unused_functions() {
 
 FunctionType CodeGenLLVM::compile_module_to_executable() {
   TI_AUTO_PROF
-  eliminate_unused_functions();
-
-  auto *llvm_prog = prog->get_llvm_program_impl();
-  if (needs_cache_) {
-    std::vector<std::string> offloaded_task_name_list;
-    for (auto &task : offloaded_tasks) {
-      offloaded_task_name_list.push_back(task.name);
-    }
-    llvm_prog->cache_kernel(this->kernel->get_key(), this->module.get(),
-                            std::move(offloaded_task_name_list));
-  }
 
   tlctx->add_module(std::move(module));
 
@@ -2384,7 +2372,42 @@ void CodeGenLLVM::emit_to_module() {
 }
 
 FunctionType CodeGenLLVM::gen() {
+  bool needs_cache = false;
+  const auto &config = prog->config;
+  std::string kernel_key;
+  if (config.offline_cache && this->supports_offline_cache() &&
+      !kernel->is_evaluator) {
+    kernel_key = get_offline_cache_key_of_kernel(kernel);
+
+    LlvmOfflineCacheFileReader reader(config.offline_cache_file_path);
+    LlvmOfflineCache::KernelCacheData cache_data;
+    auto *tlctx =
+        this->prog->get_llvm_program_impl()->get_llvm_context(config.arch);
+    auto &llvm_ctx = *tlctx->get_this_thread_context();
+
+    if (reader.get_kernel_cache(cache_data, kernel_key, llvm_ctx)) {
+      this->module = std::move(cache_data.owned_module);
+      for (auto &task : cache_data.offloaded_task_list) {
+        auto &t = this->offloaded_tasks.emplace_back(this);
+        t.name = std::move(task.name);
+        t.block_dim = task.block_dim;
+        t.grid_dim = task.grid_dim;
+      }
+      kernel->set_from_offline_cache();
+      return compile_module_to_executable();
+    } else {
+      needs_cache = true;
+    }
+  }
+
+  if (!kernel->lowered()) {
+    kernel->lower();
+  }
   emit_to_module();
+  eliminate_unused_functions();
+  if (needs_cache) {
+    cache_module(kernel_key);
+  }
   return compile_module_to_executable();
 }
 
@@ -2451,6 +2474,18 @@ void CodeGenLLVM::visit(FuncCallStmt *stmt) {
   }
 }
 
+void CodeGenLLVM::cache_module(const std::string &kernel_key) {
+  using OffloadedTaskCache = LlvmOfflineCache::OffloadedTaskCacheData;
+  std::vector<OffloadedTaskCache> offloaded_task_list;
+  for (auto &task : offloaded_tasks) {
+    auto &task_cache = offloaded_task_list.emplace_back();
+    task_cache.name = task.name;
+    task_cache.block_dim = task.block_dim;
+    task_cache.grid_dim = task.grid_dim;
+  }
+  prog->get_llvm_program_impl()->cache_kernel(kernel_key, this->module.get(),
+                                              std::move(offloaded_task_list));
+}
 TLANG_NAMESPACE_END
 
 #endif  // #ifdef TI_WITH_LLVM
diff --git a/taichi/codegen/codegen_llvm.h b/taichi/codegen/codegen_llvm.h
index b23cf052dd7a0..b99cc287a3771 100644
--- a/taichi/codegen/codegen_llvm.h
+++ b/taichi/codegen/codegen_llvm.h
@@ -20,8 +20,8 @@ class OffloadedTask {
   using task_fp_type = int32 (*)(void *);
   task_fp_type func;
 
-  int block_dim;
-  int grid_dim;
+  int block_dim{0};
+  int grid_dim{0};
 
   OffloadedTask(CodeGenLLVM *codegen);
 
@@ -48,9 +48,6 @@ class FunctionCreationGuard {
 };
 
 class CodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {
- private:
-  bool needs_cache_{false};
-
  public:
   Kernel *kernel;
   IRNode *ir;
@@ -86,8 +83,7 @@ class CodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {
 
   CodeGenLLVM(Kernel *kernel,
               IRNode *ir = nullptr,
-              std::unique_ptr<llvm::Module> &&module = nullptr,
-              bool needs_cache = false);
+              std::unique_ptr<llvm::Module> &&module = nullptr);
 
   Arch current_arch() {
     return kernel->arch;
@@ -131,6 +127,10 @@ class CodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {
 
   virtual FunctionType gen();
 
+  virtual bool supports_offline_cache() const {
+    return false;
+  }
+
   // For debugging only
   virtual llvm::Value *create_print(std::string tag,
                                     DataType dt,
@@ -391,6 +391,9 @@ class CodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {
   llvm::Value *bitcast_to_u64(llvm::Value *val, DataType type);
 
   ~CodeGenLLVM() override = default;
+
+ private:
+  void cache_module(const std::string &kernel_key);
 };
 
 TLANG_NAMESPACE_END
diff --git a/taichi/llvm/llvm_offline_cache.cpp b/taichi/llvm/llvm_offline_cache.cpp
index 595544a599f57..2bdcc1ee86931 100644
--- a/taichi/llvm/llvm_offline_cache.cpp
+++ b/taichi/llvm/llvm_offline_cache.cpp
@@ -4,10 +4,22 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_os_ostream.h"
 #include "llvm/IR/Module.h"
+#include "taichi/ir/transforms.h"
+
+#include "picosha2.h"
 
 namespace taichi {
 namespace lang {
 
+std::string get_offline_cache_key_of_kernel(Kernel *kernel) {
+  std::string res, kernel_ast_string;
+  irpass::re_id(kernel->ir.get());
+  irpass::print(kernel->ir.get(), &kernel_ast_string);
+  picosha2::hash256_hex_string(kernel_ast_string, res);
+  res.push_back(kernel->grad ? 'g' : 'n');
+  return res;
+}
+
 bool LlvmOfflineCacheFileReader::get_kernel_cache(
     LlvmOfflineCache::KernelCacheData &res,
     const std::string &key,
@@ -32,7 +44,9 @@ bool LlvmOfflineCacheFileReader::get_kernel_cache(
       std::getline(in, line, '\n');
       if (line.empty())
         break;
-      res.offloaded_task_name_list.push_back(std::move(line));
+      std::istringstream iss(line);
+      auto &task = res.offloaded_task_list.emplace_back();
+      iss >> task.name >> task.block_dim >> task.grid_dim;
     }
   }
   return true;
@@ -49,11 +63,11 @@ void LlvmOfflineCacheFileWriter::dump() {
       llvm::LLVMContext ctx;
       llvm::raw_os_ostream llvm_os(os);
       if (v.module) {
-        mangle_offloaded_task_name(k, v.module, v.offloaded_task_name_list);
+        mangle_offloaded_task_name(k, v.module, v.offloaded_task_list);
         v.module->print(llvm_os, nullptr);
       } else if (v.owned_module) {
         mangle_offloaded_task_name(k, v.owned_module.get(),
-                                   v.offloaded_task_name_list);
+                                   v.offloaded_task_list);
         v.owned_module->print(llvm_os, nullptr);
       } else
         TI_ASSERT(false);
@@ -62,8 +76,8 @@ void LlvmOfflineCacheFileWriter::dump() {
       std::string filename = filename_prefix + "_otnl.txt";
       std::ofstream os(filename, std::ios::out | std::ios::binary);
       TI_ERROR_IF(!os.is_open(), "File {} open failed", filename);
-      for (const auto &name : v.offloaded_task_name_list) {
-        os << name << '\n';
+      for (const auto &task : v.offloaded_task_list) {
+        os << task.name << ' ' << task.block_dim << ' ' << task.grid_dim << '\n';
       }
     }
   }
@@ -72,15 +86,16 @@ void LlvmOfflineCacheFileWriter::dump() {
 void LlvmOfflineCacheFileWriter::mangle_offloaded_task_name(
     const std::string &kernel_key,
     llvm::Module *module,
-    std::vector<std::string> &offloaded_task_name_list) {
+    std::vector<LlvmOfflineCache::OffloadedTaskCacheData>
+        &offloaded_task_list) {
   if (!mangled_) {
     std::size_t cnt = 0;
-    for (auto &e : offloaded_task_name_list) {
+    for (auto &e : offloaded_task_list) {
       std::string mangled_name = kernel_key + std::to_string(cnt++);
-      auto func = module->getFunction(e);
+      auto func = module->getFunction(e.name);
       TI_ASSERT(func != nullptr);
       func->setName(mangled_name);
-      e = mangled_name;
+      e.name = mangled_name;
     }
   }
 }
diff --git a/taichi/llvm/llvm_offline_cache.h b/taichi/llvm/llvm_offline_cache.h
index fafd4d08bae5e..54da7bd7866a4 100644
--- a/taichi/llvm/llvm_offline_cache.h
+++ b/taichi/llvm/llvm_offline_cache.h
@@ -1,18 +1,26 @@
 #pragma once
 
 #include "taichi/common/core.h"
+#include "taichi/program/kernel.h"
 #include "taichi/llvm/llvm_fwd.h"
 #include "taichi/util/io.h"
 
 namespace taichi {
 namespace lang {
 
+std::string get_offline_cache_key_of_kernel(Kernel *kernel);
+
 struct LlvmOfflineCache {
+  struct OffloadedTaskCacheData {
+    std::string name;
+    int block_dim{0};
+    int grid_dim{0};
+  };
   struct KernelCacheData {
     std::string kernel_key;
     std::unique_ptr<llvm::Module> owned_module{nullptr};
     llvm::Module *module{nullptr};
-    std::vector<std::string> offloaded_task_name_list;
+    std::vector<OffloadedTaskCacheData> offloaded_task_list;
 
     KernelCacheData() = default;
     KernelCacheData(KernelCacheData &&) = default;
@@ -58,7 +66,8 @@ class LlvmOfflineCacheFileWriter {
   void mangle_offloaded_task_name(
       const std::string &kernel_key,
       llvm::Module *module,
-      std::vector<std::string> &offloaded_task_name_list);
+      std::vector<LlvmOfflineCache::OffloadedTaskCacheData>
+          &offloaded_task_list);
 
   std::string path_;
   LlvmOfflineCache data_;
diff --git a/taichi/llvm/llvm_program.cpp b/taichi/llvm/llvm_program.cpp
index c79cf1914a83d..274e8b76ec2ea 100644
--- a/taichi/llvm/llvm_program.cpp
+++ b/taichi/llvm/llvm_program.cpp
@@ -1,8 +1,6 @@
 #include "llvm_program.h"
 #include "llvm/IR/Module.h"
 
-#include "picosha2.h"
-
 #include "taichi/backends/cuda/cuda_driver.h"
 #include "taichi/backends/arch.h"
 #include "taichi/llvm/llvm_offline_cache.h"
@@ -133,32 +131,7 @@ void LlvmProgramImpl::maybe_initialize_cuda_llvm_context() {
 
 FunctionType LlvmProgramImpl::compile(Kernel *kernel,
                                       OffloadedStmt *offloaded) {
-  bool needs_cache = false;
-  if (config->offline_cache && this->supports_offline_cache() &&
-      !kernel->is_evaluator) {
-    std::string kernel_key, hashed_kernel_key;
-    irpass::re_id(kernel->ir.get());
-    irpass::print(kernel->ir.get(), &kernel_key);
-    picosha2::hash256_hex_string(kernel_key, hashed_kernel_key);
-    if (kernel->grad)
-      hashed_kernel_key.push_back('g');
-    else
-      hashed_kernel_key.push_back('n');
-    auto func = this->create_kernel_function_from_offline_cache(
-        hashed_kernel_key, kernel);
-    if (func) {
-      kernel->set_from_offline_cache();
-      return func;
-    } else {
-      kernel->set_key(hashed_kernel_key);
-      needs_cache = true;
-    }
-  }
-  if (!kernel->lowered()) {
-    kernel->lower();
-  }
-  auto codegen =
-      KernelCodeGen::create(kernel->arch, kernel, offloaded, needs_cache);
+  auto codegen = KernelCodeGen::create(kernel->arch, kernel, offloaded);
   return codegen->codegen();
 }
 
@@ -667,64 +640,22 @@ void LlvmProgramImpl::fill_ndarray(const DeviceAllocation &alloc,
   }
 }
 
-FunctionType LlvmProgramImpl::create_kernel_function_from_offline_cache(
-    const std::string &kernel_key,
-    Kernel *kernel) {
-  TI_ASSERT(config->offline_cache);
-  using task_fp_type = int32 (*)(void *);
-
-  LlvmOfflineCacheFileReader reader(config->offline_cache_file_path);
-  LlvmOfflineCache::KernelCacheData cache_data;
-  auto *tlctx = this->get_llvm_context(config->arch);
-  auto &llvm_ctx = *tlctx->get_this_thread_context();
-
-  if (!reader.get_kernel_cache(cache_data, kernel_key, llvm_ctx))
-    return nullptr;
-
-  std::vector<task_fp_type> func_list;
-  tlctx->add_module(std::move(cache_data.owned_module));
-  for (const auto &func_name : cache_data.offloaded_task_name_list) {
-    void *kernel_symbol = tlctx->lookup_function_pointer(func_name);
-    TI_ASSERT(kernel_symbol);
-    func_list.push_back((task_fp_type)kernel_symbol);
-  }
-
-  return [kernel, flist = std::move(func_list)](RuntimeContext &ctx) -> void {
-    auto args = kernel->args;
-    // For taichi ndarrays, context.args saves pointer to its
-    // |DeviceAllocation|, CPU backend actually want to use the raw ptr
-    // here.
-    for (std::size_t i = 0; i < args.size(); ++i) {
-      if (args[i].is_array && ctx.is_device_allocation[i] && args[i].size > 0) {
-        DeviceAllocation *ptr =
-            static_cast<DeviceAllocation *>(ctx.get_arg<void *>(i));
-        uint64 host_ptr = (uint64)kernel->program->get_llvm_program_impl()
-                              ->get_ndarray_alloc_info_ptr(*ptr);
-        ctx.set_arg(i, host_ptr);
-        ctx.set_device_allocation(i, false);
-      }
-    }
-    for (auto func : flist) {
-      func(&ctx);
-    }
-  };
-}
-
 void LlvmProgramImpl::cache_kernel(
     const std::string &kernel_key,
     llvm::Module *module,
-    std::vector<std::string> &&offloaded_task_name_list) {
+    std::vector<LlvmOfflineCache::OffloadedTaskCacheData>
+        &&offloaded_task_list) {
   if (cache_data_.kernels.find(kernel_key) != cache_data_.kernels.end()) {
     return;
   }
   auto &kernel_cache = cache_data_.kernels[kernel_key];
   kernel_cache.kernel_key = kernel_key;
   kernel_cache.owned_module = llvm::CloneModule(*module);
-  kernel_cache.offloaded_task_name_list = offloaded_task_name_list;
+  kernel_cache.offloaded_task_list = offloaded_task_list;
 }
 
 void LlvmProgramImpl::dump_cache_data_to_disk() {
-  if (config->offline_cache && this->supports_offline_cache()) {
+  if (config->offline_cache && !cache_data_.kernels.empty()) {
     LlvmOfflineCacheFileWriter writer(config->offline_cache_file_path);
     writer.set_data(std::move(cache_data_));
     writer.dump();
diff --git a/taichi/llvm/llvm_program.h b/taichi/llvm/llvm_program.h
index 2134b070956fc..7ae9ea30daa73 100644
--- a/taichi/llvm/llvm_program.h
+++ b/taichi/llvm/llvm_program.h
@@ -115,18 +115,9 @@ class LlvmProgramImpl : public ProgramImpl {
                     std::size_t size,
                     uint32_t data);
 
-  bool supports_offline_cache() const {
-    auto arch = config->arch;
-    return arch_is_cpu(arch) && arch != Arch::wasm && !config->async_mode;
-  }
-
-  FunctionType create_kernel_function_from_offline_cache(
-      const std::string &kernel_key,
-      Kernel *kernel);
-
   void cache_kernel(const std::string &kernel_key,
                     llvm::Module *module,
-                    std::vector<std::string> &&offloaded_task_name_list);
+                    std::vector<LlvmOfflineCache::OffloadedTaskCacheData> &&offloaded_task_list);
 
  private:
   std::unique_ptr<llvm::Module> clone_struct_compiler_initial_context(
diff --git a/taichi/program/kernel.h b/taichi/program/kernel.h
index 19399e37078cc..d29aac4399f4a 100644
--- a/taichi/program/kernel.h
+++ b/taichi/program/kernel.h
@@ -117,14 +117,6 @@ class TI_DLL_EXPORT Kernel : public Callable {
     return task_counter_++;
   }
 
-  void set_key(const std::string &key) {
-    this->kernel_key_ = key;
-  }
-
-  const std::string &get_key() const {
-    return this->kernel_key_;
-  }
-
   void set_from_offline_cache() {
     this->from_offline_cache_ = true;
   }

From ec330b4b64b471b23f1db0ed3691714ad2b56be1 Mon Sep 17 00:00:00 2001
From: PGZXB <420254146@qq.com>
Date: Tue, 22 Mar 2022 18:51:31 +0800
Subject: [PATCH 2/5] Add ti.cuda into supported_archs_offline_cache for
 test_offline_cache

---
 tests/python/test_offline_cache.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/python/test_offline_cache.py b/tests/python/test_offline_cache.py
index 611bc59606c92..af020103a985b 100644
--- a/tests/python/test_offline_cache.py
+++ b/tests/python/test_offline_cache.py
@@ -8,7 +8,7 @@
 import taichi as ti
 from tests import test_utils
 
-supported_archs_offline_cache = [ti.cpu]
+supported_archs_offline_cache = [ti.cpu, ti.cuda]
 supported_archs_offline_cache = [
     v for v in supported_archs_offline_cache
     if v in test_utils.expected_archs()
@@ -17,7 +17,6 @@
 ext_init_options = {
     'offline_cache': True,
     'offline_cache_file_path': tmp_offline_cache_file_path,
-    'print_preprocessed_ir': True
 }
 cache_files_num_per_kernel = 2
 

From 85c88533464afe62703298013c49dfdd441a05e3 Mon Sep 17 00:00:00 2001
From: Taichi Gardener <taichigardener@gmail.com>
Date: Tue, 22 Mar 2022 10:56:34 +0000
Subject: [PATCH 3/5] Auto Format

---
 taichi/llvm/llvm_offline_cache.cpp | 3 ++-
 taichi/llvm/llvm_program.h         | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/taichi/llvm/llvm_offline_cache.cpp b/taichi/llvm/llvm_offline_cache.cpp
index 2bdcc1ee86931..8ce77018db17b 100644
--- a/taichi/llvm/llvm_offline_cache.cpp
+++ b/taichi/llvm/llvm_offline_cache.cpp
@@ -77,7 +77,8 @@ void LlvmOfflineCacheFileWriter::dump() {
       std::ofstream os(filename, std::ios::out | std::ios::binary);
       TI_ERROR_IF(!os.is_open(), "File {} open failed", filename);
       for (const auto &task : v.offloaded_task_list) {
-        os << task.name << ' ' << task.block_dim << ' ' << task.grid_dim << '\n';
+        os << task.name << ' ' << task.block_dim << ' ' << task.grid_dim
+           << '\n';
       }
     }
   }
diff --git a/taichi/llvm/llvm_program.h b/taichi/llvm/llvm_program.h
index 7ae9ea30daa73..ee028e33b89d3 100644
--- a/taichi/llvm/llvm_program.h
+++ b/taichi/llvm/llvm_program.h
@@ -117,7 +117,8 @@ class LlvmProgramImpl : public ProgramImpl {
 
   void cache_kernel(const std::string &kernel_key,
                     llvm::Module *module,
-                    std::vector<LlvmOfflineCache::OffloadedTaskCacheData> &&offloaded_task_list);
+                    std::vector<LlvmOfflineCache::OffloadedTaskCacheData>
+                        &&offloaded_task_list);
 
  private:
   std::unique_ptr<llvm::Module> clone_struct_compiler_initial_context(

From 5b9f865839d156356c3b9c1e7523d2ef065ef33c Mon Sep 17 00:00:00 2001
From: PGZXB <420254146@qq.com>
Date: Tue, 22 Mar 2022 20:29:30 +0800
Subject: [PATCH 4/5] Fix cache-key

---
 taichi/llvm/llvm_offline_cache.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/taichi/llvm/llvm_offline_cache.cpp b/taichi/llvm/llvm_offline_cache.cpp
index 8ce77018db17b..071b0e9b737b5 100644
--- a/taichi/llvm/llvm_offline_cache.cpp
+++ b/taichi/llvm/llvm_offline_cache.cpp
@@ -16,7 +16,7 @@ std::string get_offline_cache_key_of_kernel(Kernel *kernel) {
   irpass::re_id(kernel->ir.get());
   irpass::print(kernel->ir.get(), &kernel_ast_string);
   picosha2::hash256_hex_string(kernel_ast_string, res);
-  res.push_back(kernel->grad ? 'g' : 'n');
+  res.insert(res.begin(), kernel->grad ? 'g' : 'n');
   return res;
 }
 

From 04154484e770e9db622c7a4cb7e5321a35e636fb Mon Sep 17 00:00:00 2001
From: PGZXB <pgzxb@qq.com>
Date: Wed, 23 Mar 2022 10:48:42 +0800
Subject: [PATCH 5/5] Move #include into #ifdef TI_WITH_LLVM

---
 taichi/codegen/codegen_llvm.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/taichi/codegen/codegen_llvm.cpp b/taichi/codegen/codegen_llvm.cpp
index b7277451007b3..312b301794a9c 100644
--- a/taichi/codegen/codegen_llvm.cpp
+++ b/taichi/codegen/codegen_llvm.cpp
@@ -1,7 +1,6 @@
-#include "taichi/llvm/llvm_offline_cache.h"
 #ifdef TI_WITH_LLVM
 #include "taichi/codegen/codegen_llvm.h"
-
+#include "taichi/llvm/llvm_offline_cache.h"
 #include "taichi/ir/statements.h"
 #include "taichi/struct/struct_llvm.h"
 #include "taichi/util/file_sequence_writer.h"