[refactor] Remove dependencies on Program::this_thread_config() in ll…

…vm backends codegen (taichi-dev#7153) Issue: taichi-dev#7002 Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
quadpixels · May 13, 2023 · a0d9360 · a0d9360
1 parent bc83b8f
commit a0d9360
Show file tree

Hide file tree

Showing 19 changed files with 102 additions and 119 deletions.
diff --git a/taichi/codegen/amdgpu/codegen_amdgpu.cpp b/taichi/codegen/amdgpu/codegen_amdgpu.cpp
@@ -27,8 +27,10 @@ using namespace llvm;
 class TaskCodeGenAMDGPU : public TaskCodeGenLLVM {
  public:
   using IRVisitor::visit;
-  TaskCodeGenAMDGPU(Kernel *kernel, IRNode *ir = nullptr)
-      : TaskCodeGenLLVM(kernel, ir) {
+  TaskCodeGenAMDGPU(const CompileConfig *config,
+                    Kernel *kernel,
+                    IRNode *ir = nullptr)
+      : TaskCodeGenLLVM(config, kernel, ir) {
   }
 
   llvm::Value *create_print(std::string tag,
@@ -232,7 +234,7 @@ class TaskCodeGenAMDGPU : public TaskCodeGenLLVM {
       init_offloaded_task_function(stmt, "gather_list");
       call("gc_parallel_0", get_context(), snode_id);
       finalize_offloaded_task_function();
-      current_task->grid_dim = prog->this_thread_config().saturating_grid_dim;
+      current_task->grid_dim = compile_config->saturating_grid_dim;
       current_task->block_dim = 64;
       offloaded_tasks.push_back(*current_task);
       current_task = nullptr;
@@ -250,7 +252,7 @@ class TaskCodeGenAMDGPU : public TaskCodeGenLLVM {
       init_offloaded_task_function(stmt, "zero_fill");
       call("gc_parallel_2", get_context(), snode_id);
       finalize_offloaded_task_function();
-      current_task->grid_dim = prog->this_thread_config().saturating_grid_dim;
+      current_task->grid_dim = compile_config->saturating_grid_dim;
       current_task->block_dim = 64;
       offloaded_tasks.push_back(*current_task);
       current_task = nullptr;
@@ -395,25 +397,17 @@ class TaskCodeGenAMDGPU : public TaskCodeGenLLVM {
   }
 };
 
-#ifdef TI_WITH_LLVM
-// static
-std::unique_ptr<TaskCodeGenLLVM> KernelCodeGenAMDGPU::make_codegen_llvm(
-    Kernel *kernel,
-    IRNode *ir) {
-  return std::make_unique<TaskCodeGenAMDGPU>(kernel, ir);
-}
-#endif  // TI_WITH_LLVM
-
 LLVMCompiledTask KernelCodeGenAMDGPU::compile_task(
+    const CompileConfig *config,
     std::unique_ptr<llvm::Module> &&module,
     OffloadedStmt *stmt) {
-  TaskCodeGenAMDGPU gen(kernel, stmt);
+  TaskCodeGenAMDGPU gen(config, kernel, stmt);
   return gen.run_compilation();
 }
 
 FunctionType KernelCodeGenAMDGPU::compile_to_function() {
   auto *llvm_prog = get_llvm_program(prog);
-  const auto &config = prog->this_thread_config();
+  const auto &config = *get_compile_config();
   auto *tlctx = llvm_prog->get_llvm_context(config.arch);
 
   AMDGPUModuleToFunctionConverter converter{tlctx,

diff --git a/taichi/codegen/amdgpu/codegen_amdgpu.h b/taichi/codegen/amdgpu/codegen_amdgpu.h
@@ -15,9 +15,8 @@ class KernelCodeGenAMDGPU : public KernelCodeGen {
 
 // TODO: Stop defining this macro guards in the headers
 #ifdef TI_WITH_LLVM
-  static std::unique_ptr<TaskCodeGenLLVM> make_codegen_llvm(Kernel *kernel,
-                                                            IRNode *ir);
   LLVMCompiledTask compile_task(
+      const CompileConfig *config,
       std::unique_ptr<llvm::Module> &&module = nullptr,
       OffloadedStmt *stmt = nullptr) override;
 #endif  // TI_WITH_LLVM

diff --git a/taichi/codegen/codegen.cpp b/taichi/codegen/codegen.cpp
@@ -116,7 +116,8 @@ LLVMCompiledKernel KernelCodeGen::compile_kernel_to_module() {
       tlctx->fetch_this_thread_struct_module();
       auto offload = irpass::analysis::clone(offloads[i].get());
       irpass::re_id(offload.get());
-      auto new_data = this->compile_task(nullptr, offload->as<OffloadedStmt>());
+      auto new_data =
+          this->compile_task(&config, nullptr, offload->as<OffloadedStmt>());
       data[i] = std::make_unique<LLVMCompiledTask>(std::move(new_data));
     };
     if (kernel->is_evaluator) {

diff --git a/taichi/codegen/codegen.h b/taichi/codegen/codegen.h
@@ -59,6 +59,7 @@ class KernelCodeGen {
   virtual LLVMCompiledKernel compile_kernel_to_module();
 
   virtual LLVMCompiledTask compile_task(
+      const CompileConfig *config,
       std::unique_ptr<llvm::Module> &&module = nullptr,
       OffloadedStmt *stmt = nullptr){TI_NOT_IMPLEMENTED}
 
@@ -67,6 +68,11 @@ class KernelCodeGen {
   void cache_kernel(const std::string &kernel_key,
                     const LLVMCompiledKernel &data);
 #endif
+ protected:
+  const CompileConfig *get_compile_config() const {
+    return compile_config_;
+  }
+
  private:
   const CompileConfig *compile_config_{nullptr};
 };

diff --git a/taichi/codegen/codegen_utils.h b/taichi/codegen/codegen_utils.h
@@ -3,7 +3,7 @@
 
 namespace taichi::lang {
 
-inline bool codegen_vector_type(CompileConfig *config) {
+inline bool codegen_vector_type(const CompileConfig *config) {
   return !config->real_matrix_scalarize;
 }
 

diff --git a/taichi/codegen/cpu/codegen_cpu.cpp b/taichi/codegen/cpu/codegen_cpu.cpp
@@ -18,8 +18,8 @@ class TaskCodeGenCPU : public TaskCodeGenLLVM {
  public:
   using IRVisitor::visit;
 
-  TaskCodeGenCPU(Kernel *kernel, IRNode *ir)
-      : TaskCodeGenLLVM(kernel, ir, nullptr) {
+  TaskCodeGenCPU(const CompileConfig *config, Kernel *kernel, IRNode *ir)
+      : TaskCodeGenLLVM(config, kernel, ir, nullptr) {
     TI_AUTO_PROF
   }
 
@@ -56,7 +56,7 @@ class TaskCodeGenCPU : public TaskCodeGenLLVM {
     auto [begin, end] = get_range_for_bounds(stmt);
 
     // adaptive block_dim
-    if (prog->this_thread_config().cpu_block_dim_adaptive) {
+    if (compile_config->cpu_block_dim_adaptive) {
       int num_items = (stmt->end_value - stmt->begin_value) / std::abs(step);
       int num_threads = stmt->num_cpu_threads;
       int items_per_thread = std::max(1, num_items / (num_threads * 32));
@@ -166,8 +166,7 @@ class TaskCodeGenCPU : public TaskCodeGenLLVM {
       create_bls_buffer(stmt);
     using Type = OffloadedStmt::TaskType;
     auto offloaded_task_name = init_offloaded_task_function(stmt);
-    if (prog->this_thread_config().kernel_profiler &&
-        arch_is_cpu(prog->this_thread_config().arch)) {
+    if (compile_config->kernel_profiler && arch_is_cpu(compile_config->arch)) {
       call("LLVMRuntime_profiler_start", get_runtime(),
            builder->CreateGlobalStringPtr(offloaded_task_name));
     }
@@ -190,8 +189,7 @@ class TaskCodeGenCPU : public TaskCodeGenLLVM {
     } else {
       TI_NOT_IMPLEMENTED
     }
-    if (prog->this_thread_config().kernel_profiler &&
-        arch_is_cpu(prog->this_thread_config().arch)) {
+    if (compile_config->kernel_profiler && arch_is_cpu(compile_config->arch)) {
       llvm::IRBuilderBase::InsertPointGuard guard(*builder);
       builder->SetInsertPoint(final_block);
       call("LLVMRuntime_profiler_stop", get_runtime());
@@ -216,13 +214,6 @@ class TaskCodeGenCPU : public TaskCodeGenLLVM {
 }  // namespace
 
 #ifdef TI_WITH_LLVM
-// static
-std::unique_ptr<TaskCodeGenLLVM> KernelCodeGenCPU::make_codegen_llvm(
-    Kernel *kernel,
-    IRNode *ir) {
-  return std::make_unique<TaskCodeGenCPU>(kernel, ir);
-}
-
 FunctionType CPUModuleToFunctionConverter::convert(
     const std::string &kernel_name,
     const std::vector<LlvmLaunchArgInfo> &args,
@@ -272,17 +263,18 @@ FunctionType CPUModuleToFunctionConverter::convert(
 }
 
 LLVMCompiledTask KernelCodeGenCPU::compile_task(
+    const CompileConfig *config,
     std::unique_ptr<llvm::Module> &&module,
     OffloadedStmt *stmt) {
-  TaskCodeGenCPU gen(kernel, stmt);
+  TaskCodeGenCPU gen(config, kernel, stmt);
   return gen.run_compilation();
 }
 #endif  // TI_WITH_LLVM
 
 FunctionType KernelCodeGenCPU::compile_to_function() {
   TI_AUTO_PROF;
   auto *llvm_prog = get_llvm_program(prog);
-  const auto &config = prog->this_thread_config();
+  const auto &config = *get_compile_config();
   auto *tlctx = llvm_prog->get_llvm_context(config.arch);
 
   CPUModuleToFunctionConverter converter(

diff --git a/taichi/codegen/cpu/codegen_cpu.h b/taichi/codegen/cpu/codegen_cpu.h
@@ -17,13 +17,11 @@ class KernelCodeGenCPU : public KernelCodeGen {
 
   // TODO: Stop defining this macro guards in the headers
 #ifdef TI_WITH_LLVM
-  static std::unique_ptr<TaskCodeGenLLVM> make_codegen_llvm(Kernel *kernel,
-                                                            IRNode *ir);
-
   bool supports_offline_cache() const override {
     return true;
   }
   LLVMCompiledTask compile_task(
+      const CompileConfig *config,
       std::unique_ptr<llvm::Module> &&module = nullptr,
       OffloadedStmt *stmt = nullptr) override;
 

diff --git a/taichi/codegen/cuda/codegen_cuda.cpp b/taichi/codegen/cuda/codegen_cuda.cpp
@@ -30,8 +30,10 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
  public:
   using IRVisitor::visit;
 
-  explicit TaskCodeGenCUDA(Kernel *kernel, IRNode *ir = nullptr)
-      : TaskCodeGenLLVM(kernel, ir) {
+  explicit TaskCodeGenCUDA(const CompileConfig *config,
+                           Kernel *kernel,
+                           IRNode *ir = nullptr)
+      : TaskCodeGenLLVM(config, kernel, ir) {
   }
 
   llvm::Value *create_print(std::string tag,
@@ -106,7 +108,7 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
           auto elem_type = dtype->get_element_type();
           for (int i = 0; i < dtype->get_num_elements(); ++i) {
             llvm::Value *elem_value;
-            if (codegen_vector_type(&prog->this_thread_config())) {
+            if (codegen_vector_type(compile_config)) {
               TI_ASSERT(llvm::dyn_cast<llvm::VectorType>(value_type));
               elem_value = builder->CreateExtractElement(value, i);
             } else {
@@ -364,7 +366,7 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
       init_offloaded_task_function(stmt, "gather_list");
       call("gc_parallel_0", get_context(), snode_id);
       finalize_offloaded_task_function();
-      current_task->grid_dim = prog->this_thread_config().saturating_grid_dim;
+      current_task->grid_dim = compile_config->saturating_grid_dim;
       current_task->block_dim = 64;
       offloaded_tasks.push_back(*current_task);
       current_task = nullptr;
@@ -382,7 +384,7 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
       init_offloaded_task_function(stmt, "zero_fill");
       call("gc_parallel_2", get_context(), snode_id);
       finalize_offloaded_task_function();
-      current_task->grid_dim = prog->this_thread_config().saturating_grid_dim;
+      current_task->grid_dim = compile_config->saturating_grid_dim;
       current_task->block_dim = 64;
       offloaded_tasks.push_back(*current_task);
       current_task = nullptr;
@@ -394,7 +396,7 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
       init_offloaded_task_function(stmt, "gather_list");
       call("gc_rc_parallel_0", get_context());
       finalize_offloaded_task_function();
-      current_task->grid_dim = prog->this_thread_config().saturating_grid_dim;
+      current_task->grid_dim = compile_config->saturating_grid_dim;
       current_task->block_dim = 64;
       offloaded_tasks.push_back(*current_task);
       current_task = nullptr;
@@ -412,7 +414,7 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
       init_offloaded_task_function(stmt, "zero_fill");
       call("gc_rc_parallel_2", get_context());
       finalize_offloaded_task_function();
-      current_task->grid_dim = prog->this_thread_config().saturating_grid_dim;
+      current_task->grid_dim = compile_config->saturating_grid_dim;
       current_task->block_dim = 64;
       offloaded_tasks.push_back(*current_task);
       current_task = nullptr;
@@ -584,26 +586,18 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
   }
 };
 
-#ifdef TI_WITH_LLVM
-// static
-std::unique_ptr<TaskCodeGenLLVM> KernelCodeGenCUDA::make_codegen_llvm(
-    Kernel *kernel,
-    IRNode *ir) {
-  return std::make_unique<TaskCodeGenCUDA>(kernel, ir);
-}
-#endif  // TI_WITH_LLVM
-
 LLVMCompiledTask KernelCodeGenCUDA::compile_task(
+    const CompileConfig *config,
     std::unique_ptr<llvm::Module> &&module,
     OffloadedStmt *stmt) {
-  TaskCodeGenCUDA gen(kernel, stmt);
+  TaskCodeGenCUDA gen(config, kernel, stmt);
   return gen.run_compilation();
 }
 
 FunctionType KernelCodeGenCUDA::compile_to_function() {
   TI_AUTO_PROF
   auto *llvm_prog = get_llvm_program(prog);
-  const auto &config = prog->this_thread_config();
+  const auto &config = *get_compile_config();
   auto *tlctx = llvm_prog->get_llvm_context(config.arch);
 
   CUDAModuleToFunctionConverter converter{tlctx,

diff --git a/taichi/codegen/cuda/codegen_cuda.h b/taichi/codegen/cuda/codegen_cuda.h
@@ -16,9 +16,8 @@ class KernelCodeGenCUDA : public KernelCodeGen {
 
 // TODO: Stop defining this macro guards in the headers
 #ifdef TI_WITH_LLVM
-  static std::unique_ptr<TaskCodeGenLLVM> make_codegen_llvm(Kernel *kernel,
-                                                            IRNode *ir);
   LLVMCompiledTask compile_task(
+      const CompileConfig *config,
       std::unique_ptr<llvm::Module> &&module = nullptr,
       OffloadedStmt *stmt = nullptr) override;
 #endif  // TI_WITH_LLVM

diff --git a/taichi/codegen/dx12/codegen_dx12.cpp b/taichi/codegen/dx12/codegen_dx12.cpp
@@ -21,8 +21,8 @@ class TaskCodeGenLLVMDX12 : public TaskCodeGenLLVM {
  public:
   using IRVisitor::visit;
 
-  TaskCodeGenLLVMDX12(Kernel *kernel, IRNode *ir)
-      : TaskCodeGenLLVM(kernel, ir, nullptr) {
+  TaskCodeGenLLVMDX12(const CompileConfig *config, Kernel *kernel, IRNode *ir)
+      : TaskCodeGenLLVM(config, kernel, ir, nullptr) {
     TI_AUTO_PROF
   }
 
@@ -149,8 +149,7 @@ class TaskCodeGenLLVMDX12 : public TaskCodeGenLLVM {
       create_bls_buffer(stmt);
     using Type = OffloadedStmt::TaskType;
     auto offloaded_task_name = init_offloaded_task_function(stmt);
-    if (prog->this_thread_config().kernel_profiler &&
-        arch_is_cpu(prog->this_thread_config().arch)) {
+    if (compile_config->kernel_profiler && arch_is_cpu(compile_config->arch)) {
       call(
           builder.get(), "LLVMRuntime_profiler_start",
           {get_runtime(), builder->CreateGlobalStringPtr(offloaded_task_name)});
@@ -172,8 +171,7 @@ class TaskCodeGenLLVMDX12 : public TaskCodeGenLLVM {
     } else {
       TI_NOT_IMPLEMENTED
     }
-    if (prog->this_thread_config().kernel_profiler &&
-        arch_is_cpu(prog->this_thread_config().arch)) {
+    if (compile_config->kernel_profiler && arch_is_cpu(compile_config->arch)) {
       llvm::IRBuilderBase::InsertPointGuard guard(*builder);
       builder->SetInsertPoint(final_block);
       call(builder.get(), "LLVMRuntime_profiler_stop", {get_runtime()});
@@ -201,6 +199,7 @@ class TaskCodeGenLLVMDX12 : public TaskCodeGenLLVM {
 
 static std::vector<uint8_t> generate_dxil_from_llvm(
     LLVMCompiledTask &compiled_data,
+    const CompileConfig *config,
     taichi::lang::Kernel *kernel) {
   // generate dxil from llvm ir.
   auto offloaded_local = compiled_data.tasks;
@@ -209,21 +208,18 @@ static std::vector<uint8_t> generate_dxil_from_llvm(
     llvm::Function *func = module->getFunction(task.name);
     TI_ASSERT(func);
     directx12::mark_function_as_cs_entry(func);
-    directx12::set_num_threads(
-        func, kernel->program->this_thread_config().default_gpu_block_dim, 1,
-        1);
+    directx12::set_num_threads(func, config->default_gpu_block_dim, 1, 1);
     // FIXME: save task.block_dim like
     // tlctx->mark_function_as_cuda_kernel(func, task.block_dim);
   }
-  auto dx_container = directx12::global_optimize_module(
-      module, kernel->program->this_thread_config());
+  auto dx_container = directx12::global_optimize_module(module, *config);
   // validate and sign dx container.
   return directx12::validate_and_sign(dx_container);
 }
 
 KernelCodeGenDX12::CompileResult KernelCodeGenDX12::compile() {
   TI_AUTO_PROF;
-  auto &config = prog->this_thread_config();
+  auto &config = *get_compile_config();
   std::string kernel_key = get_hashed_offline_cache_key(&config, kernel);
   kernel->set_kernel_key_for_cache(kernel_key);
 
@@ -239,10 +235,10 @@ KernelCodeGenDX12::CompileResult KernelCodeGenDX12::compile() {
     auto offload = irpass::analysis::clone(offloads[i].get());
     irpass::re_id(offload.get());
     auto *offload_stmt = offload->as<OffloadedStmt>();
-    auto new_data = compile_task(nullptr, offload_stmt);
+    auto new_data = compile_task(&config, nullptr, offload_stmt);
 
     Result.task_dxil_source_codes.emplace_back(
-        generate_dxil_from_llvm(new_data, kernel));
+        generate_dxil_from_llvm(new_data, &config, kernel));
     aot::CompiledOffloadedTask task;
     // FIXME: build all fields for task.
     task.name = fmt::format("{}_{}_{}", kernel->get_name(),
@@ -256,9 +252,10 @@ KernelCodeGenDX12::CompileResult KernelCodeGenDX12::compile() {
 }
 
 LLVMCompiledTask KernelCodeGenDX12::compile_task(
+    const CompileConfig *config,
     std::unique_ptr<llvm::Module> &&module,
     OffloadedStmt *stmt) {
-  TaskCodeGenLLVMDX12 gen(kernel, stmt);
+  TaskCodeGenLLVMDX12 gen(config, kernel, stmt);
   return gen.run_compilation();
 }
 #endif  // TI_WITH_LLVM

diff --git a/taichi/codegen/dx12/codegen_dx12.h b/taichi/codegen/dx12/codegen_dx12.h
@@ -24,6 +24,7 @@ class KernelCodeGenDX12 : public KernelCodeGen {
   CompileResult compile();
 #ifdef TI_WITH_LLVM
   LLVMCompiledTask compile_task(
+      const CompileConfig *config,
       std::unique_ptr<llvm::Module> &&module = nullptr,
       OffloadedStmt *stmt = nullptr) override;
 #endif