From 67105aaa6009604553f60c13d7ab3baabc1ae663 Mon Sep 17 00:00:00 2001 From: Haidong Lan Date: Thu, 14 Jul 2022 22:39:59 +0800 Subject: [PATCH 01/11] Implement shared memory for CDUA --- python/taichi/lang/ast/ast_transformer.py | 1 - python/taichi/lang/expr.py | 1 + python/taichi/lang/impl.py | 8 ++++++++ python/taichi/lang/simt/block.py | 14 +++++++++++++- taichi/codegen/llvm/codegen_llvm.cpp | 22 +++++++++++++++++++++- taichi/ir/frontend_ir.cpp | 10 ++++++++++ taichi/ir/frontend_ir.h | 11 ++++++++--- taichi/ir/statements.h | 12 ++++++++---- taichi/python/export_lang.cpp | 1 + taichi/transforms/ir_printer.cpp | 8 +++++--- taichi/transforms/lower_ast.cpp | 2 +- 11 files changed, 76 insertions(+), 14 deletions(-) diff --git a/python/taichi/lang/ast/ast_transformer.py b/python/taichi/lang/ast/ast_transformer.py index e565045cb109c..76015ac9efd94 100644 --- a/python/taichi/lang/ast/ast_transformer.py +++ b/python/taichi/lang/ast/ast_transformer.py @@ -97,7 +97,6 @@ def build_Assign(ctx, node): # Ref https://github.com/taichi-dev/taichi/issues/2659. values = node.value.ptr if is_static_assign else impl.expr_init( node.value.ptr) - for node_target in node.targets: ASTTransformer.build_assign_unpack(ctx, node_target, values, is_static_assign) diff --git a/python/taichi/lang/expr.py b/python/taichi/lang/expr.py index 2f60e86aeb284..d7814b798b96c 100644 --- a/python/taichi/lang/expr.py +++ b/python/taichi/lang/expr.py @@ -7,6 +7,7 @@ from taichi.types.primitive_types import integer_types, real_types + # Scalar, basic data type class Expr(TaichiOperations): """A Python-side Expr wrapper, whose member variable `ptr` is an instance of C++ Expr class. A C++ Expr object contains member variable `expr` which holds an instance of C++ Expression class.""" diff --git a/python/taichi/lang/impl.py b/python/taichi/lang/impl.py index 43940fce55439..10a671c49359e 100644 --- a/python/taichi/lang/impl.py +++ b/python/taichi/lang/impl.py @@ -19,6 +19,7 @@ MeshRelationAccessProxy, MeshReorderedMatrixFieldProxy, MeshReorderedScalarFieldProxy, element_type_name) +from taichi.lang.simt.block import SharedArray from taichi.lang.snode import SNode from taichi.lang.struct import Struct, StructField, _IntermediateStruct from taichi.lang.util import (cook_dtype, get_traceback, is_taichi_class, @@ -26,11 +27,16 @@ from taichi.types.primitive_types import all_types, f16, f32, f64, i32, i64 + @taichi_scope def expr_init_local_tensor(shape, element_type, elements): return get_runtime().prog.current_ast_builder().expr_alloca_local_tensor( shape, element_type, elements) +@taichi_scope +def expr_init_scratch_pad(shape, element_type): + return get_runtime().prog.current_ast_builder().expr_alloca_scratch_pad( + shape, element_type) @taichi_scope def expr_init(rhs): @@ -40,6 +46,8 @@ def expr_init(rhs): return type(rhs)(*rhs.to_list()) if isinstance(rhs, Matrix): return Matrix(rhs.to_list()) + if isinstance(rhs, SharedArray): + return rhs if isinstance(rhs, Struct): return Struct(rhs.to_dict(include_methods=True)) if isinstance(rhs, list): diff --git a/python/taichi/lang/simt/block.py b/python/taichi/lang/simt/block.py index 427664473dd5d..390376c895d64 100644 --- a/python/taichi/lang/simt/block.py +++ b/python/taichi/lang/simt/block.py @@ -1,5 +1,17 @@ from taichi.lang import impl - +from taichi.lang.util import taichi_scope def sync(): return impl.call_internal("block_barrier", with_runtime_context=False) + + +class SharedArray: + _is_taichi_class = True + def __init__(self, shape, dtype): + self.shape = shape + self.dtype = dtype + self.shared_array_proxy = impl.expr_init_scratch_pad(shape, dtype) + + @taichi_scope + def _subscript(self, indices, get_ref=False): + return impl.make_index_expr(self.shared_array_proxy, (indices,)) \ No newline at end of file diff --git a/taichi/codegen/llvm/codegen_llvm.cpp b/taichi/codegen/llvm/codegen_llvm.cpp index 6966c81f44d84..d33e563987c3e 100644 --- a/taichi/codegen/llvm/codegen_llvm.cpp +++ b/taichi/codegen/llvm/codegen_llvm.cpp @@ -128,7 +128,27 @@ void CodeGenLLVM::visit(AllocaStmt *stmt) { auto type = tlctx->get_data_type(tensor_type->get_element_type()); auto array_size = tlctx->get_constant(tensor_type->get_num_elements()); // Return type is [array_size x type]*. - llvm_val[stmt] = create_entry_block_alloca(type, 0, array_size); + if (stmt->is_shared) { + size_t data_element_size = tlctx->get_type_size(tlctx->get_data_type(tensor_type->get_element_type())); + auto type = llvm::ArrayType::get(llvm::Type::getInt8Ty(*llvm_context), + data_element_size * tensor_type->get_num_elements()); + auto base = new llvm::GlobalVariable( + *module, type, false, llvm::GlobalValue::ExternalLinkage, nullptr, + fmt::format("shared_array_{}", stmt->id), nullptr, llvm::GlobalVariable::NotThreadLocal, + 3 /*addrspace=shared*/); + base->setAlignment(llvm::MaybeAlign(8)); + + auto ptr = builder->CreateGEP( +#ifdef TI_LLVM_15 + base->getValueType(), +#endif + base, {tlctx->get_constant(0), tlctx->get_constant(0)}); + auto ptr_type = llvm::PointerType::get( + tlctx->get_data_type(tensor_type->get_element_type()), 0); + llvm_val[stmt] = builder->CreatePointerCast(ptr, ptr_type); + } else { + llvm_val[stmt] = create_entry_block_alloca(type, 0, array_size); + } } else { TI_ASSERT(stmt->width() == 1); llvm_val[stmt] = diff --git a/taichi/ir/frontend_ir.cpp b/taichi/ir/frontend_ir.cpp index f2c03ea18c99d..15d52fff10b30 100644 --- a/taichi/ir/frontend_ir.cpp +++ b/taichi/ir/frontend_ir.cpp @@ -985,6 +985,16 @@ Expr ASTBuilder::expr_alloca_local_tensor(const std::vector &shape, return var; } +Expr ASTBuilder::expr_alloca_scratch_pad(const std::vector &shape, + const DataType &element_type) { + auto var = Expr(std::make_shared(get_next_id())); + this->insert(std::make_unique( + std::static_pointer_cast(var.expr)->id, shape, + element_type, true)); + var->ret_type = this->get_last_stmt()->ret_type; + return var; +} + void ASTBuilder::expr_assign(const Expr &lhs, const Expr &rhs, std::string tb) { TI_ASSERT(lhs->is_lvalue()); auto stmt = std::make_unique(lhs, rhs); diff --git a/taichi/ir/frontend_ir.h b/taichi/ir/frontend_ir.h index fdbe86c95c620..bb4fbba305408 100644 --- a/taichi/ir/frontend_ir.h +++ b/taichi/ir/frontend_ir.h @@ -63,17 +63,20 @@ class FrontendAllocaStmt : public Stmt { public: Identifier ident; - FrontendAllocaStmt(const Identifier &lhs, DataType type) : ident(lhs) { + FrontendAllocaStmt(const Identifier &lhs, DataType type) : ident(lhs), is_shared(false) { ret_type = TypeFactory::create_vector_or_scalar_type(1, type); } FrontendAllocaStmt(const Identifier &lhs, std::vector shape, - DataType element) - : ident(lhs) { + DataType element, + bool is_shared=false) + : ident(lhs), is_shared(is_shared) { ret_type = DataType(TypeFactory::create_tensor_type(shape, element)); } + bool is_shared; + TI_DEFINE_ACCEPT }; @@ -874,6 +877,8 @@ class ASTBuilder { Expr expr_alloca_local_tensor(const std::vector &shape, const DataType &element_type, const ExprGroup &elements); + Expr expr_alloca_scratch_pad(const std::vector &shape, + const DataType &element_type); void expr_assign(const Expr &lhs, const Expr &rhs, std::string tb); void create_assert_stmt(const Expr &cond, const std::string &msg, diff --git a/taichi/ir/statements.h b/taichi/ir/statements.h index 0ffa885f1fb27..f0409c6f6700c 100644 --- a/taichi/ir/statements.h +++ b/taichi/ir/statements.h @@ -18,17 +18,20 @@ class Function; */ class AllocaStmt : public Stmt { public: - AllocaStmt(DataType type) { + AllocaStmt(DataType type) : is_shared(false) { ret_type = TypeFactory::create_vector_or_scalar_type(1, type); TI_STMT_REG_FIELDS; } - AllocaStmt(int width, DataType type) { + AllocaStmt(int width, DataType type) : is_shared(false) { ret_type = TypeFactory::create_vector_or_scalar_type(width, type); TI_STMT_REG_FIELDS; } - AllocaStmt(const std::vector &shape, DataType type) { + AllocaStmt(const std::vector &shape, + DataType type, + bool is_shared = false) + : is_shared(is_shared) { ret_type = TypeFactory::create_tensor_type(shape, type); TI_STMT_REG_FIELDS; } @@ -41,7 +44,8 @@ class AllocaStmt : public Stmt { return false; } - TI_STMT_DEF_FIELDS(ret_type); + bool is_shared; + TI_STMT_DEF_FIELDS(ret_type, is_shared); TI_DEFINE_ACCEPT_AND_CLONE }; diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index a2ea36d1ecfc2..609ebd36c931e 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -303,6 +303,7 @@ void export_lang(py::module &m) { .def("insert_external_func_call", &ASTBuilder::insert_external_func_call) .def("expr_alloca", &ASTBuilder::expr_alloca) .def("expr_alloca_local_tensor", &ASTBuilder::expr_alloca_local_tensor) + .def("expr_alloca_scratch_pad", &ASTBuilder::expr_alloca_scratch_pad) .def("create_assert_stmt", &ASTBuilder::create_assert_stmt) .def("expr_assign", &ASTBuilder::expr_assign) .def("begin_frontend_range_for", &ASTBuilder::begin_frontend_range_for) diff --git a/taichi/transforms/ir_printer.cpp b/taichi/transforms/ir_printer.cpp index eb94695e83125..76eb3f28153b9 100644 --- a/taichi/transforms/ir_printer.cpp +++ b/taichi/transforms/ir_printer.cpp @@ -119,8 +119,9 @@ class IRPrinter : public IRVisitor { } void visit(FrontendAllocaStmt *alloca) override { - print("{}${} = alloca {}", alloca->type_hint(), alloca->id, - alloca->ident.name()); + std::string shared_suffix = (alloca->is_shared) ? " shared" : ""; + print("{}${} = alloca {}{}", alloca->type_hint(), alloca->id, + alloca->ident.name(), shared_suffix); } void visit(FrontendAssertStmt *assert) override { @@ -187,7 +188,8 @@ class IRPrinter : public IRVisitor { } void visit(AllocaStmt *alloca) override { - print("{}${} = alloca", alloca->type_hint(), alloca->id); + std::string shared_suffix = (alloca->is_shared) ? " shared" : ""; + print("{}${} = alloca{}", alloca->type_hint(), alloca->id, shared_suffix); } void visit(RandStmt *stmt) override { diff --git a/taichi/transforms/lower_ast.cpp b/taichi/transforms/lower_ast.cpp index e7c390ce3c41e..fd339bae354e4 100644 --- a/taichi/transforms/lower_ast.cpp +++ b/taichi/transforms/lower_ast.cpp @@ -71,7 +71,7 @@ class LowerAST : public IRVisitor { if (stmt->ret_type->is()) { auto tensor_type = stmt->ret_type->cast(); auto lowered = std::make_unique( - tensor_type->get_shape(), tensor_type->get_element_type()); + tensor_type->get_shape(), tensor_type->get_element_type(), stmt->is_shared); block->local_var_to_stmt.insert(std::make_pair(ident, lowered.get())); stmt->parent->replace_with(stmt, std::move(lowered)); } else { From 88bcdaf7b635aedbd26e76b8325545d8f3324ae3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 14 Jul 2022 16:18:29 +0000 Subject: [PATCH 02/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- python/taichi/lang/expr.py | 1 - python/taichi/lang/impl.py | 3 ++- python/taichi/lang/simt/block.py | 4 +++- taichi/codegen/llvm/codegen_llvm.cpp | 14 ++++++++------ taichi/ir/frontend_ir.cpp | 6 +++--- taichi/ir/frontend_ir.h | 7 ++++--- taichi/ir/statements.h | 2 +- taichi/transforms/lower_ast.cpp | 3 ++- 8 files changed, 23 insertions(+), 17 deletions(-) diff --git a/python/taichi/lang/expr.py b/python/taichi/lang/expr.py index d7814b798b96c..2f60e86aeb284 100644 --- a/python/taichi/lang/expr.py +++ b/python/taichi/lang/expr.py @@ -7,7 +7,6 @@ from taichi.types.primitive_types import integer_types, real_types - # Scalar, basic data type class Expr(TaichiOperations): """A Python-side Expr wrapper, whose member variable `ptr` is an instance of C++ Expr class. A C++ Expr object contains member variable `expr` which holds an instance of C++ Expression class.""" diff --git a/python/taichi/lang/impl.py b/python/taichi/lang/impl.py index 10a671c49359e..9349b346af93a 100644 --- a/python/taichi/lang/impl.py +++ b/python/taichi/lang/impl.py @@ -27,17 +27,18 @@ from taichi.types.primitive_types import all_types, f16, f32, f64, i32, i64 - @taichi_scope def expr_init_local_tensor(shape, element_type, elements): return get_runtime().prog.current_ast_builder().expr_alloca_local_tensor( shape, element_type, elements) + @taichi_scope def expr_init_scratch_pad(shape, element_type): return get_runtime().prog.current_ast_builder().expr_alloca_scratch_pad( shape, element_type) + @taichi_scope def expr_init(rhs): if rhs is None: diff --git a/python/taichi/lang/simt/block.py b/python/taichi/lang/simt/block.py index 390376c895d64..97b698b6bf164 100644 --- a/python/taichi/lang/simt/block.py +++ b/python/taichi/lang/simt/block.py @@ -1,12 +1,14 @@ from taichi.lang import impl from taichi.lang.util import taichi_scope + def sync(): return impl.call_internal("block_barrier", with_runtime_context=False) class SharedArray: _is_taichi_class = True + def __init__(self, shape, dtype): self.shape = shape self.dtype = dtype @@ -14,4 +16,4 @@ def __init__(self, shape, dtype): @taichi_scope def _subscript(self, indices, get_ref=False): - return impl.make_index_expr(self.shared_array_proxy, (indices,)) \ No newline at end of file + return impl.make_index_expr(self.shared_array_proxy, (indices, )) diff --git a/taichi/codegen/llvm/codegen_llvm.cpp b/taichi/codegen/llvm/codegen_llvm.cpp index d33e563987c3e..bb357a77de202 100644 --- a/taichi/codegen/llvm/codegen_llvm.cpp +++ b/taichi/codegen/llvm/codegen_llvm.cpp @@ -129,13 +129,15 @@ void CodeGenLLVM::visit(AllocaStmt *stmt) { auto array_size = tlctx->get_constant(tensor_type->get_num_elements()); // Return type is [array_size x type]*. if (stmt->is_shared) { - size_t data_element_size = tlctx->get_type_size(tlctx->get_data_type(tensor_type->get_element_type())); - auto type = llvm::ArrayType::get(llvm::Type::getInt8Ty(*llvm_context), - data_element_size * tensor_type->get_num_elements()); + size_t data_element_size = tlctx->get_type_size( + tlctx->get_data_type(tensor_type->get_element_type())); + auto type = llvm::ArrayType::get( + llvm::Type::getInt8Ty(*llvm_context), + data_element_size * tensor_type->get_num_elements()); auto base = new llvm::GlobalVariable( *module, type, false, llvm::GlobalValue::ExternalLinkage, nullptr, - fmt::format("shared_array_{}", stmt->id), nullptr, llvm::GlobalVariable::NotThreadLocal, - 3 /*addrspace=shared*/); + fmt::format("shared_array_{}", stmt->id), nullptr, + llvm::GlobalVariable::NotThreadLocal, 3 /*addrspace=shared*/); base->setAlignment(llvm::MaybeAlign(8)); auto ptr = builder->CreateGEP( @@ -145,7 +147,7 @@ void CodeGenLLVM::visit(AllocaStmt *stmt) { base, {tlctx->get_constant(0), tlctx->get_constant(0)}); auto ptr_type = llvm::PointerType::get( tlctx->get_data_type(tensor_type->get_element_type()), 0); - llvm_val[stmt] = builder->CreatePointerCast(ptr, ptr_type); + llvm_val[stmt] = builder->CreatePointerCast(ptr, ptr_type); } else { llvm_val[stmt] = create_entry_block_alloca(type, 0, array_size); } diff --git a/taichi/ir/frontend_ir.cpp b/taichi/ir/frontend_ir.cpp index 15d52fff10b30..6b7927617cff9 100644 --- a/taichi/ir/frontend_ir.cpp +++ b/taichi/ir/frontend_ir.cpp @@ -986,11 +986,11 @@ Expr ASTBuilder::expr_alloca_local_tensor(const std::vector &shape, } Expr ASTBuilder::expr_alloca_scratch_pad(const std::vector &shape, - const DataType &element_type) { + const DataType &element_type) { auto var = Expr(std::make_shared(get_next_id())); this->insert(std::make_unique( - std::static_pointer_cast(var.expr)->id, shape, - element_type, true)); + std::static_pointer_cast(var.expr)->id, shape, element_type, + true)); var->ret_type = this->get_last_stmt()->ret_type; return var; } diff --git a/taichi/ir/frontend_ir.h b/taichi/ir/frontend_ir.h index bb4fbba305408..e2a7f8e84613d 100644 --- a/taichi/ir/frontend_ir.h +++ b/taichi/ir/frontend_ir.h @@ -63,14 +63,15 @@ class FrontendAllocaStmt : public Stmt { public: Identifier ident; - FrontendAllocaStmt(const Identifier &lhs, DataType type) : ident(lhs), is_shared(false) { + FrontendAllocaStmt(const Identifier &lhs, DataType type) + : ident(lhs), is_shared(false) { ret_type = TypeFactory::create_vector_or_scalar_type(1, type); } FrontendAllocaStmt(const Identifier &lhs, std::vector shape, - DataType element, - bool is_shared=false) + DataType element, + bool is_shared = false) : ident(lhs), is_shared(is_shared) { ret_type = DataType(TypeFactory::create_tensor_type(shape, element)); } diff --git a/taichi/ir/statements.h b/taichi/ir/statements.h index f0409c6f6700c..02937e9849d1f 100644 --- a/taichi/ir/statements.h +++ b/taichi/ir/statements.h @@ -23,7 +23,7 @@ class AllocaStmt : public Stmt { TI_STMT_REG_FIELDS; } - AllocaStmt(int width, DataType type) : is_shared(false) { + AllocaStmt(int width, DataType type) : is_shared(false) { ret_type = TypeFactory::create_vector_or_scalar_type(width, type); TI_STMT_REG_FIELDS; } diff --git a/taichi/transforms/lower_ast.cpp b/taichi/transforms/lower_ast.cpp index fd339bae354e4..90e33cba4e01b 100644 --- a/taichi/transforms/lower_ast.cpp +++ b/taichi/transforms/lower_ast.cpp @@ -71,7 +71,8 @@ class LowerAST : public IRVisitor { if (stmt->ret_type->is()) { auto tensor_type = stmt->ret_type->cast(); auto lowered = std::make_unique( - tensor_type->get_shape(), tensor_type->get_element_type(), stmt->is_shared); + tensor_type->get_shape(), tensor_type->get_element_type(), + stmt->is_shared); block->local_var_to_stmt.insert(std::make_pair(ident, lowered.get())); stmt->parent->replace_with(stmt, std::move(lowered)); } else { From a33f7848662e27989f2328e9376d74f9f5218b08 Mon Sep 17 00:00:00 2001 From: Haidong Lan Date: Mon, 18 Jul 2022 11:08:39 +0800 Subject: [PATCH 03/11] Minor update for name consistency --- python/taichi/lang/ast/ast_transformer.py | 1 + python/taichi/lang/impl.py | 2 +- python/taichi/lang/simt/__init__.py | 1 + python/taichi/lang/simt/block.py | 3 ++- taichi/ir/frontend_ir.cpp | 4 ++-- taichi/ir/frontend_ir.h | 4 ++-- taichi/python/export_lang.cpp | 2 +- 7 files changed, 10 insertions(+), 7 deletions(-) diff --git a/python/taichi/lang/ast/ast_transformer.py b/python/taichi/lang/ast/ast_transformer.py index 76015ac9efd94..e565045cb109c 100644 --- a/python/taichi/lang/ast/ast_transformer.py +++ b/python/taichi/lang/ast/ast_transformer.py @@ -97,6 +97,7 @@ def build_Assign(ctx, node): # Ref https://github.com/taichi-dev/taichi/issues/2659. values = node.value.ptr if is_static_assign else impl.expr_init( node.value.ptr) + for node_target in node.targets: ASTTransformer.build_assign_unpack(ctx, node_target, values, is_static_assign) diff --git a/python/taichi/lang/impl.py b/python/taichi/lang/impl.py index 9349b346af93a..e032e38eaeb5a 100644 --- a/python/taichi/lang/impl.py +++ b/python/taichi/lang/impl.py @@ -35,7 +35,7 @@ def expr_init_local_tensor(shape, element_type, elements): @taichi_scope def expr_init_scratch_pad(shape, element_type): - return get_runtime().prog.current_ast_builder().expr_alloca_scratch_pad( + return get_runtime().prog.current_ast_builder().expr_alloca_shared_array( shape, element_type) diff --git a/python/taichi/lang/simt/__init__.py b/python/taichi/lang/simt/__init__.py index b7a0be5dea542..aae40ddf381eb 100644 --- a/python/taichi/lang/simt/__init__.py +++ b/python/taichi/lang/simt/__init__.py @@ -1,3 +1,4 @@ from taichi.lang.simt import block, grid, subgroup, warp +#from taichi.lang.simt.block import SharedArray __all__ = ['warp', 'subgroup', 'block', 'grid'] diff --git a/python/taichi/lang/simt/block.py b/python/taichi/lang/simt/block.py index 97b698b6bf164..8316e3d35bbc4 100644 --- a/python/taichi/lang/simt/block.py +++ b/python/taichi/lang/simt/block.py @@ -16,4 +16,5 @@ def __init__(self, shape, dtype): @taichi_scope def _subscript(self, indices, get_ref=False): - return impl.make_index_expr(self.shared_array_proxy, (indices, )) + return impl.make_index_expr(self.shared_array_proxy, (indices,)) + \ No newline at end of file diff --git a/taichi/ir/frontend_ir.cpp b/taichi/ir/frontend_ir.cpp index 6b7927617cff9..56d5de025a9e8 100644 --- a/taichi/ir/frontend_ir.cpp +++ b/taichi/ir/frontend_ir.cpp @@ -985,8 +985,8 @@ Expr ASTBuilder::expr_alloca_local_tensor(const std::vector &shape, return var; } -Expr ASTBuilder::expr_alloca_scratch_pad(const std::vector &shape, - const DataType &element_type) { +Expr ASTBuilder::expr_alloca_shared_array(const std::vector &shape, + const DataType &element_type) { auto var = Expr(std::make_shared(get_next_id())); this->insert(std::make_unique( std::static_pointer_cast(var.expr)->id, shape, element_type, diff --git a/taichi/ir/frontend_ir.h b/taichi/ir/frontend_ir.h index e2a7f8e84613d..0f831a2fa158e 100644 --- a/taichi/ir/frontend_ir.h +++ b/taichi/ir/frontend_ir.h @@ -878,7 +878,7 @@ class ASTBuilder { Expr expr_alloca_local_tensor(const std::vector &shape, const DataType &element_type, const ExprGroup &elements); - Expr expr_alloca_scratch_pad(const std::vector &shape, + Expr expr_alloca_shared_array(const std::vector &shape, const DataType &element_type); void expr_assign(const Expr &lhs, const Expr &rhs, std::string tb); void create_assert_stmt(const Expr &cond, @@ -913,7 +913,7 @@ class ASTBuilder { } void block_dim(int v) { - TI_ASSERT(bit::is_power_of_two(v)); + //TI_ASSERT(bit::is_power_of_two(v)); for_loop_dec_.config.block_dim = v; } diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index 609ebd36c931e..edf26ec7078c5 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -303,7 +303,7 @@ void export_lang(py::module &m) { .def("insert_external_func_call", &ASTBuilder::insert_external_func_call) .def("expr_alloca", &ASTBuilder::expr_alloca) .def("expr_alloca_local_tensor", &ASTBuilder::expr_alloca_local_tensor) - .def("expr_alloca_scratch_pad", &ASTBuilder::expr_alloca_scratch_pad) + .def("expr_alloca_shared_array", &ASTBuilder::expr_alloca_shared_array) .def("create_assert_stmt", &ASTBuilder::create_assert_stmt) .def("expr_assign", &ASTBuilder::expr_assign) .def("begin_frontend_range_for", &ASTBuilder::begin_frontend_range_for) From f5117e229e1f511dfb18db14a23fae8b0cae8c26 Mon Sep 17 00:00:00 2001 From: Haidong Lan Date: Mon, 18 Jul 2022 11:09:52 +0800 Subject: [PATCH 04/11] format --- python/taichi/lang/simt/__init__.py | 1 + python/taichi/lang/simt/block.py | 3 +-- taichi/ir/frontend_ir.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/taichi/lang/simt/__init__.py b/python/taichi/lang/simt/__init__.py index aae40ddf381eb..a8c121c1e3d90 100644 --- a/python/taichi/lang/simt/__init__.py +++ b/python/taichi/lang/simt/__init__.py @@ -1,4 +1,5 @@ from taichi.lang.simt import block, grid, subgroup, warp + #from taichi.lang.simt.block import SharedArray __all__ = ['warp', 'subgroup', 'block', 'grid'] diff --git a/python/taichi/lang/simt/block.py b/python/taichi/lang/simt/block.py index 8316e3d35bbc4..97b698b6bf164 100644 --- a/python/taichi/lang/simt/block.py +++ b/python/taichi/lang/simt/block.py @@ -16,5 +16,4 @@ def __init__(self, shape, dtype): @taichi_scope def _subscript(self, indices, get_ref=False): - return impl.make_index_expr(self.shared_array_proxy, (indices,)) - \ No newline at end of file + return impl.make_index_expr(self.shared_array_proxy, (indices, )) diff --git a/taichi/ir/frontend_ir.h b/taichi/ir/frontend_ir.h index 0f831a2fa158e..4bbdb26f5219f 100644 --- a/taichi/ir/frontend_ir.h +++ b/taichi/ir/frontend_ir.h @@ -879,7 +879,7 @@ class ASTBuilder { const DataType &element_type, const ExprGroup &elements); Expr expr_alloca_shared_array(const std::vector &shape, - const DataType &element_type); + const DataType &element_type); void expr_assign(const Expr &lhs, const Expr &rhs, std::string tb); void create_assert_stmt(const Expr &cond, const std::string &msg, @@ -913,7 +913,7 @@ class ASTBuilder { } void block_dim(int v) { - //TI_ASSERT(bit::is_power_of_two(v)); + // TI_ASSERT(bit::is_power_of_two(v)); for_loop_dec_.config.block_dim = v; } From 3ce855af84709505d9d6a7233d52a01c9971db77 Mon Sep 17 00:00:00 2001 From: Haidong Lan Date: Mon, 18 Jul 2022 11:39:24 +0800 Subject: [PATCH 05/11] Minor patches --- python/taichi/lang/impl.py | 2 +- python/taichi/lang/simt/__init__.py | 2 -- python/taichi/lang/simt/block.py | 2 +- taichi/ir/frontend_ir.h | 6 +++++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/python/taichi/lang/impl.py b/python/taichi/lang/impl.py index e032e38eaeb5a..4168454898c43 100644 --- a/python/taichi/lang/impl.py +++ b/python/taichi/lang/impl.py @@ -34,7 +34,7 @@ def expr_init_local_tensor(shape, element_type, elements): @taichi_scope -def expr_init_scratch_pad(shape, element_type): +def expr_init_shared_array(shape, element_type): return get_runtime().prog.current_ast_builder().expr_alloca_shared_array( shape, element_type) diff --git a/python/taichi/lang/simt/__init__.py b/python/taichi/lang/simt/__init__.py index a8c121c1e3d90..b7a0be5dea542 100644 --- a/python/taichi/lang/simt/__init__.py +++ b/python/taichi/lang/simt/__init__.py @@ -1,5 +1,3 @@ from taichi.lang.simt import block, grid, subgroup, warp -#from taichi.lang.simt.block import SharedArray - __all__ = ['warp', 'subgroup', 'block', 'grid'] diff --git a/python/taichi/lang/simt/block.py b/python/taichi/lang/simt/block.py index 97b698b6bf164..1d51a1a784dd3 100644 --- a/python/taichi/lang/simt/block.py +++ b/python/taichi/lang/simt/block.py @@ -12,7 +12,7 @@ class SharedArray: def __init__(self, shape, dtype): self.shape = shape self.dtype = dtype - self.shared_array_proxy = impl.expr_init_scratch_pad(shape, dtype) + self.shared_array_proxy = impl.expr_init_shared_array(shape, dtype) @taichi_scope def _subscript(self, indices, get_ref=False): diff --git a/taichi/ir/frontend_ir.h b/taichi/ir/frontend_ir.h index 4bbdb26f5219f..1e70069dd7b9f 100644 --- a/taichi/ir/frontend_ir.h +++ b/taichi/ir/frontend_ir.h @@ -913,7 +913,11 @@ class ASTBuilder { } void block_dim(int v) { - // TI_ASSERT(bit::is_power_of_two(v)); + if (arch_ == Arch::cuda) { + TI_ASSERT(v % 32 == 0); + } else { + TI_ASSERT(bit::is_power_of_two(v)); + } for_loop_dec_.config.block_dim = v; } From b7382cf194923c73408bdddf6153a3d14ca2f427 Mon Sep 17 00:00:00 2001 From: Haidong Lan Date: Mon, 18 Jul 2022 15:14:41 +0800 Subject: [PATCH 06/11] Add a simple test --- tests/python/test_shared_array.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 tests/python/test_shared_array.py diff --git a/tests/python/test_shared_array.py b/tests/python/test_shared_array.py new file mode 100644 index 0000000000000..3f87072e0444f --- /dev/null +++ b/tests/python/test_shared_array.py @@ -0,0 +1,26 @@ +import taichi as ti +from tests import test_utils + + +@test_utils.test(arch=ti.cuda) +def test_shared_array_save(): + block_dim=128 + pad_num = 16 + a = ti.field(dtype=ti.f32, shape=(block_dim * pad_num,)) + @ti.kernel + def func(): + ti.loop_config(block_dim=block_dim) + for i in range(block_dim * pad_num): + g_tid = ti.global_thread_idx() + tid = g_tid % block_dim + pad = ti.simt.block.SharedArray((block_dim,), ti.f32) + pad[tid] = tid * 2.0 + ti.simt.block.sync() + a[i] = pad[tid] + ti.simt.block.sync() + func() + print(a) + for i in range(pad_num): + assert a[i * block_dim + 7] == 14.0 + assert a[i * block_dim + 29] == 58.0 + assert a[i * block_dim + 127] == 254.0 From bb029026794b9384c392eb5a2f366143e2e159da Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 18 Jul 2022 07:16:47 +0000 Subject: [PATCH 07/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/python/test_shared_array.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/python/test_shared_array.py b/tests/python/test_shared_array.py index 3f87072e0444f..47c8613b83e19 100644 --- a/tests/python/test_shared_array.py +++ b/tests/python/test_shared_array.py @@ -4,20 +4,22 @@ @test_utils.test(arch=ti.cuda) def test_shared_array_save(): - block_dim=128 + block_dim = 128 pad_num = 16 - a = ti.field(dtype=ti.f32, shape=(block_dim * pad_num,)) + a = ti.field(dtype=ti.f32, shape=(block_dim * pad_num, )) + @ti.kernel def func(): ti.loop_config(block_dim=block_dim) for i in range(block_dim * pad_num): g_tid = ti.global_thread_idx() tid = g_tid % block_dim - pad = ti.simt.block.SharedArray((block_dim,), ti.f32) + pad = ti.simt.block.SharedArray((block_dim, ), ti.f32) pad[tid] = tid * 2.0 ti.simt.block.sync() a[i] = pad[tid] ti.simt.block.sync() + func() print(a) for i in range(pad_num): From dfffafe77ed737b71eec57b1f72b471dbe0eba98 Mon Sep 17 00:00:00 2001 From: Haidong Lan Date: Mon, 18 Jul 2022 15:42:02 +0800 Subject: [PATCH 08/11] minor fix --- tests/python/test_shared_array.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/python/test_shared_array.py b/tests/python/test_shared_array.py index 3f87072e0444f..f511d92992d64 100644 --- a/tests/python/test_shared_array.py +++ b/tests/python/test_shared_array.py @@ -19,7 +19,6 @@ def func(): a[i] = pad[tid] ti.simt.block.sync() func() - print(a) for i in range(pad_num): assert a[i * block_dim + 7] == 14.0 assert a[i * block_dim + 29] == 58.0 From 95a41dd9b8c666e5481199cf5e66577f5cc108b5 Mon Sep 17 00:00:00 2001 From: Haidong Lan Date: Tue, 19 Jul 2022 16:22:45 +0800 Subject: [PATCH 09/11] Re-enable CUDA launch parameter for small block dim --- taichi/ir/frontend_ir.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taichi/ir/frontend_ir.h b/taichi/ir/frontend_ir.h index 1e70069dd7b9f..9859f6a9cb63d 100644 --- a/taichi/ir/frontend_ir.h +++ b/taichi/ir/frontend_ir.h @@ -914,7 +914,7 @@ class ASTBuilder { void block_dim(int v) { if (arch_ == Arch::cuda) { - TI_ASSERT(v % 32 == 0); + TI_ASSERT((v % 32 == 0) || bit::is_power_of_two(v)); } else { TI_ASSERT(bit::is_power_of_two(v)); } From c1bbaee138b33c85f04c9ddbb4f7baa8b6bb104e Mon Sep 17 00:00:00 2001 From: Haidong Lan Date: Wed, 20 Jul 2022 18:11:59 +0800 Subject: [PATCH 10/11] Minor fixes: shared memory and indices reference --- python/taichi/lang/simt/block.py | 2 +- taichi/transforms/ir_printer.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/taichi/lang/simt/block.py b/python/taichi/lang/simt/block.py index 1d51a1a784dd3..0baf3f352b5de 100644 --- a/python/taichi/lang/simt/block.py +++ b/python/taichi/lang/simt/block.py @@ -15,5 +15,5 @@ def __init__(self, shape, dtype): self.shared_array_proxy = impl.expr_init_shared_array(shape, dtype) @taichi_scope - def _subscript(self, indices, get_ref=False): + def _subscript(self, *indices, get_ref=False): return impl.make_index_expr(self.shared_array_proxy, (indices, )) diff --git a/taichi/transforms/ir_printer.cpp b/taichi/transforms/ir_printer.cpp index 76eb3f28153b9..b3041bb2acf34 100644 --- a/taichi/transforms/ir_printer.cpp +++ b/taichi/transforms/ir_printer.cpp @@ -119,9 +119,9 @@ class IRPrinter : public IRVisitor { } void visit(FrontendAllocaStmt *alloca) override { - std::string shared_suffix = (alloca->is_shared) ? " shared" : ""; - print("{}${} = alloca {}{}", alloca->type_hint(), alloca->id, - alloca->ident.name(), shared_suffix); + std::string shared_suffix = (alloca->is_shared) ? "(shared)" : ""; + print("{}${} = alloca{} {}", alloca->type_hint(), alloca->id, + shared_suffix, alloca->ident.name()); } void visit(FrontendAssertStmt *assert) override { @@ -188,7 +188,7 @@ class IRPrinter : public IRVisitor { } void visit(AllocaStmt *alloca) override { - std::string shared_suffix = (alloca->is_shared) ? " shared" : ""; + std::string shared_suffix = (alloca->is_shared) ? "(shared)" : ""; print("{}${} = alloca{}", alloca->type_hint(), alloca->id, shared_suffix); } From eb862e3c83edf069d757046d5865f053ede9839b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Jul 2022 10:13:32 +0000 Subject: [PATCH 11/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- taichi/transforms/ir_printer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/taichi/transforms/ir_printer.cpp b/taichi/transforms/ir_printer.cpp index b3041bb2acf34..ca462e42773e8 100644 --- a/taichi/transforms/ir_printer.cpp +++ b/taichi/transforms/ir_printer.cpp @@ -120,8 +120,8 @@ class IRPrinter : public IRVisitor { void visit(FrontendAllocaStmt *alloca) override { std::string shared_suffix = (alloca->is_shared) ? "(shared)" : ""; - print("{}${} = alloca{} {}", alloca->type_hint(), alloca->id, - shared_suffix, alloca->ident.name()); + print("{}${} = alloca{} {}", alloca->type_hint(), alloca->id, shared_suffix, + alloca->ident.name()); } void visit(FrontendAssertStmt *assert) override {