add PEs(matmul, all relus) and Ops(elementwise_add, relu6, mul) and C… (

PaddlePaddle#198) * add PEs(matmul, all relus) and Ops(elementwise_add, relu6, mul) and C++/python tests * modify codes * modify code styles Co-authored-by: Yan Chunwei <yanchunwei@outlook.com>
thisjiang · Sep 2, 2020 · 9b6e04c · 9b6e04c
1 parent 00b99d4
commit 9b6e04c
Show file tree

Hide file tree

Showing 33 changed files with 998 additions and 233 deletions.
diff --git a/cinn/common/ir_util.h b/cinn/common/ir_util.h
@@ -95,5 +95,13 @@ Expr make_const(Type t, T v) {
   return Expr();
 }
 
+template <typename FuncOp>
+Expr FoldExpr(FuncOp funcOp, Expr init_value, const std::vector<Expr> &values) {
+  for (const Expr &val : values) {
+    init_value = funcOp(init_value, val);
+  }
+  return init_value;
+}
+
 }  // namespace common
 }  // namespace cinn
diff --git a/cinn/frontend/syntax.cc b/cinn/frontend/syntax.cc
@@ -28,7 +28,7 @@ Placeholder::operator Variable() {
 }
 
 Variable Program::add(const Variable& a, const Variable& b) {
-  Instruction instr("add");
+  Instruction instr("elementwise_add");
   instr.SetInputs({a, b});
   AddInstruction(instr);
   return instr.GetOutputs()[0];

diff --git a/cinn/frontend/syntax.h b/cinn/frontend/syntax.h
@@ -76,7 +76,14 @@ struct Variable : public common::Shared<_Variable_> {
  * Data of a Instruction.
  */
 struct _Instruction_ : public common::Object {
-  using attr_t = std::variant<int, float, std::string, std::vector<int>, std::vector<float>, std::vector<std::string>>;
+  using attr_t = std::variant<int,
+                              float,
+                              bool,
+                              std::string,
+                              std::vector<int>,
+                              std::vector<float>,
+                              std::vector<bool>,
+                              std::vector<std::string>>;
 
   std::string op_type;
   std::unordered_map<std::string, attr_t> attrs;

diff --git a/cinn/hlir/framework/node.h b/cinn/hlir/framework/node.h
@@ -23,7 +23,14 @@ using NodePtr = std::shared_ptr<Node>;
  *  and other parameters like axis.
  */
 struct NodeAttr {
-  using attr_t = std::variant<int, float, std::string, std::vector<int>, std::vector<float>, std::vector<std::string>>;
+  using attr_t = std::variant<int,
+                              float,
+                              bool,
+                              std::string,
+                              std::vector<int>,
+                              std::vector<float>,
+                              std::vector<bool>,
+                              std::vector<std::string>>;
 
   /**
    * \brief The operator this node uses.
@@ -90,7 +97,14 @@ class Node : public common::GraphNode {
  * \brief NodeData represents the output data from an operator.
  */
 class NodeData : public common::GraphNode {
-  using attr_t = std::variant<int, float, std::string, std::vector<int>, std::vector<float>, std::vector<std::string>>;
+  using attr_t = std::variant<int,
+                              float,
+                              bool,
+                              std::string,
+                              std::vector<int>,
+                              std::vector<float>,
+                              std::vector<bool>,
+                              std::vector<std::string>>;
 
  public:
   NodeData(NodePtr node, uint32_t index, uint32_t version, std::string id)

diff --git a/cinn/hlir/framework/op_test.cc b/cinn/hlir/framework/op_test.cc
@@ -18,7 +18,7 @@ namespace framework {
 using CCompute = std::function<std::shared_ptr<ir::Tensor>(const std::vector<ir::Tensor>)>;
 
 TEST(Operator, GetAttrs) {
-  auto add      = Operator::Get("add");
+  auto add      = Operator::Get("elementwise_add");
   Operator temp = *add;
   auto strategy = Operator::GetAttrs<StrategyFunction>("CINNStrategy");
 
@@ -46,7 +46,7 @@ TEST(Operator, GetAttrs) {
   auto func = Lower("add1", rets.back(), inputs);
   LOG(INFO) << "Test Strategy Codegen:\n" << func;
 
-  ASSERT_EQ(impl->name, "strategy.add.x86");
+  ASSERT_EQ(impl->name, "strategy.elementwise_add.x86");
   ASSERT_EQ(add->description, "Add two tensors");
 }
 

diff --git a/cinn/hlir/framework/print_graph_pass_test.cc b/cinn/hlir/framework/print_graph_pass_test.cc
@@ -9,6 +9,7 @@
 #include "cinn/hlir/framework/pass.h"
 #include "cinn/hlir/op/use_ops.h"
 #include "cinn/lang/packed_func.h"
+#include "cinn/utils/string.h"
 
 namespace cinn {
 namespace hlir {
@@ -50,7 +51,12 @@ TEST(Operator, GetAttrs) {
   ApplyPass(g, "PrintGraph");
   auto s = g->GetAttrs<std::string>("print_graph");
   LOG(INFO) << s;
-  ASSERT_EQ(s, "0:add(add_0)\n1:add(add_1)\n2:add(add_2)\n");
+  std::string target_str = R"ROC(
+0:elementwise_add(elementwise_add_0)
+1:elementwise_add(elementwise_add_1)
+2:elementwise_add(elementwise_add_2)
+)ROC";
+  ASSERT_EQ(utils::Trim(s), utils::Trim(target_str));
 }
 
 }  // namespace framework

diff --git a/cinn/hlir/op/CMakeLists.txt b/cinn/hlir/op/CMakeLists.txt
@@ -1,9 +1,13 @@
 set(srcs
-  nn.cc
+        nn.cc
+        broadcast.cc
+        transform.cc
   )
 
 foreach(cpp ${srcs})
   set(core_src
     "${core_src};cinn/hlir/op/${cpp}"
     CACHE INTERNAL "")
 endforeach()
+
+cc_test(test_op_broadcast SRCS op_broadcast_test.cc DEPS core)
diff --git a/cinn/hlir/op/broadcast.cc b/cinn/hlir/op/broadcast.cc
@@ -0,0 +1,81 @@
+#include "cinn/hlir/pe/broadcast.h"
+
+#include <iostream>
+#include "cinn/hlir/framework/node.h"
+#include "cinn/hlir/framework/op.h"
+#include "cinn/hlir/framework/op_strategy.h"
+
+namespace cinn {
+namespace hlir {
+namespace op {
+using common::_CINNValuePack_;
+using common::CINNValue;
+using common::CINNValuePack;
+using framework::OpStrategy;
+using framework::StrategyFunction;
+
+std::shared_ptr<OpStrategy> StrategyForElementwiseAdd(const framework::NodeAttr &attrs,
+                                                      const std::vector<ir::Tensor> &inputs,
+                                                      const std::vector<Type> &out_type,
+                                                      const Target &target) {
+  framework::CINNCompute add_compute([&attrs](lang::Args args, lang::RetValue *ret) {
+    CINNValuePack a = args[0];
+    ir::Expr A_expr = a[0];
+    ir::Expr B_expr = a[1];
+    CHECK(A_expr.as_tensor());
+    CHECK(B_expr.as_tensor());
+    ir::Tensor A    = A_expr.as_tensor_ref();
+    ir::Tensor B    = B_expr.as_tensor_ref();
+    auto attr_store = attrs.attr_store;
+    auto iter       = attr_store.find("axis");
+    ir::Expr axis;
+    if (iter != attr_store.end()) {
+      axis = ir::Expr(std::get<int>(iter->second));
+    }
+
+    auto out = pe::Add(A, B, UniqName("C"), axis);
+
+    auto stages = CreateStages({out});
+    *ret        = CINNValuePack{{CINNValue(ir::Expr(out.get())), CINNValue(stages)}};
+  });
+
+  framework::CINNSchedule add_schedule([](lang::Args args, lang::RetValue *ret) {
+    CINNValuePack arg_pack      = args[0];
+    ir::Expr A [[maybe_unused]] = arg_pack[0];
+    CHECK_EQ(arg_pack.size(), 2UL);
+    *ret = arg_pack;
+  });
+
+  auto strategy = std::make_shared<framework::OpStrategy>();
+  strategy->AddImpl(add_compute, add_schedule, "strategy.elementwise_add.x86", 1);
+
+  return strategy;
+}
+
+std::vector<std::vector<int>> InferShapeForElementwiseAdd(const std::vector<std::vector<int>> &inputs_shape,
+                                                          const framework::NodeAttr &attrs) {
+  CHECK(!inputs_shape.empty() && !inputs_shape[0].empty()) << "The input's shape size is 0! Please check again.";
+  std::vector<std::vector<int>> res{inputs_shape[0]};
+  return res;
+}
+
+std::vector<Type> InferDtypeForElementwiseAdd(const std::vector<Type> &inputs_type, const framework::NodeAttr &attrs) {
+  CHECK(!inputs_type.empty()) << "The input's type size is 0! Please check again.";
+  std::vector<Type> res{inputs_type[0]};
+  return res;
+}
+
+}  // namespace op
+}  // namespace hlir
+}  // namespace cinn
+
+CINN_REGISTER_HELPER(broadcast_ops) {
+  CINN_REGISTER_OP(elementwise_add)
+      .describe("Add two tensors")
+      .set_num_inputs(2)
+      .set_num_outputs(1)
+      .set_attr<cinn::hlir::framework::StrategyFunction>("CINNStrategy", cinn::hlir::op::StrategyForElementwiseAdd)
+      .set_attr("infershape", std::function(cinn::hlir::op::InferShapeForElementwiseAdd))
+      .set_attr("inferdtype", std::function(cinn::hlir::op::InferDtypeForElementwiseAdd))
+      .set_support_level(4);
+}
diff --git a/cinn/hlir/op/nn.cc b/cinn/hlir/op/nn.cc
@@ -13,57 +13,58 @@ using common::CINNValuePack;
 using framework::OpStrategy;
 using framework::StrategyFunction;
 
-std::shared_ptr<OpStrategy> StrategyForAdd(const framework::NodeAttr &attrs,
-                                           const std::vector<ir::Tensor> &inputs,
-                                           const std::vector<Type> &out_type,
-                                           const Target &target) {
-  framework::CINNCompute add_compute([](lang::Args args, lang::RetValue *ret) {
+std::shared_ptr<OpStrategy> StrategyForRelu(const framework::NodeAttr &attrs,
+                                            const std::vector<ir::Tensor> &inputs,
+                                            const std::vector<Type> &out_type,
+                                            const Target &target) {
+  framework::CINNCompute relu_compute([](lang::Args args, lang::RetValue *ret) {
     CINNValuePack a = args[0];
     ir::Expr A      = a[0];
-    ir::Expr B      = a[1];
     CHECK(A.as_tensor());
-    CHECK(B.as_tensor());
-    auto out = pe::Add(A.as_tensor_ref(), B.as_tensor_ref(), UniqName("C"));
-
+    auto out    = pe::Relu<float>(A.as_tensor_ref(), 0.0, UniqName("Relu_output"));
     auto stages = CreateStages({out});
     *ret        = CINNValuePack{{CINNValue(ir::Expr(out.get())), CINNValue(stages)}};
   });
 
-  framework::CINNSchedule add_schedule([](lang::Args args, lang::RetValue *ret) {
+  framework::CINNSchedule relu_schedule([](lang::Args args, lang::RetValue *ret) {
     CINNValuePack arg_pack      = args[0];
     ir::Expr A [[maybe_unused]] = arg_pack[0];
     CHECK_EQ(arg_pack.size(), 2UL);
     *ret = arg_pack;
   });
 
   auto strategy = std::make_shared<framework::OpStrategy>();
-  strategy->AddImpl(add_compute, add_schedule, "strategy.add.x86", 1);
-
+  CHECK(out_type.size()) << "Out_type of relu op is empty! Please check.";
+  if (out_type[0] == Float(32)) {
+    strategy->AddImpl(relu_compute, relu_schedule, "strategy.relu.x86", 1);
+  } else {
+    LOG(INFO) << "Relu op with dtype != float32 is not implemented yet!";
+  }
   return strategy;
 }
 
-std::vector<std::vector<int>> InferShapeForAdd(const std::vector<std::vector<int>> &inputs_shape,
-                                               const framework::NodeAttr &attrs) {
+std::vector<std::vector<int>> InferShapeForRelu(const std::vector<std::vector<int>> &inputs_shape,
+                                                const framework::NodeAttr &attrs) {
   CHECK(!inputs_shape.empty() && !inputs_shape[0].empty()) << "The input's shape size is 0! Please check again.";
   std::vector<std::vector<int>> res{inputs_shape[0]};
   return res;
 }
 
-std::vector<Type> InferDtypeForAdd(const std::vector<Type> &inputs_type, const framework::NodeAttr &attrs) {
+std::vector<Type> InferDtypeForRelu(const std::vector<Type> &inputs_type, const framework::NodeAttr &attrs) {
   CHECK(!inputs_type.empty()) << "The input's type size is 0! Please check again.";
   std::vector<Type> res{inputs_type[0]};
   return res;
 }
 
-std::shared_ptr<OpStrategy> StrategyForRelu(const framework::NodeAttr &attrs,
-                                            const std::vector<ir::Tensor> &inputs,
-                                            const std::vector<Type> &out_type,
-                                            const Target &target) {
+std::shared_ptr<OpStrategy> StrategyForRelu6(const framework::NodeAttr &attrs,
+                                             const std::vector<ir::Tensor> &inputs,
+                                             const std::vector<Type> &out_type,
+                                             const Target &target) {
   framework::CINNCompute relu_compute([](lang::Args args, lang::RetValue *ret) {
     CINNValuePack a = args[0];
     ir::Expr A      = a[0];
     CHECK(A.as_tensor());
-    auto out    = pe::Relu<float>(A.as_tensor_ref(), 0.0, UniqName("Relu_output"));
+    auto out    = pe::Relu6<float>(A.as_tensor_ref(), 0.0, UniqName("Relu6_output"));
     auto stages = CreateStages({out});
     *ret        = CINNValuePack{{CINNValue(ir::Expr(out.get())), CINNValue(stages)}};
   });
@@ -76,28 +77,15 @@ std::shared_ptr<OpStrategy> StrategyForRelu(const framework::NodeAttr &attrs,
   });
 
   auto strategy = std::make_shared<framework::OpStrategy>();
-  CHECK(out_type.size()) << "Out_type of relu op is empty! Please check.";
+  CHECK(out_type.size()) << "Out_type of relu6 op is empty! Please check.";
   if (out_type[0] == Float(32)) {
-    strategy->AddImpl(relu_compute, relu_schedule, "strategy.relu.x86", 1);
+    strategy->AddImpl(relu_compute, relu_schedule, "strategy.relu6.x86", 1);
   } else {
-    LOG(INFO) << "Relu op with dtype != float32 is not implemented yet!";
+    LOG(INFO) << "Relu6 op with dtype != float32 is not implemented yet!";
   }
   return strategy;
 }
 
-std::vector<std::vector<int>> InferShapeForRelu(const std::vector<std::vector<int>> &inputs_shape,
-                                                const framework::NodeAttr &attrs) {
-  CHECK(!inputs_shape.empty() && !inputs_shape[0].empty()) << "The input's shape size is 0! Please check again.";
-  std::vector<std::vector<int>> res{inputs_shape[0]};
-  return res;
-}
-
-std::vector<Type> InferDtypeForRelu(const std::vector<Type> &inputs_type, const framework::NodeAttr &attrs) {
-  CHECK(!inputs_type.empty()) << "The input's type size is 0! Please check again.";
-  std::vector<Type> res{inputs_type[0]};
-  return res;
-}
-
 std::shared_ptr<OpStrategy> StrategyForConv2d(const framework::NodeAttr &attrs,
                                               const std::vector<ir::Tensor> &inputs,
                                               const std::vector<Type> &out_type,
@@ -245,14 +233,6 @@ std::vector<Type> InferDtypeForBatchNorm(const std::vector<Type> &inputs_type, c
 }  // namespace cinn
 
 CINN_REGISTER_HELPER(nn_ops) {
-  CINN_REGISTER_OP(add)
-      .describe("Add two tensors")
-      .set_num_inputs(2)
-      .set_num_outputs(1)
-      .set_attr<cinn::hlir::framework::StrategyFunction>("CINNStrategy", cinn::hlir::op::StrategyForAdd)
-      .set_attr("infershape", std::function(cinn::hlir::op::InferShapeForAdd))
-      .set_attr("inferdtype", std::function(cinn::hlir::op::InferDtypeForAdd))
-      .set_support_level(4);
   CINN_REGISTER_OP(relu)
       .describe("Output 0 for each input element < 0. Output itself for each input element >= 0.")
       .set_num_inputs(1)
@@ -261,6 +241,16 @@ CINN_REGISTER_HELPER(nn_ops) {
       .set_attr("infershape", std::function(cinn::hlir::op::InferShapeForRelu))
       .set_attr("inferdtype", std::function(cinn::hlir::op::InferDtypeForRelu))
       .set_support_level(4);
+
+  CINN_REGISTER_OP(relu6)
+      .describe("Output 0 for each input element < 0. Output itself for each input element >= 0 and <=6.")
+      .set_num_inputs(1)
+      .set_num_outputs(1)
+      .set_attr<cinn::hlir::framework::StrategyFunction>("CINNStrategy", cinn::hlir::op::StrategyForRelu6)
+      .set_attr("infershape", std::function(cinn::hlir::op::InferShapeForRelu))
+      .set_attr("inferdtype", std::function(cinn::hlir::op::InferDtypeForRelu))
+      .set_support_level(4);
+
   CINN_REGISTER_OP(conv2d)
       .describe("Do a 2-D convolution with an NCHW-layout.")
       .set_num_inputs(2)  // here we consider filter as anohter input
@@ -269,6 +259,7 @@ CINN_REGISTER_HELPER(nn_ops) {
       .set_attr("infershape", std::function(cinn::hlir::op::InferShapeForConv2d))
       .set_attr("inferdtype", std::function(cinn::hlir::op::InferDtypeForConv2d))
       .set_support_level(4);
+
   CINN_REGISTER_OP(batchnorm)
       .describe("Can be used as a normalizer function for convolution or fully_connected operations.")
       .set_num_inputs(2)  // here we consider batchnorm's 4 attrs(mean, variance, scale, bias) as another input