kendryte · HeJunchao100813 · Jul 31, 2023 · Jul 25, 2023 · Jul 25, 2023 · Jul 26, 2023
diff --git a/src/Native/include/nncase/runtime/simple_types.h b/src/Native/include/nncase/runtime/simple_types.h
@@ -127,6 +127,7 @@ using dims_t = itlib::small_vector<size_t, 8>;
 using axes_t = itlib::small_vector<int64_t, 8>;
 using strides_t = itlib::small_vector<size_t, 8>;
 using paddings_t = itlib::small_vector<padding, 4>;
+using slope_t = itlib::small_vector<float_t, 4>;
 
 template <class... Ints>
 auto fixed_dims(Ints &&...values) -> std::array<size_t, sizeof...(Ints)> {

diff --git a/tests/kernels/kernel_test.h b/tests/kernels/kernel_test.h
@@ -211,8 +211,8 @@ class KernelTest {
             NNCASE_UNUSED auto res = kernels::stackvm::apply(
                 tensor.shape(),
                 [&](gsl::span<const size_t> index) -> result<void> {
-                    get<bool>(tensor, index) =
-                        static_cast<double>(dis(gen)) >= 0;
+                    get<bfloat16>(tensor, index) =
+                        static_cast<bfloat16>(dis(gen));
                     return ok();
                 });
             break;
@@ -237,7 +237,7 @@ class KernelTest {
         if (shape.size() == 1 && (shape[0] == initvalue.size())) {
             // One dim array attribute
             T *tmp = new T[shape[0]];
-            for (int i = 0; i < (int)shape[0]; ++i) {
+            for (size_t i = 0; i < shape[0]; ++i) {
                 tmp[i] = initvalue[i];
             }
             return tmp;
@@ -1516,13 +1516,6 @@ class KernelTest {
                     vec2.push_back(static_cast<float>(get<double>(rhs, index)));
                     break;
                 }
-                case dt_boolean: {
-                    vec1.push_back(
-                        static_cast<float>(get<bool>(lhs, index) ? 2 : 1));
-                    vec2.push_back(
-                        static_cast<float>(get<bool>(rhs, index) ? 2 : 1));
-                    break;
-                }
                 default: {
                     return err(std::errc::not_supported);
                 }
@@ -1630,5 +1623,20 @@ class KernelTest {
 
         std::cout << std::endl;
     }
+
+    template <class T>
+    result<void> clamp_impl(const T *input, T min, T max, T *output,
+                            gsl::span<const size_t> in_shape,
+                            gsl::span<const size_t> in_strides,
+                            gsl::span<const size_t> out_strides,
+                            NNCASE_UNUSED kernel_context &context) {
+        return apply(in_shape,
+                     [&](gsl::span<const size_t> index) -> result<void> {
+                         const auto v = input[offset(index, in_strides)];
+                         output[offset(index, out_strides)] =
+                             std::min(std::max(v, min), max);
+                         return ok();
+                     });
+    }
 };
 } // namespace nncase
diff --git a/tests/kernels/test_batch_normalization.cpp b/tests/kernels/test_batch_normalization.cpp
@@ -94,9 +94,12 @@ TEST_P(BatchNormalizationTest, batch_normalization) {
     auto mean_ort = runtime_tensor_2_ort_tensor(mean);
     auto var_ort = runtime_tensor_2_ort_tensor(var);
 
+    auto eps = 0.01f;
+    auto momentum = 0.9f;
+
     // expected
-    auto output_ort = ortki_BatchNormalization(input_ort, scale_ort, b_ort,
-                                               mean_ort, var_ort, 0.01f, 0.9f);
+    auto output_ort = ortki_BatchNormalization(
+        input_ort, scale_ort, b_ort, mean_ort, var_ort, eps, momentum);
     size_t size = 0;
     void *ptr_ort = tensor_buffer(output_ort, &size);
     dims_t shape(tensor_rank(output_ort));
@@ -106,14 +109,14 @@ TEST_P(BatchNormalizationTest, batch_normalization) {
                                 true, host_runtime_tensor::pool_cpu_only)
                         .expect("create tensor failed");
 
-    float epsilon_ptr[] = {0.01f};
+    float epsilon_ptr[] = {eps};
     auto epsilon = hrt::create(nncase::dt_float32, {1},
                                {reinterpret_cast<gsl::byte *>(epsilon_ptr),
                                 sizeof(epsilon_ptr)},
                                true, host_runtime_tensor::pool_cpu_only)
                        .expect("create tensor failed");
 
-    float monentum_ptr[] = {0.9f};
+    float monentum_ptr[] = {momentum};
     auto monentum = hrt::create(nncase::dt_float32, {1},
                                 {reinterpret_cast<gsl::byte *>(monentum_ptr),
                                  sizeof(monentum_ptr)},

diff --git a/tests/kernels/test_broadcast.cpp b/tests/kernels/test_broadcast.cpp
@@ -33,41 +33,174 @@ class BroadCastTest : public KernelTest,
     void SetUp() override {
         auto &&[typecode, l_shape, r_shape] = GetParam();
 
-        float input_ptr[] = {3, 2, 1};
-        input = hrt::create(typecode, l_shape,
-                            {reinterpret_cast<gsl::byte *>(input_ptr),
-                             sizeof(input_ptr)},
-                            true, host_runtime_tensor::pool_cpu_only)
-                    .expect("create tensor failed");
-
-        float output_ptr[] = {3, 2, 1, 3, 2, 1, 3, 2, 1};
-        expected = hrt::create(typecode, r_shape,
-                               {reinterpret_cast<gsl::byte *>(output_ptr),
-                                sizeof(output_ptr)},
-                               true, host_runtime_tensor::pool_cpu_only)
-                       .expect("create tensor failed");
+        input =
+            hrt::create(typecode, r_shape, host_runtime_tensor::pool_cpu_only)
+                .expect("create tensor failed");
+        init_tensor(input);
+
+        one = hrt::create(typecode, r_shape, host_runtime_tensor::pool_cpu_only)
+                  .expect("create tensor failed");
+        init_tensor_one(one);
+
+        size_t shape_size = r_shape.size();
+        int64_t *shape_array = (int64_t *)malloc(shape_size * sizeof(int64_t));
+        std::copy(r_shape.begin(), r_shape.end(), shape_array);
+        new_shape = hrt::create(dt_int64, {shape_size},
+                                {reinterpret_cast<gsl::byte *>(shape_array),
+                                 shape_size * sizeof(int64_t)},
+                                true, host_runtime_tensor::pool_cpu_only)
+                        .expect("create tensor failed");
     }
 
     void TearDown() override {}
 
+    void init_tensor_one(runtime::runtime_tensor &tensor) {
+        auto dtype = tensor.datatype();
+        switch (dtype) {
+        case dt_int8: {
+            NNCASE_UNUSED auto res = kernels::stackvm::apply(
+                tensor.shape(),
+                [&](gsl::span<const size_t> index) -> result<void> {
+                    get<int8_t>(tensor, index) = static_cast<int8_t>(1);
+                    return ok();
+                });
+            break;
+        }
+        case dt_int16: {
+            NNCASE_UNUSED auto res = kernels::stackvm::apply(
+                tensor.shape(),
+                [&](gsl::span<const size_t> index) -> result<void> {
+                    get<int16_t>(tensor, index) = static_cast<int16_t>(1);
+                    return ok();
+                });
+            break;
+        }
+        case dt_int32: {
+            NNCASE_UNUSED auto res = kernels::stackvm::apply(
+                tensor.shape(),
+                [&](gsl::span<const size_t> index) -> result<void> {
+                    get<int32_t>(tensor, index) = 1;
+                    return ok();
+                });
+            break;
+        }
+        case dt_int64: {
+            NNCASE_UNUSED auto res = kernels::stackvm::apply(
+                tensor.shape(),
+                [&](gsl::span<const size_t> index) -> result<void> {
+                    get<int64_t>(tensor, index) = static_cast<int64_t>(1);
+                    return ok();
+                });
+            break;
+        }
+        case dt_uint8: {
+            NNCASE_UNUSED auto res = kernels::stackvm::apply(
+                tensor.shape(),
+                [&](gsl::span<const size_t> index) -> result<void> {
+                    get<uint8_t>(tensor, index) = static_cast<uint8_t>(1);
+                    return ok();
+                });
+            break;
+        }
+        case dt_uint16: {
+            NNCASE_UNUSED auto res = kernels::stackvm::apply(
+                tensor.shape(),
+                [&](gsl::span<const size_t> index) -> result<void> {
+                    get<uint16_t>(tensor, index) = static_cast<uint16_t>(1);
+                    return ok();
+                });
+            break;
+        }
+        case dt_uint32: {
+            NNCASE_UNUSED auto res = kernels::stackvm::apply(
+                tensor.shape(),
+                [&](gsl::span<const size_t> index) -> result<void> {
+                    get<uint32_t>(tensor, index) = static_cast<uint32_t>(1);
+                    return ok();
+                });
+            break;
+        }
+        case dt_uint64: {
+            NNCASE_UNUSED auto res = kernels::stackvm::apply(
+                tensor.shape(),
+                [&](gsl::span<const size_t> index) -> result<void> {
+                    get<uint64_t>(tensor, index) = static_cast<uint64_t>(1);
+                    return ok();
+                });
+            break;
+        }
+        case dt_float16: {
+            NNCASE_UNUSED auto res = kernels::stackvm::apply(
+                tensor.shape(),
+                [&](gsl::span<const size_t> index) -> result<void> {
+                    get<half>(tensor, index) = static_cast<half>(1);
+                    return ok();
+                });
+            break;
+        }
+        case dt_float32: {
+            NNCASE_UNUSED auto res = kernels::stackvm::apply(
+                tensor.shape(),
+                [&](gsl::span<const size_t> index) -> result<void> {
+                    get<float>(tensor, index) = static_cast<float>(1);
+                    return ok();
+                });
+            break;
+        }
+        case dt_float64: {
+            NNCASE_UNUSED auto res = kernels::stackvm::apply(
+                tensor.shape(),
+                [&](gsl::span<const size_t> index) -> result<void> {
+                    get<double>(tensor, index) = static_cast<double>(1);
+                    return ok();
+                });
+            break;
+        }
+        case dt_bfloat16: {
+            NNCASE_UNUSED auto res = kernels::stackvm::apply(
+                tensor.shape(),
+                [&](gsl::span<const size_t> index) -> result<void> {
+                    get<bfloat16>(tensor, index) = static_cast<bfloat16>(1);
+                    return ok();
+                });
+            break;
+        }
+        default: {
+        }
+        }
+    }
+
   protected:
     runtime_tensor input;
-    runtime_tensor expected;
+    runtime_tensor one;
+    runtime_tensor new_shape;
 };
 
-INSTANTIATE_TEST_SUITE_P(BroadCast, BroadCastTest,
-                         testing::Combine(testing::Values(dt_float32),
-                                          testing::Values(dims_t{3}),
-                                          testing::Values(dims_t{1, 3, 3})));
+INSTANTIATE_TEST_SUITE_P(
+    BroadCast, BroadCastTest,
+    testing::Combine(testing::Values(dt_float32, dt_float64, dt_int32, dt_int64,
+                                     dt_float16),
+                     testing::Values(dims_t{3}, dims_t{1, 3}, dims_t{3, 3},
+                                     dims_t{1}, dims_t{1, 3, 1}),
+                     testing::Values(dims_t{1, 3, 3}, dims_t{1, 3, 3, 3},
+                                     dims_t{1, 3, 16, 16})));
 
 TEST_P(BroadCastTest, BroadCast) {
-    //     actual
-    int64_t a_ptr[] = {1, 3, 3};
-    auto a = hrt::create(nncase::dt_int64, {3},
-                         {reinterpret_cast<gsl::byte *>(a_ptr), sizeof(a_ptr)},
-                         true, host_runtime_tensor::pool_cpu_only)
-                 .expect("create tensor failed");
-    auto output = kernels::stackvm::broadcast(input.impl(), a.impl())
+
+    // expected
+    auto output_ort = ortki_Mul(runtime_tensor_2_ort_tensor(input),
+                                runtime_tensor_2_ort_tensor(one));
+    size_t size = 0;
+    void *ptr_ort = tensor_buffer(output_ort, &size);
+    dims_t shape(tensor_rank(output_ort));
+    tensor_shape(output_ort, reinterpret_cast<int64_t *>(shape.data()));
+    auto expected = hrt::create(input.datatype(), shape,
+                                {reinterpret_cast<gsl::byte *>(ptr_ort), size},
+                                true, host_runtime_tensor::pool_cpu_only)
+                        .expect("create tensor failed");
+
+    // actual
+    auto output = kernels::stackvm::broadcast(input.impl(), new_shape.impl())
                       .expect("broadcast failed");
     runtime_tensor actual(output.as<tensor>().expect("as tensor failed"));
 

diff --git a/tests/kernels/test_clamp.cpp b/tests/kernels/test_clamp.cpp
@@ -28,70 +28,72 @@ using namespace ortki;
 
 class ClampTest
     : public KernelTest,
-      public ::testing::TestWithParam<std::tuple<nncase::typecode_t, dims_t>> {
+      public ::testing::TestWithParam<
+          std::tuple<nncase::typecode_t, dims_t, float_t, float_t>> {
   public:
     void SetUp() override {
-        auto &&[typecode, l_shape] = GetParam();
+        auto &&[typecode, l_shape, value1, value2] = GetParam();
 
         input =
             hrt::create(typecode, l_shape, host_runtime_tensor::pool_cpu_only)
                 .expect("create tensor failed");
         init_tensor(input);
+
+        min_value = value1;
+        max_value = value2;
     }
 
     void TearDown() override {}
 
   protected:
     runtime_tensor input;
+    float_t min_value;
+    float_t max_value;
 };
 
 INSTANTIATE_TEST_SUITE_P(
     clamp, ClampTest,
-    testing::Combine(testing::Values(dt_float32, dt_int32, dt_int16, dt_int8,
-                                     dt_uint8, dt_uint16, dt_uint32, dt_uint64,
-                                     dt_int64, dt_float64, dt_boolean),
+    testing::Combine(testing::Values(dt_float32),
                      testing::Values(dims_t{1, 3, 16, 16}, dims_t{1},
                                      dims_t{1, 3}, dims_t{8, 8},
                                      dims_t{1, 3, 8}, dims_t{16, 16}, dims_t{},
-                                     dims_t{16})));
+                                     dims_t{16}),
+                     testing::Values(-1, -2, -3, -4, -5, -6),
+                     testing::Values(1, 2, 3, 4, 5, 6)));
 
 TEST_P(ClampTest, clamp) {
 
     // expected
-    float_t min1[] = {-1.0f};
-    auto min_tensor1 =
+    float_t min1[] = {min_value};
+    auto min_tensor =
         hrt::create(nncase::dt_float32, {1},
                     {reinterpret_cast<gsl::byte *>(min1), sizeof(min1)}, true,
                     host_runtime_tensor::pool_cpu_only)
             .expect("create tensor failed");
 
-    float_t max1[] = {1.0f};
-    auto max_tensor1 =
+    float_t max1[] = {max_value};
+    auto max_tensor =
         hrt::create(nncase::dt_float32, {1},
                     {reinterpret_cast<gsl::byte *>(max1), sizeof(max1)}, true,
                     host_runtime_tensor::pool_cpu_only)
             .expect("create tensor failed");
 
-    auto output1 = kernels::stackvm::clamp(input.impl(), min_tensor1.impl(),
-                                           max_tensor1.impl())
-                       .expect("clamp failed");
-    runtime_tensor expected(output1.as<tensor>().expect("as tensor failed"));
+    auto output_ort =
+        ortki_Clip(runtime_tensor_2_ort_tensor(input),
+                   ortki_CastLike(runtime_tensor_2_ort_tensor(min_tensor),
+                                  runtime_tensor_2_ort_tensor(input)),
+                   ortki_CastLike(runtime_tensor_2_ort_tensor(max_tensor),
+                                  runtime_tensor_2_ort_tensor(input)));
+    size_t size = 0;
+    void *ptr_ort = tensor_buffer(output_ort, &size);
+    dims_t shape(tensor_rank(output_ort));
+    tensor_shape(output_ort, reinterpret_cast<int64_t *>(shape.data()));
+    auto expected = hrt::create(input.datatype(), shape,
+                                {reinterpret_cast<gsl::byte *>(ptr_ort), size},
+                                true, host_runtime_tensor::pool_cpu_only)
+                        .expect("create tensor failed");
 
     // actual
-    float_t min[] = {-1.0f};
-    auto min_tensor =
-        hrt::create(nncase::dt_float32, {1},
-                    {reinterpret_cast<gsl::byte *>(min), sizeof(min)}, true,
-                    host_runtime_tensor::pool_cpu_only)
-            .expect("create tensor failed");
-
-    float_t max[] = {1.0f};
-    auto max_tensor =
-        hrt::create(nncase::dt_float32, {1},
-                    {reinterpret_cast<gsl::byte *>(max), sizeof(max)}, true,
-                    host_runtime_tensor::pool_cpu_only)
-            .expect("create tensor failed");
-
     auto output = kernels::stackvm::clamp(input.impl(), min_tensor.impl(),
                                           max_tensor.impl())
                       .expect("clamp failed");