From 8805843d6d6e1ecf89b9b833ea82c12014f50d17 Mon Sep 17 00:00:00 2001 From: hejunchao Date: Fri, 4 Aug 2023 15:09:41 +0800 Subject: [PATCH 1/6] add GatherElementsTest --- tests/kernels/test_gather_elements.cpp | 117 +++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 tests/kernels/test_gather_elements.cpp diff --git a/tests/kernels/test_gather_elements.cpp b/tests/kernels/test_gather_elements.cpp new file mode 100644 index 0000000000..7a2c545e0f --- /dev/null +++ b/tests/kernels/test_gather_elements.cpp @@ -0,0 +1,117 @@ +/* Copyright 2019-2021 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernel_test.h" +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace nncase; +using namespace nncase::runtime; +using namespace ortki; + +class GatherElementsTest : public KernelTest, + public ::testing::TestWithParam< + std::tuple> { + public: + void SetUp() override { + auto &&[typecode, shape, value] = GetParam(); + + input = hrt::create(typecode, shape, host_runtime_tensor::pool_cpu_only) + .expect("create tensor failed"); + init_tensor(input); + + int64_t indices_array[] = {0, 0, 1, 1}; + indices = hrt::create(dt_int64, {2, 2}, + {reinterpret_cast(indices_array), + sizeof(indices_array)}, + true, host_runtime_tensor::pool_cpu_only) + .expect("create tensor failed"); + + batchDims_value = value; + int64_t batchDims_array[1] = {value}; + batchDims = hrt::create(dt_int64, dims_t{1}, + {reinterpret_cast(batchDims_array), + sizeof(batchDims_array)}, + true, host_runtime_tensor::pool_cpu_only) + .expect("create tensor failed"); + } + + void TearDown() override {} + + protected: + runtime_tensor input; + runtime_tensor indices; + runtime_tensor batchDims; + int64_t batchDims_value; +}; + +INSTANTIATE_TEST_SUITE_P( + gather_elements, GatherElementsTest, + testing::Combine(testing::Values(dt_int32, dt_int64, dt_float32, dt_uint64, + dt_int8, dt_int16, dt_uint8, dt_uint16, + dt_uint32, dt_float16, dt_float64, + dt_bfloat16, dt_boolean), + testing::Values(dims_t{ + 2, + 2} /*, dims_t{3, 5}, + dims_t{2, 3, 1}, dims_t{5, 7, 5}, + dims_t{5, 4, 3, 2}, dims_t{5, 5, 7, 7}, + dims_t{2, 3, 3, 5}*/), + testing::Values(-1, 0, 1))); + +TEST_P(GatherElementsTest, gather_elements) { + auto input_ort = runtime_tensor_2_ort_tensor(input); + auto indices_ort = runtime_tensor_2_ort_tensor(indices); + + // expected + auto output_ort = ortki_GatherElements(input_ort, indices_ort, batchDims_value); + size_t size = 0; + void *ptr_ort = tensor_buffer(output_ort, &size); + dims_t shape(tensor_rank(output_ort)); + tensor_shape(output_ort, reinterpret_cast(shape.data())); + auto expected = hrt::create(input.datatype(), shape, + {reinterpret_cast(ptr_ort), size}, + true, host_runtime_tensor::pool_cpu_only) + .expect("create tensor failed"); + + // actual + auto output = + kernels::stackvm::gather_elements(input.impl(), batchDims.impl(), indices.impl()) + .expect("gather failed"); + runtime_tensor actual(output.as().expect("as tensor failed")); + + bool result = is_same_tensor(expected, actual) || + cosine_similarity_tensor(expected, actual); + + if (!result) { + std::cout << "actual "; + print_runtime_tensor(actual); + std::cout << "expected "; + print_runtime_tensor(expected); + } + + // compare + EXPECT_TRUE(result); +} + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file From 3473fac6e03fcc358cf525fee5d04d9464797cdf Mon Sep 17 00:00:00 2001 From: Hejunchao6 Date: Fri, 4 Aug 2023 07:13:12 +0000 Subject: [PATCH 2/6] Apply code-format changes --- tests/kernels/test_gather_elements.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/kernels/test_gather_elements.cpp b/tests/kernels/test_gather_elements.cpp index 7a2c545e0f..1a17edaecf 100644 --- a/tests/kernels/test_gather_elements.cpp +++ b/tests/kernels/test_gather_elements.cpp @@ -26,9 +26,10 @@ using namespace nncase; using namespace nncase::runtime; using namespace ortki; -class GatherElementsTest : public KernelTest, - public ::testing::TestWithParam< - std::tuple> { +class GatherElementsTest + : public KernelTest, + public ::testing::TestWithParam< + std::tuple> { public: void SetUp() override { auto &&[typecode, shape, value] = GetParam(); @@ -81,7 +82,8 @@ TEST_P(GatherElementsTest, gather_elements) { auto indices_ort = runtime_tensor_2_ort_tensor(indices); // expected - auto output_ort = ortki_GatherElements(input_ort, indices_ort, batchDims_value); + auto output_ort = + ortki_GatherElements(input_ort, indices_ort, batchDims_value); size_t size = 0; void *ptr_ort = tensor_buffer(output_ort, &size); dims_t shape(tensor_rank(output_ort)); @@ -92,9 +94,9 @@ TEST_P(GatherElementsTest, gather_elements) { .expect("create tensor failed"); // actual - auto output = - kernels::stackvm::gather_elements(input.impl(), batchDims.impl(), indices.impl()) - .expect("gather failed"); + auto output = kernels::stackvm::gather_elements( + input.impl(), batchDims.impl(), indices.impl()) + .expect("gather failed"); runtime_tensor actual(output.as().expect("as tensor failed")); bool result = is_same_tensor(expected, actual) || From 1f2e6ade0e2a11e0706a19c0473857b17f3772ef Mon Sep 17 00:00:00 2001 From: hejunchao Date: Fri, 4 Aug 2023 16:34:43 +0800 Subject: [PATCH 3/6] fix round --- .../stackvm/optimized/x86_64/unary.cpp | 2 +- .../src/kernels/stackvm/reference/unary.cpp | 30 ++++++++++++++++++- tests/kernels/test_unary_round.cpp | 3 +- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp b/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp index 78bedf4457..1332717d5b 100644 --- a/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp +++ b/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp @@ -110,7 +110,7 @@ struct unary_op_round { void pack(const float *a, float *b) { __m256 vector_a = _mm256_loadu_ps(a); __m256 dst_a = _mm256_round_ps( - vector_a, (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)); + vector_a, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); _mm256_storeu_ps(b, dst_a); } }; diff --git a/src/Native/src/kernels/stackvm/reference/unary.cpp b/src/Native/src/kernels/stackvm/reference/unary.cpp index 9d8e0dba39..f7dac2856a 100644 --- a/src/Native/src/kernels/stackvm/reference/unary.cpp +++ b/src/Native/src/kernels/stackvm/reference/unary.cpp @@ -45,6 +45,20 @@ result unary_impl(TOp &&op, const T *input, T *output, return unary_impl(funct, input, output, input_shape, input_strides, \ out_shape, out_strides, context) +float round_onnx(float v){ + if(v>0&&v-(int32_t)v==0.5){ + float result = (int32_t)v+1.0; + if((int32_t)result%2==0) return result; + else return result -1; + } + else if(v<0&&(int32_t)v-v==0.5){ + float result = (int32_t)v+1.0; + if((int32_t)result%2==0) return result; + else return result -1; + } + else return roundf(v); +} + template result unary_impl(unary_op_t op, const T *input, T *output, gsl::span input_shape, @@ -66,7 +80,7 @@ result unary_impl(unary_op_t op, const T *input, T *output, UNARY_IMPL_OP(log, logf); UNARY_IMPL_OP(logical_not, [](float v) { return !v; }); UNARY_IMPL_OP(neg, std::negate()); - UNARY_IMPL_OP(round, roundf); + UNARY_IMPL_OP(round, [](float v) { return round_onnx(v); }); UNARY_IMPL_OP(rsqrt, [](float v) { return 1.f / sqrtf(v); }); UNARY_IMPL_OP(sign, [](float v) { return (0.f < v) - (v < 0.f); }); UNARY_IMPL_OP(sin, sinf); @@ -79,6 +93,20 @@ result unary_impl(unary_op_t op, const T *input, T *output, } } +//float round_onnx(float v){ +// if(v>0&&v-(int32_t)v==0.5){ +// float result = (int32_t)v+1.0; +// if((int32_t)result%2==0) return result; +// else return result -1; +// } +// else if(v<0&&(int32_t)v-v==0.5){ +// float result = (int32_t)v+1.0; +// if((int32_t)result%2==0) return result; +// else return result -1; +// } +// else return roundf(v); +//} + #define UNARY_IMPL_DTYPE(dtype, type) \ case dtype: \ return unary_impl(op, reinterpret_cast(input), \ diff --git a/tests/kernels/test_unary_round.cpp b/tests/kernels/test_unary_round.cpp index bfba4db767..cda1a27980 100644 --- a/tests/kernels/test_unary_round.cpp +++ b/tests/kernels/test_unary_round.cpp @@ -54,7 +54,7 @@ INSTANTIATE_TEST_SUITE_P( dims_t{16, 1}, dims_t{1, 16, 1}, dims_t{16}, dims_t{1}, dims_t{}))); -TEST_P(UnaryTest, roound) { +TEST_P(UnaryTest, round) { OrtKITensor *orts[1]; orts[0] = runtime_tensor_2_ort_tensor(input); @@ -79,6 +79,7 @@ TEST_P(UnaryTest, roound) { cosine_similarity_tensor(expected, actual); if (!result) { + print_runtime_tensor(input); std::cout << "actual "; print_runtime_tensor(actual); std::cout << "expected "; From 3208f652120d8fd26f17d347698625a5688c6e00 Mon Sep 17 00:00:00 2001 From: hejunchao Date: Fri, 4 Aug 2023 16:36:39 +0800 Subject: [PATCH 4/6] fix round --- .../stackvm/optimized/x86_64/unary.cpp | 21 +++++++- .../src/kernels/stackvm/reference/unary.cpp | 53 ++++++++++--------- 2 files changed, 47 insertions(+), 27 deletions(-) diff --git a/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp b/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp index 1332717d5b..9dbe4855ba 100644 --- a/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp +++ b/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp @@ -104,13 +104,30 @@ struct unary_op_neg { } }; +float round_onnx(float v) { + if (v > 0 && v - (int32_t)v == 0.5) { + float result = (int32_t)v + 1.0; + if ((int32_t)result % 2 == 0) + return result; + else + return result - 1; + } else if (v < 0 && (int32_t)v - v == 0.5) { + float result = (int32_t)v + 1.0; + if ((int32_t)result % 2 == 0) + return result; + else + return result - 1; + } else + return roundf(v); +} + struct unary_op_round { - float operator()(float x) const { return roundf(x); } + float operator()(float x) const { return round_onnx(x); } void pack(const float *a, float *b) { __m256 vector_a = _mm256_loadu_ps(a); __m256 dst_a = _mm256_round_ps( - vector_a, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); + vector_a, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); _mm256_storeu_ps(b, dst_a); } }; diff --git a/src/Native/src/kernels/stackvm/reference/unary.cpp b/src/Native/src/kernels/stackvm/reference/unary.cpp index f7dac2856a..af05724243 100644 --- a/src/Native/src/kernels/stackvm/reference/unary.cpp +++ b/src/Native/src/kernels/stackvm/reference/unary.cpp @@ -45,18 +45,21 @@ result unary_impl(TOp &&op, const T *input, T *output, return unary_impl(funct, input, output, input_shape, input_strides, \ out_shape, out_strides, context) -float round_onnx(float v){ - if(v>0&&v-(int32_t)v==0.5){ - float result = (int32_t)v+1.0; - if((int32_t)result%2==0) return result; - else return result -1; - } - else if(v<0&&(int32_t)v-v==0.5){ - float result = (int32_t)v+1.0; - if((int32_t)result%2==0) return result; - else return result -1; - } - else return roundf(v); +float round_onnx(float v) { + if (v > 0 && v - (int32_t)v == 0.5) { + float result = (int32_t)v + 1.0; + if ((int32_t)result % 2 == 0) + return result; + else + return result - 1; + } else if (v < 0 && (int32_t)v - v == 0.5) { + float result = (int32_t)v + 1.0; + if ((int32_t)result % 2 == 0) + return result; + else + return result - 1; + } else + return roundf(v); } template @@ -93,19 +96,19 @@ result unary_impl(unary_op_t op, const T *input, T *output, } } -//float round_onnx(float v){ -// if(v>0&&v-(int32_t)v==0.5){ -// float result = (int32_t)v+1.0; -// if((int32_t)result%2==0) return result; -// else return result -1; -// } -// else if(v<0&&(int32_t)v-v==0.5){ -// float result = (int32_t)v+1.0; -// if((int32_t)result%2==0) return result; -// else return result -1; -// } -// else return roundf(v); -//} +// float round_onnx(float v){ +// if(v>0&&v-(int32_t)v==0.5){ +// float result = (int32_t)v+1.0; +// if((int32_t)result%2==0) return result; +// else return result -1; +// } +// else if(v<0&&(int32_t)v-v==0.5){ +// float result = (int32_t)v+1.0; +// if((int32_t)result%2==0) return result; +// else return result -1; +// } +// else return roundf(v); +// } #define UNARY_IMPL_DTYPE(dtype, type) \ case dtype: \ From 8120c9901e2305140154959b1f1587b7a63ff145 Mon Sep 17 00:00:00 2001 From: hejunchao Date: Fri, 4 Aug 2023 16:38:43 +0800 Subject: [PATCH 5/6] fix round --- src/Native/src/kernels/stackvm/reference/unary.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/Native/src/kernels/stackvm/reference/unary.cpp b/src/Native/src/kernels/stackvm/reference/unary.cpp index af05724243..aa34a19f48 100644 --- a/src/Native/src/kernels/stackvm/reference/unary.cpp +++ b/src/Native/src/kernels/stackvm/reference/unary.cpp @@ -96,20 +96,6 @@ result unary_impl(unary_op_t op, const T *input, T *output, } } -// float round_onnx(float v){ -// if(v>0&&v-(int32_t)v==0.5){ -// float result = (int32_t)v+1.0; -// if((int32_t)result%2==0) return result; -// else return result -1; -// } -// else if(v<0&&(int32_t)v-v==0.5){ -// float result = (int32_t)v+1.0; -// if((int32_t)result%2==0) return result; -// else return result -1; -// } -// else return roundf(v); -// } - #define UNARY_IMPL_DTYPE(dtype, type) \ case dtype: \ return unary_impl(op, reinterpret_cast(input), \ From 9617cdc5450d126584c242a1ce722cf782f0ff76 Mon Sep 17 00:00:00 2001 From: hejunchao Date: Fri, 4 Aug 2023 17:17:52 +0800 Subject: [PATCH 6/6] fix round --- src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp | 2 +- src/Native/src/kernels/stackvm/reference/unary.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp b/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp index 9dbe4855ba..01d5d1af4e 100644 --- a/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp +++ b/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp @@ -104,7 +104,7 @@ struct unary_op_neg { } }; -float round_onnx(float v) { +static float round_onnx(float v) { if (v > 0 && v - (int32_t)v == 0.5) { float result = (int32_t)v + 1.0; if ((int32_t)result % 2 == 0) diff --git a/src/Native/src/kernels/stackvm/reference/unary.cpp b/src/Native/src/kernels/stackvm/reference/unary.cpp index aa34a19f48..f9da11ba02 100644 --- a/src/Native/src/kernels/stackvm/reference/unary.cpp +++ b/src/Native/src/kernels/stackvm/reference/unary.cpp @@ -45,7 +45,7 @@ result unary_impl(TOp &&op, const T *input, T *output, return unary_impl(funct, input, output, input_shape, input_strides, \ out_shape, out_strides, context) -float round_onnx(float v) { +static float round_onnx(float v) { if (v > 0 && v - (int32_t)v == 0.5) { float result = (int32_t)v + 1.0; if ((int32_t)result % 2 == 0)