diff --git a/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp b/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp index 78bedf4457..01d5d1af4e 100644 --- a/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp +++ b/src/Native/src/kernels/stackvm/optimized/x86_64/unary.cpp @@ -104,13 +104,30 @@ struct unary_op_neg { } }; +static float round_onnx(float v) { + if (v > 0 && v - (int32_t)v == 0.5) { + float result = (int32_t)v + 1.0; + if ((int32_t)result % 2 == 0) + return result; + else + return result - 1; + } else if (v < 0 && (int32_t)v - v == 0.5) { + float result = (int32_t)v + 1.0; + if ((int32_t)result % 2 == 0) + return result; + else + return result - 1; + } else + return roundf(v); +} + struct unary_op_round { - float operator()(float x) const { return roundf(x); } + float operator()(float x) const { return round_onnx(x); } void pack(const float *a, float *b) { __m256 vector_a = _mm256_loadu_ps(a); __m256 dst_a = _mm256_round_ps( - vector_a, (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)); + vector_a, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); _mm256_storeu_ps(b, dst_a); } }; diff --git a/src/Native/src/kernels/stackvm/reference/unary.cpp b/src/Native/src/kernels/stackvm/reference/unary.cpp index 9d8e0dba39..f9da11ba02 100644 --- a/src/Native/src/kernels/stackvm/reference/unary.cpp +++ b/src/Native/src/kernels/stackvm/reference/unary.cpp @@ -45,6 +45,23 @@ result unary_impl(TOp &&op, const T *input, T *output, return unary_impl(funct, input, output, input_shape, input_strides, \ out_shape, out_strides, context) +static float round_onnx(float v) { + if (v > 0 && v - (int32_t)v == 0.5) { + float result = (int32_t)v + 1.0; + if ((int32_t)result % 2 == 0) + return result; + else + return result - 1; + } else if (v < 0 && (int32_t)v - v == 0.5) { + float result = (int32_t)v + 1.0; + if ((int32_t)result % 2 == 0) + return result; + else + return result - 1; + } else + return roundf(v); +} + template result unary_impl(unary_op_t op, const T *input, T *output, gsl::span input_shape, @@ -66,7 +83,7 @@ result unary_impl(unary_op_t op, const T *input, T *output, UNARY_IMPL_OP(log, logf); UNARY_IMPL_OP(logical_not, [](float v) { return !v; }); UNARY_IMPL_OP(neg, std::negate()); - UNARY_IMPL_OP(round, roundf); + UNARY_IMPL_OP(round, [](float v) { return round_onnx(v); }); UNARY_IMPL_OP(rsqrt, [](float v) { return 1.f / sqrtf(v); }); UNARY_IMPL_OP(sign, [](float v) { return (0.f < v) - (v < 0.f); }); UNARY_IMPL_OP(sin, sinf); diff --git a/tests/kernels/test_gather_elements.cpp b/tests/kernels/test_gather_elements.cpp new file mode 100644 index 0000000000..1a17edaecf --- /dev/null +++ b/tests/kernels/test_gather_elements.cpp @@ -0,0 +1,119 @@ +/* Copyright 2019-2021 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernel_test.h" +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace nncase; +using namespace nncase::runtime; +using namespace ortki; + +class GatherElementsTest + : public KernelTest, + public ::testing::TestWithParam< + std::tuple> { + public: + void SetUp() override { + auto &&[typecode, shape, value] = GetParam(); + + input = hrt::create(typecode, shape, host_runtime_tensor::pool_cpu_only) + .expect("create tensor failed"); + init_tensor(input); + + int64_t indices_array[] = {0, 0, 1, 1}; + indices = hrt::create(dt_int64, {2, 2}, + {reinterpret_cast(indices_array), + sizeof(indices_array)}, + true, host_runtime_tensor::pool_cpu_only) + .expect("create tensor failed"); + + batchDims_value = value; + int64_t batchDims_array[1] = {value}; + batchDims = hrt::create(dt_int64, dims_t{1}, + {reinterpret_cast(batchDims_array), + sizeof(batchDims_array)}, + true, host_runtime_tensor::pool_cpu_only) + .expect("create tensor failed"); + } + + void TearDown() override {} + + protected: + runtime_tensor input; + runtime_tensor indices; + runtime_tensor batchDims; + int64_t batchDims_value; +}; + +INSTANTIATE_TEST_SUITE_P( + gather_elements, GatherElementsTest, + testing::Combine(testing::Values(dt_int32, dt_int64, dt_float32, dt_uint64, + dt_int8, dt_int16, dt_uint8, dt_uint16, + dt_uint32, dt_float16, dt_float64, + dt_bfloat16, dt_boolean), + testing::Values(dims_t{ + 2, + 2} /*, dims_t{3, 5}, + dims_t{2, 3, 1}, dims_t{5, 7, 5}, + dims_t{5, 4, 3, 2}, dims_t{5, 5, 7, 7}, + dims_t{2, 3, 3, 5}*/), + testing::Values(-1, 0, 1))); + +TEST_P(GatherElementsTest, gather_elements) { + auto input_ort = runtime_tensor_2_ort_tensor(input); + auto indices_ort = runtime_tensor_2_ort_tensor(indices); + + // expected + auto output_ort = + ortki_GatherElements(input_ort, indices_ort, batchDims_value); + size_t size = 0; + void *ptr_ort = tensor_buffer(output_ort, &size); + dims_t shape(tensor_rank(output_ort)); + tensor_shape(output_ort, reinterpret_cast(shape.data())); + auto expected = hrt::create(input.datatype(), shape, + {reinterpret_cast(ptr_ort), size}, + true, host_runtime_tensor::pool_cpu_only) + .expect("create tensor failed"); + + // actual + auto output = kernels::stackvm::gather_elements( + input.impl(), batchDims.impl(), indices.impl()) + .expect("gather failed"); + runtime_tensor actual(output.as().expect("as tensor failed")); + + bool result = is_same_tensor(expected, actual) || + cosine_similarity_tensor(expected, actual); + + if (!result) { + std::cout << "actual "; + print_runtime_tensor(actual); + std::cout << "expected "; + print_runtime_tensor(expected); + } + + // compare + EXPECT_TRUE(result); +} + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/tests/kernels/test_unary_round.cpp b/tests/kernels/test_unary_round.cpp index bfba4db767..cda1a27980 100644 --- a/tests/kernels/test_unary_round.cpp +++ b/tests/kernels/test_unary_round.cpp @@ -54,7 +54,7 @@ INSTANTIATE_TEST_SUITE_P( dims_t{16, 1}, dims_t{1, 16, 1}, dims_t{16}, dims_t{1}, dims_t{}))); -TEST_P(UnaryTest, roound) { +TEST_P(UnaryTest, round) { OrtKITensor *orts[1]; orts[0] = runtime_tensor_2_ort_tensor(input); @@ -79,6 +79,7 @@ TEST_P(UnaryTest, roound) { cosine_similarity_tensor(expected, actual); if (!result) { + print_runtime_tensor(input); std::cout << "actual "; print_runtime_tensor(actual); std::cout << "expected ";