Skip to content

Commit

Permalink
neon/vld2_u8: native and portable implementation
Browse files Browse the repository at this point in the history
Fixes #752
  • Loading branch information
ngzhian authored and nemequ committed May 1, 2021
1 parent 5a27732 commit 85d2ed2
Show file tree
Hide file tree
Showing 6 changed files with 184 additions and 12 deletions.
1 change: 1 addition & 0 deletions meson.build
Expand Up @@ -59,6 +59,7 @@ simde_neon_families = [
'hadd',
'hsub',
'ld1',
'ld2',
'ld3',
'ld4',
'max',
Expand Down
1 change: 1 addition & 0 deletions simde/arm/neon.h
Expand Up @@ -79,6 +79,7 @@
#include "neon/hadd.h"
#include "neon/hsub.h"
#include "neon/ld1.h"
#include "neon/ld2.h"
#include "neon/ld3.h"
#include "neon/ld4.h"
#include "neon/max.h"
Expand Down
70 changes: 70 additions & 0 deletions simde/arm/neon/ld2.h
@@ -0,0 +1,70 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Zhi An Ng <zhin@google.com> (Copyright owned by Google, LLC)
*/

#if !defined(SIMDE_ARM_NEON_LD2_H)
#define SIMDE_ARM_NEON_LD2_H

#include "types.h"

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_

#if !defined(SIMDE_BUG_INTEL_857088)

SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8x2_t
simde_vld2_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_u8(ptr);
#else
simde_uint8x8_private r_[2];

for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}

simde_uint8x8x2_t r = { {
simde_uint8x8_from_private(r_[0]),
simde_uint8x8_from_private(r_[1]),
} };

return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2_u8
#define vld2_u8(a) simde_vld2_u8((a))
#endif

#endif /* !defined(SIMDE_BUG_INTEL_857088) */

SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP

#endif /* !defined(SIMDE_ARM_NEON_LD2_H) */
1 change: 1 addition & 0 deletions test/arm/neon/declare-suites.h
Expand Up @@ -47,6 +47,7 @@ SIMDE_TEST_DECLARE_SUITE(get_lane)
SIMDE_TEST_DECLARE_SUITE(get_low)
SIMDE_TEST_DECLARE_SUITE(hadd)
SIMDE_TEST_DECLARE_SUITE(hsub)
SIMDE_TEST_DECLARE_SUITE(ld2)
SIMDE_TEST_DECLARE_SUITE(max)
SIMDE_TEST_DECLARE_SUITE(maxnm)
SIMDE_TEST_DECLARE_SUITE(maxv)
Expand Down
99 changes: 99 additions & 0 deletions test/arm/neon/ld2.c
@@ -0,0 +1,99 @@
#define SIMDE_TEST_ARM_NEON_INSN ld2

#include "test-neon.h"
#include "../../../simde/arm/neon/ld2.h"

#if !defined(SIMDE_BUG_INTEL_857088)

static int
test_simde_vld2_u8 (SIMDE_MUNIT_TEST_ARGS) {
#if 1
static const struct {
uint8_t a[8];
uint8_t b[8];
uint8_t r[2][8];
} test_vec[] = {

{ { UINT8_C(144), UINT8_C(234), UINT8_C(130), UINT8_C(145), UINT8_C( 26), UINT8_C(241), UINT8_C( 35), UINT8_C( 43) },
{ UINT8_C( 76), UINT8_C(223), UINT8_C(152), UINT8_C(128), UINT8_C(203), UINT8_C( 66), UINT8_C( 17), UINT8_C(218) },
{ { UINT8_C(144), UINT8_C(130), UINT8_C( 26), UINT8_C( 35), UINT8_C( 76), UINT8_C(152), UINT8_C(203), UINT8_C( 17) },
{ UINT8_C(234), UINT8_C(145), UINT8_C(241), UINT8_C( 43), UINT8_C(223), UINT8_C(128), UINT8_C( 66), UINT8_C(218) } },
},
{ { UINT8_C(196), UINT8_C( 14), UINT8_C( 36), UINT8_C( 59), UINT8_C(230), UINT8_C(253), UINT8_C(216), UINT8_C( 14) },
{ UINT8_C( 31), UINT8_C( 73), UINT8_C( 48), UINT8_C( 55), UINT8_C(200), UINT8_C( 71), UINT8_C(176), UINT8_C( 88) },
{ { UINT8_C(196), UINT8_C( 36), UINT8_C(230), UINT8_C(216), UINT8_C( 31), UINT8_C( 48), UINT8_C(200), UINT8_C(176) },
{ UINT8_C( 14), UINT8_C( 59), UINT8_C(253), UINT8_C( 14), UINT8_C( 73), UINT8_C( 55), UINT8_C( 71), UINT8_C( 88) } },
},
{ { UINT8_C( 49), UINT8_C( 50), UINT8_C(233), UINT8_C( 76), UINT8_C( 35), UINT8_C( 13), UINT8_C(119), UINT8_C(111) },
{ UINT8_C(236), UINT8_C( 15), UINT8_C(240), UINT8_C(184), UINT8_C( 81), UINT8_C( 1), UINT8_C(146), UINT8_C( 22) },
{ { UINT8_C( 49), UINT8_C(233), UINT8_C( 35), UINT8_C(119), UINT8_C(236), UINT8_C(240), UINT8_C( 81), UINT8_C(146) },
{ UINT8_C( 50), UINT8_C( 76), UINT8_C( 13), UINT8_C(111), UINT8_C( 15), UINT8_C(184), UINT8_C( 1), UINT8_C( 22) } },
},
{ { UINT8_C( 15), UINT8_C(182), UINT8_C( 81), UINT8_C(245), UINT8_C(179), UINT8_C( 41), UINT8_C( 4), UINT8_C(211) },
{ UINT8_C(115), UINT8_C( 52), UINT8_C( 10), UINT8_C( 59), UINT8_C(123), UINT8_C(187), UINT8_C(147), UINT8_C(173) },
{ { UINT8_C( 15), UINT8_C( 81), UINT8_C(179), UINT8_C( 4), UINT8_C(115), UINT8_C( 10), UINT8_C(123), UINT8_C(147) },
{ UINT8_C(182), UINT8_C(245), UINT8_C( 41), UINT8_C(211), UINT8_C( 52), UINT8_C( 59), UINT8_C(187), UINT8_C(173) } },
},
{ { UINT8_C(237), UINT8_C(125), UINT8_C(249), UINT8_C( 17), UINT8_C(138), UINT8_C(112), UINT8_C(128), UINT8_C(118) },
{ UINT8_C(127), UINT8_C(112), UINT8_C( 46), UINT8_C(208), UINT8_C(113), UINT8_C(193), UINT8_C(230), UINT8_C(129) },
{ { UINT8_C(237), UINT8_C(249), UINT8_C(138), UINT8_C(128), UINT8_C(127), UINT8_C( 46), UINT8_C(113), UINT8_C(230) },
{ UINT8_C(125), UINT8_C( 17), UINT8_C(112), UINT8_C(118), UINT8_C(112), UINT8_C(208), UINT8_C(193), UINT8_C(129) } },
},
{ { UINT8_C(119), UINT8_C( 56), UINT8_C(118), UINT8_C( 43), UINT8_C( 97), UINT8_C(122), UINT8_C(254), UINT8_C(212) },
{ UINT8_C(175), UINT8_C( 8), UINT8_C( 15), UINT8_C( 42), UINT8_C(195), UINT8_C(163), UINT8_C(215), UINT8_C(177) },
{ { UINT8_C(119), UINT8_C(118), UINT8_C( 97), UINT8_C(254), UINT8_C(175), UINT8_C( 15), UINT8_C(195), UINT8_C(215) },
{ UINT8_C( 56), UINT8_C( 43), UINT8_C(122), UINT8_C(212), UINT8_C( 8), UINT8_C( 42), UINT8_C(163), UINT8_C(177) } },
},
{ { UINT8_C( 32), UINT8_C(208), UINT8_C(194), UINT8_C(170), UINT8_C( 64), UINT8_C( 66), UINT8_C( 32), UINT8_C(191) },
{ UINT8_C(179), UINT8_C( 79), UINT8_C(144), UINT8_C( 36), UINT8_C( 16), UINT8_C(118), UINT8_C(165), UINT8_C(135) },
{ { UINT8_C( 32), UINT8_C(194), UINT8_C( 64), UINT8_C( 32), UINT8_C(179), UINT8_C(144), UINT8_C( 16), UINT8_C(165) },
{ UINT8_C(208), UINT8_C(170), UINT8_C( 66), UINT8_C(191), UINT8_C( 79), UINT8_C( 36), UINT8_C(118), UINT8_C(135) } },
},
{ { UINT8_C(174), UINT8_C( 28), UINT8_C(178), UINT8_C( 16), UINT8_C(150), UINT8_C(176), UINT8_C(228), UINT8_C( 69) },
{ UINT8_C(185), UINT8_C(244), UINT8_C(112), UINT8_C(124), UINT8_C(151), UINT8_C( 71), UINT8_C( 45), UINT8_C(183) },
{ { UINT8_C(174), UINT8_C(178), UINT8_C(150), UINT8_C(228), UINT8_C(185), UINT8_C(112), UINT8_C(151), UINT8_C( 45) },
{ UINT8_C( 28), UINT8_C( 16), UINT8_C(176), UINT8_C( 69), UINT8_C(244), UINT8_C(124), UINT8_C( 71), UINT8_C(183) } },
},


};

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde_uint8x8x2_t r = simde_vld2_u8(test_vec[i].a);

simde_uint8x8x2_t expected = {
{simde_vld1_u8(test_vec[i].r[0]), simde_vld1_u8(test_vec[i].r[1])}};

simde_test_arm_neon_assert_equal_u8x8(r.val[0], expected.val[0]);
simde_test_arm_neon_assert_equal_u8x8(r.val[1], expected.val[1]);
}

return 0;
#else
for (int i = 0 ; i < 8 ; i++) {
simde_uint8x8_t a = simde_test_arm_neon_random_u8x8();
simde_uint8x8_t b = simde_test_arm_neon_random_u8x8();
simde_uint8x8x2_t c = {{a, b}};

simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);

uint8_t buf[16];
simde_memcpy(buf, c.val, sizeof(buf));
simde_uint8x8x2_t r = simde_vld2_u8(buf);

simde_test_arm_neon_write_u8x8x2(2, r, SIMDE_TEST_VEC_POS_LAST);
}
return 1;
#endif
}

#endif /* !defined(SIMDE_BUG_INTEL_857088) */

SIMDE_TEST_FUNC_LIST_BEGIN
#if !defined(SIMDE_BUG_INTEL_857088)
SIMDE_TEST_FUNC_LIST_ENTRY(vld2_u8)
#endif /* !defined(SIMDE_BUG_INTEL_857088) */
SIMDE_TEST_FUNC_LIST_END

#include "test-neon-footer.h"
24 changes: 12 additions & 12 deletions test/arm/neon/test-neon.h
Expand Up @@ -114,7 +114,7 @@ HEDLEY_DIAGNOSTIC_POP
simde_test_arm_neon_write_##symbol_identifier##x##element_count##x2(int indent, simde_##NT value, SimdeTestVecPos pos) { \
if (pos == SIMDE_TEST_VEC_POS_FIRST) { \
simde_test_codegen_write_indent(indent); \
fputs("{\n", stdout); \
fputs("{\n", SIMDE_CODEGEN_FP); \
} \
ET value0_[sizeof(value) / sizeof(ET) / 2]; \
ET value1_[sizeof(value) / sizeof(ET) / 2]; \
Expand All @@ -126,7 +126,7 @@ HEDLEY_DIAGNOSTIC_POP
simde_test_codegen_write_v##symbol_identifier(indent+2, sizeof(value1_) / sizeof(ET), value1_, SIMDE_TEST_VEC_POS_LAST); \
if (pos == SIMDE_TEST_VEC_POS_LAST) { \
simde_test_codegen_write_indent(indent); \
fputs("},\n", stdout); \
fputs("},\n", SIMDE_CODEGEN_FP); \
} \
\
} \
Expand Down Expand Up @@ -163,7 +163,7 @@ HEDLEY_DIAGNOSTIC_POP
simde_test_arm_neon_write_##symbol_identifier##x##element_count##x2(int indent, simde_##NT value, SimdeTestVecPos pos) { \
if (pos == SIMDE_TEST_VEC_POS_FIRST) { \
simde_test_codegen_write_indent(indent); \
fputs("{\n", stdout); \
fputs("{\n", SIMDE_CODEGEN_FP); \
} \
\
ET value0_[sizeof(value) / sizeof(ET) / 2]; \
Expand All @@ -174,7 +174,7 @@ HEDLEY_DIAGNOSTIC_POP
simde_test_codegen_write_v##symbol_identifier(indent + 2, sizeof(value1_) / sizeof(value1_[0]), value1_, SIMDE_TEST_VEC_POS_LAST); \
if (pos == SIMDE_TEST_VEC_POS_LAST) { \
simde_test_codegen_write_indent(indent); \
fputs("},\n", stdout); \
fputs("},\n", SIMDE_CODEGEN_FP); \
} \
} \
\
Expand Down Expand Up @@ -229,7 +229,7 @@ SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x2_t, simde_fl
simde_test_arm_neon_write_##symbol_identifier##x##element_count##x3(int indent, simde_##NT value, SimdeTestVecPos pos) { \
if (pos == SIMDE_TEST_VEC_POS_FIRST) { \
simde_test_codegen_write_indent(indent); \
fputs("{\n", stdout); \
fputs("{\n", SIMDE_CODEGEN_FP); \
} \
ET value0_[sizeof(value) / sizeof(ET) / 3]; \
ET value1_[sizeof(value) / sizeof(ET) / 3]; \
Expand All @@ -244,7 +244,7 @@ SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x2_t, simde_fl
simde_test_codegen_write_v##symbol_identifier(indent+2, sizeof(value2_) / sizeof(ET), value2_, SIMDE_TEST_VEC_POS_LAST); \
if (pos == SIMDE_TEST_VEC_POS_LAST) { \
simde_test_codegen_write_indent(indent); \
fputs("},\n", stdout); \
fputs("},\n", SIMDE_CODEGEN_FP); \
} \
\
} \
Expand Down Expand Up @@ -288,7 +288,7 @@ SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x2_t, simde_fl
simde_test_arm_neon_write_##symbol_identifier##x##element_count##x3(int indent, simde_##NT value, SimdeTestVecPos pos) { \
if (pos == SIMDE_TEST_VEC_POS_FIRST) { \
simde_test_codegen_write_indent(indent); \
fputs("{\n", stdout); \
fputs("{\n", SIMDE_CODEGEN_FP); \
} \
\
ET value0_[sizeof(value) / sizeof(ET) / 3]; \
Expand All @@ -302,7 +302,7 @@ SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x2_t, simde_fl
simde_test_codegen_write_v##symbol_identifier(indent + 2, sizeof(value2_) / sizeof(value2_[0]), value2_, SIMDE_TEST_VEC_POS_LAST); \
if (pos == SIMDE_TEST_VEC_POS_LAST) { \
simde_test_codegen_write_indent(indent); \
fputs("},\n", stdout); \
fputs("},\n", SIMDE_CODEGEN_FP); \
} \
} \
\
Expand Down Expand Up @@ -361,7 +361,7 @@ SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x3_t, simde_fl
simde_test_arm_neon_write_##symbol_identifier##x##element_count##x4(int indent, simde_##NT value, SimdeTestVecPos pos) { \
if (pos == SIMDE_TEST_VEC_POS_FIRST) { \
simde_test_codegen_write_indent(indent); \
fputs("{\n", stdout); \
fputs("{\n", SIMDE_CODEGEN_FP); \
} \
ET value0_[sizeof(value) / sizeof(ET) / 4]; \
ET value1_[sizeof(value) / sizeof(ET) / 4]; \
Expand All @@ -379,7 +379,7 @@ SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x3_t, simde_fl
simde_test_codegen_write_v##symbol_identifier(indent+2, sizeof(value3_) / sizeof(ET), value3_, SIMDE_TEST_VEC_POS_LAST); \
if (pos == SIMDE_TEST_VEC_POS_LAST) { \
simde_test_codegen_write_indent(indent); \
fputs("},\n", stdout); \
fputs("},\n", SIMDE_CODEGEN_FP); \
} \
\
} \
Expand Down Expand Up @@ -430,7 +430,7 @@ SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x3_t, simde_fl
simde_test_arm_neon_write_##symbol_identifier##x##element_count##x4(int indent, simde_##NT value, SimdeTestVecPos pos) { \
if (pos == SIMDE_TEST_VEC_POS_FIRST) { \
simde_test_codegen_write_indent(indent); \
fputs("{\n", stdout); \
fputs("{\n", SIMDE_CODEGEN_FP); \
} \
\
ET value0_[sizeof(value) / sizeof(ET) / 4]; \
Expand All @@ -447,7 +447,7 @@ SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x3_t, simde_fl
simde_test_codegen_write_v##symbol_identifier(indent + 2, sizeof(value3_) / sizeof(value3_[0]), value3_, SIMDE_TEST_VEC_POS_LAST); \
if (pos == SIMDE_TEST_VEC_POS_LAST) { \
simde_test_codegen_write_indent(indent); \
fputs("},\n", stdout); \
fputs("},\n", SIMDE_CODEGEN_FP); \
} \
} \
\
Expand Down

0 comments on commit 85d2ed2

Please sign in to comment.