Skip to content

Commit

Permalink
fix build issues for gcc under aarch64
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire committed Nov 15, 2023
1 parent d82c4a1 commit 43b46b3
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 15 deletions.
29 changes: 29 additions & 0 deletions .github/workflows/aarch64.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Ubuntu armv7 (GCC 11)

on:
push:
branches:
- master
pull_request:
branches:
- master

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: uraimo/run-on-arch-action@v2
name: Test
id: runcmd
with:
arch: aarch64
githubToken: ${{ github.token }}
distro: ubuntu_latest
install: |
apt-get update -q -y
apt-get install -y cmake make g++
run: |
cmake -DCMAKE_BUILD_TYPE=Release -B build
cmake --build build -j=2
ctest --output-on-failure --test-dir build
4 changes: 2 additions & 2 deletions src/arm64/arm_convert_latin1_to_utf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ std::pair<const char*, char16_t*> arm_convert_latin1_to_utf16(const char* buf, s
while (buf + 16 <= end) {
uint8x16_t in8 = vld1q_u8(reinterpret_cast<const uint8_t *>(buf));
uint16x8_t inlow = vmovl_u8(vget_low_u8(in8));
if (!match_system(big_endian)) { inlow = vrev16q_u8(inlow); }
if (!match_system(big_endian)) { inlow = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(inlow))); }
vst1q_u16(reinterpret_cast<uint16_t *>(utf16_output), inlow);
uint16x8_t inhigh = vmovl_u8(vget_high_u8(in8));
if (!match_system(big_endian)) { inhigh = vrev16q_u8(inhigh); }
if (!match_system(big_endian)) { inhigh = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(inhigh))); }
vst1q_u16(reinterpret_cast<uint16_t *>(utf16_output+8), inhigh);
utf16_output += 16;
buf += 16;
Expand Down
4 changes: 2 additions & 2 deletions src/arm64/arm_convert_utf16_to_latin1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ std::pair<const char16_t*, char*> arm_convert_utf16_to_latin1(const char16_t* bu
const char16_t* end = buf + len;
while (buf + 8 <= end) {
uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
if (!match_system(big_endian)) { in = vrev16q_u8(in); }
if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
if (vmaxvq_u16(in) <= 0xff) {
// 1. pack the bytes
uint8x8_t latin1_packed = vmovn_u16(in);
Expand All @@ -26,7 +26,7 @@ std::pair<result, char*> arm_convert_utf16_to_latin1_with_errors(const char16_t*
const char16_t* end = buf + len;
while (buf + 8 <= end) {
uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
if (!match_system(big_endian)) { in = vrev16q_u8(in); }
if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
if (vmaxvq_u16(in) <= 0xff) {
// 1. pack the bytes
uint8x8_t latin1_packed = vmovn_u16(in);
Expand Down
4 changes: 2 additions & 2 deletions src/arm64/arm_convert_utf16_to_utf32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ std::pair<const char16_t*, char32_t*> arm_convert_utf16_to_utf32(const char16_t*

while (buf + 8 <= end) {
uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
if (!match_system(big_endian)) { in = vrev16q_u8(in); }
if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }

const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
// It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
Expand Down Expand Up @@ -118,7 +118,7 @@ std::pair<result, char32_t*> arm_convert_utf16_to_utf32_with_errors(const char16

while (buf + 8 <= end) {
uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
if (!match_system(big_endian)) { in = vrev16q_u8(in); }
if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }

const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
// It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
Expand Down
8 changes: 4 additions & 4 deletions src/arm64/arm_convert_utf16_to_utf8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ std::pair<const char16_t*, char*> arm_convert_utf16_to_utf8(const char16_t* buf,

while (buf + 16 <= end) {
uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
if (!match_system(big_endian)) { in = vrev16q_u8(in); }
if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
if(vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!!
// It is common enough that we have sequences of 16 consecutive ASCII characters.
uint16x8_t nextin = vld1q_u16(reinterpret_cast<const uint16_t *>(buf) + 8);
if (!match_system(big_endian)) { nextin = vrev16q_u8(nextin); }
if (!match_system(big_endian)) { nextin = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(nextin))); }
if(vmaxvq_u16(nextin) > 0x7F) {
// 1. pack the bytes
// obviously suboptimal.
Expand Down Expand Up @@ -314,11 +314,11 @@ std::pair<result, char*> arm_convert_utf16_to_utf8_with_errors(const char16_t* b

while (buf + 16 <= end) {
uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
if (!match_system(big_endian)) { in = vrev16q_u8(in); }
if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
if(vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!!
// It is common enough that we have sequences of 16 consecutive ASCII characters.
uint16x8_t nextin = vld1q_u16(reinterpret_cast<const uint16_t *>(buf) + 8);
if (!match_system(big_endian)) { nextin = vrev16q_u8(nextin); }
if (!match_system(big_endian)) { nextin = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(nextin))); }
if(vmaxvq_u16(nextin) > 0x7F) {
// 1. pack the bytes
// obviously suboptimal.
Expand Down
8 changes: 4 additions & 4 deletions src/arm64/arm_validate_utf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ const char16_t* arm_validate_utf16(const char16_t* input, size_t size) {
auto in0 = simd16<uint16_t>(input);
auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
if (!match_system(big_endian)) {
in0 = vrev16q_u8(in0);
in1 = vrev16q_u8(in1);
in0 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in0)));
in1 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in1)));
}
const auto t0 = in0.shr<8>();
const auto t1 = in1.shr<8>();
Expand Down Expand Up @@ -83,8 +83,8 @@ const result arm_validate_utf16_with_errors(const char16_t* input, size_t size)
auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));

if (!match_system(big_endian)) {
in0 = vrev16q_u8(in0);
in1 = vrev16q_u8(in1);
in0 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in0)));
in1 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in1)));
}
const auto t0 = in0.shr<8>();
const auto t1 = in1.shr<8>();
Expand Down
2 changes: 1 addition & 1 deletion src/simdutf/arm64/simd16-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ struct simd16<uint16_t>: base16_numeric<uint16_t> {

// Change the endianness
simdutf_really_inline simd16<uint16_t> swap_bytes() const {
return vreinterpretq_u16_u8(vrev16q_u8((*this)));
return vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(*this)));
}
};
simdutf_really_inline simd16<int16_t>::operator simd16<uint16_t>() const { return this->value; }
Expand Down

0 comments on commit 43b46b3

Please sign in to comment.