fix build issues for gcc under aarch64

simdutf · Nov 15, 2023 · 43b46b3 · 43b46b3
1 parent d82c4a1
commit 43b46b3
Show file tree

Hide file tree

Showing 7 changed files with 44 additions and 15 deletions.
diff --git a/.github/workflows/aarch64.yml b/.github/workflows/aarch64.yml
@@ -0,0 +1,29 @@
+name: Ubuntu armv7 (GCC 11)
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: uraimo/run-on-arch-action@v2
+        name: Test
+        id: runcmd
+        with:
+          arch: aarch64
+          githubToken: ${{ github.token }}
+          distro: ubuntu_latest
+          install: |
+            apt-get update -q -y
+            apt-get install -y cmake make g++
+          run: |
+            cmake -DCMAKE_BUILD_TYPE=Release -B build
+            cmake --build build -j=2
+            ctest --output-on-failure --test-dir build
diff --git a/src/arm64/arm_convert_latin1_to_utf16.cpp b/src/arm64/arm_convert_latin1_to_utf16.cpp
@@ -5,10 +5,10 @@ std::pair<const char*, char16_t*> arm_convert_latin1_to_utf16(const char* buf, s
     while (buf + 16 <= end) {
         uint8x16_t in8 = vld1q_u8(reinterpret_cast<const uint8_t *>(buf));
         uint16x8_t inlow = vmovl_u8(vget_low_u8(in8));
-        if (!match_system(big_endian)) { inlow = vrev16q_u8(inlow); }
+        if (!match_system(big_endian)) { inlow = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(inlow))); }
         vst1q_u16(reinterpret_cast<uint16_t *>(utf16_output), inlow);
         uint16x8_t inhigh = vmovl_u8(vget_high_u8(in8));
-        if (!match_system(big_endian)) { inhigh = vrev16q_u8(inhigh); }
+        if (!match_system(big_endian)) { inhigh = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(inhigh))); }
         vst1q_u16(reinterpret_cast<uint16_t *>(utf16_output+8), inhigh);
         utf16_output += 16;
         buf += 16;

diff --git a/src/arm64/arm_convert_utf16_to_latin1.cpp b/src/arm64/arm_convert_utf16_to_latin1.cpp
@@ -4,7 +4,7 @@ std::pair<const char16_t*, char*> arm_convert_utf16_to_latin1(const char16_t* bu
   const char16_t* end = buf + len;
   while (buf + 8 <= end) {
     uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
-    if (!match_system(big_endian)) { in = vrev16q_u8(in); }
+    if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
     if (vmaxvq_u16(in) <= 0xff) {
         // 1. pack the bytes
         uint8x8_t latin1_packed = vmovn_u16(in);
@@ -26,7 +26,7 @@ std::pair<result, char*> arm_convert_utf16_to_latin1_with_errors(const char16_t*
   const char16_t* end = buf + len;
   while (buf + 8 <= end) {
     uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
-    if (!match_system(big_endian)) { in = vrev16q_u8(in); }
+    if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
     if (vmaxvq_u16(in) <= 0xff) {
         // 1. pack the bytes
         uint8x8_t latin1_packed = vmovn_u16(in);

diff --git a/src/arm64/arm_convert_utf16_to_utf32.cpp b/src/arm64/arm_convert_utf16_to_utf32.cpp
@@ -60,7 +60,7 @@ std::pair<const char16_t*, char32_t*> arm_convert_utf16_to_utf32(const char16_t*
 
   while (buf + 8 <= end) {
     uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
-    if (!match_system(big_endian)) { in = vrev16q_u8(in); }
+    if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
 
     const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
     // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
@@ -118,7 +118,7 @@ std::pair<result, char32_t*> arm_convert_utf16_to_utf32_with_errors(const char16
 
   while (buf + 8 <= end) {
     uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
-    if (!match_system(big_endian)) { in = vrev16q_u8(in); }
+    if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
 
     const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
     // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,

diff --git a/src/arm64/arm_convert_utf16_to_utf8.cpp b/src/arm64/arm_convert_utf16_to_utf8.cpp
@@ -61,11 +61,11 @@ std::pair<const char16_t*, char*> arm_convert_utf16_to_utf8(const char16_t* buf,
 
   while (buf + 16 <= end) {
     uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
-    if (!match_system(big_endian)) { in = vrev16q_u8(in); }
+    if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
     if(vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!!
         // It is common enough that we have sequences of 16 consecutive ASCII characters.
         uint16x8_t nextin = vld1q_u16(reinterpret_cast<const uint16_t *>(buf) + 8);
-        if (!match_system(big_endian)) { nextin = vrev16q_u8(nextin); }
+        if (!match_system(big_endian)) { nextin = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(nextin))); }
         if(vmaxvq_u16(nextin) > 0x7F) {
           // 1. pack the bytes
           // obviously suboptimal.
@@ -314,11 +314,11 @@ std::pair<result, char*> arm_convert_utf16_to_utf8_with_errors(const char16_t* b
 
   while (buf + 16 <= end) {
     uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
-    if (!match_system(big_endian)) { in = vrev16q_u8(in); }
+    if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
     if(vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!!
         // It is common enough that we have sequences of 16 consecutive ASCII characters.
         uint16x8_t nextin = vld1q_u16(reinterpret_cast<const uint16_t *>(buf) + 8);
-        if (!match_system(big_endian)) { nextin = vrev16q_u8(nextin); }
+        if (!match_system(big_endian)) { nextin = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(nextin))); }
         if(vmaxvq_u16(nextin) > 0x7F) {
           // 1. pack the bytes
           // obviously suboptimal.

diff --git a/src/arm64/arm_validate_utf16.cpp b/src/arm64/arm_validate_utf16.cpp
@@ -12,8 +12,8 @@ const char16_t* arm_validate_utf16(const char16_t* input, size_t size) {
         auto in0 = simd16<uint16_t>(input);
         auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
         if (!match_system(big_endian)) {
-            in0 = vrev16q_u8(in0);
-            in1 = vrev16q_u8(in1);
+            in0 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in0)));
+            in1 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in1)));
         }
         const auto t0 = in0.shr<8>();
         const auto t1 = in1.shr<8>();
@@ -83,8 +83,8 @@ const result arm_validate_utf16_with_errors(const char16_t* input, size_t size)
         auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
 
         if (!match_system(big_endian)) {
-            in0 = vrev16q_u8(in0);
-            in1 = vrev16q_u8(in1);
+            in0 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in0)));
+            in1 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in1)));
         }
         const auto t0 = in0.shr<8>();
         const auto t1 = in1.shr<8>();

diff --git a/src/simdutf/arm64/simd16-inl.h b/src/simdutf/arm64/simd16-inl.h
@@ -163,7 +163,7 @@ struct simd16<uint16_t>: base16_numeric<uint16_t>  {
 
   // Change the endianness
   simdutf_really_inline simd16<uint16_t> swap_bytes() const {
-    return vreinterpretq_u16_u8(vrev16q_u8((*this)));
+    return vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(*this)));
   }
 };
 simdutf_really_inline simd16<int16_t>::operator simd16<uint16_t>() const { return this->value; }