Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARMv7 Neon support WIP (does not compile yet) #43

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,12 @@ std = []
# expose SIMD implementations in basic::imp::* and compat::imp::*
public_imp = []

# aarch64 NEON SIMD implementation - requires nightly
# aarch64 Neon SIMD implementation - requires nightly
aarch64_neon = []

# arm Neon SIMD implementation - requires nightly
arm_neon = []

# deprecated - does not do anything
hints = []

Expand Down
40 changes: 40 additions & 0 deletions src/implementation/arm/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#[cfg(all(feature = "arm_neon", target_feature = "neon"))]
pub(crate) mod neon;

#[inline]
#[cfg(all(feature = "arm_neon", target_feature = "neon"))]
pub(crate) unsafe fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> {
if input.len() < super::helpers::SIMD_CHUNK_SIZE {
return super::validate_utf8_basic_fallback(input);
}

validate_utf8_basic_neon(input)
}

#[inline(never)]
#[cfg(all(feature = "arm_neon", target_feature = "neon"))]
unsafe fn validate_utf8_basic_neon(input: &[u8]) -> Result<(), crate::basic::Utf8Error> {
neon::validate_utf8_basic(input)
}

#[cfg(not(all(feature = "arm_neon", target_feature = "neon")))]
pub(crate) use super::validate_utf8_basic_fallback as validate_utf8_basic;

#[inline]
#[cfg(all(feature = "arm_neon", target_feature = "neon"))]
pub(crate) unsafe fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Utf8Error> {
if input.len() < super::helpers::SIMD_CHUNK_SIZE {
return super::validate_utf8_compat_fallback(input);
}

validate_utf8_compat_neon(input)
}

#[inline(never)]
#[cfg(all(feature = "arm_neon", target_feature = "neon"))]
unsafe fn validate_utf8_compat_neon(input: &[u8]) -> Result<(), crate::compat::Utf8Error> {
neon::validate_utf8_compat(input)
}

#[cfg(not(all(feature = "arm_neon", target_feature = "neon")))]
pub(crate) use super::validate_utf8_compat_fallback as validate_utf8_compat;
235 changes: 235 additions & 0 deletions src/implementation/arm/neon.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
//! Contains the arm UTF-8 validation implementation.

use core::arch::arm::{
uint8x16_t, vandq_u8, vcgtq_u8, vdupq_n_u8, veorq_u8, vextq_u8, vget_high_u32, vget_lane_u32,
vget_low_u32, vld1q_u8, vmovq_n_u8, vorr_u32, vorrq_u8, vpmax_u32, vqsubq_u8,
vreinterpretq_u32_u8, vshrq_n_u8,
};

use crate::implementation::helpers::Utf8CheckAlgorithm;

// arm Neon SIMD primitives

type SimdU8Value = crate::implementation::helpers::SimdU8Value<uint8x16_t>;

impl SimdU8Value {
#[inline]
#[allow(clippy::too_many_arguments)]
#[allow(clippy::cast_possible_wrap)]
unsafe fn from_32_cut_off_leading(
_v0: u8,
_v1: u8,
_v2: u8,
_v3: u8,
_v4: u8,
_v5: u8,
_v6: u8,
_v7: u8,
_v8: u8,
_v9: u8,
_v10: u8,
_v11: u8,
_v12: u8,
_v13: u8,
_v14: u8,
_v15: u8,
v16: u8,
v17: u8,
v18: u8,
v19: u8,
v20: u8,
v21: u8,
v22: u8,
v23: u8,
v24: u8,
v25: u8,
v26: u8,
v27: u8,
v28: u8,
v29: u8,
v30: u8,
v31: u8,
) -> Self {
let arr: [u8; 16] = [
v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
];
Self::from(vld1q_u8(arr.as_ptr()))
}

#[inline]
#[allow(clippy::too_many_arguments)]
#[allow(clippy::cast_possible_wrap)]
unsafe fn repeat_16(
v0: u8,
v1: u8,
v2: u8,
v3: u8,
v4: u8,
v5: u8,
v6: u8,
v7: u8,
v8: u8,
v9: u8,
v10: u8,
v11: u8,
v12: u8,
v13: u8,
v14: u8,
v15: u8,
) -> Self {
let arr: [u8; 16] = [
v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
];
Self::from(vld1q_u8(arr.as_ptr()))
}

#[inline]
#[allow(clippy::cast_ptr_alignment)]
unsafe fn load_from(ptr: *const u8) -> Self {
// WORKAROUND for https://github.com/rust-lang/stdarch/issues/1148
// The vld1q_u8 intrinsic is currently broken, it treats it as individual
// byte loads so the compiler sometimes decides it is a better to load
// individual bytes to "optimize" a subsequent SIMD shuffle
//
// This code forces a full 128-bit load.
let mut dst = core::mem::MaybeUninit::<uint8x16_t>::uninit();
core::ptr::copy_nonoverlapping(
ptr.cast::<u8>(),
dst.as_mut_ptr().cast::<u8>(),
core::mem::size_of::<uint8x16_t>(),
);
Self::from(dst.assume_init())
}

#[inline]
#[allow(clippy::too_many_arguments)]
unsafe fn lookup_16(
self,
v0: u8,
v1: u8,
v2: u8,
v3: u8,
v4: u8,
v5: u8,
v6: u8,
v7: u8,
v8: u8,
v9: u8,
v10: u8,
v11: u8,
v12: u8,
v13: u8,
v14: u8,
v15: u8,
) -> Self {
unimplemented!();
}

#[inline]
#[allow(clippy::cast_possible_wrap)]
unsafe fn splat(val: u8) -> Self {
Self::from(vmovq_n_u8(val))
}

#[inline]
#[allow(clippy::cast_possible_wrap)]
unsafe fn splat0() -> Self {
Self::from(vdupq_n_u8(0))
}

#[inline]
unsafe fn or(self, b: Self) -> Self {
Self::from(vorrq_u8(self.0, b.0))
}

#[inline]
unsafe fn and(self, b: Self) -> Self {
Self::from(vandq_u8(self.0, b.0))
}

#[inline]
unsafe fn xor(self, b: Self) -> Self {
Self::from(veorq_u8(self.0, b.0))
}

#[inline]
unsafe fn saturating_sub(self, b: Self) -> Self {
Self::from(vqsubq_u8(self.0, b.0))
}

// ugly but shr<N> requires const generics

#[allow(clippy::cast_lossless)]
#[inline]
unsafe fn shr4(self) -> Self {
Self::from(vshrq_n_u8(self.0, 4))
}

// ugly but prev<N> requires const generics

#[allow(clippy::cast_lossless)]
#[inline]
unsafe fn prev1(self, prev: Self) -> Self {
Self::from(vextq_u8(prev.0, self.0, 16 - 1))
}

// ugly but prev<N> requires const generics

#[allow(clippy::cast_lossless)]
#[inline]
unsafe fn prev2(self, prev: Self) -> Self {
Self::from(vextq_u8(prev.0, self.0, 16 - 2))
}

// ugly but prev<N> requires const generics

#[allow(clippy::cast_lossless)]
#[inline]
unsafe fn prev3(self, prev: Self) -> Self {
Self::from(vextq_u8(prev.0, self.0, 16 - 3))
}

#[inline]
unsafe fn unsigned_gt(self, other: Self) -> Self {
Self::from(vcgtq_u8(self.0, other.0))
}

#[inline]
unsafe fn any_bit_set(self) -> bool {
let tmp = vreinterpretq_u32_u8(self.0);
let tmp = vorr_u32(vget_low_u32(tmp), vget_high_u32(tmp));
return vget_lane_u32(vpmax_u32(tmp, tmp), 0) != 0;
}

#[inline]
unsafe fn is_ascii(self) -> bool {
unimplemented!();
}
}

impl From<uint8x16_t> for SimdU8Value {
#[inline]
fn from(val: uint8x16_t) -> Self {
Self { 0: val }
}
}

impl Utf8CheckAlgorithm<SimdU8Value> {
#[inline]
unsafe fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value {
let is_third_byte = prev2.unsigned_gt(SimdU8Value::splat(0b1110_0000 - 1));
let is_fourth_byte = prev3.unsigned_gt(SimdU8Value::splat(0b1111_0000 - 1));

is_third_byte.or(is_fourth_byte)
}
}

#[inline]
unsafe fn simd_prefetch(ptr: *const u8) {
// _pld intrinsic currently not available, potential benefit also unknown
}

const PREFETCH: bool = false;
use crate::implementation::helpers::TempSimdChunkA16 as TempSimdChunk;
simd_input_128_bit!("neon");
algorithm_simd!("neon");
25 changes: 23 additions & 2 deletions src/implementation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,33 @@ pub(super) use aarch64::validate_utf8_basic;
#[cfg(target_arch = "aarch64")]
pub(super) use aarch64::validate_utf8_compat;

// arm implementation

#[cfg(target_arch = "arm")]
pub(crate) mod arm;

#[cfg(target_arch = "arm")]
pub(super) use arm::validate_utf8_basic;

#[cfg(target_arch = "arm")]
pub(super) use arm::validate_utf8_compat;

// fallback for unsupported architectures

#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
#[cfg(not(any(
target_arch = "x86",
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "arm"
)))]
pub(super) use validate_utf8_basic_fallback as validate_utf8_basic;

#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
#[cfg(not(any(
target_arch = "x86",
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "arm"
)))]
pub(super) use validate_utf8_compat_fallback as validate_utf8_compat;

// fallback method implementations
Expand Down
9 changes: 8 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,16 @@
#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![cfg_attr(
all(feature = "aarch64_neon", target_arch = "aarch64"),
any(
all(feature = "aarch64_neon", target_arch = "aarch64"),
all(feature = "arm_neon", target_arch = "arm")
),
feature(stdsimd)
)]
#![cfg_attr(
all(feature = "arm_neon", target_arch = "arm"),
feature(arm_target_feature)
)]

//! Blazingly fast API-compatible UTF-8 validation for Rust using SIMD extensions, based on the implementation from
//! [simdjson](https://github.com/simdjson/simdjson). Originally ported to Rust by the developers of [simd-json.rs](https://simd-json.rs), but now heavily improved.
Expand Down