Skip to content

Commit c5afe3d

Browse files
authored
feat: add escape_into (#18)
1 parent 2df28ec commit c5afe3d

File tree

3 files changed

+92
-28
lines changed

3 files changed

+92
-28
lines changed

src/aarch64.rs

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,9 @@ const PREFETCH_DISTANCE: usize = CHUNK * 2;
1010
const SLASH_SENTINEL: u8 = 0xFF;
1111

1212
#[inline]
13-
pub fn escape_neon<S: AsRef<str>>(input: S) -> String {
14-
let s = input.as_ref();
15-
let bytes = s.as_bytes();
13+
pub fn escape_neon(bytes: &[u8], output: &mut Vec<u8>) {
1614
let n = bytes.len();
1715

18-
let mut out = Vec::with_capacity(n + 2);
19-
out.push(b'"');
20-
2116
unsafe {
2217
let tbl = vld1q_u8_x4(ESCAPE.as_ptr());
2318
let slash = vdupq_n_u8(b'\\');
@@ -53,18 +48,18 @@ pub fn escape_neon<S: AsRef<str>>(input: S) -> String {
5348
let mask_r_4 = vmaxvq_u8(mask_4);
5449

5550
if mask_r_1 | mask_r_2 | mask_r_3 | mask_r_4 == 0 {
56-
out.extend_from_slice(std::slice::from_raw_parts(ptr, CHUNK));
51+
output.extend_from_slice(std::slice::from_raw_parts(ptr, CHUNK));
5752
i += CHUNK;
5853
continue;
5954
}
6055

6156
macro_rules! handle {
6257
($mask:expr, $mask_r:expr, $off:expr) => {
6358
if $mask_r == 0 {
64-
out.extend_from_slice(std::slice::from_raw_parts(ptr.add($off), 16));
59+
output.extend_from_slice(std::slice::from_raw_parts(ptr.add($off), 16));
6560
} else {
6661
vst1q_u8(placeholder.as_mut_ptr(), $mask);
67-
handle_block(&bytes[i + $off..i + $off + 16], &placeholder, &mut out);
62+
handle_block(&bytes[i + $off..i + $off + 16], &placeholder, output);
6863
}
6964
};
7065
}
@@ -78,13 +73,9 @@ pub fn escape_neon<S: AsRef<str>>(input: S) -> String {
7873
}
7974

8075
if i < n {
81-
handle_tail(&bytes[i..], &mut out);
76+
handle_tail(&bytes[i..], output);
8277
}
8378
}
84-
85-
out.push(b'"');
86-
87-
unsafe { String::from_utf8_unchecked(out) }
8879
}
8980

9081
#[inline(always)]

src/generic.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@ pub fn escape_generic<S: AsRef<str>>(s: S) -> String {
1313
unsafe { String::from_utf8_unchecked(result) }
1414
}
1515

16+
#[inline]
17+
pub fn escape_into_generic<S: AsRef<str>>(s: S, output: &mut Vec<u8>) {
18+
let s = s.as_ref();
19+
let bytes = s.as_bytes();
20+
output.push(b'"');
21+
escape_inner(bytes, output);
22+
output.push(b'"');
23+
}
24+
1625
#[inline]
1726
// Slightly modified version of
1827
// <https://github.com/serde-rs/json/blob/d12e943590208da738c092db92c34b39796a2538/src/ser.rs#L2079>

src/lib.rs

Lines changed: 78 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -114,19 +114,19 @@ mod generic;
114114
#[cfg(target_arch = "x86_64")]
115115
mod x86;
116116

117-
pub use generic::escape_generic;
117+
pub use generic::{escape_generic, escape_into_generic};
118118

119119
/// Main entry point for JSON string escaping with SIMD acceleration
120120
/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
121121
pub fn escape<S: AsRef<str>>(input: S) -> String {
122+
use generic::escape_inner;
123+
124+
let mut result = Vec::with_capacity(input.as_ref().len() + input.as_ref().len() / 2 + 2);
125+
result.push(b'"');
126+
let s = input.as_ref();
127+
let bytes = s.as_bytes();
122128
#[cfg(target_arch = "x86_64")]
123129
{
124-
use generic::escape_inner;
125-
126-
let mut result = Vec::with_capacity(input.as_ref().len() + input.as_ref().len() / 2 + 2);
127-
result.push(b'"');
128-
let s = input.as_ref();
129-
let bytes = s.as_bytes();
130130
let len = bytes.len();
131131
// Runtime CPU feature detection for x86_64
132132
if is_x86_feature_detected!("avx512f")
@@ -144,16 +144,71 @@ pub fn escape<S: AsRef<str>>(input: S) -> String {
144144
} else {
145145
escape_inner(bytes, &mut result);
146146
}
147-
result.push(b'"');
148-
// SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
149-
unsafe { String::from_utf8_unchecked(result) }
150147
}
151148

152149
#[cfg(target_arch = "aarch64")]
153150
{
154151
#[cfg(feature = "force_aarch64_neon")]
155152
{
156-
return aarch64::escape_neon(input);
153+
aarch64::escape_neon(bytes, &mut result);
154+
}
155+
#[cfg(not(feature = "force_aarch64_neon"))]
156+
{
157+
// on Apple M2 and later, the `bf16` feature is available
158+
// it means they have more registers and can significantly benefit from the SIMD path
159+
// TODO: add support for sve2 chips with wider registers
160+
// github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
161+
if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
162+
aarch64::escape_neon(bytes, &mut result);
163+
} else {
164+
escape_inner(bytes, &mut result);
165+
}
166+
}
167+
}
168+
169+
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
170+
{
171+
escape_inner(bytes, &mut result);
172+
}
173+
result.push(b'"');
174+
// SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
175+
unsafe { String::from_utf8_unchecked(result) }
176+
}
177+
178+
/// Main entry point for JSON string escaping with SIMD acceleration
179+
/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
180+
pub fn escape_into<S: AsRef<str>>(input: S, output: &mut Vec<u8>) {
181+
use generic::escape_inner;
182+
183+
output.push(b'"');
184+
let s = input.as_ref();
185+
let bytes = s.as_bytes();
186+
#[cfg(target_arch = "x86_64")]
187+
{
188+
let len = bytes.len();
189+
// Runtime CPU feature detection for x86_64
190+
if is_x86_feature_detected!("avx512f")
191+
&& is_x86_feature_detected!("avx512bw")
192+
&& len >= x86::LOOP_SIZE_AVX512
193+
{
194+
unsafe { x86::escape_avx512(bytes, output) }
195+
} else if is_x86_feature_detected!("avx2") && len >= x86::LOOP_SIZE_AVX2 {
196+
unsafe { x86::escape_avx2(bytes, output) }
197+
} else if is_x86_feature_detected!("sse2")
198+
&& /* if len < 128, no need to use simd */
199+
len >= x86::LOOP_SIZE_AVX2
200+
{
201+
unsafe { x86::escape_sse2(bytes, output) }
202+
} else {
203+
escape_inner(bytes, output);
204+
}
205+
}
206+
207+
#[cfg(target_arch = "aarch64")]
208+
{
209+
#[cfg(feature = "force_aarch64_neon")]
210+
{
211+
return aarch64::escape_neon(bytes, output);
157212
}
158213
#[cfg(not(feature = "force_aarch64_neon"))]
159214
{
@@ -162,15 +217,18 @@ pub fn escape<S: AsRef<str>>(input: S) -> String {
162217
// TODO: add support for sve2 chips with wider registers
163218
// github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
164219
if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
165-
return aarch64::escape_neon(input);
220+
aarch64::escape_neon(bytes, output);
166221
} else {
167-
return escape_generic(input);
222+
escape_inner(bytes, output);
168223
}
169224
}
170225
}
171226

172227
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
173-
escape_generic(input)
228+
{
229+
escape_into_generic(input, output);
230+
}
231+
output.push(b'"');
174232
}
175233

176234
#[test]
@@ -377,6 +435,9 @@ fn test_rxjs() {
377435
assert!(!sources.is_empty());
378436
for source in sources {
379437
assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
438+
let mut output = String::new();
439+
escape_into(&source, unsafe { output.as_mut_vec() });
440+
assert_eq!(output, serde_json::to_string(&source).unwrap());
380441
}
381442
}
382443

@@ -402,5 +463,8 @@ fn test_sources() {
402463
assert!(!sources.is_empty());
403464
for source in sources {
404465
assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
466+
let mut output = String::new();
467+
escape_into(&source, unsafe { output.as_mut_vec() });
468+
assert_eq!(output, serde_json::to_string(&source).unwrap());
405469
}
406470
}

0 commit comments

Comments
 (0)