Skip to content

Commit

Permalink
Added basic SSE4.2 support.
Browse files Browse the repository at this point in the history
  • Loading branch information
kamyuentse committed Mar 23, 2018
1 parent 27404ef commit 37b77af
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 94 deletions.
1 change: 1 addition & 0 deletions src/iter.rs
Expand Up @@ -30,6 +30,7 @@ impl<'a> Bytes<'a> {
}

#[cfg(feature = "nightly")]
#[allow(unused)]
#[inline]
pub fn advance(&mut self, n: usize) {
debug_assert!(self.pos + n <= self.slice.len(), "overflow");
Expand Down
221 changes: 128 additions & 93 deletions src/lib.rs
@@ -1,4 +1,4 @@
#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(test, deny(warnings))]
#![cfg_attr(feature = "nightly", feature(cfg_target_feature, stdsimd))]
#![deny(missing_docs)]
Expand All @@ -12,43 +12,15 @@
//! skipping bounds checks.
//!
//! The speed is faster than picohttpparser, when SIMD is not available.
#[cfg(feature = "std")] extern crate std as core;
#[cfg(feature = "std")]
extern crate std as core;

use core::{fmt, result, str, slice};

use iter::Bytes;

mod iter;

macro_rules! next {
($bytes:ident) => ({
match $bytes.next() {
Some(b) => b,
None => return Ok(Status::Partial)
}
})
}

macro_rules! expect {
($bytes:ident.next() == $pat:pat => $ret:expr) => {
expect!(next!($bytes) => $pat |? $ret)
};
($e:expr => $pat:pat |? $ret:expr) => {
match $e {
v@$pat => v,
_ => return $ret
}
};
}

macro_rules! complete {
($e:expr) => {
match try!($e) {
Status::Complete(v) => v,
Status::Partial => return Ok(Status::Partial)
}
}
}
#[macro_use] mod macros;

#[inline]
fn shrink<T>(slice: &mut &mut [T], len: usize) {
Expand All @@ -72,32 +44,26 @@ fn is_token(b: u8) -> bool {
b > 0x1F && b < 0x7F
}

macro_rules! byte_map {
($($flag:expr,)*) => ([
$($flag != 0,)*
])
}

// ASCII codes to accept URI string.
// i.e. A-Z a-z 0-9 !#$%&'*+-._();:@=,/?[]~
// TODO: Make a stricter checking for URI string?
static URI_MAP: [bool; 256] = byte_map![
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
// ====== Extended ASCII (aka. obs-text) ======
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
// ====== Extended ASCII (aka. obs-text) ======
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
];

Expand All @@ -106,7 +72,6 @@ fn is_uri_token(b: u8) -> bool {
URI_MAP[b as usize]
}


static HEADER_NAME_MAP: [bool; 256] = byte_map![
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Expand Down Expand Up @@ -156,29 +121,6 @@ fn is_header_value_token(b: u8) -> bool {
HEADER_VALUE_MAP[b as usize]
}


macro_rules! space {
($bytes:ident or $err:expr) => ({
expect!($bytes.next() == b' ' => Err($err));
$bytes.slice();
})
}

macro_rules! newline {
($bytes:ident) => ({
match next!($bytes) {
b'\r' => {
expect!($bytes.next() == b'\n' => Err(Error::NewLine));
$bytes.slice();
},
b'\n' => {
$bytes.slice();
},
_ => return Err(Error::NewLine)
}
})
}

/// An error in parsing.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub enum Error {
Expand Down Expand Up @@ -553,15 +495,27 @@ fn parse_token<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> {
#[inline]
fn parse_uri<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> {
#[cfg(feature = "nightly")]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx2"))]
while bytes.as_ref().len() >= 32 {
let advance = match_url_char_32(bytes.as_ref());
let advance = match_url_char_32_avx(bytes.as_ref());
bytes.advance(advance);

if advance != 32 {
break;
}
}

#[cfg(feature = "nightly")]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse4.2"))]
while bytes.as_ref().len() >= 16 {
let advance = match_url_char_16_sse(bytes.as_ref());
bytes.advance(advance);

if advance != 16 {
break;
}
}

loop {
let b = next!(bytes);
if b == b' ' {
Expand All @@ -578,7 +532,7 @@ fn parse_uri<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> {
#[cfg(feature = "nightly")]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx2"))]
#[inline]
fn match_url_char_32(buf: &[u8]) -> usize {
fn match_url_char_32_avx(buf: &[u8]) -> usize {
debug_assert!(buf.len() >= 32);

#[cfg(target_arch = "x86")]
Expand All @@ -589,7 +543,7 @@ fn match_url_char_32(buf: &[u8]) -> usize {
let ptr = buf.as_ptr();

#[allow(non_snake_case, overflowing_literals)]
unsafe {
unsafe {
let LSH: __m256i = _mm256_set1_epi8(0x0f);
let URI: __m256i = _mm256_setr_epi8(
0xb8, 0xfc, 0xf8, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
Expand All @@ -616,15 +570,52 @@ fn match_url_char_32(buf: &[u8]) -> usize {
}
}

#[cfg(feature = "nightly")]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse4.2"))]
#[inline]
fn match_url_char_16_sse(buf: &[u8]) -> usize {
debug_assert!(buf.len() >= 16);

#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;

let ptr = buf.as_ptr();

#[allow(non_snake_case, overflowing_literals)]
unsafe {
let LSH: __m128i = _mm_set1_epi8(0x0f);
let URI: __m128i = _mm_setr_epi8(
0xb8, 0xfc, 0xf8, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
0xfc, 0xfc, 0xfc, 0x7c, 0x54, 0x7c, 0xd4, 0x7c,
);
let ARF: __m128i = _mm_setr_epi8(
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
);

let data = _mm_lddqu_si128(ptr as *const _);
let rbms = _mm_shuffle_epi8(URI, data);
let cols = _mm_and_si128(LSH, _mm_srli_epi16(data, 4));
let bits = _mm_and_si128(_mm_shuffle_epi8(ARF, cols), rbms);

let v = _mm_cmpeq_epi8(bits, _mm_setzero_si128());
let r = 0xffff_0000 | _mm_movemask_epi8(v) as u32;

_tzcnt_u32(r) as usize
}
}

#[inline]
fn parse_code(bytes: &mut Bytes) -> Result<u16> {
let hundreds = expect!(bytes.next() == b'0'...b'9' => Err(Error::Status));
let tens = expect!(bytes.next() == b'0'...b'9' => Err(Error::Status));
let ones = expect!(bytes.next() == b'0'...b'9' => Err(Error::Status));

Ok(Status::Complete((hundreds - b'0') as u16 * 100 +
(tens - b'0') as u16 * 10 +
(ones - b'0') as u16))
(tens - b'0') as u16 * 10 +
(ones - b'0') as u16))
}

/// Parse a buffer of bytes as headers.
Expand Down Expand Up @@ -652,10 +643,42 @@ pub fn parse_headers<'b: 'h, 'h>(src: &'b [u8], mut dst: &'h mut [Header<'b>])
Ok(Status::Complete((pos, dst)))
}

#[cfg(feature = "nightly")]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse4.2"))]
#[inline]
fn match_header_value_char_16_sse(buf: &[u8]) -> usize {
debug_assert!(buf.len() >= 16);

#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;

let ptr = buf.as_ptr();

#[allow(non_snake_case)]
unsafe {
// %x09 %x20-%x7e %x80-%xff
let TAB: __m128i = _mm_set1_epi8(0x09);
let DEL: __m128i = _mm_set1_epi8(0x7f);
let LOW: __m128i = _mm_set1_epi8(0x1f);

let dat = _mm_lddqu_si128(ptr as *const _);
let low = _mm_cmpgt_epi8(dat, LOW);
let tab = _mm_cmpeq_epi8(dat, TAB);
let del = _mm_cmpeq_epi8(dat, DEL);
let bit = _mm_andnot_si128(del, _mm_or_si128(low, tab));
let rev = _mm_cmpeq_epi8(bit, _mm_setzero_si128());
let res = 0xffff_0000 | _mm_movemask_epi8(rev) as u32;

_tzcnt_u32(res) as usize
}
}

#[cfg(feature = "nightly")]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx2"))]
#[inline]
fn match_header_value_char_32(buf: &[u8]) -> usize {
fn match_header_value_char_32_avx(buf: &[u8]) -> usize {
debug_assert!(buf.len() >= 32);

#[cfg(target_arch = "x86")]
Expand Down Expand Up @@ -750,14 +773,27 @@ fn parse_headers_iter<'a, 'b>(headers: &mut &mut [Header<'a>], bytes: &'b mut By
// parse value till EOL

#[cfg(feature = "nightly")]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx2"))]
{
'batch: while bytes.as_ref().len() >= 32 {
let advance = match_header_value_char_32(bytes.as_ref());
'batch32: while bytes.as_ref().len() >= 32 {
let advance = match_header_value_char_32_avx(bytes.as_ref());
bytes.advance(advance);

if advance != 32 {
break 'batch;
}
if advance != 32 {
break 'batch32;
}
}
}
#[cfg(feature = "nightly")]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse4.2"))]
{
'batch16: while bytes.as_ref().len() >= 16 {
let advance = match_header_value_char_16_sse(bytes.as_ref());
bytes.advance(advance);

if advance != 16 {
break 'batch16;
}
}
}

Expand Down Expand Up @@ -801,7 +837,6 @@ fn parse_headers_iter<'a, 'b>(headers: &mut &mut [Header<'a>], bytes: &'b mut By
} else {
return Err(Error::HeaderValue);
}

}
} // drop iter

Expand All @@ -822,7 +857,7 @@ fn parse_headers_iter<'a, 'b>(headers: &mut &mut [Header<'a>], bytes: &'b mut By
/// Ok(httparse::Status::Complete((3, 4))));
/// ```
pub fn parse_chunk_size(buf: &[u8])
-> result::Result<Status<(usize, u64)>, InvalidChunkSize> {
-> result::Result<Status<(usize, u64)>, InvalidChunkSize> {
const RADIX: u64 = 16;
let mut bytes = Bytes::new(buf);
let mut size = 0;
Expand All @@ -832,23 +867,23 @@ pub fn parse_chunk_size(buf: &[u8])
loop {
let b = next!(bytes);
match b {
b'0'...b'9' if in_chunk_size => {
b'0' ... b'9' if in_chunk_size => {
if count > 15 {
return Err(InvalidChunkSize);
}
count += 1;
size *= RADIX;
size += (b - b'0') as u64;
},
b'a'...b'f' if in_chunk_size => {
b'a' ... b'f' if in_chunk_size => {
if count > 15 {
return Err(InvalidChunkSize);
}
count += 1;
size *= RADIX;
size += (b + 10 - b'a') as u64;
}
b'A'...b'F' if in_chunk_size => {
b'A' ... b'F' if in_chunk_size => {
if count > 15 {
return Err(InvalidChunkSize);
}
Expand Down
2 changes: 1 addition & 1 deletion src/macros.rs
Expand Up @@ -56,4 +56,4 @@ macro_rules! newline {
_ => return Err(Error::NewLine)
}
})
}
}

0 comments on commit 37b77af

Please sign in to comment.