From de04f3a9d7e784396441ae3e84747dec34bbd838 Mon Sep 17 00:00:00 2001 From: Aaron O'Mullan Date: Mon, 24 Apr 2023 23:55:58 +0300 Subject: [PATCH 1/2] refactor: Bytes inner arithmetic IMO cleaner pointer arithmetic avoiding unnecessary intermediate slices. No perf gains in isolation but facilitates some (upcoming PRs) This is conceptually easier to reason about, end is fixed, "start" and "cursor" advance monotonically --- src/iter.rs | 132 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 91 insertions(+), 41 deletions(-) diff --git a/src/iter.rs b/src/iter.rs index 0d86f9e..9dd45a8 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -1,108 +1,158 @@ -use core::slice; -use core::convert::TryInto; -use core::convert::TryFrom; - #[allow(missing_docs)] pub struct Bytes<'a> { - slice: &'a [u8], - pos: usize + start: *const u8, + end: *const u8, + cursor: *const u8, + phantom: core::marker::PhantomData<&'a ()>, } #[allow(missing_docs)] impl<'a> Bytes<'a> { #[inline] pub fn new(slice: &'a [u8]) -> Bytes<'a> { + let start = slice.as_ptr(); + let end = unsafe { start.add(slice.len()) }; + let cursor = start; Bytes { - slice, - pos: 0 + start, + end, + cursor, + phantom: core::marker::PhantomData, } } #[inline] pub fn pos(&self) -> usize { - self.pos + self.cursor as usize - self.start as usize } #[inline] pub fn peek(&self) -> Option { - self.peek_ahead(0) + if self.cursor < self.end { + // SAFETY: bounds checked + Some(unsafe { *self.cursor }) + } else { + None + } } #[inline] pub fn peek_ahead(&self, n: usize) -> Option { - self.slice.get(self.pos + n).copied() + let ptr = unsafe { self.cursor.add(n) }; + if ptr < self.end { + // SAFETY: bounds checked + Some(unsafe { *ptr }) + } else { + None + } } - + #[inline] - pub fn peek_n>(&self, n: usize) -> Option { - self.slice.get(self.pos..self.pos + n)?.try_into().ok() + pub fn peek_n(&self, n: usize) -> Option { + // TODO: drop `n` arg in favour of const + // let n = core::mem::size_of::(); + // Boundary check then read array from ptr + if self.len() >= n { + let ptr = self.cursor as *const U; + let x = unsafe { core::ptr::read_unaligned(ptr) }; + Some(x) + } else { + None + } } #[inline] pub unsafe fn bump(&mut self) { - debug_assert!(self.pos < self.slice.len(), "overflow"); - self.pos += 1; + self.advance(1) } - #[allow(unused)] #[inline] pub unsafe fn advance(&mut self, n: usize) { - debug_assert!(self.pos + n <= self.slice.len(), "overflow"); - self.pos += n; + self.cursor = self.cursor.add(n); + debug_assert!(self.cursor <= self.end, "overflow"); } #[inline] pub fn len(&self) -> usize { - self.slice.len() + self.end as usize - self.cursor as usize } #[inline] pub fn slice(&mut self) -> &'a [u8] { // not moving position at all, so it's safe - unsafe { - self.slice_skip(0) - } + let slice = unsafe { slice_from_ptr_range(self.start, self.cursor) }; + self.commit(); + slice } + // TODO: this is an anti-pattern, should be removed #[inline] pub unsafe fn slice_skip(&mut self, skip: usize) -> &'a [u8] { - debug_assert!(self.pos >= skip); - let head_pos = self.pos - skip; - let ptr = self.slice.as_ptr(); - let head = slice::from_raw_parts(ptr, head_pos); - let tail = slice::from_raw_parts(ptr.add(self.pos), self.slice.len() - self.pos); - self.pos = 0; - self.slice = tail; + debug_assert!(self.cursor.sub(skip) >= self.start); + let head = slice_from_ptr_range(self.start, self.cursor.sub(skip)); + self.commit(); head } + + #[inline] + pub fn commit(&mut self) { + self.start = self.cursor + } #[inline] pub unsafe fn advance_and_commit(&mut self, n: usize) { - debug_assert!(self.pos + n <= self.slice.len(), "overflow"); - self.pos += n; - let ptr = self.slice.as_ptr(); - let tail = slice::from_raw_parts(ptr.add(n), self.slice.len() - n); - self.pos = 0; - self.slice = tail; + self.advance(n); + self.commit(); + } + + #[inline] + pub fn as_ptr(&self) -> *const u8 { + self.cursor + } + + #[inline] + pub fn start(&self) -> *const u8 { + self.start + } + + #[inline] + pub fn end(&self) -> *const u8 { + self.end + } + + #[inline] + pub unsafe fn set_cursor(&mut self, ptr: *const u8) { + debug_assert!(ptr >= self.start); + debug_assert!(ptr <= self.end); + self.cursor = ptr; } } impl<'a> AsRef<[u8]> for Bytes<'a> { #[inline] fn as_ref(&self) -> &[u8] { - &self.slice[self.pos..] + unsafe { slice_from_ptr_range(self.cursor, self.end) } } } +#[inline] +unsafe fn slice_from_ptr_range<'a>(start: *const u8, end: *const u8) -> &'a [u8] { + debug_assert!(start <= end); + core::slice::from_raw_parts(start, end as usize - start as usize) +} + impl<'a> Iterator for Bytes<'a> { type Item = u8; #[inline] fn next(&mut self) -> Option { - if self.slice.len() > self.pos { - let b = unsafe { *self.slice.get_unchecked(self.pos) }; - self.pos += 1; - Some(b) + if self.cursor < self.end { + // SAFETY: bounds checked + unsafe { + let b = *self.cursor; + self.bump(); + Some(b) + } } else { None } From 7152377789fcdf6c74ca39966eeaeb0a6fcbff44 Mon Sep 17 00:00:00 2001 From: Aaron O'Mullan Date: Wed, 26 Apr 2023 00:14:00 +0300 Subject: [PATCH 2/2] safe Bytes::peek_n() --- src/iter.rs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/iter.rs b/src/iter.rs index 9dd45a8..e6e5133 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -1,3 +1,6 @@ +use core::convert::TryInto; +use core::convert::TryFrom; + #[allow(missing_docs)] pub struct Bytes<'a> { start: *const u8, @@ -48,17 +51,11 @@ impl<'a> Bytes<'a> { } #[inline] - pub fn peek_n(&self, n: usize) -> Option { + pub fn peek_n<'b: 'a, U: TryFrom<&'a [u8]>>(&'b self, n: usize) -> Option { + // TODO: once we bump MSRC, use const generics to allow only [u8; N] reads // TODO: drop `n` arg in favour of const // let n = core::mem::size_of::(); - // Boundary check then read array from ptr - if self.len() >= n { - let ptr = self.cursor as *const U; - let x = unsafe { core::ptr::read_unaligned(ptr) }; - Some(x) - } else { - None - } + self.as_ref().get(..n)?.try_into().ok() } #[inline]