Skip to content

Commit

Permalink
Auto merge of #111395 - scottmcm:slice-iter-zst-experiment, r=the8472
Browse files Browse the repository at this point in the history
Simplify the implementation of iterators over slices of ZSTs

Currently, slice iterators over ZSTs store `end = start.wrapping_byte_add(len)`.

That's slightly convenient for `is_empty`, but kinda annoying for pretty much everything else -- see bugs like #42789, for example.

This PR instead changes it to just `end = ptr::invalid(len)` instead.

That's easier to think about (IMHO, at least) as well as easier to represent.

`next` is still to big to get inlined into the mir-opt/pre-codegen/ tests, but if I bump the inline threshold to force it to show the whole thing, this implementation is also less MIR:
```
> git diff --numstat
241     370     tests/mir-opt/pre-codegen/slice_iter.forward_loop.PreCodegen.after.mir
255     329     tests/mir-opt/pre-codegen/slice_iter.reverse_loop.PreCodegen.after.mir
184     216     tests/mir-opt/pre-codegen/slice_iter.slice_iter_mut_next_back.PreCodegen.after.mir
182     254     tests/mir-opt/pre-codegen/slice_iter.slice_iter_next.PreCodegen.after.mir
```
(That's ≈70 lines less for `Iter::next`, for example.)

r? `@ghost`

~~Built atop #111282, so draft until that lands.~~
  • Loading branch information
bors committed May 11, 2023
2 parents 2a8221d + 15aa7fa commit 5b24e12
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 35 deletions.
13 changes: 13 additions & 0 deletions library/core/src/ptr/non_null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,19 @@ impl<T: ?Sized> NonNull<T> {
// SAFETY: `self` is a `NonNull` pointer which is necessarily non-null
unsafe { NonNull::new_unchecked(self.as_ptr() as *mut U) }
}

/// See [`pointer::add`] for semantics and safety requirements.
#[inline]
pub(crate) const unsafe fn add(self, delta: usize) -> Self
where
T: Sized,
{
// SAFETY: We require that the delta stays in-bounds of the object, and
// thus it cannot become null, as that would require wrapping the
// address space, which no legal objects are allowed to do.
// And the caller promised the `delta` is sound to add.
unsafe { NonNull { pointer: self.pointer.add(delta) } }
}
}

impl<T> NonNull<[T]> {
Expand Down
16 changes: 5 additions & 11 deletions library/core/src/slice/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::iter::{
use crate::marker::{PhantomData, Send, Sized, Sync};
use crate::mem::{self, SizedTypeProperties};
use crate::num::NonZeroUsize;
use crate::ptr::NonNull;
use crate::ptr::{invalid, invalid_mut, NonNull};

use super::{from_raw_parts, from_raw_parts_mut};

Expand Down Expand Up @@ -67,9 +67,7 @@ pub struct Iter<'a, T: 'a> {
ptr: NonNull<T>,
/// For non-ZSTs, the non-null pointer to the past-the-end element.
///
/// For ZSTs, this is `ptr.wrapping_byte_add(len)`.
///
/// For all types, `ptr == end` tests whether the iterator is empty.
/// For ZSTs, this is `ptr::invalid(len)`.
end: *const T,
_marker: PhantomData<&'a T>,
}
Expand All @@ -94,8 +92,7 @@ impl<'a, T> Iter<'a, T> {
unsafe {
assume(!ptr.is_null());

let end =
if T::IS_ZST { ptr.wrapping_byte_add(slice.len()) } else { ptr.add(slice.len()) };
let end = if T::IS_ZST { invalid(slice.len()) } else { ptr.add(slice.len()) };

Self { ptr: NonNull::new_unchecked(ptr as *mut T), end, _marker: PhantomData }
}
Expand Down Expand Up @@ -193,9 +190,7 @@ pub struct IterMut<'a, T: 'a> {
ptr: NonNull<T>,
/// For non-ZSTs, the non-null pointer to the past-the-end element.
///
/// For ZSTs, this is `ptr.wrapping_byte_add(len)`.
///
/// For all types, `ptr == end` tests whether the iterator is empty.
/// For ZSTs, this is `ptr::invalid_mut(len)`.
end: *mut T,
_marker: PhantomData<&'a mut T>,
}
Expand Down Expand Up @@ -235,8 +230,7 @@ impl<'a, T> IterMut<'a, T> {
unsafe {
assume(!ptr.is_null());

let end =
if T::IS_ZST { ptr.wrapping_byte_add(slice.len()) } else { ptr.add(slice.len()) };
let end = if T::IS_ZST { invalid_mut(slice.len()) } else { ptr.add(slice.len()) };

Self { ptr: NonNull::new_unchecked(ptr), end, _marker: PhantomData }
}
Expand Down
57 changes: 33 additions & 24 deletions library/core/src/slice/iter/macros.rs
Original file line number Diff line number Diff line change
@@ -1,28 +1,44 @@
//! Macros used by iterators of slice.

// Shrinks the iterator when T is a ZST, setting the length to `new_len`.
// `new_len` must not exceed `self.len()`.
macro_rules! zst_set_len {
($self: ident, $new_len: expr) => {{
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block

// SAFETY: same as `invalid(_mut)`, but the macro doesn't know
// which versions of that function to call, so open-code it.
$self.end = unsafe { mem::transmute::<usize, _>($new_len) };
}};
}

// Shrinks the iterator when T is a ZST, reducing the length by `n`.
// `n` must not exceed `self.len()`.
macro_rules! zst_shrink {
($self: ident, $n: ident) => {
let new_len = $self.end.addr() - $n;
zst_set_len!($self, new_len);
};
}

// Inlining is_empty and len makes a huge performance difference
macro_rules! is_empty {
// The way we encode the length of a ZST iterator, this works both for ZST
// and non-ZST.
($self: ident) => {
$self.ptr.as_ptr() as *const T == $self.end
if T::IS_ZST { $self.end.addr() == 0 } else { $self.ptr.as_ptr() as *const _ == $self.end }
};
}

macro_rules! len {
($self: ident) => {{
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block

let start = $self.ptr;
if T::IS_ZST {
// This _cannot_ use `ptr_sub` because we depend on wrapping
// to represent the length of long ZST slice iterators.
$self.end.addr().wrapping_sub(start.as_ptr().addr())
$self.end.addr()
} else {
// To get rid of some bounds checks (see `position`), we use ptr_sub instead of
// offset_from (Tested by `codegen/slice-position-bounds-check`.)
// SAFETY: by the type invariant pointers are aligned and `start <= end`
unsafe { $self.end.sub_ptr(start.as_ptr()) }
unsafe { $self.end.sub_ptr($self.ptr.as_ptr()) }
}
}};
}
Expand Down Expand Up @@ -50,14 +66,6 @@ macro_rules! iterator {
($self: ident) => {& $( $mut_ )? *$self.pre_dec_end(1)}
}

// Shrinks the iterator when T is a ZST, by moving the end of the iterator
// backwards by `n`. `n` must not exceed `self.len()`.
macro_rules! zst_shrink {
($self: ident, $n: ident) => {
$self.end = $self.end.wrapping_byte_sub($n);
}
}

impl<'a, T> $name<'a, T> {
// Helper function for creating a slice from the iterator.
#[inline(always)]
Expand All @@ -73,16 +81,15 @@ macro_rules! iterator {
// Unsafe because the offset must not exceed `self.len()`.
#[inline(always)]
unsafe fn post_inc_start(&mut self, offset: usize) -> * $raw_mut T {
let old = self.ptr;
if T::IS_ZST {
zst_shrink!(self, offset);
self.ptr.as_ptr()
} else {
let old = self.ptr.as_ptr();
// SAFETY: the caller guarantees that `offset` doesn't exceed `self.len()`,
// so this new pointer is inside `self` and thus guaranteed to be non-null.
self.ptr = unsafe { NonNull::new_unchecked(self.ptr.as_ptr().add(offset)) };
old
self.ptr = unsafe { self.ptr.add(offset) };
}
old.as_ptr()
}

// Helper function for moving the end of the iterator backwards by `offset` elements,
Expand Down Expand Up @@ -155,9 +162,7 @@ macro_rules! iterator {
if n >= len!(self) {
// This iterator is now empty.
if T::IS_ZST {
// We have to do it this way as `ptr` may never be 0, but `end`
// could be (due to wrapping).
self.end = self.ptr.as_ptr();
zst_set_len!(self, 0);
} else {
// SAFETY: end can't be 0 if T isn't ZST because ptr isn't 0 and end >= ptr
unsafe {
Expand Down Expand Up @@ -356,7 +361,11 @@ macro_rules! iterator {
fn nth_back(&mut self, n: usize) -> Option<$elem> {
if n >= len!(self) {
// This iterator is now empty.
self.end = self.ptr.as_ptr();
if T::IS_ZST {
zst_set_len!(self, 0);
} else {
self.end = self.ptr.as_ptr();
}
return None;
}
// SAFETY: We are in bounds. `pre_dec_end` does the right thing even for ZSTs.
Expand Down

0 comments on commit 5b24e12

Please sign in to comment.