From 15b71f491f39c0d2e3b733c13328f3b513900309 Mon Sep 17 00:00:00 2001 From: ltdk Date: Sun, 13 Nov 2022 04:09:20 -0500 Subject: [PATCH 1/2] Add CStr::bytes iterator --- library/core/src/ffi/c_str.rs | 89 +++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/library/core/src/ffi/c_str.rs b/library/core/src/ffi/c_str.rs index 111fb83088b82..1dafc8c1f868c 100644 --- a/library/core/src/ffi/c_str.rs +++ b/library/core/src/ffi/c_str.rs @@ -5,8 +5,11 @@ use crate::error::Error; use crate::ffi::c_char; use crate::fmt; use crate::intrinsics; +use crate::iter::FusedIterator; +use crate::marker::PhantomData; use crate::ops; use crate::ptr::addr_of; +use crate::ptr::NonNull; use crate::slice; use crate::slice::memchr; use crate::str; @@ -617,6 +620,26 @@ impl CStr { unsafe { &*(addr_of!(self.inner) as *const [u8]) } } + /// Iterates over the bytes in this C string. + /// + /// The returned iterator will **not** contain the trailing nul terminator + /// that this C string has. + /// + /// # Examples + /// + /// ``` + /// #![feature(cstr_bytes)] + /// use std::ffi::CStr; + /// + /// let cstr = CStr::from_bytes_with_nul(b"foo\0").expect("CStr::from_bytes_with_nul failed"); + /// assert!(cstr.bytes().eq(*b"foo")); + /// ``` + #[inline] + #[unstable(feature = "cstr_bytes", issue = "112115")] + pub fn bytes(&self) -> Bytes<'_> { + Bytes::new(self) + } + /// Yields a &[str] slice if the `CStr` contains valid UTF-8. /// /// If the contents of the `CStr` are valid UTF-8 data, this @@ -735,3 +758,69 @@ const unsafe fn const_strlen(ptr: *const c_char) -> usize { intrinsics::const_eval_select((ptr,), strlen_ct, strlen_rt) } } + +/// An iterator over the bytes of a [`CStr`], without the nul terminator. +/// +/// This struct is created by the [`bytes`] method on [`CStr`]. +/// See its documentation for more. +/// +/// [`bytes`]: CStr::bytes +#[must_use = "iterators are lazy and do nothing unless consumed"] +#[unstable(feature = "cstr_bytes", issue = "112115")] +#[derive(Clone, Debug)] +pub struct Bytes<'a> { + // since we know the string is nul-terminated, we only need one pointer + ptr: NonNull, + phantom: PhantomData<&'a u8>, +} +impl<'a> Bytes<'a> { + #[inline] + fn new(s: &'a CStr) -> Self { + Self { + // SAFETY: Because we have a valid reference to the string, we know + // that its pointer is non-null. + ptr: unsafe { NonNull::new_unchecked(s.as_ptr() as *const u8 as *mut u8) }, + phantom: PhantomData, + } + } + + #[inline] + fn is_empty(&self) -> bool { + // SAFETY: We uphold that the pointer is always valid to dereference + // by starting with a valid C string and then never incrementing beyond + // the nul terminator. + unsafe { *self.ptr.as_ref() == 0 } + } +} + +#[unstable(feature = "cstr_bytes", issue = "112115")] +impl Iterator for Bytes<'_> { + type Item = u8; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We only choose a pointer from a valid C string, which must + // be non-null and contain at least one value. Since we always stop at + // the nul terminator, which is guaranteed to exist, we can assume that + // the pointer is non-null and valid. This lets us safely dereference + // it and assume that adding 1 will create a new, non-null, valid + // pointer. + unsafe { + let ret = *self.ptr.as_ref(); + if ret == 0 { + None + } else { + self.ptr = NonNull::new_unchecked(self.ptr.as_ptr().offset(1)); + Some(ret) + } + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + if self.is_empty() { (0, Some(0)) } else { (1, None) } + } +} + +#[unstable(feature = "cstr_bytes", issue = "112115")] +impl FusedIterator for Bytes<'_> {} From a38a556ceb4b65c397a47bcbe35d004bc9b8626f Mon Sep 17 00:00:00 2001 From: ltdk Date: Tue, 12 Mar 2024 17:19:58 -0400 Subject: [PATCH 2/2] Reduce unsafe code, use more NonNull APIs per @cuviper review --- library/core/src/ffi/c_str.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/library/core/src/ffi/c_str.rs b/library/core/src/ffi/c_str.rs index 1dafc8c1f868c..30debbffec1aa 100644 --- a/library/core/src/ffi/c_str.rs +++ b/library/core/src/ffi/c_str.rs @@ -507,6 +507,13 @@ impl CStr { self.inner.as_ptr() } + /// We could eventually expose this publicly, if we wanted. + #[inline] + #[must_use] + const fn as_non_null_ptr(&self) -> NonNull { + NonNull::from(&self.inner).as_non_null_ptr() + } + /// Returns the length of `self`. Like C's `strlen`, this does not include the nul terminator. /// /// > **Note**: This method is currently implemented as a constant-time @@ -776,12 +783,7 @@ pub struct Bytes<'a> { impl<'a> Bytes<'a> { #[inline] fn new(s: &'a CStr) -> Self { - Self { - // SAFETY: Because we have a valid reference to the string, we know - // that its pointer is non-null. - ptr: unsafe { NonNull::new_unchecked(s.as_ptr() as *const u8 as *mut u8) }, - phantom: PhantomData, - } + Self { ptr: s.as_non_null_ptr().cast(), phantom: PhantomData } } #[inline] @@ -789,7 +791,7 @@ impl<'a> Bytes<'a> { // SAFETY: We uphold that the pointer is always valid to dereference // by starting with a valid C string and then never incrementing beyond // the nul terminator. - unsafe { *self.ptr.as_ref() == 0 } + unsafe { self.ptr.read() == 0 } } } @@ -806,11 +808,11 @@ impl Iterator for Bytes<'_> { // it and assume that adding 1 will create a new, non-null, valid // pointer. unsafe { - let ret = *self.ptr.as_ref(); + let ret = self.ptr.read(); if ret == 0 { None } else { - self.ptr = NonNull::new_unchecked(self.ptr.as_ptr().offset(1)); + self.ptr = self.ptr.offset(1); Some(ret) } }