diff --git a/compiler/rustc_arena/src/lib.rs b/compiler/rustc_arena/src/lib.rs index 3928d70c0ede2..62995dfd2e2f0 100644 --- a/compiler/rustc_arena/src/lib.rs +++ b/compiler/rustc_arena/src/lib.rs @@ -18,6 +18,7 @@ #![feature(decl_macro)] #![feature(rustc_attrs)] #![cfg_attr(test, feature(test))] +#![feature(strict_provenance)] use smallvec::SmallVec; @@ -87,7 +88,7 @@ impl ArenaChunk { unsafe { if mem::size_of::() == 0 { // A pointer as large as possible for zero-sized elements. - !0 as *mut T + ptr::invalid_mut(!0) } else { self.start().add(self.storage.len()) } @@ -199,7 +200,7 @@ impl TypedArena { unsafe { if mem::size_of::() == 0 { self.ptr.set((self.ptr.get() as *mut u8).wrapping_offset(1) as *mut T); - let ptr = mem::align_of::() as *mut T; + let ptr = ptr::NonNull::::dangling().as_ptr(); // Don't drop the object. This `write` is equivalent to `forget`. ptr::write(ptr, object); &mut *ptr @@ -216,7 +217,9 @@ impl TypedArena { #[inline] fn can_allocate(&self, additional: usize) -> bool { - let available_bytes = self.end.get() as usize - self.ptr.get() as usize; + // FIXME: this should *likely* use `offset_from`, but more + // investigation is needed (including running tests in miri). + let available_bytes = self.end.get().addr() - self.ptr.get().addr(); let additional_bytes = additional.checked_mul(mem::size_of::()).unwrap(); available_bytes >= additional_bytes } @@ -262,7 +265,9 @@ impl TypedArena { // If a type is `!needs_drop`, we don't need to keep track of how many elements // the chunk stores - the field will be ignored anyway. if mem::needs_drop::() { - let used_bytes = self.ptr.get() as usize - last_chunk.start() as usize; + // FIXME: this should *likely* use `offset_from`, but more + // investigation is needed (including running tests in miri). + let used_bytes = self.ptr.get().addr() - last_chunk.start().addr(); last_chunk.entries = used_bytes / mem::size_of::(); } @@ -288,9 +293,9 @@ impl TypedArena { // chunks. fn clear_last_chunk(&self, last_chunk: &mut ArenaChunk) { // Determine how much was filled. - let start = last_chunk.start() as usize; + let start = last_chunk.start().addr(); // We obtain the value of the pointer to the first uninitialized element. - let end = self.ptr.get() as usize; + let end = self.ptr.get().addr(); // We then calculate the number of elements to be dropped in the last chunk, // which is the filled area's length. let diff = if mem::size_of::() == 0 { @@ -299,6 +304,8 @@ impl TypedArena { // Recall that `end` was incremented for each allocated value. end - start } else { + // FIXME: this should *likely* use `offset_from`, but more + // investigation is needed (including running tests in miri). (end - start) / mem::size_of::() }; // Pass that to the `destroy` method. @@ -395,15 +402,16 @@ impl DroplessArena { /// request. #[inline] fn alloc_raw_without_grow(&self, layout: Layout) -> Option<*mut u8> { - let start = self.start.get() as usize; - let end = self.end.get() as usize; + let start = self.start.get().addr(); + let old_end = self.end.get(); + let end = old_end.addr(); let align = layout.align(); let bytes = layout.size(); let new_end = end.checked_sub(bytes)? & !(align - 1); if start <= new_end { - let new_end = new_end as *mut u8; + let new_end = old_end.with_addr(new_end); self.end.set(new_end); Some(new_end) } else { diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index 25e27f565eae2..6cf6be79a8628 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -6,6 +6,7 @@ #![feature(once_cell)] #![feature(nll)] #![feature(associated_type_bounds)] +#![feature(strict_provenance)] #![recursion_limit = "256"] #![allow(rustc::potential_query_instability)] diff --git a/compiler/rustc_codegen_ssa/src/mono_item.rs b/compiler/rustc_codegen_ssa/src/mono_item.rs index 5f0f50ae2df1d..5414c619dcbca 100644 --- a/compiler/rustc_codegen_ssa/src/mono_item.rs +++ b/compiler/rustc_codegen_ssa/src/mono_item.rs @@ -116,7 +116,7 @@ impl<'a, 'tcx: 'a> MonoItemExt<'a, 'tcx> for MonoItem<'tcx> { fn to_raw_string(&self) -> String { match *self { MonoItem::Fn(instance) => { - format!("Fn({:?}, {})", instance.def, instance.substs.as_ptr() as usize) + format!("Fn({:?}, {})", instance.def, instance.substs.as_ptr().addr()) } MonoItem::Static(id) => format!("Static({:?})", id), MonoItem::GlobalAsm(id) => format!("GlobalAsm({:?})", id), diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs index 0a180b83355e0..065d071a2e360 100644 --- a/library/alloc/src/lib.rs +++ b/library/alloc/src/lib.rs @@ -127,6 +127,7 @@ #![feature(slice_ptr_len)] #![feature(slice_range)] #![feature(str_internals)] +#![feature(strict_provenance)] #![feature(trusted_len)] #![feature(trusted_random_access)] #![feature(try_trait_v2)] diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index 3ddc9acb2e7db..0b57c36247e43 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -2115,13 +2115,12 @@ impl Weak { #[rustc_const_unstable(feature = "const_weak_new", issue = "95091", reason = "recently added")] #[must_use] pub const fn new() -> Weak { - Weak { ptr: unsafe { NonNull::new_unchecked(usize::MAX as *mut RcBox) } } + Weak { ptr: unsafe { NonNull::new_unchecked(ptr::invalid_mut::>(usize::MAX)) } } } } pub(crate) fn is_dangling(ptr: *mut T) -> bool { - let address = ptr as *mut () as usize; - address == usize::MAX + (ptr as *mut ()).addr() == usize::MAX } /// Helper type to allow accessing the reference counts without diff --git a/library/alloc/src/slice.rs b/library/alloc/src/slice.rs index f52871c73d9fc..31365562ddb50 100644 --- a/library/alloc/src/slice.rs +++ b/library/alloc/src/slice.rs @@ -1043,9 +1043,9 @@ where impl Drop for MergeHole { fn drop(&mut self) { - // `T` is not a zero-sized type, so it's okay to divide by its size. - let len = (self.end as usize - self.start as usize) / mem::size_of::(); + // `T` is not a zero-sized type, and these are pointers into a slice's elements. unsafe { + let len = self.end.offset_from(self.start) as usize; ptr::copy_nonoverlapping(self.start, self.dest, len); } } diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index e2b9890850b3a..f8b4d46ac105d 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -1746,7 +1746,7 @@ impl Weak { #[rustc_const_unstable(feature = "const_weak_new", issue = "95091", reason = "recently added")] #[must_use] pub const fn new() -> Weak { - Weak { ptr: unsafe { NonNull::new_unchecked(usize::MAX as *mut ArcInner) } } + Weak { ptr: unsafe { NonNull::new_unchecked(ptr::invalid_mut::>(usize::MAX)) } } } } diff --git a/library/alloc/src/vec/into_iter.rs b/library/alloc/src/vec/into_iter.rs index f17b8d71b3a1a..cc6dfb0e33017 100644 --- a/library/alloc/src/vec/into_iter.rs +++ b/library/alloc/src/vec/into_iter.rs @@ -159,7 +159,7 @@ impl Iterator for IntoIter { #[inline] fn size_hint(&self) -> (usize, Option) { let exact = if mem::size_of::() == 0 { - (self.end as usize).wrapping_sub(self.ptr as usize) + self.end.addr().wrapping_sub(self.ptr.addr()) } else { unsafe { self.end.offset_from(self.ptr) as usize } }; diff --git a/library/core/src/alloc/layout.rs b/library/core/src/alloc/layout.rs index ea639268652c3..0639d6eed62a5 100644 --- a/library/core/src/alloc/layout.rs +++ b/library/core/src/alloc/layout.rs @@ -194,7 +194,7 @@ impl Layout { #[inline] pub const fn dangling(&self) -> NonNull { // SAFETY: align is guaranteed to be non-zero - unsafe { NonNull::new_unchecked(self.align() as *mut u8) } + unsafe { NonNull::new_unchecked(crate::ptr::invalid_mut::(self.align())) } } /// Creates a layout describing the record that can hold a value diff --git a/library/core/src/fmt/mod.rs b/library/core/src/fmt/mod.rs index 84cf1753f86ba..6c1d20f36e2f6 100644 --- a/library/core/src/fmt/mod.rs +++ b/library/core/src/fmt/mod.rs @@ -352,6 +352,10 @@ impl<'a> ArgumentV1<'a> { } fn as_usize(&self) -> Option { + // We are type punning a bit here: USIZE_MARKER only takes an &usize but + // formatter takes an &Opaque. Rust understandably doesn't think we should compare + // the function pointers if they don't have the same signature, so we cast to + // usizes to tell it that we just want to compare addresses. if self.formatter as usize == USIZE_MARKER as usize { // SAFETY: The `formatter` field is only set to USIZE_MARKER if // the value is a usize, so this is safe @@ -2246,7 +2250,7 @@ impl Pointer for *const T { } f.flags |= 1 << (FlagV1::Alternate as u32); - let ret = LowerHex::fmt(&(ptr as usize), f); + let ret = LowerHex::fmt(&(ptr.addr()), f); f.width = old_width; f.flags = old_flags; diff --git a/library/core/src/hash/mod.rs b/library/core/src/hash/mod.rs index 53de8b42c059f..45c9df0c930b9 100644 --- a/library/core/src/hash/mod.rs +++ b/library/core/src/hash/mod.rs @@ -793,7 +793,7 @@ mod impls { #[inline] fn hash(&self, state: &mut H) { let (address, metadata) = self.to_raw_parts(); - state.write_usize(address as usize); + state.write_usize(address.addr()); metadata.hash(state); } } @@ -803,7 +803,7 @@ mod impls { #[inline] fn hash(&self, state: &mut H) { let (address, metadata) = self.to_raw_parts(); - state.write_usize(address as usize); + state.write_usize(address.addr()); metadata.hash(state); } } diff --git a/library/core/src/intrinsics.rs b/library/core/src/intrinsics.rs index 129402ad23a56..8ad4317c145ac 100644 --- a/library/core/src/intrinsics.rs +++ b/library/core/src/intrinsics.rs @@ -1972,15 +1972,15 @@ extern "rust-intrinsic" { /// Checks whether `ptr` is properly aligned with respect to /// `align_of::()`. pub(crate) fn is_aligned_and_not_null(ptr: *const T) -> bool { - !ptr.is_null() && ptr as usize % mem::align_of::() == 0 + !ptr.is_null() && ptr.addr() % mem::align_of::() == 0 } /// Checks whether the regions of memory starting at `src` and `dst` of size /// `count * size_of::()` do *not* overlap. #[cfg(debug_assertions)] pub(crate) fn is_nonoverlapping(src: *const T, dst: *const T, count: usize) -> bool { - let src_usize = src as usize; - let dst_usize = dst as usize; + let src_usize = src.addr(); + let dst_usize = dst.addr(); let size = mem::size_of::().checked_mul(count).unwrap(); let diff = if src_usize > dst_usize { src_usize - dst_usize } else { dst_usize - src_usize }; // If the absolute distance between the ptrs is at least as big as the size of the buffer, diff --git a/library/core/src/ptr/const_ptr.rs b/library/core/src/ptr/const_ptr.rs index 74c64370ddcda..20ee128026421 100644 --- a/library/core/src/ptr/const_ptr.rs +++ b/library/core/src/ptr/const_ptr.rs @@ -150,6 +150,79 @@ impl *const T { bits as Self } + /// Gets the "address" portion of the pointer. + /// + /// This is equivalent to `self as usize`, which semantically discards + /// *provenance* and *address-space* information. To properly restore that information, + /// use [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr]. + /// + /// On most platforms this will produce a value with the same bytes as the original + /// pointer, because all the bytes are dedicated to describing the address. + /// Platforms which need to store additional information in the pointer may + /// perform a change of representation to produce a value containing only the address + /// portion of the pointer. What that means is up to the platform to define. + /// + /// This API and its claimed semantics are part of the Strict Provenance experiment, + /// see the [module documentation][crate::ptr] for details. + #[must_use] + #[inline] + #[unstable(feature = "strict_provenance", issue = "95228")] + pub fn addr(self) -> usize + where + T: Sized, + { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + self as usize + } + + /// Creates a new pointer with the given address. + /// + /// This performs the same operation as an `addr as ptr` cast, but copies + /// the *address-space* and *provenance* of `self` to the new pointer. + /// This allows us to dynamically preserve and propagate this important + /// information in a way that is otherwise impossible with a unary cast. + /// + /// This is equivalent to using [`wrapping_offset`][pointer::wrapping_offset] to offset + /// `self` to the given address, and therefore has all the same capabilities and restrictions. + /// + /// This API and its claimed semantics are part of the Strict Provenance experiment, + /// see the [module documentation][crate::ptr] for details. + #[must_use] + #[inline] + #[unstable(feature = "strict_provenance", issue = "95228")] + pub fn with_addr(self, addr: usize) -> Self + where + T: Sized, + { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + // + // In the mean-time, this operation is defined to be "as if" it was + // a wrapping_offset, so we can emulate it as such. This should properly + // restore pointer provenance even under today's compiler. + let self_addr = self.addr() as isize; + let dest_addr = addr as isize; + let offset = dest_addr.wrapping_sub(self_addr); + + // This is the canonical desugarring of this operation + self.cast::().wrapping_offset(offset).cast::() + } + + /// Creates a new pointer by mapping `self`'s address to a new one. + /// + /// This is a convenience for [`with_addr`][pointer::with_addr], see that method for details. + /// + /// This API and its claimed semantics are part of the Strict Provenance experiment, + /// see the [module documentation][crate::ptr] for details. + #[must_use] + #[inline] + #[unstable(feature = "strict_provenance", issue = "95228")] + pub fn map_addr(self, f: impl FnOnce(usize) -> usize) -> Self + where + T: Sized, + { + self.with_addr(f(self.addr())) + } + /// Decompose a (possibly wide) pointer into its address and metadata components. /// /// The pointer can be later reconstructed with [`from_raw_parts`]. @@ -1006,7 +1079,7 @@ impl *const [T] { /// use std::ptr; /// /// let slice: *const [i8] = ptr::slice_from_raw_parts(ptr::null(), 3); - /// assert_eq!(slice.as_ptr(), 0 as *const i8); + /// assert_eq!(slice.as_ptr(), ptr::null()); /// ``` #[inline] #[unstable(feature = "slice_ptr_get", issue = "74265")] diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index 59b1b4c136752..6a7841d3de6af 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -63,12 +63,269 @@ //! separate allocated object), heap allocations (each allocation created by the global allocator is //! a separate allocated object), and `static` variables. //! +//! +//! # Strict Provenance +//! +//! **The following text is non-normative, insufficiently formal, and is an extremely strict +//! interpretation of provenance. It's ok if your code doesn't strictly conform to it.** +//! +//! [Strict Provenance][] is an experimental set of APIs that help tools that try +//! to validate the memory-safety of your program's execution. Notably this includes [miri][] +//! and [CHERI][], which can detect when you access out of bounds memory or otherwise violate +//! Rust's memory model. +//! +//! Provenance must exist in some form for any programming +//! language compiled for modern computer architectures, but specifying a model for provenance +//! in a way that is useful to both compilers and programmers is an ongoing challenge. +//! The [Strict Provenance][] experiment seeks to explore the question: *what if we just said you +//! couldn't do all the nasty operations that make provenance so messy?* +//! +//! What APIs would have to be removed? What APIs would have to be added? How much would code +//! have to change, and is it worse or better now? Would any patterns become truly inexpressible? +//! Could we carve out special exceptions for those patterns? Should we? +//! +//! A secondary goal of this project is to see if we can disamiguate the many functions of +//! pointer<->integer casts enough for the definition of `usize` to be loosened so that it +//! isn't *pointer*-sized but address-space/offset/allocation-sized (we'll probably continue +//! to conflate these notions). This would potentially make it possible to more efficiently +//! target platforms where pointers are larger than offsets, such as CHERI and maybe some +//! segmented architecures. +//! +//! ## Provenance +//! +//! **This section is *non-normative* and is part of the [Strict Provenance][] experiment.** +//! +//! Pointers are not *simply* an "integer" or "address". For instance, it's uncontroversial +//! to say that a Use After Free is clearly Undefined Behaviour, even if you "get lucky" +//! and the freed memory gets reallocated before your read/write (in fact this is the +//! worst-case scenario, UAFs would be much less concerning if this didn't happen!). +//! To rationalize this claim, pointers need to somehow be *more* than just their addresses: +//! they must have provenance. +//! +//! When an allocation is created, that allocation has a unique Original Pointer. For alloc +//! APIs this is literally the pointer the call returns, and for local variables and statics, +//! this is the name of the variable/static. This is mildly overloading the term "pointer" +//! for the sake of brevity/exposition. +//! +//! The Original Pointer for an allocation is guaranteed to have unique access to the entire +//! allocation and *only* that allocation. In this sense, an allocation can be thought of +//! as a "sandbox" that cannot be broken into or out of. *Provenance* is the permission +//! to access an allocation's sandbox and has both a *spatial* and *temporal* component: +//! +//! * Spatial: A range of bytes that the pointer is allowed to access. +//! * Temporal: The lifetime (of the allocation) that access to these bytes is tied to. +//! +//! Spatial provenance makes sure you don't go beyond your sandbox, while temporal provenance +//! makes sure that you can't "get lucky" after your permission to access some memory +//! has been revoked (either through deallocations or borrows expiring). +//! +//! Provenance is implicitly shared with all pointers transitively derived from +//! The Original Pointer through operations like [`offset`], borrowing, and pointer casts. +//! Some operations may *shrink* the derived provenance, limiting how much memory it can +//! access or how long it's valid for (i.e. borrowing a subfield and subslicing). +//! +//! Shrinking provenance cannot be undone: even if you "know" there is a larger allocation, you +//! can't derive a pointer with a larger provenance. Similarly, you cannot "recombine" +//! two contiguous provenances back into one (i.e. with a `fn merge(&[T], &[T]) -> &[T]`). +//! +//! A reference to a value always has provenance over exactly the memory that field occupies. +//! A reference to a slice always has provenance over exactly the range that slice describes. +//! +//! If an allocation is deallocated, all pointers with provenance to that allocation become +//! invalidated, and effectively lose their provenance. +//! +//! The strict provenance experiment is mostly only interested in exploring stricter *spatial* +//! provenance. In this sense it can be thought of as a subset of the more ambitious and +//! formal [Stacked Borrows][] research project, which is what tools like [miri][] are based on. +//! In particular, Stacked Borrows is necessary to properly describe what borrows are allowed +//! to do and when they become invalidated. This necessarily involves much more complex +//! *temporal* reasoning than simply identifying allocations. Adjusting APIs and code +//! for the strict provenance experiment will also greatly help Stacked Borrows. +//! +//! +//! ## Pointer Vs Addresses +//! +//! **This section is *non-normative* and is part of the [Strict Provenance][] experiment.** +//! +//! One of the largest historical issues with trying to define provenance is that programmers +//! freely convert between pointers and integers. Once you allow for this, it generally becomes +//! impossible to accurately track and preserve provenance information, and you need to appeal +//! to very complex and unreliable heuristics. But of course, converting between pointers and +//! integers is very useful, so what can we do? +//! +//! Also did you know WASM is actually a "Harvard Architecture"? As in function pointers are +//! handled completely differently from data pointers? And we kind of just shipped Rust on WASM +//! without really addressing the fact that we let you freely convert between function pointers +//! and data pointers, because it mostly Just Works? Let's just put that on the "pointer casts +//! are dubious" pile. +//! +//! Strict Provenance attempts to square these circles by decoupling Rust's traditional conflation +//! of pointers and `usize` (and `isize`), and defining a pointer to semantically contain the +//! following information: +//! +//! * The **address-space** it is part of (i.e. "data" vs "code" in WASM). +//! * The **address** it points to, which can be represented by a `usize`. +//! * The **provenance** it has, defining the memory it has permission to access. +//! +//! Under Strict Provenance, a usize *cannot* accurately represent a pointer, and converting from +//! a pointer to a usize is generally an operation which *only* extracts the address. It is +//! therefore *impossible* to construct a valid pointer from a usize because there is no way +//! to restore the address-space and provenance. +//! +//! The key insight to making this model *at all* viable is the [`with_addr`][] method: +//! +//! ```text +//! /// Creates a new pointer with the given address. +//! /// +//! /// This performs the same operation as an `addr as ptr` cast, but copies +//! /// the *address-space* and *provenance* of `self` to the new pointer. +//! /// This allows us to dynamically preserve and propagate this important +//! /// information in a way that is otherwise impossible with a unary cast. +//! /// +//! /// This is equivalent to using `wrapping_offset` to offset `self` to the +//! /// given address, and therefore has all the same capabilities and restrictions. +//! pub fn with_addr(self, addr: usize) -> Self; +//! ``` +//! +//! So you're still able to drop down to the address representation and do whatever +//! clever bit tricks you want *as long as* you're able to keep around a pointer +//! into the allocation you care about that can "reconstitute" the other parts of the pointer. +//! Usually this is very easy, because you only are taking a pointer, messing with the address, +//! and then immediately converting back to a pointer. To make this use case more ergonomic, +//! we provide the [`map_addr`][] method. +//! +//! To help make it clear that code is "following" Strict Provenance semantics, we also +//! provide an [`addr`][] method which is currently equivalent to `ptr as usize`. In the +//! future we may provide a lint for pointer<->integer casts to help you audit if your +//! code conforms to strict provenance. +//! +//! +//! ## Using Strict Provenance +//! +//! Most code needs no changes to conform to strict provenance, as the only really concerning +//! operation that *wasn't* obviously already Undefined Behaviour is casts from usize to a +//! pointer. For code which *does* cast a usize to a pointer, the scope of the change depends +//! on exactly what you're doing. +//! +//! In general you just need to make sure that if you want to convert a usize address to a +//! pointer and then use that pointer to read/write memory, you need to keep around a pointer +//! that has sufficient provenance to perform that read/write itself. In this way all of your +//! casts from an address to a pointer are essentially just applying offsets/indexing. +//! +//! This is generally trivial to do for simple cases like tagged pointers *as long as you +//! represent the tagged pointer as an actual pointer and not a usize*. For instance: +//! +//! ``` +//! #![feature(strict_provenance)] +//! +//! unsafe { +//! // A flag we want to pack into our pointer +//! static HAS_DATA: usize = 0x1; +//! static FLAG_MASK: usize = !HAS_DATA; +//! +//! // Our value, which must have enough alignment to have spare least-significant-bits. +//! let my_precious_data: u32 = 17; +//! assert!(core::mem::align_of::() > 1); +//! +//! // Create a tagged pointer +//! let ptr = &my_precious_data as *const u32; +//! let tagged = ptr.map_addr(|addr| addr | HAS_DATA); +//! +//! // Check the flag: +//! if tagged.addr() & HAS_DATA != 0 { +//! // Untag and read the pointer +//! let data = *tagged.map_addr(|addr| addr & FLAG_MASK); +//! assert_eq!(data, 17); +//! } else { +//! unreachable!() +//! } +//! } +//! ``` +//! +//! (Yes, if you've been using AtomicUsize for pointers in concurrent datastructures, you should +//! be using AtomicPtr instead. If that messes up the way you atomically manipulate pointers, +//! we would like to know why, and what needs to be done to fix it.) +//! +//! Something more complicated and just generally *evil* like a XOR-List requires more significant +//! changes like allocating all nodes in a pre-allocated Vec or Arena and using a pointer +//! to the whole allocation to reconstitute the XORed addresses. +//! +//! Situations where a valid pointer *must* be created from just an address, such as baremetal code +//! accessing a memory-mapped interface at a fixed address, are an open question on how to support. +//! These situations *will* still be allowed, but we might require some kind of "I know what I'm +//! doing" annotation to explain the situation to the compiler. It's also possible they need no +//! special attention at all, because they're generally accessing memory outside the scope of +//! "the abstract machine", or already using "I know what I'm doing" annotations like "volatile". +//! +//! Under [Strict Provenance] is is Undefined Behaviour to: +//! +//! * Access memory through a pointer that does not have provenance over that memory. +//! +//! * [`offset`] a pointer to or from an address it doesn't have provenance over. +//! This means it's always UB to offset a pointer derived from something deallocated, +//! even if the offset is 0. Note that a pointer "one past the end" of its provenance +//! is not actually outside its provenance, it just has 0 bytes it can load/store. +//! +//! But it *is* still sound to: +//! +//! * Create an invalid pointer from just an address (see [`ptr::invalid`][]). This can +//! be used for sentinel values like `null` *or* to represent a tagged pointer that will +//! never be dereferencable. In general, it is always sound for an integer to pretend +//! to be a pointer "for fun" as long as you don't use operations on it which require +//! it to be valid (offset, read, write, etc). +//! +//! * Forge an allocation of size zero at any sufficiently aligned non-null address. +//! i.e. the usual "ZSTs are fake, do what you want" rules apply *but* this only applies +//! for actual forgery (integers cast to pointers). If you borrow some struct's field +//! that *happens* to be zero-sized, the resulting pointer will have provenance tied to +//! that allocation and it will still get invalidated if the allocation gets deallocated. +//! In the future we may introduce an API to make such a forged allocation explicit. +//! +//! * [`wrapping_offset`][] a pointer outside its provenance. This includes invalid pointers +//! which have "no" provenance. Unfortunately there may be practical limits on this for a +//! particular platform, and it's an open question as to how to specify this (if at all). +//! Notably, [CHERI][] relies on a compression scheme that can't handle a +//! pointer getting offset "too far" out of bounds. If this happens, the address +//! returned by `addr` will be the value you expect, but the provenance will get invalidated +//! and using it to read/write will fault. The details of this are architecture-specific +//! and based on alignment, but the buffer on either side of the pointer's range is pretty +//! generous (think kilobytes, not bytes). +//! +//! * Compare arbitrary pointers by address. Addresses *are* just integers and so there is +//! always a coherent answer, even if the pointers are invalid or from different +//! address-spaces/provenances. Of course, comparing addresses from different address-spaces +//! is generally going to be *meaningless*, but so is comparing Kilograms to Meters, and Rust +//! doesn't prevent that either. Similarly, if you get "lucky" and notice that a pointer +//! one-past-the-end is the "same" address as the start of an unrelated allocation, anything +//! you do with that fact is *probably* going to be gibberish. The scope of that gibberish +//! is kept under control by the fact that the two pointers *still* aren't allowed to access +//! the other's allocation (bytes), because they still have different provenance. +//! +//! * Perform pointer tagging tricks. This falls out of [`wrapping_offset`] but is worth +//! mentioning in more detail because of the limitations of [CHERI][]. Low-bit tagging +//! is very robust, and often doesn't even go out of bounds because types ensure +//! size >= align (and over-aligning actually gives CHERI more flexibility). Anything +//! more complex than this rapidly enters "extremely platform-specific" territory as +//! certain things may or may not be allowed based on specific supported operations. +//! For instance, ARM explicitly supports high-bit tagging, and so CHERI on ARM inherits +//! that and should support it. +//! +//! //! [aliasing]: ../../nomicon/aliasing.html //! [book]: ../../book/ch19-01-unsafe-rust.html#dereferencing-a-raw-pointer //! [ub]: ../../reference/behavior-considered-undefined.html //! [zst]: ../../nomicon/exotic-sizes.html#zero-sized-types-zsts //! [atomic operations]: crate::sync::atomic //! [`offset`]: pointer::offset +//! [`wrapping_offset`]: pointer::wrapping_offset +//! [`with_addr`]: pointer::with_addr +//! [`map_addr`]: pointer::map_addr +//! [`addr`]: pointer::addr +//! [`ptr::invalid`]: core::ptr::invalid +//! [miri]: https://github.com/rust-lang/miri +//! [CHERI]: https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/ +//! [Strict Provenance]: https://github.com/rust-lang/rust/issues/95228 +//! [Stacked Borrows]: https://plv.mpi-sws.org/rustbelt/stacked-borrows/ #![stable(feature = "rust1", since = "1.0.0")] @@ -210,7 +467,7 @@ pub unsafe fn drop_in_place(to_drop: *mut T) { #[rustc_const_stable(feature = "const_ptr_null", since = "1.24.0")] #[rustc_diagnostic_item = "ptr_null"] pub const fn null() -> *const T { - 0 as *const T + invalid(0) } /// Creates a null mutable raw pointer. @@ -230,7 +487,61 @@ pub const fn null() -> *const T { #[rustc_const_stable(feature = "const_ptr_null", since = "1.24.0")] #[rustc_diagnostic_item = "ptr_null_mut"] pub const fn null_mut() -> *mut T { - 0 as *mut T + invalid_mut(0) +} + +/// Creates an invalid pointer with the given address. +/// +/// This is *currently* equivalent to `addr as *const T` but it expresses the intended semantic +/// more clearly, and may become important under future memory models. +/// +/// The module's top-level documentation discusses the precise meaning of an "invalid" +/// pointer but essentially this expresses that the pointer is not associated +/// with any actual allocation and is little more than a usize address in disguise. +/// +/// This pointer will have no provenance associated with it and is therefore +/// UB to read/write/offset. This mostly exists to facilitate things +/// like ptr::null and NonNull::dangling which make invalid pointers. +/// +/// (Standard "Zero-Sized-Types get to cheat and lie" caveats apply, although it +/// may be desirable to give them their own API just to make that 100% clear.) +/// +/// This API and its claimed semantics are part of the Strict Provenance experiment, +/// see the [module documentation][crate::ptr] for details. +#[inline(always)] +#[must_use] +#[rustc_const_stable(feature = "strict_provenance", since = "1.61.0")] +#[unstable(feature = "strict_provenance", issue = "95228")] +pub const fn invalid(addr: usize) -> *const T { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + addr as *const T +} + +/// Creates an invalid mutable pointer with the given address. +/// +/// This is *currently* equivalent to `addr as *mut T` but it expresses the intended semantic +/// more clearly, and may become important under future memory models. +/// +/// The module's top-level documentation discusses the precise meaning of an "invalid" +/// pointer but essentially this expresses that the pointer is not associated +/// with any actual allocation and is little more than a usize address in disguise. +/// +/// This pointer will have no provenance associated with it and is therefore +/// UB to read/write/offset. This mostly exists to facilitate things +/// like ptr::null and NonNull::dangling which make invalid pointers. +/// +/// (Standard "Zero-Sized-Types get to cheat and lie" caveats apply, although it +/// may be desirable to give them their own API just to make that 100% clear.) +/// +/// This API and its claimed semantics are part of the Strict Provenance experiment, +/// see the [module documentation][crate::ptr] for details. +#[inline(always)] +#[must_use] +#[rustc_const_stable(feature = "strict_provenance", since = "1.61.0")] +#[unstable(feature = "strict_provenance", issue = "95228")] +pub const fn invalid_mut(addr: usize) -> *mut T { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + addr as *mut T } /// Forms a raw slice from a pointer and a length. @@ -1110,6 +1421,8 @@ pub(crate) unsafe fn align_offset(p: *const T, a: usize) -> usize { unchecked_shl, unchecked_shr, unchecked_sub, wrapping_add, wrapping_mul, wrapping_sub, }; + let addr = p.addr(); + /// Calculate multiplicative modular inverse of `x` modulo `m`. /// /// This implementation is tailored for `align_offset` and has following preconditions: @@ -1170,13 +1483,10 @@ pub(crate) unsafe fn align_offset(p: *const T, a: usize) -> usize { // // which distributes operations around the load-bearing, but pessimizing `and` sufficiently // for LLVM to be able to utilize the various optimizations it knows about. - return wrapping_sub( - wrapping_add(p as usize, a_minus_one) & wrapping_sub(0, a), - p as usize, - ); + return wrapping_sub(wrapping_add(addr, a_minus_one) & wrapping_sub(0, a), addr); } - let pmoda = p as usize & a_minus_one; + let pmoda = addr & a_minus_one; if pmoda == 0 { // Already aligned. Yay! return 0; @@ -1193,7 +1503,7 @@ pub(crate) unsafe fn align_offset(p: *const T, a: usize) -> usize { let gcd = unsafe { unchecked_shl(1usize, gcdpow) }; // SAFETY: gcd is always greater or equal to 1. - if p as usize & unsafe { unchecked_sub(gcd, 1) } == 0 { + if addr & unsafe { unchecked_sub(gcd, 1) } == 0 { // This branch solves for the following linear congruence equation: // // ` p + so = 0 mod a ` @@ -1347,6 +1657,11 @@ pub fn hash(hashee: *const T, into: &mut S) { hashee.hash(into); } +// FIXME(strict_provenance_magic): function pointers have buggy codegen that +// necessitates casting to a usize to get the backend to do the right thing. +// for now I will break AVR to silence *a billion* lints. We should probably +// have a proper "opaque function pointer type" to handle this kind of thing. + // Impls for function pointers macro_rules! fnptr_impls_safety_abi { ($FnTy: ty, $($Arg: ident),*) => { diff --git a/library/core/src/ptr/mut_ptr.rs b/library/core/src/ptr/mut_ptr.rs index 7e48eac4fe033..547208025214a 100644 --- a/library/core/src/ptr/mut_ptr.rs +++ b/library/core/src/ptr/mut_ptr.rs @@ -154,6 +154,79 @@ impl *mut T { bits as Self } + /// Gets the "address" portion of the pointer. + /// + /// This is equivalent to `self as usize`, which semantically discards + /// *provenance* and *address-space* information. To properly restore that information, + /// use [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr]. + /// + /// On most platforms this will produce a value with the same bytes as the original + /// pointer, because all the bytes are dedicated to describing the address. + /// Platforms which need to store additional information in the pointer may + /// perform a change of representation to produce a value containing only the address + /// portion of the pointer. What that means is up to the platform to define. + /// + /// This API and its claimed semantics are part of the Strict Provenance experiment, + /// see the [module documentation][crate::ptr] for details. + #[must_use] + #[inline] + #[unstable(feature = "strict_provenance", issue = "95228")] + pub fn addr(self) -> usize + where + T: Sized, + { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + self as usize + } + + /// Creates a new pointer with the given address. + /// + /// This performs the same operation as an `addr as ptr` cast, but copies + /// the *address-space* and *provenance* of `self` to the new pointer. + /// This allows us to dynamically preserve and propagate this important + /// information in a way that is otherwise impossible with a unary cast. + /// + /// This is equivalent to using [`wrapping_offset`][pointer::wrapping_offset] to offset + /// `self` to the given address, and therefore has all the same capabilities and restrictions. + /// + /// This API and its claimed semantics are part of the Strict Provenance experiment, + /// see the [module documentation][crate::ptr] for details. + #[must_use] + #[inline] + #[unstable(feature = "strict_provenance", issue = "95228")] + pub fn with_addr(self, addr: usize) -> Self + where + T: Sized, + { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + // + // In the mean-time, this operation is defined to be "as if" it was + // a wrapping_offset, so we can emulate it as such. This should properly + // restore pointer provenance even under today's compiler. + let self_addr = self.addr() as isize; + let dest_addr = addr as isize; + let offset = dest_addr.wrapping_sub(self_addr); + + // This is the canonical desugarring of this operation + self.cast::().wrapping_offset(offset).cast::() + } + + /// Creates a new pointer by mapping `self`'s address to a new one. + /// + /// This is a convenience for [`with_addr`][pointer::with_addr], see that method for details. + /// + /// This API and its claimed semantics are part of the Strict Provenance experiment, + /// see the [module documentation][crate::ptr] for details. + #[must_use] + #[inline] + #[unstable(feature = "strict_provenance", issue = "95228")] + pub fn map_addr(self, f: impl FnOnce(usize) -> usize) -> Self + where + T: Sized, + { + self.with_addr(f(self.addr())) + } + /// Decompose a (possibly wide) pointer into its address and metadata components. /// /// The pointer can be later reconstructed with [`from_raw_parts_mut`]. @@ -1276,7 +1349,7 @@ impl *mut [T] { /// use std::ptr; /// /// let slice: *mut [i8] = ptr::slice_from_raw_parts_mut(ptr::null_mut(), 3); - /// assert_eq!(slice.as_mut_ptr(), 0 as *mut i8); + /// assert_eq!(slice.as_mut_ptr(), ptr::null_mut()); /// ``` #[inline(always)] #[unstable(feature = "slice_ptr_get", issue = "74265")] diff --git a/library/core/src/ptr/non_null.rs b/library/core/src/ptr/non_null.rs index a698aec51ca71..c1b19895f006c 100644 --- a/library/core/src/ptr/non_null.rs +++ b/library/core/src/ptr/non_null.rs @@ -90,7 +90,7 @@ impl NonNull { // to a *mut T. Therefore, `ptr` is not null and the conditions for // calling new_unchecked() are respected. unsafe { - let ptr = mem::align_of::() as *mut T; + let ptr = crate::ptr::invalid_mut::(mem::align_of::()); NonNull::new_unchecked(ptr) } } @@ -469,7 +469,7 @@ impl NonNull<[T]> { /// use std::ptr::NonNull; /// /// let slice: NonNull<[i8]> = NonNull::slice_from_raw_parts(NonNull::dangling(), 3); - /// assert_eq!(slice.as_non_null_ptr(), NonNull::new(1 as *mut i8).unwrap()); + /// assert_eq!(slice.as_non_null_ptr(), NonNull::::dangling()); /// ``` #[inline] #[must_use] @@ -489,7 +489,7 @@ impl NonNull<[T]> { /// use std::ptr::NonNull; /// /// let slice: NonNull<[i8]> = NonNull::slice_from_raw_parts(NonNull::dangling(), 3); - /// assert_eq!(slice.as_mut_ptr(), 1 as *mut i8); + /// assert_eq!(slice.as_mut_ptr(), NonNull::::dangling().as_ptr()); /// ``` #[inline] #[must_use] diff --git a/library/core/src/ptr/unique.rs b/library/core/src/ptr/unique.rs index cff68f64f78e0..29398cbeb238d 100644 --- a/library/core/src/ptr/unique.rs +++ b/library/core/src/ptr/unique.rs @@ -73,7 +73,7 @@ impl Unique { pub const fn dangling() -> Self { // SAFETY: mem::align_of() returns a valid, non-null pointer. The // conditions to call new_unchecked() are thus respected. - unsafe { Unique::new_unchecked(mem::align_of::() as *mut T) } + unsafe { Unique::new_unchecked(crate::ptr::invalid_mut::(mem::align_of::())) } } } diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs index c02a6f2d78c64..6c9107401fda6 100644 --- a/library/core/src/slice/ascii.rs +++ b/library/core/src/slice/ascii.rs @@ -294,7 +294,7 @@ fn is_ascii(s: &[u8]) -> bool { // Paranoia check about alignment, since we're about to do a bunch of // unaligned loads. In practice this should be impossible barring a bug in // `align_offset` though. - debug_assert_eq!((word_ptr as usize) % mem::align_of::(), 0); + debug_assert_eq!(word_ptr.addr() % mem::align_of::(), 0); // Read subsequent words until the last aligned word, excluding the last // aligned word by itself to be done in tail check later, to ensure that @@ -302,9 +302,9 @@ fn is_ascii(s: &[u8]) -> bool { while byte_pos < len - USIZE_SIZE { debug_assert!( // Sanity check that the read is in bounds - (word_ptr as usize + USIZE_SIZE) <= (start.wrapping_add(len) as usize) && + (word_ptr.addr() + USIZE_SIZE) <= start.addr().wrapping_add(len) && // And that our assumptions about `byte_pos` hold. - (word_ptr as usize) - (start as usize) == byte_pos + (word_ptr.addr() - start.addr()) == byte_pos ); // SAFETY: We know `word_ptr` is properly aligned (because of diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs index cf15756868e65..b74ab28fc092a 100644 --- a/library/core/src/slice/iter/macros.rs +++ b/library/core/src/slice/iter/macros.rs @@ -20,13 +20,13 @@ macro_rules! len { if size == 0 { // This _cannot_ use `unchecked_sub` because we depend on wrapping // to represent the length of long ZST slice iterators. - ($self.end as usize).wrapping_sub(start.as_ptr() as usize) + $self.end.addr().wrapping_sub(start.as_ptr().addr()) } else { // We know that `start <= end`, so can do better than `offset_from`, // which needs to deal in signed. By setting appropriate flags here // we can tell LLVM this, which helps it remove bounds checks. // SAFETY: By the type invariant, `start <= end` - let diff = unsafe { unchecked_sub($self.end as usize, start.as_ptr() as usize) }; + let diff = unsafe { unchecked_sub($self.end.addr(), start.as_ptr().addr()) }; // By also telling LLVM that the pointers are apart by an exact // multiple of the type size, it can optimize `len() == 0` down to // `start == end` instead of `(end - start) < size`. diff --git a/library/core/src/slice/sort.rs b/library/core/src/slice/sort.rs index 2ba0e5320d7b9..1f392a079718c 100644 --- a/library/core/src/slice/sort.rs +++ b/library/core/src/slice/sort.rs @@ -269,7 +269,9 @@ where // Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive). fn width(l: *mut T, r: *mut T) -> usize { assert!(mem::size_of::() > 0); - (r as usize - l as usize) / mem::size_of::() + // FIXME: this should *likely* use `offset_from`, but more + // investigation is needed (including running tests in miri). + (r.addr() - l.addr()) / mem::size_of::() } loop { diff --git a/library/std/src/backtrace.rs b/library/std/src/backtrace.rs index 94e6070c0f794..05e9b2eb6bc3c 100644 --- a/library/std/src/backtrace.rs +++ b/library/std/src/backtrace.rs @@ -330,7 +330,7 @@ impl Backtrace { frame: RawFrame::Actual(frame.clone()), symbols: Vec::new(), }); - if frame.symbol_address() as usize == ip && actual_start.is_none() { + if frame.symbol_address().addr() == ip && actual_start.is_none() { actual_start = Some(frames.len()); } true @@ -493,7 +493,7 @@ impl RawFrame { match self { RawFrame::Actual(frame) => frame.ip(), #[cfg(test)] - RawFrame::Fake => 1 as *mut c_void, + RawFrame::Fake => crate::ptr::invalid_mut(1), } } } diff --git a/library/std/src/io/error/repr_bitpacked.rs b/library/std/src/io/error/repr_bitpacked.rs index 208d5a80c5a69..e80068b46abb9 100644 --- a/library/std/src/io/error/repr_bitpacked.rs +++ b/library/std/src/io/error/repr_bitpacked.rs @@ -106,7 +106,7 @@ use super::{Custom, ErrorData, ErrorKind, SimpleMessage}; use alloc::boxed::Box; use core::marker::PhantomData; use core::mem::{align_of, size_of}; -use core::ptr::NonNull; +use core::ptr::{self, NonNull}; // The 2 least-significant bits are used as tag. const TAG_MASK: usize = 0b11; @@ -136,7 +136,7 @@ impl Repr { let p = Box::into_raw(b).cast::(); // Should only be possible if an allocator handed out a pointer with // wrong alignment. - debug_assert_eq!((p as usize & TAG_MASK), 0); + debug_assert_eq!(p.addr() & TAG_MASK, 0); // Note: We know `TAG_CUSTOM <= size_of::()` (static_assert at // end of file), and both the start and end of the expression must be // valid without address space wraparound due to `Box`'s semantics. @@ -166,7 +166,7 @@ impl Repr { pub(super) fn new_os(code: i32) -> Self { let utagged = ((code as usize) << 32) | TAG_OS; // Safety: `TAG_OS` is not zero, so the result of the `|` is not 0. - let res = Self(unsafe { NonNull::new_unchecked(utagged as *mut ()) }, PhantomData); + let res = Self(unsafe { NonNull::new_unchecked(ptr::invalid_mut(utagged)) }, PhantomData); // quickly smoke-check we encoded the right thing (This generally will // only run in libstd's tests, unless the user uses -Zbuild-std) debug_assert!( @@ -180,7 +180,7 @@ impl Repr { pub(super) fn new_simple(kind: ErrorKind) -> Self { let utagged = ((kind as usize) << 32) | TAG_SIMPLE; // Safety: `TAG_SIMPLE` is not zero, so the result of the `|` is not 0. - let res = Self(unsafe { NonNull::new_unchecked(utagged as *mut ()) }, PhantomData); + let res = Self(unsafe { NonNull::new_unchecked(ptr::invalid_mut(utagged)) }, PhantomData); // quickly smoke-check we encoded the right thing (This generally will // only run in libstd's tests, unless the user uses -Zbuild-std) debug_assert!( @@ -238,7 +238,7 @@ unsafe fn decode_repr(ptr: NonNull<()>, make_custom: F) -> ErrorData where F: FnOnce(*mut Custom) -> C, { - let bits = ptr.as_ptr() as usize; + let bits = ptr.as_ptr().addr(); match bits & TAG_MASK { TAG_OS => { let code = ((bits as i64) >> 32) as i32; diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index a464f2d4c7431..133ced5f26cfb 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -275,6 +275,7 @@ #![feature(extend_one)] #![feature(float_minimum_maximum)] #![feature(format_args_nl)] +#![feature(strict_provenance)] #![feature(get_mut_unchecked)] #![feature(hashmap_internals)] #![feature(int_error_internals)] diff --git a/library/std/src/os/unix/net/addr.rs b/library/std/src/os/unix/net/addr.rs index a3ef4b2d92cc4..9aeae4b2cae69 100644 --- a/library/std/src/os/unix/net/addr.rs +++ b/library/std/src/os/unix/net/addr.rs @@ -17,8 +17,8 @@ mod libc { fn sun_path_offset(addr: &libc::sockaddr_un) -> usize { // Work with an actual instance of the type since using a null pointer is UB - let base = addr as *const _ as usize; - let path = &addr.sun_path as *const _ as usize; + let base = (addr as *const libc::sockaddr_un).addr(); + let path = (&addr.sun_path as *const libc::c_char).addr(); path - base } diff --git a/library/std/src/os/windows/io/handle.rs b/library/std/src/os/windows/io/handle.rs index 120af9f99dd9b..ee30cc8be6b57 100644 --- a/library/std/src/os/windows/io/handle.rs +++ b/library/std/src/os/windows/io/handle.rs @@ -9,6 +9,7 @@ use crate::fs; use crate::io; use crate::marker::PhantomData; use crate::mem::forget; +use crate::ptr; use crate::sys::c; use crate::sys::cvt; use crate::sys_common::{AsInner, FromInner, IntoInner}; @@ -182,7 +183,7 @@ impl OwnedHandle { return unsafe { Ok(Self::from_raw_handle(handle)) }; } - let mut ret = 0 as c::HANDLE; + let mut ret = ptr::null_mut(); cvt(unsafe { let cur_proc = c::GetCurrentProcess(); c::DuplicateHandle( diff --git a/library/std/src/os/windows/io/socket.rs b/library/std/src/os/windows/io/socket.rs index a6b979cc22bd3..db93cd15d4ab7 100644 --- a/library/std/src/os/windows/io/socket.rs +++ b/library/std/src/os/windows/io/socket.rs @@ -129,6 +129,7 @@ impl OwnedSocket { } } + // FIXME(strict_provenance_magic): we defined RawSocket to be a u64 ;-; #[cfg(not(target_vendor = "uwp"))] pub(crate) fn set_no_inherit(&self) -> io::Result<()> { cvt(unsafe { diff --git a/library/std/src/path.rs b/library/std/src/path.rs index bcf5c9328b79c..8ecea8ce07f6b 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -1449,8 +1449,8 @@ impl PathBuf { }; // truncate until right after the file stem - let end_file_stem = file_stem[file_stem.len()..].as_ptr() as usize; - let start = os_str_as_u8_slice(&self.inner).as_ptr() as usize; + let end_file_stem = file_stem[file_stem.len()..].as_ptr().addr(); + let start = os_str_as_u8_slice(&self.inner).as_ptr().addr(); let v = self.as_mut_vec(); v.truncate(end_file_stem.wrapping_sub(start)); diff --git a/library/std/src/sync/once.rs b/library/std/src/sync/once.rs index 511de863dc51b..d2dd4c075d2a9 100644 --- a/library/std/src/sync/once.rs +++ b/library/std/src/sync/once.rs @@ -91,9 +91,12 @@ use crate::cell::Cell; use crate::fmt; use crate::marker; use crate::panic::{RefUnwindSafe, UnwindSafe}; -use crate::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use crate::ptr; +use crate::sync::atomic::{AtomicBool, AtomicPtr, Ordering}; use crate::thread::{self, Thread}; +type Masked = (); + /// A synchronization primitive which can be used to run a one-time global /// initialization. Useful for one-time initialization for FFI or related /// functionality. This type can only be constructed with [`Once::new()`]. @@ -113,7 +116,7 @@ use crate::thread::{self, Thread}; pub struct Once { // `state_and_queue` is actually a pointer to a `Waiter` with extra state // bits, so we add the `PhantomData` appropriately. - state_and_queue: AtomicUsize, + state_and_queue: AtomicPtr, _marker: marker::PhantomData<*const Waiter>, } @@ -136,7 +139,7 @@ impl RefUnwindSafe for Once {} #[derive(Debug)] pub struct OnceState { poisoned: bool, - set_state_on_drop_to: Cell, + set_state_on_drop_to: Cell<*mut Masked>, } /// Initialization value for static [`Once`] values. @@ -184,8 +187,8 @@ struct Waiter { // Every node is a struct on the stack of a waiting thread. // Will wake up the waiters when it gets dropped, i.e. also on panic. struct WaiterQueue<'a> { - state_and_queue: &'a AtomicUsize, - set_state_on_drop_to: usize, + state_and_queue: &'a AtomicPtr, + set_state_on_drop_to: *mut Masked, } impl Once { @@ -195,7 +198,10 @@ impl Once { #[rustc_const_stable(feature = "const_once_new", since = "1.32.0")] #[must_use] pub const fn new() -> Once { - Once { state_and_queue: AtomicUsize::new(INCOMPLETE), _marker: marker::PhantomData } + Once { + state_and_queue: AtomicPtr::new(ptr::invalid_mut(INCOMPLETE)), + _marker: marker::PhantomData, + } } /// Performs an initialization routine once and only once. The given closure @@ -376,7 +382,7 @@ impl Once { // operations visible to us, and, this being a fast path, weaker // ordering helps with performance. This `Acquire` synchronizes with // `Release` operations on the slow path. - self.state_and_queue.load(Ordering::Acquire) == COMPLETE + self.state_and_queue.load(Ordering::Acquire).addr() == COMPLETE } // This is a non-generic function to reduce the monomorphization cost of @@ -395,7 +401,7 @@ impl Once { fn call_inner(&self, ignore_poisoning: bool, init: &mut dyn FnMut(&OnceState)) { let mut state_and_queue = self.state_and_queue.load(Ordering::Acquire); loop { - match state_and_queue { + match state_and_queue.addr() { COMPLETE => break, POISONED if !ignore_poisoning => { // Panic to propagate the poison. @@ -405,7 +411,7 @@ impl Once { // Try to register this thread as the one RUNNING. let exchange_result = self.state_and_queue.compare_exchange( state_and_queue, - RUNNING, + ptr::invalid_mut(RUNNING), Ordering::Acquire, Ordering::Acquire, ); @@ -417,13 +423,13 @@ impl Once { // wake them up on drop. let mut waiter_queue = WaiterQueue { state_and_queue: &self.state_and_queue, - set_state_on_drop_to: POISONED, + set_state_on_drop_to: ptr::invalid_mut(POISONED), }; // Run the initialization function, letting it know if we're // poisoned or not. let init_state = OnceState { - poisoned: state_and_queue == POISONED, - set_state_on_drop_to: Cell::new(COMPLETE), + poisoned: state_and_queue.addr() == POISONED, + set_state_on_drop_to: Cell::new(ptr::invalid_mut(COMPLETE)), }; init(&init_state); waiter_queue.set_state_on_drop_to = init_state.set_state_on_drop_to.get(); @@ -432,7 +438,7 @@ impl Once { _ => { // All other values must be RUNNING with possibly a // pointer to the waiter queue in the more significant bits. - assert!(state_and_queue & STATE_MASK == RUNNING); + assert!(state_and_queue.addr() & STATE_MASK == RUNNING); wait(&self.state_and_queue, state_and_queue); state_and_queue = self.state_and_queue.load(Ordering::Acquire); } @@ -441,13 +447,13 @@ impl Once { } } -fn wait(state_and_queue: &AtomicUsize, mut current_state: usize) { +fn wait(state_and_queue: &AtomicPtr, mut current_state: *mut Masked) { // Note: the following code was carefully written to avoid creating a // mutable reference to `node` that gets aliased. loop { // Don't queue this thread if the status is no longer running, // otherwise we will not be woken up. - if current_state & STATE_MASK != RUNNING { + if current_state.addr() & STATE_MASK != RUNNING { return; } @@ -455,15 +461,15 @@ fn wait(state_and_queue: &AtomicUsize, mut current_state: usize) { let node = Waiter { thread: Cell::new(Some(thread::current())), signaled: AtomicBool::new(false), - next: (current_state & !STATE_MASK) as *const Waiter, + next: current_state.with_addr(current_state.addr() & !STATE_MASK) as *const Waiter, }; - let me = &node as *const Waiter as usize; + let me = &node as *const Waiter as *const Masked as *mut Masked; // Try to slide in the node at the head of the linked list, making sure // that another thread didn't just replace the head of the linked list. let exchange_result = state_and_queue.compare_exchange( current_state, - me | RUNNING, + me.with_addr(me.addr() | RUNNING), Ordering::Release, Ordering::Relaxed, ); @@ -502,7 +508,7 @@ impl Drop for WaiterQueue<'_> { self.state_and_queue.swap(self.set_state_on_drop_to, Ordering::AcqRel); // We should only ever see an old state which was RUNNING. - assert_eq!(state_and_queue & STATE_MASK, RUNNING); + assert_eq!(state_and_queue.addr() & STATE_MASK, RUNNING); // Walk the entire linked list of waiters and wake them up (in lifo // order, last to register is first to wake up). @@ -511,7 +517,8 @@ impl Drop for WaiterQueue<'_> { // free `node` if there happens to be has a spurious wakeup. // So we have to take out the `thread` field and copy the pointer to // `next` first. - let mut queue = (state_and_queue & !STATE_MASK) as *const Waiter; + let mut queue = + state_and_queue.with_addr(state_and_queue.addr() & !STATE_MASK) as *const Waiter; while !queue.is_null() { let next = (*queue).next; let thread = (*queue).thread.take().unwrap(); @@ -568,6 +575,6 @@ impl OnceState { /// Poison the associated [`Once`] without explicitly panicking. // NOTE: This is currently only exposed for the `lazy` module pub(crate) fn poison(&self) { - self.set_state_on_drop_to.set(POISONED); + self.set_state_on_drop_to.set(ptr::invalid_mut(POISONED)); } } diff --git a/library/std/src/sys/unix/memchr.rs b/library/std/src/sys/unix/memchr.rs index a9273ea676cb3..73ba604eccba2 100644 --- a/library/std/src/sys/unix/memchr.rs +++ b/library/std/src/sys/unix/memchr.rs @@ -9,7 +9,7 @@ pub fn memchr(needle: u8, haystack: &[u8]) -> Option { haystack.len(), ) }; - if p.is_null() { None } else { Some(p as usize - (haystack.as_ptr() as usize)) } + if p.is_null() { None } else { Some(p.addr() - haystack.as_ptr().addr()) } } pub fn memrchr(needle: u8, haystack: &[u8]) -> Option { @@ -26,7 +26,9 @@ pub fn memrchr(needle: u8, haystack: &[u8]) -> Option { haystack.len(), ) }; - if p.is_null() { None } else { Some(p as usize - (haystack.as_ptr() as usize)) } + // FIXME: this should *likely* use `offset_from`, but more + // investigation is needed (including running tests in miri). + if p.is_null() { None } else { Some(p.addr() - haystack.as_ptr().addr()) } } #[cfg(not(target_os = "linux"))] diff --git a/library/std/src/sys/unix/thread.rs b/library/std/src/sys/unix/thread.rs index 2d5d306ed62bb..be70d00cb1ace 100644 --- a/library/std/src/sys/unix/thread.rs +++ b/library/std/src/sys/unix/thread.rs @@ -505,9 +505,8 @@ pub mod guard { #[cfg(target_os = "macos")] unsafe fn get_stack_start() -> Option<*mut libc::c_void> { let th = libc::pthread_self(); - let stackaddr = - libc::pthread_get_stackaddr_np(th) as usize - libc::pthread_get_stacksize_np(th); - Some(stackaddr as *mut libc::c_void) + let stackptr = libc::pthread_get_stackaddr_np(th); + Some(stackptr.map_addr(|addr| addr - libc::pthread_get_stacksize_np(th))) } #[cfg(target_os = "openbsd")] @@ -515,14 +514,15 @@ pub mod guard { let mut current_stack: libc::stack_t = crate::mem::zeroed(); assert_eq!(libc::pthread_stackseg_np(libc::pthread_self(), &mut current_stack), 0); + let stack_ptr = current_stack.ss_sp; let stackaddr = if libc::pthread_main_np() == 1 { // main thread - current_stack.ss_sp as usize - current_stack.ss_size + PAGE_SIZE.load(Ordering::Relaxed) + stack_ptr.addr() - current_stack.ss_size + PAGE_SIZE.load(Ordering::Relaxed) } else { // new thread - current_stack.ss_sp as usize - current_stack.ss_size + stack_ptr.addr() - current_stack.ss_size }; - Some(stackaddr as *mut libc::c_void) + Some(stack_ptr.with_addr(stack_addr)) } #[cfg(any( @@ -557,7 +557,8 @@ pub mod guard { unsafe fn get_stack_start_aligned() -> Option<*mut libc::c_void> { let page_size = PAGE_SIZE.load(Ordering::Relaxed); assert!(page_size != 0); - let stackaddr = get_stack_start()?; + let stackptr = get_stack_start()?; + let stackaddr = stackptr.addr(); // Ensure stackaddr is page aligned! A parent process might // have reset RLIMIT_STACK to be non-page aligned. The @@ -565,11 +566,11 @@ pub mod guard { // stackaddr < stackaddr + stacksize, so if stackaddr is not // page-aligned, calculate the fix such that stackaddr < // new_page_aligned_stackaddr < stackaddr + stacksize - let remainder = (stackaddr as usize) % page_size; + let remainder = stackaddr % page_size; Some(if remainder == 0 { - stackaddr + stackptr } else { - ((stackaddr as usize) + page_size - remainder) as *mut libc::c_void + stackptr.with_addr(stackaddr + page_size - remainder) }) } @@ -588,8 +589,8 @@ pub mod guard { // Instead, we'll just note where we expect rlimit to start // faulting, so our handler can report "stack overflow", and // trust that the kernel's own stack guard will work. - let stackaddr = get_stack_start_aligned()?; - let stackaddr = stackaddr as usize; + let stackptr = get_stack_start_aligned()?; + let stackaddr = stackptr.addr(); Some(stackaddr - page_size..stackaddr) } else if cfg!(all(target_os = "linux", target_env = "musl")) { // For the main thread, the musl's pthread_attr_getstack @@ -602,8 +603,8 @@ pub mod guard { // at the bottom. If we try to remap the bottom of the stack // ourselves, FreeBSD's guard page moves upwards. So we'll just use // the builtin guard page. - let stackaddr = get_stack_start_aligned()?; - let guardaddr = stackaddr as usize; + let stackptr = get_stack_start_aligned()?; + let guardaddr = stackptr.addr(); // Technically the number of guard pages is tunable and controlled // by the security.bsd.stack_guard_page sysctl, but there are // few reasons to change it from the default. The default value has @@ -620,25 +621,25 @@ pub mod guard { // than the initial mmap() used, so we mmap() here with // read/write permissions and only then mprotect() it to // no permissions at all. See issue #50313. - let stackaddr = get_stack_start_aligned()?; + let stackptr = get_stack_start_aligned()?; let result = mmap( - stackaddr, + stackptr, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0, ); - if result != stackaddr || result == MAP_FAILED { + if result != stackptr || result == MAP_FAILED { panic!("failed to allocate a guard page: {}", io::Error::last_os_error()); } - let result = mprotect(stackaddr, page_size, PROT_NONE); + let result = mprotect(stackptr, page_size, PROT_NONE); if result != 0 { panic!("failed to protect the guard page: {}", io::Error::last_os_error()); } - let guardaddr = stackaddr as usize; + let guardaddr = stackptr.addr(); Some(guardaddr..guardaddr + page_size) } @@ -646,7 +647,8 @@ pub mod guard { #[cfg(any(target_os = "macos", target_os = "openbsd", target_os = "solaris"))] pub unsafe fn current() -> Option { - let stackaddr = get_stack_start()? as usize; + let stackptr = get_stack_start()?; + let stackaddr = stackptr.addr(); Some(stackaddr - PAGE_SIZE.load(Ordering::Relaxed)..stackaddr) } @@ -679,11 +681,11 @@ pub mod guard { panic!("there is no guard page"); } } - let mut stackaddr = crate::ptr::null_mut(); + let mut stackptr = crate::ptr::null_mut::(); let mut size = 0; - assert_eq!(libc::pthread_attr_getstack(&attr, &mut stackaddr, &mut size), 0); + assert_eq!(libc::pthread_attr_getstack(&attr, &mut stackptr, &mut size), 0); - let stackaddr = stackaddr as usize; + let stackaddr = stackptr.addr(); ret = if cfg!(any(target_os = "freebsd", target_os = "netbsd")) { Some(stackaddr - guardsize..stackaddr) } else if cfg!(all(target_os = "linux", target_env = "musl")) { diff --git a/library/std/src/sys/windows/alloc.rs b/library/std/src/sys/windows/alloc.rs index 2fe71f9f28d5c..fdc81cdea7dec 100644 --- a/library/std/src/sys/windows/alloc.rs +++ b/library/std/src/sys/windows/alloc.rs @@ -159,7 +159,7 @@ unsafe fn allocate(layout: Layout, zeroed: bool) -> *mut u8 { // Create a correctly aligned pointer offset from the start of the allocated block, // and write a header before it. - let offset = layout.align() - (ptr as usize & (layout.align() - 1)); + let offset = layout.align() - (ptr.addr() & (layout.align() - 1)); // SAFETY: `MIN_ALIGN` <= `offset` <= `layout.align()` and the size of the allocated // block is `layout.align() + layout.size()`. `aligned` will thus be a correctly aligned // pointer inside the allocated block with at least `layout.size()` bytes after it and at diff --git a/library/std/src/sys/windows/c.rs b/library/std/src/sys/windows/c.rs index 9b61b2476d5bb..0edf43e5d9dd5 100644 --- a/library/std/src/sys/windows/c.rs +++ b/library/std/src/sys/windows/c.rs @@ -173,7 +173,7 @@ pub const PROGRESS_CONTINUE: DWORD = 0; pub const E_NOTIMPL: HRESULT = 0x80004001u32 as HRESULT; -pub const INVALID_HANDLE_VALUE: HANDLE = !0 as HANDLE; +pub const INVALID_HANDLE_VALUE: HANDLE = ptr::invalid_mut(!0); pub const FACILITY_NT_BIT: DWORD = 0x1000_0000; diff --git a/library/std/src/sys/windows/compat.rs b/library/std/src/sys/windows/compat.rs index cbd3366b189ed..a914a3bcc120b 100644 --- a/library/std/src/sys/windows/compat.rs +++ b/library/std/src/sys/windows/compat.rs @@ -88,7 +88,7 @@ macro_rules! compat_fn { let symbol_name: *const u8 = concat!(stringify!($symbol), "\0").as_ptr(); let module_handle = $crate::sys::c::GetModuleHandleA(module_name as *const i8); if !module_handle.is_null() { - match $crate::sys::c::GetProcAddress(module_handle, symbol_name as *const i8) as usize { + match $crate::sys::c::GetProcAddress(module_handle, symbol_name as *const i8).addr() { 0 => {} n => { PTR = Some(mem::transmute::(n)); diff --git a/library/std/src/sys/windows/fs.rs b/library/std/src/sys/windows/fs.rs index d6c40a15329a9..95903899297b6 100644 --- a/library/std/src/sys/windows/fs.rs +++ b/library/std/src/sys/windows/fs.rs @@ -57,6 +57,9 @@ pub struct DirEntry { data: c::WIN32_FIND_DATAW, } +unsafe impl Send for OpenOptions {} +unsafe impl Sync for OpenOptions {} + #[derive(Clone, Debug)] pub struct OpenOptions { // generic @@ -72,7 +75,7 @@ pub struct OpenOptions { attributes: c::DWORD, share_mode: c::DWORD, security_qos_flags: c::DWORD, - security_attributes: usize, // FIXME: should be a reference + security_attributes: c::LPSECURITY_ATTRIBUTES, } #[derive(Clone, PartialEq, Eq, Debug)] @@ -187,7 +190,7 @@ impl OpenOptions { share_mode: c::FILE_SHARE_READ | c::FILE_SHARE_WRITE | c::FILE_SHARE_DELETE, attributes: 0, security_qos_flags: 0, - security_attributes: 0, + security_attributes: ptr::null_mut(), } } @@ -228,7 +231,7 @@ impl OpenOptions { self.security_qos_flags = flags | c::SECURITY_SQOS_PRESENT; } pub fn security_attributes(&mut self, attrs: c::LPSECURITY_ATTRIBUTES) { - self.security_attributes = attrs as usize; + self.security_attributes = attrs; } fn get_access_mode(&self) -> io::Result { @@ -289,7 +292,7 @@ impl File { path.as_ptr(), opts.get_access_mode()?, opts.share_mode, - opts.security_attributes as *mut _, + opts.security_attributes, opts.get_creation_mode()?, opts.get_flags_and_attributes(), ptr::null_mut(), diff --git a/library/std/src/sys/windows/mod.rs b/library/std/src/sys/windows/mod.rs index 62814eaaa5667..87e3fec6353f9 100644 --- a/library/std/src/sys/windows/mod.rs +++ b/library/std/src/sys/windows/mod.rs @@ -136,7 +136,7 @@ pub fn unrolled_find_u16s(needle: u16, haystack: &[u16]) -> Option { ($($n:literal,)+) => { $( if start[$n] == needle { - return Some((&start[$n] as *const u16 as usize - ptr as usize) / 2); + return Some(((&start[$n] as *const u16).addr() - ptr.addr()) / 2); } )+ } @@ -149,7 +149,7 @@ pub fn unrolled_find_u16s(needle: u16, haystack: &[u16]) -> Option { for c in start { if *c == needle { - return Some((c as *const u16 as usize - ptr as usize) / 2); + return Some(((c as *const u16).addr() - ptr.addr()) / 2); } } None diff --git a/library/std/src/sys/windows/os.rs b/library/std/src/sys/windows/os.rs index 450bceae00081..bcac996c024ec 100644 --- a/library/std/src/sys/windows/os.rs +++ b/library/std/src/sys/windows/os.rs @@ -134,7 +134,7 @@ impl Drop for Env { pub fn env() -> Env { unsafe { let ch = c::GetEnvironmentStringsW(); - if ch as usize == 0 { + if ch.is_null() { panic!("failure getting env string from OS: {}", io::Error::last_os_error()); } Env { base: ch, cur: ch } diff --git a/library/std/src/sys/windows/thread_parker.rs b/library/std/src/sys/windows/thread_parker.rs index 5888ee8e34bfb..3497da51deeda 100644 --- a/library/std/src/sys/windows/thread_parker.rs +++ b/library/std/src/sys/windows/thread_parker.rs @@ -60,7 +60,7 @@ use crate::convert::TryFrom; use crate::ptr; use crate::sync::atomic::{ - AtomicI8, AtomicUsize, + AtomicI8, AtomicPtr, Ordering::{Acquire, Relaxed, Release}, }; use crate::sys::{c, dur2timeout}; @@ -217,8 +217,8 @@ impl Parker { } fn keyed_event_handle() -> c::HANDLE { - const INVALID: usize = !0; - static HANDLE: AtomicUsize = AtomicUsize::new(INVALID); + const INVALID: c::HANDLE = ptr::invalid_mut(!0); + static HANDLE: AtomicPtr = AtomicPtr::new(INVALID); match HANDLE.load(Relaxed) { INVALID => { let mut handle = c::INVALID_HANDLE_VALUE; @@ -233,7 +233,7 @@ fn keyed_event_handle() -> c::HANDLE { r => panic!("Unable to create keyed event handle: error {r}"), } } - match HANDLE.compare_exchange(INVALID, handle as usize, Relaxed, Relaxed) { + match HANDLE.compare_exchange(INVALID, handle, Relaxed, Relaxed) { Ok(_) => handle, Err(h) => { // Lost the race to another thread initializing HANDLE before we did. @@ -241,10 +241,10 @@ fn keyed_event_handle() -> c::HANDLE { unsafe { c::CloseHandle(handle); } - h as c::HANDLE + h } } } - handle => handle as c::HANDLE, + handle => handle, } } diff --git a/library/std/src/sys_common/condvar/check.rs b/library/std/src/sys_common/condvar/check.rs index 47aff060d6f79..7671850ac55b8 100644 --- a/library/std/src/sys_common/condvar/check.rs +++ b/library/std/src/sys_common/condvar/check.rs @@ -1,4 +1,5 @@ -use crate::sync::atomic::{AtomicUsize, Ordering}; +use crate::ptr; +use crate::sync::atomic::{AtomicPtr, Ordering}; use crate::sys::locks as imp; use crate::sys_common::mutex::MovableMutex; @@ -13,17 +14,18 @@ impl CondvarCheck for Box { } pub struct SameMutexCheck { - addr: AtomicUsize, + addr: AtomicPtr<()>, } #[allow(dead_code)] impl SameMutexCheck { pub const fn new() -> Self { - Self { addr: AtomicUsize::new(0) } + Self { addr: AtomicPtr::new(ptr::null_mut()) } } pub fn verify(&self, mutex: &MovableMutex) { - let addr = mutex.raw() as *const imp::Mutex as usize; - match self.addr.compare_exchange(0, addr, Ordering::SeqCst, Ordering::SeqCst) { + let addr = mutex.raw() as *const imp::Mutex as *const () as *mut _; + match self.addr.compare_exchange(ptr::null_mut(), addr, Ordering::SeqCst, Ordering::SeqCst) + { Ok(_) => {} // Stored the address Err(n) if n == addr => {} // Lost a race to store the same address _ => panic!("attempted to use a condition variable with two mutexes"), diff --git a/library/std/src/thread/local.rs b/library/std/src/thread/local.rs index a100444f04968..ca29261b1c98d 100644 --- a/library/std/src/thread/local.rs +++ b/library/std/src/thread/local.rs @@ -1071,7 +1071,7 @@ pub mod os { pub unsafe fn get(&'static self, init: impl FnOnce() -> T) -> Option<&'static T> { // SAFETY: See the documentation for this method. let ptr = unsafe { self.os.get() as *mut Value }; - if ptr as usize > 1 { + if ptr.addr() > 1 { // SAFETY: the check ensured the pointer is safe (its destructor // is not running) + it is coming from a trusted source (self). if let Some(ref value) = unsafe { (*ptr).inner.get() } { @@ -1090,7 +1090,7 @@ pub mod os { // SAFETY: No mutable references are ever handed out meaning getting // the value is ok. let ptr = unsafe { self.os.get() as *mut Value }; - if ptr as usize == 1 { + if ptr.addr() == 1 { // destructor is running return None; } @@ -1130,7 +1130,7 @@ pub mod os { unsafe { let ptr = Box::from_raw(ptr as *mut Value); let key = ptr.key; - key.os.set(1 as *mut u8); + key.os.set(ptr::invalid_mut(1)); drop(ptr); key.os.set(ptr::null_mut()); }