From e632e3f9a501daefde13a264e1d0aff54a417510 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 9 Mar 2024 13:05:13 +0100 Subject: [PATCH] miri: do not apply aliasing restrictions to Box with custom allocator --- .../src/interpret/validity.rs | 8 +- .../rustc_const_eval/src/interpret/visitor.rs | 10 +- .../rustc_hir_analysis/src/check/intrinsic.rs | 4 + compiler/rustc_middle/src/ty/sty.rs | 7 +- compiler/rustc_span/src/symbol.rs | 1 + library/alloc/src/boxed.rs | 24 ++-- library/core/src/intrinsics.rs | 13 ++ src/tools/miri/src/borrow_tracker/mod.rs | 15 ++- .../src/borrow_tracker/stacked_borrows/mod.rs | 34 ++++- .../src/borrow_tracker/tree_borrows/mod.rs | 33 +++-- src/tools/miri/src/shims/intrinsics/mod.rs | 13 ++ .../tests/pass/box-custom-alloc-aliasing.rs | 123 ++++++++++++++++++ 12 files changed, 250 insertions(+), 35 deletions(-) create mode 100644 src/tools/miri/tests/pass/box-custom-alloc-aliasing.rs diff --git a/compiler/rustc_const_eval/src/interpret/validity.rs b/compiler/rustc_const_eval/src/interpret/validity.rs index ff1cb43db864e..c568f9acfd38b 100644 --- a/compiler/rustc_const_eval/src/interpret/validity.rs +++ b/compiler/rustc_const_eval/src/interpret/validity.rs @@ -17,8 +17,8 @@ use rustc_middle::mir::interpret::{ ExpectedKind, InterpError, InvalidMetaKind, Misalignment, PointerKind, Provenance, ValidationErrorInfo, ValidationErrorKind, ValidationErrorKind::*, }; -use rustc_middle::ty; use rustc_middle::ty::layout::{LayoutOf, TyAndLayout}; +use rustc_middle::ty::{self, Ty}; use rustc_span::symbol::{sym, Symbol}; use rustc_target::abi::{ Abi, FieldIdx, Scalar as ScalarAbi, Size, VariantIdx, Variants, WrappingRange, @@ -783,7 +783,11 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueVisitor<'mir, 'tcx, M> } #[inline] - fn visit_box(&mut self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx> { + fn visit_box( + &mut self, + _box_ty: Ty<'tcx>, + op: &OpTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx> { self.check_safe_pointer(op, PointerKind::Box)?; Ok(()) } diff --git a/compiler/rustc_const_eval/src/interpret/visitor.rs b/compiler/rustc_const_eval/src/interpret/visitor.rs index b200ecbf73af5..0e824f3f592d4 100644 --- a/compiler/rustc_const_eval/src/interpret/visitor.rs +++ b/compiler/rustc_const_eval/src/interpret/visitor.rs @@ -3,7 +3,7 @@ use rustc_index::IndexVec; use rustc_middle::mir::interpret::InterpResult; -use rustc_middle::ty; +use rustc_middle::ty::{self, Ty}; use rustc_target::abi::FieldIdx; use rustc_target::abi::{FieldsShape, VariantIdx, Variants}; @@ -47,10 +47,10 @@ pub trait ValueVisitor<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>>: Sized { Ok(()) } /// Visits the given value as the pointer of a `Box`. There is nothing to recurse into. - /// The type of `v` will be a raw pointer, but this is a field of `Box` and the - /// pointee type is the actual `T`. + /// The type of `v` will be a raw pointer to `T`, but this is a field of `Box` and the + /// pointee type is the actual `T`. `box_ty` provides the full type of the `Box` itself. #[inline(always)] - fn visit_box(&mut self, _v: &Self::V) -> InterpResult<'tcx> { + fn visit_box(&mut self, _box_ty: Ty<'tcx>, _v: &Self::V) -> InterpResult<'tcx> { Ok(()) } @@ -144,7 +144,7 @@ pub trait ValueVisitor<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>>: Sized { assert_eq!(nonnull_ptr.layout().fields.count(), 1); let raw_ptr = self.ecx().project_field(&nonnull_ptr, 0)?; // the actual raw ptr // ... whose only field finally is a raw ptr we can dereference. - self.visit_box(&raw_ptr)?; + self.visit_box(ty, &raw_ptr)?; // The second `Box` field is the allocator, which we recursively check for validity // like in regular structs. diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs index 95c51cc0486fc..5a74bdc8a84cb 100644 --- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs +++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs @@ -657,6 +657,10 @@ pub fn check_intrinsic_type( sym::simd_shuffle => (3, 0, vec![param(0), param(0), param(1)], param(2)), sym::simd_shuffle_generic => (2, 1, vec![param(0), param(0)], param(1)), + sym::retag_box_to_raw => { + (2, 0, vec![Ty::new_mut_ptr(tcx, param(0))], Ty::new_mut_ptr(tcx, param(0))) + } + other => { tcx.dcx().emit_err(UnrecognizedIntrinsicFunction { span, name: other }); return; diff --git a/compiler/rustc_middle/src/ty/sty.rs b/compiler/rustc_middle/src/ty/sty.rs index 427c0f04bd11e..39b5d3b6ea763 100644 --- a/compiler/rustc_middle/src/ty/sty.rs +++ b/compiler/rustc_middle/src/ty/sty.rs @@ -2008,13 +2008,10 @@ impl<'tcx> Ty<'tcx> { // Single-argument Box is always global. (for "minicore" tests) return true; }; - if let Some(alloc_adt) = alloc.expect_ty().ty_adt_def() { + alloc.expect_ty().ty_adt_def().is_some_and(|alloc_adt| { let global_alloc = tcx.require_lang_item(LangItem::GlobalAlloc, None); alloc_adt.did() == global_alloc - } else { - // Allocator is not an ADT... - false - } + }) } _ => false, } diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 9dadd47124771..283c685eaa305 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1450,6 +1450,7 @@ symbols! { residual, result, resume, + retag_box_to_raw, return_position_impl_trait_in_trait, return_type_notation, rhs, diff --git a/library/alloc/src/boxed.rs b/library/alloc/src/boxed.rs index 2736e5ee6c588..304f607000b01 100644 --- a/library/alloc/src/boxed.rs +++ b/library/alloc/src/boxed.rs @@ -155,6 +155,7 @@ use core::error::Error; use core::fmt; use core::future::Future; use core::hash::{Hash, Hasher}; +use core::intrinsics::retag_box_to_raw; use core::iter::FusedIterator; use core::marker::Tuple; use core::marker::Unsize; @@ -1110,8 +1111,16 @@ impl Box { #[unstable(feature = "allocator_api", issue = "32838")] #[inline] pub fn into_raw_with_allocator(b: Self) -> (*mut T, A) { - let (leaked, alloc) = Box::into_unique(b); - (leaked.as_ptr(), alloc) + // This is the transition point from `Box` to raw pointers. For Stacked Borrows, these casts + // are relevant -- if this is a global allocator Box and we just get the pointer from `b.0`, + // it will have `Unique` permission, which is not what we want from a raw pointer. We could + // fix that by going through `&mut`, but then if this is *not* a global allocator Box, we'd + // be adding uniqueness assertions that we do not want. So for Miri's sake we pass this + // pointer through an intrinsic for box-to-raw casts, which can do the right thing wrt the + // aliasing model. + let b = mem::ManuallyDrop::new(b); + let alloc = unsafe { ptr::read(&b.1) }; + (unsafe { retag_box_to_raw::(b.0.as_ptr()) }, alloc) } #[unstable( @@ -1122,13 +1131,8 @@ impl Box { #[inline] #[doc(hidden)] pub fn into_unique(b: Self) -> (Unique, A) { - // Box is recognized as a "unique pointer" by Stacked Borrows, but internally it is a - // raw pointer for the type system. Turning it directly into a raw pointer would not be - // recognized as "releasing" the unique pointer to permit aliased raw accesses, - // so all raw pointer methods have to go through `Box::leak`. Turning *that* to a raw pointer - // behaves correctly. - let alloc = unsafe { ptr::read(&b.1) }; - (Unique::from(Box::leak(b)), alloc) + let (ptr, alloc) = Box::into_raw_with_allocator(b); + unsafe { (Unique::from(&mut *ptr), alloc) } } /// Returns a reference to the underlying allocator. @@ -1184,7 +1188,7 @@ impl Box { where A: 'a, { - unsafe { &mut *mem::ManuallyDrop::new(b).0.as_ptr() } + unsafe { &mut *Box::into_raw(b) } } /// Converts a `Box` into a `Pin>`. If `T` does not implement [`Unpin`], then diff --git a/library/core/src/intrinsics.rs b/library/core/src/intrinsics.rs index aff1c589e628a..f19f5f322aa50 100644 --- a/library/core/src/intrinsics.rs +++ b/library/core/src/intrinsics.rs @@ -2695,6 +2695,19 @@ pub unsafe fn vtable_size(_ptr: *const ()) -> usize { unreachable!() } +/// Retag a box pointer as part of casting it to a raw pointer. This is the `Box` equivalent of +/// `(x: &mut T) as *mut T`. The input pointer must be the pointer of a `Box` (passed as raw pointer +/// to avoid all questions around move semantics and custom allocators), and `A` must be the `Box`'s +/// allocator. +#[unstable(feature = "core_intrinsics", issue = "none")] +#[rustc_nounwind] +#[cfg_attr(not(bootstrap), rustc_intrinsic)] +#[cfg_attr(bootstrap, inline)] +pub unsafe fn retag_box_to_raw(ptr: *mut T) -> *mut T { + // Miri needs to adjust the provenance, but for regular codegen this is not needed + ptr +} + // Some functions are defined here because they accidentally got made // available in this module on stable. See . // (`transmute` also falls into this category, but it cannot be wrapped due to the diff --git a/src/tools/miri/src/borrow_tracker/mod.rs b/src/tools/miri/src/borrow_tracker/mod.rs index 262f7c449d275..884b8a3b9bc68 100644 --- a/src/tools/miri/src/borrow_tracker/mod.rs +++ b/src/tools/miri/src/borrow_tracker/mod.rs @@ -5,7 +5,7 @@ use std::num::NonZero; use smallvec::SmallVec; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; -use rustc_middle::mir::RetagKind; +use rustc_middle::{mir::RetagKind, ty::Ty}; use rustc_target::abi::Size; use crate::*; @@ -291,6 +291,19 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { } } + fn retag_box_to_raw( + &mut self, + val: &ImmTy<'tcx, Provenance>, + alloc_ty: Ty<'tcx>, + ) -> InterpResult<'tcx, ImmTy<'tcx, Provenance>> { + let this = self.eval_context_mut(); + let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method; + match method { + BorrowTrackerMethod::StackedBorrows => this.sb_retag_box_to_raw(val, alloc_ty), + BorrowTrackerMethod::TreeBorrows => this.tb_retag_box_to_raw(val, alloc_ty), + } + } + fn retag_place_contents( &mut self, kind: RetagKind, diff --git a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs index 86d22229714d5..6eed62d7edc6a 100644 --- a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs +++ b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs @@ -865,6 +865,24 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { this.sb_retag_reference(val, new_perm, RetagInfo { cause, in_field: false }) } + fn sb_retag_box_to_raw( + &mut self, + val: &ImmTy<'tcx, Provenance>, + alloc_ty: Ty<'tcx>, + ) -> InterpResult<'tcx, ImmTy<'tcx, Provenance>> { + let this = self.eval_context_mut(); + let is_global_alloc = alloc_ty.ty_adt_def().is_some_and(|adt| { + let global_alloc = this.tcx.require_lang_item(rustc_hir::LangItem::GlobalAlloc, None); + adt.did() == global_alloc + }); + if is_global_alloc { + // Retag this as-if it was a mutable reference. + this.sb_retag_ptr_value(RetagKind::Raw, val) + } else { + Ok(val.clone()) + } + } + fn sb_retag_place_contents( &mut self, kind: RetagKind, @@ -916,10 +934,18 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { self.ecx } - fn visit_box(&mut self, place: &PlaceTy<'tcx, Provenance>) -> InterpResult<'tcx> { - // Boxes get a weak protectors, since they may be deallocated. - let new_perm = NewPermission::from_box_ty(place.layout.ty, self.kind, self.ecx); - self.retag_ptr_inplace(place, new_perm) + fn visit_box( + &mut self, + box_ty: Ty<'tcx>, + place: &PlaceTy<'tcx, Provenance>, + ) -> InterpResult<'tcx> { + // Only boxes for the global allocator get any special treatment. + if box_ty.is_box_global(*self.ecx.tcx) { + // Boxes get a weak protectors, since they may be deallocated. + let new_perm = NewPermission::from_box_ty(place.layout.ty, self.kind, self.ecx); + self.retag_ptr_inplace(place, new_perm)?; + } + Ok(()) } fn visit_value(&mut self, place: &PlaceTy<'tcx, Provenance>) -> InterpResult<'tcx> { diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs index 4b944ea88f503..ae38ce6e75346 100644 --- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs +++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs @@ -392,6 +392,15 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { } } + fn tb_retag_box_to_raw( + &mut self, + val: &ImmTy<'tcx, Provenance>, + _alloc_ty: Ty<'tcx>, + ) -> InterpResult<'tcx, ImmTy<'tcx, Provenance>> { + // Casts to raw pointers are NOPs in Tree Borrows. + Ok(val.clone()) + } + /// Retag all pointers that are stored in this place. fn tb_retag_place_contents( &mut self, @@ -441,14 +450,22 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { /// Regardless of how `Unique` is handled, Boxes are always reborrowed. /// When `Unique` is also reborrowed, then it behaves exactly like `Box` /// except for the fact that `Box` has a non-zero-sized reborrow. - fn visit_box(&mut self, place: &PlaceTy<'tcx, Provenance>) -> InterpResult<'tcx> { - let new_perm = NewPermission::from_unique_ty( - place.layout.ty, - self.kind, - self.ecx, - /* zero_size */ false, - ); - self.retag_ptr_inplace(place, new_perm) + fn visit_box( + &mut self, + box_ty: Ty<'tcx>, + place: &PlaceTy<'tcx, Provenance>, + ) -> InterpResult<'tcx> { + // Only boxes for the global allocator get any special treatment. + if box_ty.is_box_global(*self.ecx.tcx) { + let new_perm = NewPermission::from_unique_ty( + place.layout.ty, + self.kind, + self.ecx, + /* zero_size */ false, + ); + self.retag_ptr_inplace(place, new_perm)?; + } + Ok(()) } fn visit_value(&mut self, place: &PlaceTy<'tcx, Provenance>) -> InterpResult<'tcx> { diff --git a/src/tools/miri/src/shims/intrinsics/mod.rs b/src/tools/miri/src/shims/intrinsics/mod.rs index 46f0c771cb51c..075ce44fbceed 100644 --- a/src/tools/miri/src/shims/intrinsics/mod.rs +++ b/src/tools/miri/src/shims/intrinsics/mod.rs @@ -129,6 +129,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { this.write_bytes_ptr(ptr, iter::repeat(val_byte).take(byte_count.bytes_usize()))?; } + // Memory model / provenance manipulation "ptr_mask" => { let [ptr, mask] = check_arg_count(args)?; @@ -139,6 +140,18 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { this.write_pointer(Pointer::new(ptr.provenance, masked_addr), dest)?; } + "retag_box_to_raw" => { + let [ptr] = check_arg_count(args)?; + let alloc_ty = generic_args[1].expect_ty(); + + let val = this.read_immediate(ptr)?; + let new_val = if this.machine.borrow_tracker.is_some() { + this.retag_box_to_raw(&val, alloc_ty)? + } else { + val + }; + this.write_immediate(*new_val, dest)?; + } // We want to return either `true` or `false` at random, or else something like // ``` diff --git a/src/tools/miri/tests/pass/box-custom-alloc-aliasing.rs b/src/tools/miri/tests/pass/box-custom-alloc-aliasing.rs new file mode 100644 index 0000000000000..541e5f244db08 --- /dev/null +++ b/src/tools/miri/tests/pass/box-custom-alloc-aliasing.rs @@ -0,0 +1,123 @@ +//! Regression test for : +//! If `Box` has a local allocator, then it can't be `noalias` as the allocator +//! may want to access allocator state based on the data pointer. + +//@revisions: stack tree +//@[tree]compile-flags: -Zmiri-tree-borrows +#![feature(allocator_api)] +#![feature(strict_provenance)] + +use std::{ + alloc::{AllocError, Allocator, Layout}, + cell::{Cell, UnsafeCell}, + ptr::{self, addr_of, NonNull}, + thread::{self, ThreadId}, + mem, +}; + +const BIN_SIZE: usize = 8; + +// A bin represents a collection of blocks of a specific layout. +#[repr(align(128))] +struct MyBin { + top: Cell, + thread_id: ThreadId, + memory: UnsafeCell<[usize; BIN_SIZE]>, +} + +impl MyBin { + fn pop(&self) -> Option> { + let top = self.top.get(); + if top == BIN_SIZE { + return None; + } + // Cast the *entire* thing to a raw pointer to not restrict its provenance. + let bin = self as *const MyBin; + let base_ptr = UnsafeCell::raw_get(unsafe{ addr_of!((*bin).memory )}).cast::(); + let ptr = unsafe { NonNull::new_unchecked(base_ptr.add(top)) }; + self.top.set(top + 1); + Some(ptr.cast()) + } + + // Pretends to not be a throwaway allocation method like this. A more realistic + // substitute is using intrusive linked lists, which requires access to the + // metadata of this bin as well. + unsafe fn push(&self, ptr: NonNull) { + // For now just check that this really is in this bin. + let start = self.memory.get().addr(); + let end = start + BIN_SIZE * mem::size_of::(); + let addr = ptr.addr().get(); + assert!((start..end).contains(&addr)); + } +} + +// A collection of bins. +struct MyAllocator { + thread_id: ThreadId, + // Pretends to be some complex collection of bins, such as an array of linked lists. + bins: Box<[MyBin; 1]>, +} + +impl MyAllocator { + fn new() -> Self { + let thread_id = thread::current().id(); + MyAllocator { + thread_id, + bins: Box::new( + [MyBin { + top: Cell::new(0), + thread_id, + memory: UnsafeCell::default(), + }; 1], + ), + } + } + + // Pretends to be expensive finding a suitable bin for the layout. + fn find_bin(&self, layout: Layout) -> Option<&MyBin> { + if layout == Layout::new::() { + Some(&self.bins[0]) + } else { + None + } + } +} + +unsafe impl Allocator for MyAllocator { + fn allocate(&self, layout: Layout) -> Result, AllocError> { + // Expensive bin search. + let bin = self.find_bin(layout).ok_or(AllocError)?; + let ptr = bin.pop().ok_or(AllocError)?; + Ok(NonNull::slice_from_raw_parts(ptr, layout.size())) + } + + unsafe fn deallocate(&self, ptr: NonNull, _layout: Layout) { + // Since manually finding the corresponding bin of `ptr` is very expensive, + // doing pointer arithmetics is preferred. + // But this means we access `top` via `ptr` rather than `self`! + // That is fundamentally the source of the aliasing trouble in this example. + let their_bin = ptr.as_ptr().map_addr(|addr| addr & !127).cast::(); + let thread_id = ptr::read(ptr::addr_of!((*their_bin).thread_id)); + if self.thread_id == thread_id { + unsafe { (*their_bin).push(ptr) }; + } else { + todo!("Deallocating from another thread") + } + } +} + +// Make sure to involve `Box` in allocating these, +// as that's where `noalias` may come from. +fn v(t: T, a: A) -> Vec { + (Box::new_in([t], a) as Box<[T], A>).into_vec() +} + +fn main() { + assert!(mem::size_of::() <= 128); // if it grows bigger, the trick to access the "header" no longer works + let my_alloc = MyAllocator::new(); + let a = v(1usize, &my_alloc); + let b = v(2usize, &my_alloc); + assert_eq!(a[0] + 1, b[0]); + assert_eq!(addr_of!(a[0]).wrapping_add(1), addr_of!(b[0])); + drop((a, b)); +}