From e1a62b9600490afa403070d9e97a7a3326d86d24 Mon Sep 17 00:00:00 2001 From: carbotaniuman <41451839+carbotaniuman@users.noreply.github.com> Date: Sun, 22 May 2022 15:22:05 -0500 Subject: [PATCH] Initial work on permissive provenance --- src/bin/miri.rs | 8 +- src/eval.rs | 7 +- src/helpers.rs | 4 +- src/intptrcast.rs | 123 +++++++++++++++--- src/lib.rs | 5 +- src/machine.rs | 70 +++++++--- src/stacked_borrows.rs | 11 +- tests/compile-fail/ptr_int_unexposed.rs | 12 ++ tests/compile-fail/ptr_legacy_provenance.rs | 21 +++ .../run-pass/ptr_int_permissive_provenance.rs | 62 +++++++++ 10 files changed, 275 insertions(+), 48 deletions(-) create mode 100644 tests/compile-fail/ptr_int_unexposed.rs create mode 100644 tests/compile-fail/ptr_legacy_provenance.rs create mode 100644 tests/run-pass/ptr_int_permissive_provenance.rs diff --git a/src/bin/miri.rs b/src/bin/miri.rs index 0aa8755573..784f0da8a3 100644 --- a/src/bin/miri.rs +++ b/src/bin/miri.rs @@ -30,7 +30,7 @@ use rustc_middle::{ }; use rustc_session::{config::ErrorOutputType, search_paths::PathKind, CtfeBacktrace}; -use miri::BacktraceStyle; +use miri::{BacktraceStyle, ProvenanceMode}; struct MiriCompilerCalls { miri_config: miri::MiriConfig, @@ -384,10 +384,14 @@ fn main() { miri_config.tag_raw = true; } "-Zmiri-strict-provenance" => { - miri_config.strict_provenance = true; + miri_config.provenance_mode = ProvenanceMode::Strict; miri_config.tag_raw = true; miri_config.check_number_validity = true; } + "-Zmiri-permissive-provenance" => { + miri_config.provenance_mode = ProvenanceMode::Permissive; + miri_config.tag_raw = true; + } "-Zmiri-mute-stdout-stderr" => { miri_config.mute_stdout_stderr = true; } diff --git a/src/eval.rs b/src/eval.rs index 5f6348fe0b..3096437c12 100644 --- a/src/eval.rs +++ b/src/eval.rs @@ -113,9 +113,8 @@ pub struct MiriConfig { pub panic_on_unsupported: bool, /// Which style to use for printing backtraces. pub backtrace_style: BacktraceStyle, - /// Whether to enforce "strict provenance" rules. Enabling this means int2ptr casts return - /// pointers with an invalid provenance, i.e., not valid for any memory access. - pub strict_provenance: bool, + /// Which provenance to use for int2ptr casts + pub provenance_mode: ProvenanceMode, /// Whether to ignore any output by the program. This is helpful when debugging miri /// as its messages don't get intermingled with the program messages. pub mute_stdout_stderr: bool, @@ -144,7 +143,7 @@ impl Default for MiriConfig { measureme_out: None, panic_on_unsupported: false, backtrace_style: BacktraceStyle::Short, - strict_provenance: false, + provenance_mode: ProvenanceMode::Legacy, mute_stdout_stderr: false, } } diff --git a/src/helpers.rs b/src/helpers.rs index 6beb3f8c3b..8956845d6c 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -787,8 +787,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx fn mark_immutable(&mut self, mplace: &MemPlace) { let this = self.eval_context_mut(); // This got just allocated, so there definitely is a pointer here. - this.alloc_mark_immutable(mplace.ptr.into_pointer_or_addr().unwrap().provenance.alloc_id) - .unwrap(); + let provenance = mplace.ptr.into_pointer_or_addr().unwrap().provenance; + this.alloc_mark_immutable(provenance.get_alloc_id().unwrap()).unwrap(); } fn item_link_name(&self, def_id: DefId) -> Symbol { diff --git a/src/intptrcast.rs b/src/intptrcast.rs index 895241bcc3..8395cdc273 100644 --- a/src/intptrcast.rs +++ b/src/intptrcast.rs @@ -4,11 +4,25 @@ use std::collections::hash_map::Entry; use log::trace; use rand::Rng; -use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_target::abi::{HasDataLayout, Size}; use crate::*; +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ProvenanceMode { + /// Int2ptr casts return pointers with "wildcard" provenance + /// that basically matches that of all exposed pointers + /// (and SB tags, if enabled). + Permissive, + /// Int2ptr casts return pointers with an invalid provenance, + /// i.e., not valid for any memory access. + Strict, + /// Int2ptr casts determine the allocation they point to at cast time. + /// All allocations are considered exposed. + Legacy, +} + pub type GlobalState = RefCell; #[derive(Clone, Debug)] @@ -21,12 +35,14 @@ pub struct GlobalStateInner { /// they do not have an `AllocExtra`. /// This is the inverse of `int_to_ptr_map`. base_addr: FxHashMap, + /// Whether an allocation has been exposed or not. This cannot be put + /// into `AllocExtra` for the same reason as `base_addr`. + exposed: FxHashSet, /// This is used as a memory address when a new pointer is casted to an integer. It /// is always larger than any address that was previously made part of a block. next_base_addr: u64, - /// Whether to enforce "strict provenance" rules. Enabling this means int2ptr casts return - /// pointers with an invalid provenance, i.e., not valid for any memory access. - strict_provenance: bool, + /// The provenance to use for int2ptr casts + provenance_mode: ProvenanceMode, } impl GlobalStateInner { @@ -34,22 +50,22 @@ impl GlobalStateInner { GlobalStateInner { int_to_ptr_map: Vec::default(), base_addr: FxHashMap::default(), + exposed: FxHashSet::default(), next_base_addr: STACK_ADDR, - strict_provenance: config.strict_provenance, + provenance_mode: config.provenance_mode, } } } impl<'mir, 'tcx> GlobalStateInner { - pub fn ptr_from_addr(addr: u64, ecx: &MiriEvalContext<'mir, 'tcx>) -> Pointer> { - trace!("Casting 0x{:x} to a pointer", addr); + // Returns the exposed `AllocId` that corresponds to the specified addr, + // or `None` if the addr is out of bounds + fn alloc_id_from_addr(ecx: &MiriEvalContext<'mir, 'tcx>, addr: u64) -> Option { let global_state = ecx.machine.intptrcast.borrow(); - - if global_state.strict_provenance { - return Pointer::new(None, Size::from_bytes(addr)); - } + assert!(global_state.provenance_mode != ProvenanceMode::Strict); let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr); + let alloc_id = match pos { Ok(pos) => Some(global_state.int_to_ptr_map[pos].1), Err(0) => None, @@ -60,6 +76,7 @@ impl<'mir, 'tcx> GlobalStateInner { // This never overflows because `addr >= glb` let offset = addr - glb; // If the offset exceeds the size of the allocation, don't use this `alloc_id`. + if offset <= ecx .get_alloc_size_and_align(alloc_id, AllocCheck::MaybeDead) @@ -72,12 +89,65 @@ impl<'mir, 'tcx> GlobalStateInner { None } } - }; - // Pointers created from integers are untagged. - Pointer::new( - alloc_id.map(|alloc_id| Tag { alloc_id, sb: SbTag::Untagged }), - Size::from_bytes(addr), - ) + }?; + + // In legacy mode, we consider all allocations exposed. + if global_state.provenance_mode == ProvenanceMode::Legacy + || global_state.exposed.contains(&alloc_id) + { + Some(alloc_id) + } else { + None + } + } + + pub fn expose_addr(ecx: &MiriEvalContext<'mir, 'tcx>, alloc_id: AllocId) { + trace!("Exposing allocation id {:?}", alloc_id); + + let mut global_state = ecx.machine.intptrcast.borrow_mut(); + if global_state.provenance_mode == ProvenanceMode::Permissive { + global_state.exposed.insert(alloc_id); + } + } + + pub fn ptr_from_addr_transmute( + ecx: &MiriEvalContext<'mir, 'tcx>, + addr: u64, + ) -> Pointer> { + trace!("Transmuting 0x{:x} to a pointer", addr); + + let global_state = ecx.machine.intptrcast.borrow(); + + // In legacy mode, we have to support int2ptr transmutes, + // so just pretend they do the same thing as a cast. + if global_state.provenance_mode == ProvenanceMode::Legacy { + Self::ptr_from_addr_cast(ecx, addr) + } else { + Pointer::new(None, Size::from_bytes(addr)) + } + } + + pub fn ptr_from_addr_cast( + ecx: &MiriEvalContext<'mir, 'tcx>, + addr: u64, + ) -> Pointer> { + trace!("Casting 0x{:x} to a pointer", addr); + + let global_state = ecx.machine.intptrcast.borrow(); + + if global_state.provenance_mode == ProvenanceMode::Strict { + Pointer::new(None, Size::from_bytes(addr)) + } else if global_state.provenance_mode == ProvenanceMode::Legacy { + let alloc_id = Self::alloc_id_from_addr(ecx, addr); + + Pointer::new( + alloc_id + .map(|alloc_id| Tag::Concrete(ConcreteTag { alloc_id, sb: SbTag::Untagged })), + Size::from_bytes(addr), + ) + } else { + Pointer::new(Some(Tag::Wildcard), Size::from_bytes(addr)) + } } fn alloc_base_addr(ecx: &MiriEvalContext<'mir, 'tcx>, alloc_id: AllocId) -> u64 { @@ -136,14 +206,27 @@ impl<'mir, 'tcx> GlobalStateInner { dl.overflowing_offset(base_addr, offset.bytes()).0 } - pub fn abs_ptr_to_rel(ecx: &MiriEvalContext<'mir, 'tcx>, ptr: Pointer) -> Size { + pub fn abs_ptr_to_rel( + ecx: &MiriEvalContext<'mir, 'tcx>, + ptr: Pointer, + ) -> Option<(AllocId, Size)> { let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance) - let base_addr = GlobalStateInner::alloc_base_addr(ecx, tag.alloc_id); + + let alloc_id = if let Tag::Concrete(concrete) = tag { + concrete.alloc_id + } else { + GlobalStateInner::alloc_id_from_addr(ecx, addr.bytes())? + }; + + let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id); // Wrapping "addr - base_addr" let dl = ecx.data_layout(); let neg_base_addr = (base_addr as i64).wrapping_neg(); - Size::from_bytes(dl.overflowing_signed_offset(addr.bytes(), neg_base_addr).0) + Some(( + alloc_id, + Size::from_bytes(dl.overflowing_signed_offset(addr.bytes(), neg_base_addr).0), + )) } /// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple diff --git a/src/lib.rs b/src/lib.rs index ee7a3bcdc5..6338f7c843 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -78,9 +78,10 @@ pub use crate::eval::{ create_ecx, eval_entry, AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith, }; pub use crate::helpers::EvalContextExt as HelpersEvalContextExt; +pub use crate::intptrcast::ProvenanceMode; pub use crate::machine::{ - AllocExtra, Evaluator, FrameData, MiriEvalContext, MiriEvalContextExt, MiriMemoryKind, Tag, - NUM_CPUS, PAGE_SIZE, STACK_ADDR, STACK_SIZE, + AllocExtra, ConcreteTag, Evaluator, FrameData, MiriEvalContext, MiriEvalContextExt, + MiriMemoryKind, Tag, NUM_CPUS, PAGE_SIZE, STACK_ADDR, STACK_SIZE, }; pub use crate::mono_hash_map::MonoHashMap; pub use crate::operator::EvalContextExt as OperatorEvalContextExt; diff --git a/src/machine.rs b/src/machine.rs index 7ab15b9f9c..5f3532dc6d 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -126,7 +126,13 @@ impl fmt::Display for MiriMemoryKind { /// Pointer provenance (tag). #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct Tag { +pub enum Tag { + Concrete(ConcreteTag), + Wildcard, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct ConcreteTag { pub alloc_id: AllocId, /// Stacked Borrows tag. pub sb: SbTag, @@ -134,8 +140,8 @@ pub struct Tag { #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] static_assert_size!(Pointer, 24); -#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] -static_assert_size!(Pointer>, 24); +// #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +// static_assert_size!(Pointer>, 24); #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] static_assert_size!(ScalarMaybeUninit, 32); @@ -149,18 +155,31 @@ impl Provenance for Tag { fn fmt(ptr: &Pointer, f: &mut fmt::Formatter<'_>) -> fmt::Result { let (tag, addr) = ptr.into_parts(); // address is absolute write!(f, "0x{:x}", addr.bytes())?; - // Forward `alternate` flag to `alloc_id` printing. - if f.alternate() { - write!(f, "[{:#?}]", tag.alloc_id)?; - } else { - write!(f, "[{:?}]", tag.alloc_id)?; + + match tag { + Tag::Concrete(tag) => { + // Forward `alternate` flag to `alloc_id` printing. + if f.alternate() { + write!(f, "[{:#?}]", tag.alloc_id)?; + } else { + write!(f, "[{:?}]", tag.alloc_id)?; + } + // Print Stacked Borrows tag. + write!(f, "{:?}", tag.sb)?; + } + Tag::Wildcard => { + write!(f, "[Wildcard]")?; + } } - // Print Stacked Borrows tag. - write!(f, "{:?}", tag.sb) + + Ok(()) } fn get_alloc_id(self) -> Option { - Some(self.alloc_id) + match self { + Tag::Concrete(concrete) => Some(concrete.alloc_id), + Tag::Wildcard => None, + } } } @@ -609,7 +628,10 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> { } else { SbTag::Untagged }; - Pointer::new(Tag { alloc_id: ptr.provenance, sb: sb_tag }, Size::from_bytes(absolute_addr)) + Pointer::new( + Tag::Concrete(ConcreteTag { alloc_id: ptr.provenance, sb: sb_tag }), + Size::from_bytes(absolute_addr), + ) } #[inline(always)] @@ -617,7 +639,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> { ecx: &MiriEvalContext<'mir, 'tcx>, addr: u64, ) -> Pointer> { - intptrcast::GlobalStateInner::ptr_from_addr(addr, ecx) + intptrcast::GlobalStateInner::ptr_from_addr_cast(ecx, addr) } #[inline(always)] @@ -625,14 +647,22 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> { ecx: &MiriEvalContext<'mir, 'tcx>, addr: u64, ) -> Pointer> { - Self::ptr_from_addr_cast(ecx, addr) + intptrcast::GlobalStateInner::ptr_from_addr_transmute(ecx, addr) } #[inline(always)] fn expose_ptr( - _ecx: &mut InterpCx<'mir, 'tcx, Self>, - _ptr: Pointer, + ecx: &mut InterpCx<'mir, 'tcx, Self>, + ptr: Pointer, ) -> InterpResult<'tcx> { + let tag = ptr.provenance; + + if let Tag::Concrete(concrete) = tag { + intptrcast::GlobalStateInner::expose_addr(ecx, concrete.alloc_id); + } + + // No need to do anything for wildcard pointers as + // their provenances have already been previously exposed. Ok(()) } @@ -643,7 +673,13 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> { ptr: Pointer, ) -> Option<(AllocId, Size, Self::TagExtra)> { let rel = intptrcast::GlobalStateInner::abs_ptr_to_rel(ecx, ptr); - Some((ptr.provenance.alloc_id, rel, ptr.provenance.sb)) + + let sb = match ptr.provenance { + Tag::Concrete(ConcreteTag { sb, .. }) => sb, + Tag::Wildcard => SbTag::Untagged, + }; + + rel.map(|(alloc_id, size)| (alloc_id, size, sb)) } #[inline(always)] diff --git a/src/stacked_borrows.rs b/src/stacked_borrows.rs index d7c2139323..0f4489ad30 100644 --- a/src/stacked_borrows.rs +++ b/src/stacked_borrows.rs @@ -870,7 +870,16 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx this.reborrow(&place, size, kind, new_tag, protect)?; // Adjust pointer. - let new_place = place.map_provenance(|p| p.map(|t| Tag { sb: new_tag, ..t })); + let new_place = place.map_provenance(|p| { + p.map(|t| { + // TODO: Fix this eventually + if let Tag::Concrete(t) = t { + Tag::Concrete(ConcreteTag { sb: new_tag, ..t }) + } else { + t + } + }) + }); // Return new pointer. Ok(ImmTy::from_immediate(new_place.to_ref(this), val.layout)) diff --git a/tests/compile-fail/ptr_int_unexposed.rs b/tests/compile-fail/ptr_int_unexposed.rs new file mode 100644 index 0000000000..2aecb68b8b --- /dev/null +++ b/tests/compile-fail/ptr_int_unexposed.rs @@ -0,0 +1,12 @@ +// compile-flags: -Zmiri-permissive-provenance -Zmiri-disable-stacked-borrows + +fn main() { + let x: i32 = 3; + let x_ptr = &x as *const i32; + + // TODO: switch this to addr() once we intrinsify it + let x_usize: usize = unsafe { std::mem::transmute(x_ptr) }; + // Cast back a pointer that did *not* get exposed. + let ptr = x_usize as *const i32; + assert_eq!(unsafe { *ptr }, 3); //~ ERROR Undefined Behavior: dereferencing pointer failed +} diff --git a/tests/compile-fail/ptr_legacy_provenance.rs b/tests/compile-fail/ptr_legacy_provenance.rs new file mode 100644 index 0000000000..aecc1460e0 --- /dev/null +++ b/tests/compile-fail/ptr_legacy_provenance.rs @@ -0,0 +1,21 @@ +// compile-flags: -Zmiri-disable-stacked-borrows +#![feature(strict_provenance)] + +use std::ptr; + +// Make sure that with legacy provenance, the allocation id of +// a casted pointer is determined at cast-time +fn main() { + let x: i32 = 0; + let y: i32 = 1; + + let x_ptr = &x as *const i32; + let y_ptr = &y as *const i32; + + let x_usize = x_ptr.expose_addr(); + let y_usize = y_ptr.expose_addr(); + + let ptr = ptr::from_exposed_addr::(y_usize); + let ptr = ptr.with_addr(x_usize); + assert_eq!(unsafe { *ptr }, 0); //~ ERROR is out-of-bounds +} diff --git a/tests/run-pass/ptr_int_permissive_provenance.rs b/tests/run-pass/ptr_int_permissive_provenance.rs new file mode 100644 index 0000000000..e025cf9213 --- /dev/null +++ b/tests/run-pass/ptr_int_permissive_provenance.rs @@ -0,0 +1,62 @@ +// compile-flags: -Zmiri-permissive-provenance -Zmiri-disable-stacked-borrows +#![feature(strict_provenance)] + +use std::ptr; + +/// Ensure we can expose the address of a pointer that is out-of-bounds +fn ptr_roundtrip_out_of_bounds() { + let x: i32 = 3; + let x_ptr = &x as *const i32; + + let x_usize = x_ptr.wrapping_offset(128).expose_addr(); + + let ptr = ptr::from_exposed_addr::(x_usize).wrapping_offset(-128); + assert_eq!(unsafe { *ptr }, 3); +} + +/// Ensure that we can move between allocations after casting back to a ptr +fn ptr_roundtrip_confusion() { + let x: i32 = 0; + let y: i32 = 1; + + let x_ptr = &x as *const i32; + let y_ptr = &y as *const i32; + + let x_usize = x_ptr.expose_addr(); + let y_usize = y_ptr.expose_addr(); + + let ptr = ptr::from_exposed_addr::(y_usize); + let ptr = ptr.with_addr(x_usize); + assert_eq!(unsafe { *ptr }, 0); +} + +/// Ensure we can cast back a different integer than the one we got when exposing. +fn ptr_roundtrip_imperfect() { + let x: u8 = 3; + let x_ptr = &x as *const u8; + + let x_usize = x_ptr.expose_addr() + 128; + + let ptr = ptr::from_exposed_addr::(x_usize).wrapping_offset(-128); + assert_eq!(unsafe { *ptr }, 3); +} + +/// Ensure that we can roundtrip through a pointer with an address of 0 +fn ptr_roundtrip_null() { + let x = &42; + let x_ptr = x as *const i32; + let x_null_ptr = x_ptr.with_addr(0); // addr 0, but still the provenance of x + let null = x_null_ptr.expose_addr(); + assert_eq!(null, 0); + + let x_null_ptr_copy = ptr::from_exposed_addr::(null); // just a roundtrip, so has provenance of x (angelically) + let x_ptr_copy = x_null_ptr_copy.with_addr(x_ptr.addr()); // addr of x and provenance of x + assert_eq!(unsafe { *x_ptr_copy }, 42); +} + +fn main() { + ptr_roundtrip_out_of_bounds(); + ptr_roundtrip_confusion(); + ptr_roundtrip_imperfect(); + ptr_roundtrip_null(); +}