From f7bc441fd342783ad42b8a9fbb2a1595e80044ad Mon Sep 17 00:00:00 2001 From: carbotaniuman <41451839+carbotaniuman@users.noreply.github.com> Date: Sun, 22 May 2022 15:22:05 -0500 Subject: [PATCH 1/2] Initial work on permissive provenance --- src/bin/miri.rs | 8 +- src/eval.rs | 7 +- src/helpers.rs | 4 +- src/intptrcast.rs | 123 +++++++++++++++--- src/lib.rs | 5 +- src/machine.rs | 70 +++++++--- src/stacked_borrows.rs | 11 +- tests/compile-fail/ptr_int_unexposed.rs | 12 ++ tests/compile-fail/ptr_legacy_provenance.rs | 21 +++ .../run-pass/ptr_int_permissive_provenance.rs | 62 +++++++++ 10 files changed, 275 insertions(+), 48 deletions(-) create mode 100644 tests/compile-fail/ptr_int_unexposed.rs create mode 100644 tests/compile-fail/ptr_legacy_provenance.rs create mode 100644 tests/run-pass/ptr_int_permissive_provenance.rs diff --git a/src/bin/miri.rs b/src/bin/miri.rs index 0aa8755573..784f0da8a3 100644 --- a/src/bin/miri.rs +++ b/src/bin/miri.rs @@ -30,7 +30,7 @@ use rustc_middle::{ }; use rustc_session::{config::ErrorOutputType, search_paths::PathKind, CtfeBacktrace}; -use miri::BacktraceStyle; +use miri::{BacktraceStyle, ProvenanceMode}; struct MiriCompilerCalls { miri_config: miri::MiriConfig, @@ -384,10 +384,14 @@ fn main() { miri_config.tag_raw = true; } "-Zmiri-strict-provenance" => { - miri_config.strict_provenance = true; + miri_config.provenance_mode = ProvenanceMode::Strict; miri_config.tag_raw = true; miri_config.check_number_validity = true; } + "-Zmiri-permissive-provenance" => { + miri_config.provenance_mode = ProvenanceMode::Permissive; + miri_config.tag_raw = true; + } "-Zmiri-mute-stdout-stderr" => { miri_config.mute_stdout_stderr = true; } diff --git a/src/eval.rs b/src/eval.rs index 13dff22ea3..430ff06cf1 100644 --- a/src/eval.rs +++ b/src/eval.rs @@ -113,9 +113,8 @@ pub struct MiriConfig { pub panic_on_unsupported: bool, /// Which style to use for printing backtraces. pub backtrace_style: BacktraceStyle, - /// Whether to enforce "strict provenance" rules. Enabling this means int2ptr casts return - /// pointers with an invalid provenance, i.e., not valid for any memory access. - pub strict_provenance: bool, + /// Which provenance to use for int2ptr casts + pub provenance_mode: ProvenanceMode, /// Whether to ignore any output by the program. This is helpful when debugging miri /// as its messages don't get intermingled with the program messages. pub mute_stdout_stderr: bool, @@ -144,7 +143,7 @@ impl Default for MiriConfig { measureme_out: None, panic_on_unsupported: false, backtrace_style: BacktraceStyle::Short, - strict_provenance: false, + provenance_mode: ProvenanceMode::Legacy, mute_stdout_stderr: false, } } diff --git a/src/helpers.rs b/src/helpers.rs index 2d1fffc6a1..08b2fa98a2 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -786,8 +786,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx fn mark_immutable(&mut self, mplace: &MemPlace) { let this = self.eval_context_mut(); // This got just allocated, so there definitely is a pointer here. - this.alloc_mark_immutable(mplace.ptr.into_pointer_or_addr().unwrap().provenance.alloc_id) - .unwrap(); + let provenance = mplace.ptr.into_pointer_or_addr().unwrap().provenance; + this.alloc_mark_immutable(provenance.get_alloc_id().unwrap()).unwrap(); } fn item_link_name(&self, def_id: DefId) -> Symbol { diff --git a/src/intptrcast.rs b/src/intptrcast.rs index 895241bcc3..8395cdc273 100644 --- a/src/intptrcast.rs +++ b/src/intptrcast.rs @@ -4,11 +4,25 @@ use std::collections::hash_map::Entry; use log::trace; use rand::Rng; -use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_target::abi::{HasDataLayout, Size}; use crate::*; +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ProvenanceMode { + /// Int2ptr casts return pointers with "wildcard" provenance + /// that basically matches that of all exposed pointers + /// (and SB tags, if enabled). + Permissive, + /// Int2ptr casts return pointers with an invalid provenance, + /// i.e., not valid for any memory access. + Strict, + /// Int2ptr casts determine the allocation they point to at cast time. + /// All allocations are considered exposed. + Legacy, +} + pub type GlobalState = RefCell; #[derive(Clone, Debug)] @@ -21,12 +35,14 @@ pub struct GlobalStateInner { /// they do not have an `AllocExtra`. /// This is the inverse of `int_to_ptr_map`. base_addr: FxHashMap, + /// Whether an allocation has been exposed or not. This cannot be put + /// into `AllocExtra` for the same reason as `base_addr`. + exposed: FxHashSet, /// This is used as a memory address when a new pointer is casted to an integer. It /// is always larger than any address that was previously made part of a block. next_base_addr: u64, - /// Whether to enforce "strict provenance" rules. Enabling this means int2ptr casts return - /// pointers with an invalid provenance, i.e., not valid for any memory access. - strict_provenance: bool, + /// The provenance to use for int2ptr casts + provenance_mode: ProvenanceMode, } impl GlobalStateInner { @@ -34,22 +50,22 @@ impl GlobalStateInner { GlobalStateInner { int_to_ptr_map: Vec::default(), base_addr: FxHashMap::default(), + exposed: FxHashSet::default(), next_base_addr: STACK_ADDR, - strict_provenance: config.strict_provenance, + provenance_mode: config.provenance_mode, } } } impl<'mir, 'tcx> GlobalStateInner { - pub fn ptr_from_addr(addr: u64, ecx: &MiriEvalContext<'mir, 'tcx>) -> Pointer> { - trace!("Casting 0x{:x} to a pointer", addr); + // Returns the exposed `AllocId` that corresponds to the specified addr, + // or `None` if the addr is out of bounds + fn alloc_id_from_addr(ecx: &MiriEvalContext<'mir, 'tcx>, addr: u64) -> Option { let global_state = ecx.machine.intptrcast.borrow(); - - if global_state.strict_provenance { - return Pointer::new(None, Size::from_bytes(addr)); - } + assert!(global_state.provenance_mode != ProvenanceMode::Strict); let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr); + let alloc_id = match pos { Ok(pos) => Some(global_state.int_to_ptr_map[pos].1), Err(0) => None, @@ -60,6 +76,7 @@ impl<'mir, 'tcx> GlobalStateInner { // This never overflows because `addr >= glb` let offset = addr - glb; // If the offset exceeds the size of the allocation, don't use this `alloc_id`. + if offset <= ecx .get_alloc_size_and_align(alloc_id, AllocCheck::MaybeDead) @@ -72,12 +89,65 @@ impl<'mir, 'tcx> GlobalStateInner { None } } - }; - // Pointers created from integers are untagged. - Pointer::new( - alloc_id.map(|alloc_id| Tag { alloc_id, sb: SbTag::Untagged }), - Size::from_bytes(addr), - ) + }?; + + // In legacy mode, we consider all allocations exposed. + if global_state.provenance_mode == ProvenanceMode::Legacy + || global_state.exposed.contains(&alloc_id) + { + Some(alloc_id) + } else { + None + } + } + + pub fn expose_addr(ecx: &MiriEvalContext<'mir, 'tcx>, alloc_id: AllocId) { + trace!("Exposing allocation id {:?}", alloc_id); + + let mut global_state = ecx.machine.intptrcast.borrow_mut(); + if global_state.provenance_mode == ProvenanceMode::Permissive { + global_state.exposed.insert(alloc_id); + } + } + + pub fn ptr_from_addr_transmute( + ecx: &MiriEvalContext<'mir, 'tcx>, + addr: u64, + ) -> Pointer> { + trace!("Transmuting 0x{:x} to a pointer", addr); + + let global_state = ecx.machine.intptrcast.borrow(); + + // In legacy mode, we have to support int2ptr transmutes, + // so just pretend they do the same thing as a cast. + if global_state.provenance_mode == ProvenanceMode::Legacy { + Self::ptr_from_addr_cast(ecx, addr) + } else { + Pointer::new(None, Size::from_bytes(addr)) + } + } + + pub fn ptr_from_addr_cast( + ecx: &MiriEvalContext<'mir, 'tcx>, + addr: u64, + ) -> Pointer> { + trace!("Casting 0x{:x} to a pointer", addr); + + let global_state = ecx.machine.intptrcast.borrow(); + + if global_state.provenance_mode == ProvenanceMode::Strict { + Pointer::new(None, Size::from_bytes(addr)) + } else if global_state.provenance_mode == ProvenanceMode::Legacy { + let alloc_id = Self::alloc_id_from_addr(ecx, addr); + + Pointer::new( + alloc_id + .map(|alloc_id| Tag::Concrete(ConcreteTag { alloc_id, sb: SbTag::Untagged })), + Size::from_bytes(addr), + ) + } else { + Pointer::new(Some(Tag::Wildcard), Size::from_bytes(addr)) + } } fn alloc_base_addr(ecx: &MiriEvalContext<'mir, 'tcx>, alloc_id: AllocId) -> u64 { @@ -136,14 +206,27 @@ impl<'mir, 'tcx> GlobalStateInner { dl.overflowing_offset(base_addr, offset.bytes()).0 } - pub fn abs_ptr_to_rel(ecx: &MiriEvalContext<'mir, 'tcx>, ptr: Pointer) -> Size { + pub fn abs_ptr_to_rel( + ecx: &MiriEvalContext<'mir, 'tcx>, + ptr: Pointer, + ) -> Option<(AllocId, Size)> { let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance) - let base_addr = GlobalStateInner::alloc_base_addr(ecx, tag.alloc_id); + + let alloc_id = if let Tag::Concrete(concrete) = tag { + concrete.alloc_id + } else { + GlobalStateInner::alloc_id_from_addr(ecx, addr.bytes())? + }; + + let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id); // Wrapping "addr - base_addr" let dl = ecx.data_layout(); let neg_base_addr = (base_addr as i64).wrapping_neg(); - Size::from_bytes(dl.overflowing_signed_offset(addr.bytes(), neg_base_addr).0) + Some(( + alloc_id, + Size::from_bytes(dl.overflowing_signed_offset(addr.bytes(), neg_base_addr).0), + )) } /// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple diff --git a/src/lib.rs b/src/lib.rs index 9fa2c61fd8..e571c8a001 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -78,9 +78,10 @@ pub use crate::eval::{ create_ecx, eval_entry, AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith, }; pub use crate::helpers::{CurrentSpan, EvalContextExt as HelpersEvalContextExt}; +pub use crate::intptrcast::ProvenanceMode; pub use crate::machine::{ - AllocExtra, Evaluator, FrameData, MiriEvalContext, MiriEvalContextExt, MiriMemoryKind, Tag, - NUM_CPUS, PAGE_SIZE, STACK_ADDR, STACK_SIZE, + AllocExtra, ConcreteTag, Evaluator, FrameData, MiriEvalContext, MiriEvalContextExt, + MiriMemoryKind, Tag, NUM_CPUS, PAGE_SIZE, STACK_ADDR, STACK_SIZE, }; pub use crate::mono_hash_map::MonoHashMap; pub use crate::operator::EvalContextExt as OperatorEvalContextExt; diff --git a/src/machine.rs b/src/machine.rs index d78b0135e9..12a32d81b5 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -125,7 +125,13 @@ impl fmt::Display for MiriMemoryKind { /// Pointer provenance (tag). #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct Tag { +pub enum Tag { + Concrete(ConcreteTag), + Wildcard, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct ConcreteTag { pub alloc_id: AllocId, /// Stacked Borrows tag. pub sb: SbTag, @@ -133,8 +139,8 @@ pub struct Tag { #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] static_assert_size!(Pointer, 24); -#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] -static_assert_size!(Pointer>, 24); +// #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +// static_assert_size!(Pointer>, 24); #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] static_assert_size!(ScalarMaybeUninit, 32); @@ -148,18 +154,31 @@ impl Provenance for Tag { fn fmt(ptr: &Pointer, f: &mut fmt::Formatter<'_>) -> fmt::Result { let (tag, addr) = ptr.into_parts(); // address is absolute write!(f, "0x{:x}", addr.bytes())?; - // Forward `alternate` flag to `alloc_id` printing. - if f.alternate() { - write!(f, "[{:#?}]", tag.alloc_id)?; - } else { - write!(f, "[{:?}]", tag.alloc_id)?; + + match tag { + Tag::Concrete(tag) => { + // Forward `alternate` flag to `alloc_id` printing. + if f.alternate() { + write!(f, "[{:#?}]", tag.alloc_id)?; + } else { + write!(f, "[{:?}]", tag.alloc_id)?; + } + // Print Stacked Borrows tag. + write!(f, "{:?}", tag.sb)?; + } + Tag::Wildcard => { + write!(f, "[Wildcard]")?; + } } - // Print Stacked Borrows tag. - write!(f, "{:?}", tag.sb) + + Ok(()) } fn get_alloc_id(self) -> Option { - Some(self.alloc_id) + match self { + Tag::Concrete(concrete) => Some(concrete.alloc_id), + Tag::Wildcard => None, + } } } @@ -611,7 +630,10 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> { } else { SbTag::Untagged }; - Pointer::new(Tag { alloc_id: ptr.provenance, sb: sb_tag }, Size::from_bytes(absolute_addr)) + Pointer::new( + Tag::Concrete(ConcreteTag { alloc_id: ptr.provenance, sb: sb_tag }), + Size::from_bytes(absolute_addr), + ) } #[inline(always)] @@ -619,7 +641,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> { ecx: &MiriEvalContext<'mir, 'tcx>, addr: u64, ) -> Pointer> { - intptrcast::GlobalStateInner::ptr_from_addr(addr, ecx) + intptrcast::GlobalStateInner::ptr_from_addr_cast(ecx, addr) } #[inline(always)] @@ -627,14 +649,22 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> { ecx: &MiriEvalContext<'mir, 'tcx>, addr: u64, ) -> Pointer> { - Self::ptr_from_addr_cast(ecx, addr) + intptrcast::GlobalStateInner::ptr_from_addr_transmute(ecx, addr) } #[inline(always)] fn expose_ptr( - _ecx: &mut InterpCx<'mir, 'tcx, Self>, - _ptr: Pointer, + ecx: &mut InterpCx<'mir, 'tcx, Self>, + ptr: Pointer, ) -> InterpResult<'tcx> { + let tag = ptr.provenance; + + if let Tag::Concrete(concrete) = tag { + intptrcast::GlobalStateInner::expose_addr(ecx, concrete.alloc_id); + } + + // No need to do anything for wildcard pointers as + // their provenances have already been previously exposed. Ok(()) } @@ -645,7 +675,13 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> { ptr: Pointer, ) -> Option<(AllocId, Size, Self::TagExtra)> { let rel = intptrcast::GlobalStateInner::abs_ptr_to_rel(ecx, ptr); - Some((ptr.provenance.alloc_id, rel, ptr.provenance.sb)) + + let sb = match ptr.provenance { + Tag::Concrete(ConcreteTag { sb, .. }) => sb, + Tag::Wildcard => SbTag::Untagged, + }; + + rel.map(|(alloc_id, size)| (alloc_id, size, sb)) } #[inline(always)] diff --git a/src/stacked_borrows.rs b/src/stacked_borrows.rs index 625ffb2c5d..f137b86134 100644 --- a/src/stacked_borrows.rs +++ b/src/stacked_borrows.rs @@ -867,7 +867,16 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx this.reborrow(&place, size, kind, new_tag, protect)?; // Adjust pointer. - let new_place = place.map_provenance(|p| p.map(|t| Tag { sb: new_tag, ..t })); + let new_place = place.map_provenance(|p| { + p.map(|t| { + // TODO: Fix this eventually + if let Tag::Concrete(t) = t { + Tag::Concrete(ConcreteTag { sb: new_tag, ..t }) + } else { + t + } + }) + }); // Return new pointer. Ok(ImmTy::from_immediate(new_place.to_ref(this), val.layout)) diff --git a/tests/compile-fail/ptr_int_unexposed.rs b/tests/compile-fail/ptr_int_unexposed.rs new file mode 100644 index 0000000000..2aecb68b8b --- /dev/null +++ b/tests/compile-fail/ptr_int_unexposed.rs @@ -0,0 +1,12 @@ +// compile-flags: -Zmiri-permissive-provenance -Zmiri-disable-stacked-borrows + +fn main() { + let x: i32 = 3; + let x_ptr = &x as *const i32; + + // TODO: switch this to addr() once we intrinsify it + let x_usize: usize = unsafe { std::mem::transmute(x_ptr) }; + // Cast back a pointer that did *not* get exposed. + let ptr = x_usize as *const i32; + assert_eq!(unsafe { *ptr }, 3); //~ ERROR Undefined Behavior: dereferencing pointer failed +} diff --git a/tests/compile-fail/ptr_legacy_provenance.rs b/tests/compile-fail/ptr_legacy_provenance.rs new file mode 100644 index 0000000000..aecc1460e0 --- /dev/null +++ b/tests/compile-fail/ptr_legacy_provenance.rs @@ -0,0 +1,21 @@ +// compile-flags: -Zmiri-disable-stacked-borrows +#![feature(strict_provenance)] + +use std::ptr; + +// Make sure that with legacy provenance, the allocation id of +// a casted pointer is determined at cast-time +fn main() { + let x: i32 = 0; + let y: i32 = 1; + + let x_ptr = &x as *const i32; + let y_ptr = &y as *const i32; + + let x_usize = x_ptr.expose_addr(); + let y_usize = y_ptr.expose_addr(); + + let ptr = ptr::from_exposed_addr::(y_usize); + let ptr = ptr.with_addr(x_usize); + assert_eq!(unsafe { *ptr }, 0); //~ ERROR is out-of-bounds +} diff --git a/tests/run-pass/ptr_int_permissive_provenance.rs b/tests/run-pass/ptr_int_permissive_provenance.rs new file mode 100644 index 0000000000..e025cf9213 --- /dev/null +++ b/tests/run-pass/ptr_int_permissive_provenance.rs @@ -0,0 +1,62 @@ +// compile-flags: -Zmiri-permissive-provenance -Zmiri-disable-stacked-borrows +#![feature(strict_provenance)] + +use std::ptr; + +/// Ensure we can expose the address of a pointer that is out-of-bounds +fn ptr_roundtrip_out_of_bounds() { + let x: i32 = 3; + let x_ptr = &x as *const i32; + + let x_usize = x_ptr.wrapping_offset(128).expose_addr(); + + let ptr = ptr::from_exposed_addr::(x_usize).wrapping_offset(-128); + assert_eq!(unsafe { *ptr }, 3); +} + +/// Ensure that we can move between allocations after casting back to a ptr +fn ptr_roundtrip_confusion() { + let x: i32 = 0; + let y: i32 = 1; + + let x_ptr = &x as *const i32; + let y_ptr = &y as *const i32; + + let x_usize = x_ptr.expose_addr(); + let y_usize = y_ptr.expose_addr(); + + let ptr = ptr::from_exposed_addr::(y_usize); + let ptr = ptr.with_addr(x_usize); + assert_eq!(unsafe { *ptr }, 0); +} + +/// Ensure we can cast back a different integer than the one we got when exposing. +fn ptr_roundtrip_imperfect() { + let x: u8 = 3; + let x_ptr = &x as *const u8; + + let x_usize = x_ptr.expose_addr() + 128; + + let ptr = ptr::from_exposed_addr::(x_usize).wrapping_offset(-128); + assert_eq!(unsafe { *ptr }, 3); +} + +/// Ensure that we can roundtrip through a pointer with an address of 0 +fn ptr_roundtrip_null() { + let x = &42; + let x_ptr = x as *const i32; + let x_null_ptr = x_ptr.with_addr(0); // addr 0, but still the provenance of x + let null = x_null_ptr.expose_addr(); + assert_eq!(null, 0); + + let x_null_ptr_copy = ptr::from_exposed_addr::(null); // just a roundtrip, so has provenance of x (angelically) + let x_ptr_copy = x_null_ptr_copy.with_addr(x_ptr.addr()); // addr of x and provenance of x + assert_eq!(unsafe { *x_ptr_copy }, 42); +} + +fn main() { + ptr_roundtrip_out_of_bounds(); + ptr_roundtrip_confusion(); + ptr_roundtrip_imperfect(); + ptr_roundtrip_null(); +} From f8f2255a91f316da6c25906c7f1f9edb353b66ba Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 23 May 2022 09:03:06 +0200 Subject: [PATCH 2/2] readme: document permissive-provenance flag --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index abcfd3e6d0..235b81173b 100644 --- a/README.md +++ b/README.md @@ -318,6 +318,17 @@ to Miri failing to detect cases of undefined behavior in a program. application instead of raising an error within the context of Miri (and halting execution). Note that code might not expect these operations to ever panic, so this flag can lead to strange (mis)behavior. +* `-Zmiri-permissive-provenance` is **experimental**. This will make Miri do a + best-effort attempt to implement the semantics of + [`expose_addr`](https://doc.rust-lang.org/nightly/std/primitive.pointer.html#method.expose_addr) + and + [`ptr::from_exposed_addr`](https://doc.rust-lang.org/nightly/std/ptr/fn.from_exposed_addr.html) + for pointer-to-int and int-to-pointer casts, respectively. This will + necessarily miss some bugs as those semantics are not efficiently + implementable in a sanitizer, but it will only miss bugs that concerns + memory/pointers which is subject to these operations. Also note that this flag + is currently incompatible with Stacked Borrows, so you will have to also pass + `-Zmiri-disable-stacked-borrows` to use this. * `-Zmiri-seed=` configures the seed of the RNG that Miri uses to resolve non-determinism. This RNG is used to pick base addresses for allocations. When isolation is enabled (the default), this is also used to emulate system