Auto merge of #122240 - RalfJung:miri-addr-reuse, r=oli-obk

miri: add some chance to reuse addresses of previously freed allocations The hope is that this can help us find ABA issues. Unfortunately this needs rustc changes so I can't easily run the regular benchmark suite. I used `src/tools/miri/tests/pass/float_nan.rs` as a substitute: ``` Before: Benchmark 1: ./x.py run miri --stage 0 --args src/tools/miri/tests/pass/float_nan.rs --args --edition=2021 Time (mean ± σ): 9.570 s ± 0.013 s [User: 9.279 s, System: 0.290 s] Range (min … max): 9.561 s … 9.579 s 2 runs After: Benchmark 1: ./x.py run miri --stage 0 --args src/tools/miri/tests/pass/float_nan.rs --args --edition=2021 Time (mean ± σ): 9.698 s ± 0.046 s [User: 9.413 s, System: 0.279 s] Range (min … max): 9.666 s … 9.731 s 2 runs ``` That's a ~1.3% slowdown, which seems fine to me. I have seen a lot of noise in this style of benchmarking so I don't quite trust this anyway; we can make further experiments in the Miri repo after this migrated there. r? `@oli-obk`
rust-lang · Mar 13, 2024 · 9ce37dc · 9ce37dc
2 parents 762d317 + c016768
commit 9ce37dc
Show file tree

Hide file tree

Showing 14 changed files with 214 additions and 65 deletions.
diff --git a/compiler/rustc_const_eval/src/interpret/machine.rs b/compiler/rustc_const_eval/src/interpret/machine.rs
@@ -443,7 +443,8 @@ pub trait Machine<'mir, 'tcx: 'mir>: Sized {
         _machine: &mut Self,
         _alloc_extra: &mut Self::AllocExtra,
         _prov: (AllocId, Self::ProvenanceExtra),
-        _range: AllocRange,
+        _size: Size,
+        _align: Align,
     ) -> InterpResult<'tcx> {
         Ok(())
     }

diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs
@@ -353,7 +353,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
             &mut self.machine,
             &mut alloc.extra,
             (alloc_id, prov),
-            alloc_range(Size::ZERO, size),
+            size,
+            alloc.align,
         )?;
 
         // Don't forget to remember size and align of this now-dead allocation

diff --git a/src/tools/miri/src/intptrcast.rs → src/tools/miri/src/alloc_addresses/mod.rs b/src/tools/miri/src/intptrcast.rs → src/tools/miri/src/alloc_addresses/mod.rs
@@ -1,3 +1,8 @@
+//! This module is responsible for managing the absolute addresses that allocations are located at,
+//! and for casting between pointers and integers based on those addresses.
+
+mod reuse_pool;
+
 use std::cell::RefCell;
 use std::cmp::max;
 use std::collections::hash_map::Entry;
@@ -6,9 +11,10 @@ use rand::Rng;
 
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_span::Span;
-use rustc_target::abi::{HasDataLayout, Size};
+use rustc_target::abi::{Align, HasDataLayout, Size};
 
 use crate::*;
+use reuse_pool::ReusePool;
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum ProvenanceMode {
@@ -23,7 +29,7 @@ pub enum ProvenanceMode {
 
 pub type GlobalState = RefCell<GlobalStateInner>;
 
-#[derive(Clone, Debug)]
+#[derive(Debug)]
 pub struct GlobalStateInner {
     /// This is used as a map between the address of each allocation and its `AllocId`. It is always
     /// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset
@@ -35,6 +41,8 @@ pub struct GlobalStateInner {
     /// they do not have an `AllocExtra`.
     /// This is the inverse of `int_to_ptr_map`.
     base_addr: FxHashMap<AllocId, u64>,
+    /// A pool of addresses we can reuse for future allocations.
+    reuse: ReusePool,
     /// Whether an allocation has been exposed or not. This cannot be put
     /// into `AllocExtra` for the same reason as `base_addr`.
     exposed: FxHashSet<AllocId>,
@@ -50,6 +58,7 @@ impl VisitProvenance for GlobalStateInner {
         let GlobalStateInner {
             int_to_ptr_map: _,
             base_addr: _,
+            reuse: _,
             exposed: _,
             next_base_addr: _,
             provenance_mode: _,
@@ -68,6 +77,7 @@ impl GlobalStateInner {
         GlobalStateInner {
             int_to_ptr_map: Vec::default(),
             base_addr: FxHashMap::default(),
+            reuse: ReusePool::new(),
             exposed: FxHashSet::default(),
             next_base_addr: stack_addr,
             provenance_mode: config.provenance_mode,
@@ -96,7 +106,7 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
     // or `None` if the addr is out of bounds
     fn alloc_id_from_addr(&self, addr: u64) -> Option<AllocId> {
         let ecx = self.eval_context_ref();
-        let global_state = ecx.machine.intptrcast.borrow();
+        let global_state = ecx.machine.alloc_addresses.borrow();
         assert!(global_state.provenance_mode != ProvenanceMode::Strict);
 
         let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr);
@@ -133,12 +143,13 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
 
     fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> {
         let ecx = self.eval_context_ref();
-        let mut global_state = ecx.machine.intptrcast.borrow_mut();
+        let mut global_state = ecx.machine.alloc_addresses.borrow_mut();
         let global_state = &mut *global_state;
 
         Ok(match global_state.base_addr.entry(alloc_id) {
             Entry::Occupied(entry) => *entry.get(),
             Entry::Vacant(entry) => {
+                let mut rng = ecx.machine.rng.borrow_mut();
                 let (size, align, kind) = ecx.get_alloc_info(alloc_id);
                 // This is either called immediately after allocation (and then cached), or when
                 // adjusting `tcx` pointers (which never get freed). So assert that we are looking
@@ -147,44 +158,63 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                 // information was removed.
                 assert!(!matches!(kind, AllocKind::Dead));
 
-                // This allocation does not have a base address yet, pick one.
-                // Leave some space to the previous allocation, to give it some chance to be less aligned.
-                let slack = {
-                    let mut rng = ecx.machine.rng.borrow_mut();
-                    // This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed.
-                    rng.gen_range(0..16)
+                // This allocation does not have a base address yet, pick or reuse one.
+                let base_addr = if let Some(reuse_addr) =
+                    global_state.reuse.take_addr(&mut *rng, size, align)
+                {
+                    reuse_addr
+                } else {
+                    // We have to pick a fresh address.
+                    // Leave some space to the previous allocation, to give it some chance to be less aligned.
+                    // We ensure that `(global_state.next_base_addr + slack) % 16` is uniformly distributed.
+                    let slack = rng.gen_range(0..16);
+                    // From next_base_addr + slack, round up to adjust for alignment.
+                    let base_addr = global_state
+                        .next_base_addr
+                        .checked_add(slack)
+                        .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
+                    let base_addr = align_addr(base_addr, align.bytes());
+
+                    // Remember next base address.  If this allocation is zero-sized, leave a gap
+                    // of at least 1 to avoid two allocations having the same base address.
+                    // (The logic in `alloc_id_from_addr` assumes unique addresses, and different
+                    // function/vtable pointers need to be distinguishable!)
+                    global_state.next_base_addr = base_addr
+                        .checked_add(max(size.bytes(), 1))
+                        .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
+                    // Even if `Size` didn't overflow, we might still have filled up the address space.
+                    if global_state.next_base_addr > ecx.target_usize_max() {
+                        throw_exhaust!(AddressSpaceFull);
+                    }
+
+                    base_addr
                 };
-                // From next_base_addr + slack, round up to adjust for alignment.
-                let base_addr = global_state
-                    .next_base_addr
-                    .checked_add(slack)
-                    .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
-                let base_addr = align_addr(base_addr, align.bytes());
-                entry.insert(base_addr);
                 trace!(
-                    "Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})",
+                    "Assigning base address {:#x} to allocation {:?} (size: {}, align: {})",
                     base_addr,
                     alloc_id,
                     size.bytes(),
                     align.bytes(),
-                    slack,
                 );
 
-                // Remember next base address.  If this allocation is zero-sized, leave a gap
-                // of at least 1 to avoid two allocations having the same base address.
-                // (The logic in `alloc_id_from_addr` assumes unique addresses, and different
-                // function/vtable pointers need to be distinguishable!)
-                global_state.next_base_addr = base_addr
-                    .checked_add(max(size.bytes(), 1))
-                    .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
-                // Even if `Size` didn't overflow, we might still have filled up the address space.
-                if global_state.next_base_addr > ecx.target_usize_max() {
-                    throw_exhaust!(AddressSpaceFull);
-                }
-                // Also maintain the opposite mapping in `int_to_ptr_map`.
-                // Given that `next_base_addr` increases in each allocation, pushing the
-                // corresponding tuple keeps `int_to_ptr_map` sorted
-                global_state.int_to_ptr_map.push((base_addr, alloc_id));
+                // Store address in cache.
+                entry.insert(base_addr);
+
+                // Also maintain the opposite mapping in `int_to_ptr_map`, ensuring we keep it sorted.
+                // We have a fast-path for the common case that this address is bigger than all previous ones.
+                let pos = if global_state
+                    .int_to_ptr_map
+                    .last()
+                    .is_some_and(|(last_addr, _)| *last_addr < base_addr)
+                {
+                    global_state.int_to_ptr_map.len()
+                } else {
+                    global_state
+                        .int_to_ptr_map
+                        .binary_search_by_key(&base_addr, |(addr, _)| *addr)
+                        .unwrap_err()
+                };
+                global_state.int_to_ptr_map.insert(pos, (base_addr, alloc_id));
 
                 base_addr
             }
@@ -196,7 +226,7 @@ impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir,
 pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
     fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
         let ecx = self.eval_context_mut();
-        let global_state = ecx.machine.intptrcast.get_mut();
+        let global_state = ecx.machine.alloc_addresses.get_mut();
         // In strict mode, we don't need this, so we can save some cycles by not tracking it.
         if global_state.provenance_mode == ProvenanceMode::Strict {
             return Ok(());
@@ -207,7 +237,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
             return Ok(());
         }
         trace!("Exposing allocation id {alloc_id:?}");
-        let global_state = ecx.machine.intptrcast.get_mut();
+        let global_state = ecx.machine.alloc_addresses.get_mut();
         global_state.exposed.insert(alloc_id);
         if ecx.machine.borrow_tracker.is_some() {
             ecx.expose_tag(alloc_id, tag)?;
@@ -219,7 +249,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
         trace!("Casting {:#x} to a pointer", addr);
 
         let ecx = self.eval_context_ref();
-        let global_state = ecx.machine.intptrcast.borrow();
+        let global_state = ecx.machine.alloc_addresses.borrow();
 
         // Potentially emit a warning.
         match global_state.provenance_mode {
@@ -299,7 +329,13 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
 }
 
 impl GlobalStateInner {
-    pub fn free_alloc_id(&mut self, dead_id: AllocId) {
+    pub fn free_alloc_id(
+        &mut self,
+        rng: &mut impl Rng,
+        dead_id: AllocId,
+        size: Size,
+        align: Align,
+    ) {
         // We can *not* remove this from `base_addr`, since the interpreter design requires that we
         // be able to retrieve an AllocId + offset for any memory access *before* we check if the
         // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
@@ -319,6 +355,8 @@ impl GlobalStateInner {
         // We can also remove it from `exposed`, since this allocation can anyway not be returned by
         // `alloc_id_from_addr` any more.
         self.exposed.remove(&dead_id);
+        // Also remember this address for future reuse.
+        self.reuse.add_addr(rng, addr, size, align)
     }
 }
 

diff --git a/src/tools/miri/src/alloc_addresses/reuse_pool.rs b/src/tools/miri/src/alloc_addresses/reuse_pool.rs
@@ -0,0 +1,87 @@
+//! Manages a pool of addresses that can be reused.
+
+use rand::Rng;
+
+use rustc_target::abi::{Align, Size};
+
+const MAX_POOL_SIZE: usize = 64;
+
+// Just use fair coins, until we have evidence that other numbers are better.
+const ADDR_REMEMBER_CHANCE: f64 = 0.5;
+const ADDR_TAKE_CHANCE: f64 = 0.5;
+
+/// The pool strikes a balance between exploring more possible executions and making it more likely
+/// to find bugs. The hypothesis is that bugs are more likely to occur when reuse happens for
+/// allocations with the same layout, since that can trigger e.g. ABA issues in a concurrent data
+/// structure. Therefore we only reuse allocations when size and alignment match exactly.
+#[derive(Debug)]
+pub struct ReusePool {
+    /// The i-th element in `pool` stores allocations of alignment `2^i`. We store these reusable
+    /// allocations as address-size pairs, the list must be sorted by the size.
+    ///
+    /// Each of these maps has at most MAX_POOL_SIZE elements, and since alignment is limited to
+    /// less than 64 different possible value, that bounds the overall size of the pool.
+    pool: Vec<Vec<(u64, Size)>>,
+}
+
+impl ReusePool {
+    pub fn new() -> Self {
+        ReusePool { pool: vec![] }
+    }
+
+    fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size)> {
+        let pool_idx: usize = align.bytes().trailing_zeros().try_into().unwrap();
+        if self.pool.len() <= pool_idx {
+            self.pool.resize(pool_idx + 1, Vec::new());
+        }
+        &mut self.pool[pool_idx]
+    }
+
+    pub fn add_addr(&mut self, rng: &mut impl Rng, addr: u64, size: Size, align: Align) {
+        // Let's see if we even want to remember this address.
+        if !rng.gen_bool(ADDR_REMEMBER_CHANCE) {
+            return;
+        }
+        // Determine the pool to add this to, and where in the pool to put it.
+        let subpool = self.subpool(align);
+        let pos = subpool.partition_point(|(_addr, other_size)| *other_size < size);
+        // Make sure the pool does not grow too big.
+        if subpool.len() >= MAX_POOL_SIZE {
+            // Pool full. Replace existing element, or last one if this would be even bigger.
+            let clamped_pos = pos.min(subpool.len() - 1);
+            subpool[clamped_pos] = (addr, size);
+            return;
+        }
+        // Add address to pool, at the right position.
+        subpool.insert(pos, (addr, size));
+    }
+
+    pub fn take_addr(&mut self, rng: &mut impl Rng, size: Size, align: Align) -> Option<u64> {
+        // Determine whether we'll even attempt a reuse.
+        if !rng.gen_bool(ADDR_TAKE_CHANCE) {
+            return None;
+        }
+        // Determine the pool to take this from.
+        let subpool = self.subpool(align);
+        // Let's see if we can find something of the right size. We want to find the full range of
+        // such items, beginning with the first, so we can't use `binary_search_by_key`.
+        let begin = subpool.partition_point(|(_addr, other_size)| *other_size < size);
+        let mut end = begin;
+        while let Some((_addr, other_size)) = subpool.get(end) {
+            if *other_size != size {
+                break;
+            }
+            end += 1;
+        }
+        if end == begin {
+            // Could not find any item of the right size.
+            return None;
+        }
+        // Pick a random element with the desired size.
+        let idx = rng.gen_range(begin..end);
+        // Remove it from the pool and return.
+        let (chosen_addr, chosen_size) = subpool.remove(idx);
+        debug_assert!(chosen_size >= size && chosen_addr % align.bytes() == 0);
+        Some(chosen_addr)
+    }
+}
diff --git a/src/tools/miri/src/borrow_tracker/mod.rs b/src/tools/miri/src/borrow_tracker/mod.rs
@@ -485,14 +485,14 @@ impl AllocState {
         &mut self,
         alloc_id: AllocId,
         prov_extra: ProvenanceExtra,
-        range: AllocRange,
+        size: Size,
         machine: &MiriMachine<'_, 'tcx>,
     ) -> InterpResult<'tcx> {
         match self {
             AllocState::StackedBorrows(sb) =>
-                sb.get_mut().before_memory_deallocation(alloc_id, prov_extra, range, machine),
+                sb.get_mut().before_memory_deallocation(alloc_id, prov_extra, size, machine),
             AllocState::TreeBorrows(tb) =>
-                tb.get_mut().before_memory_deallocation(alloc_id, prov_extra, range, machine),
+                tb.get_mut().before_memory_deallocation(alloc_id, prov_extra, size, machine),
         }
     }
 

diff --git a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs
@@ -574,13 +574,13 @@ impl Stacks {
         &mut self,
         alloc_id: AllocId,
         tag: ProvenanceExtra,
-        range: AllocRange,
+        size: Size,
         machine: &MiriMachine<'_, 'tcx>,
     ) -> InterpResult<'tcx> {
-        trace!("deallocation with tag {:?}: {:?}, size {}", tag, alloc_id, range.size.bytes());
+        trace!("deallocation with tag {:?}: {:?}, size {}", tag, alloc_id, size.bytes());
         let dcx = DiagnosticCxBuilder::dealloc(machine, tag);
         let state = machine.borrow_tracker.as_ref().unwrap().borrow();
-        self.for_each(range, dcx, |stack, dcx, exposed_tags| {
+        self.for_each(alloc_range(Size::ZERO, size), dcx, |stack, dcx, exposed_tags| {
             stack.dealloc(tag, &state, dcx, exposed_tags)
         })?;
         Ok(())

diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
@@ -80,7 +80,7 @@ impl<'tcx> Tree {
         &mut self,
         alloc_id: AllocId,
         prov: ProvenanceExtra,
-        range: AllocRange,
+        size: Size,
         machine: &MiriMachine<'_, 'tcx>,
     ) -> InterpResult<'tcx> {
         // TODO: for now we bail out on wildcard pointers. Eventually we should
@@ -91,7 +91,7 @@ impl<'tcx> Tree {
         };
         let global = machine.borrow_tracker.as_ref().unwrap();
         let span = machine.current_span();
-        self.dealloc(tag, range, global, alloc_id, span)
+        self.dealloc(tag, alloc_range(Size::ZERO, size), global, alloc_id, span)
     }
 
     pub fn expose_tag(&mut self, _tag: BorTag) {

diff --git a/src/tools/miri/src/concurrency/data_race.rs b/src/tools/miri/src/concurrency/data_race.rs
@@ -1071,10 +1071,10 @@ impl VClockAlloc {
     pub fn deallocate<'tcx>(
         &mut self,
         alloc_id: AllocId,
-        range: AllocRange,
+        size: Size,
         machine: &mut MiriMachine<'_, '_>,
     ) -> InterpResult<'tcx> {
-        self.unique_access(alloc_id, range, NaWriteType::Deallocate, machine)
+        self.unique_access(alloc_id, alloc_range(Size::ZERO, size), NaWriteType::Deallocate, machine)
     }
 }