Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 3 additions & 15 deletions src/shims/files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,6 @@ impl<T: ?Sized> FileDescriptionRef<T> {
pub fn id(&self) -> FdId {
self.0.id
}

/// Returns the raw address of this file description. Useful for equality comparisons.
/// Use `id` instead if this can affect user-visible behavior!
pub fn addr(&self) -> usize {
Rc::as_ptr(&self.0).addr()
}
}

/// Holds a weak reference to the actual file description.
Expand All @@ -76,11 +70,6 @@ impl<T: ?Sized> WeakFileDescriptionRef<T> {
pub fn upgrade(&self) -> Option<FileDescriptionRef<T>> {
self.0.upgrade().map(FileDescriptionRef)
}

/// Returns the raw address of this file description. Useful for equality comparisons.
pub fn addr(&self) -> usize {
self.0.as_ptr().addr()
}
}

impl<T> VisitProvenance for WeakFileDescriptionRef<T> {
Expand Down Expand Up @@ -116,13 +105,12 @@ impl<T: FileDescription + 'static> FileDescriptionExt for T {
communicate_allowed: bool,
ecx: &mut MiriInterpCx<'tcx>,
) -> InterpResult<'tcx, io::Result<()>> {
let addr = self.addr();
match Rc::into_inner(self.0) {
Some(fd) => {
// There might have been epolls interested in this FD. Remove that.
ecx.machine.epoll_interests.remove_epolls(fd.id);

fd.inner.destroy(addr, communicate_allowed, ecx)
fd.inner.destroy(fd.id, communicate_allowed, ecx)
}
None => {
// Not the last reference.
Expand Down Expand Up @@ -200,7 +188,7 @@ pub trait FileDescription: std::fmt::Debug + FileDescriptionExt {
/// `self_addr` is the address that this file description used to be stored at.
fn destroy<'tcx>(
self,
_self_addr: usize,
_self_id: FdId,
_communicate_allowed: bool,
_ecx: &mut MiriInterpCx<'tcx>,
) -> InterpResult<'tcx, io::Result<()>>
Expand Down Expand Up @@ -379,7 +367,7 @@ impl FileDescription for FileHandle {

fn destroy<'tcx>(
self,
_self_addr: usize,
_self_id: FdId,
communicate_allowed: bool,
_ecx: &mut MiriInterpCx<'tcx>,
) -> InterpResult<'tcx, io::Result<()>> {
Expand Down
120 changes: 72 additions & 48 deletions src/shims/unix/linux_like/epoll.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::collections::{BTreeMap, btree_map};
use std::io;
use std::time::Duration;

Expand Down Expand Up @@ -44,7 +44,7 @@ fn range_for_id(id: FdId) -> std::ops::RangeInclusive<EpollEventKey> {
}

/// EpollEventInstance contains information that will be returned by epoll_wait.
#[derive(Debug)]
#[derive(Debug, Default)]
pub struct EpollEventInstance {
/// Bitmask of event types that happened to the file description.
events: u32,
Expand All @@ -54,12 +54,6 @@ pub struct EpollEventInstance {
clock: VClock,
}

impl EpollEventInstance {
pub fn new(events: u32, data: u64) -> EpollEventInstance {
EpollEventInstance { events, data, clock: Default::default() }
}
}

/// EpollEventInterest registers the file description information to an epoll
/// instance during a successful `epoll_ctl` call. It also stores additional
/// information needed to check and update readiness state for `epoll_wait`.
Expand All @@ -69,10 +63,12 @@ impl EpollEventInstance {
/// see the man page:
///
/// <https://man7.org/linux/man-pages/man2/epoll_ctl.2.html>
#[derive(Debug, Copy, Clone)]
#[derive(Debug)]
pub struct EpollEventInterest {
/// The events bitmask retrieved from `epoll_event`.
events: u32,
/// The way the events looked last time we checked (for edge trigger / ET detection).
prev_events: u32,
/// The data retrieved from `epoll_event`.
/// libc's data field in epoll_event can store integer or pointer,
/// but only u64 is supported for now.
Expand Down Expand Up @@ -146,15 +142,15 @@ impl FileDescription for Epoll {

fn destroy<'tcx>(
mut self,
self_addr: usize,
self_id: FdId,
_communicate_allowed: bool,
ecx: &mut MiriInterpCx<'tcx>,
) -> InterpResult<'tcx, io::Result<()>> {
// If we were interested in some FDs, we can remove that now.
let mut ids = self.interest_list.get_mut().keys().map(|(id, _num)| *id).collect::<Vec<_>>();
ids.dedup(); // they come out of the map sorted
for id in ids {
ecx.machine.epoll_interests.remove(id, self_addr);
ecx.machine.epoll_interests.remove(id, self_id);
}
interp_ok(Ok(()))
}
Expand All @@ -168,36 +164,38 @@ impl UnixFileDescription for Epoll {}

/// The table of all EpollEventInterest.
/// This tracks, for each file description, which epoll instances have an interest in events
/// for this file description.
pub struct EpollInterestTable(BTreeMap<FdId, Vec<WeakFileDescriptionRef<Epoll>>>);
/// for this file description. The `FdId` is the ID of the epoll instance, so that we can recognize
/// it later when it is slated for removal. The vector is sorted by that ID.
pub struct EpollInterestTable(BTreeMap<FdId, Vec<(FdId, WeakFileDescriptionRef<Epoll>)>>);

impl EpollInterestTable {
pub(crate) fn new() -> Self {
EpollInterestTable(BTreeMap::new())
}

fn insert(&mut self, id: FdId, epoll: WeakFileDescriptionRef<Epoll>) {
fn insert(&mut self, id: FdId, epoll: &FileDescriptionRef<Epoll>) {
let epolls = self.0.entry(id).or_default();
epolls.push(epoll);
let idx = epolls
.binary_search_by_key(&epoll.id(), |&(id, _)| id)
.expect_err("trying to add an epoll that's already in the list");
epolls.insert(idx, (epoll.id(), FileDescriptionRef::downgrade(epoll)));
}

fn remove(&mut self, id: FdId, epoll_addr: usize) {
fn remove(&mut self, id: FdId, epoll_id: FdId) {
let epolls = self.0.entry(id).or_default();
// FIXME: linear scan. Keep the list sorted so we can do binary search?
let idx = epolls
.iter()
.position(|old_ref| old_ref.addr() == epoll_addr)
.binary_search_by_key(&epoll_id, |&(id, _)| id)
.expect("trying to remove an epoll that's not in the list");
epolls.remove(idx);
}

fn get_epolls(&self, id: FdId) -> Option<&Vec<WeakFileDescriptionRef<Epoll>>> {
self.0.get(&id)
fn get_epolls(&self, id: FdId) -> Option<impl Iterator<Item = &WeakFileDescriptionRef<Epoll>>> {
self.0.get(&id).map(|epolls| epolls.iter().map(|(_id, epoll)| epoll))
}

pub fn remove_epolls(&mut self, id: FdId) {
if let Some(epolls) = self.0.remove(&id) {
for epoll in epolls.iter().filter_map(|e| e.upgrade()) {
for epoll in epolls.iter().filter_map(|(_id, epoll)| epoll.upgrade()) {
// This is a still-live epoll with interest in this FD. Remove all
// relevent interests.
epoll
Expand Down Expand Up @@ -343,33 +341,40 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
);
}

// Add new interest to list.
// Add new interest to list. Experiments show that we need to reset all state
// on `EPOLL_CTL_MOD`, including the edge tracking.
let epoll_key = (id, fd);
let new_interest = EpollEventInterest { events, data };
if op == epoll_ctl_add {
let new_interest = EpollEventInterest { events, data, prev_events: 0 };
let new_interest = if op == epoll_ctl_add {
if interest_list.range(range_for_id(id)).next().is_none() {
// This is the first time this FD got added to this epoll.
// Remember that in the global list so we get notified about FD events.
this.machine.epoll_interests.insert(id, FileDescriptionRef::downgrade(&epfd));
this.machine.epoll_interests.insert(id, &epfd);
}
if interest_list.insert(epoll_key, new_interest).is_some() {
// We already had interest in this.
return this.set_last_error_and_return_i32(LibcError("EEXIST"));
match interest_list.entry(epoll_key) {
btree_map::Entry::Occupied(_) => {
// We already had interest in this.
return this.set_last_error_and_return_i32(LibcError("EEXIST"));
}
btree_map::Entry::Vacant(e) => e.insert(new_interest),
}
} else {
// Modify the existing interest.
let Some(interest) = interest_list.get_mut(&epoll_key) else {
return this.set_last_error_and_return_i32(LibcError("ENOENT"));
};
*interest = new_interest;
interest
};

// Deliver events for the new interest.
let force_edge = true; // makes no difference since we reset `prev_events`
send_ready_events_to_interests(
this,
&epfd,
fd_ref.as_unix(this).get_epoll_ready_events()?.get_event_bitmask(this),
std::iter::once((&epoll_key, &new_interest)),
force_edge,
std::iter::once((&epoll_key, new_interest)),
)?;

interp_ok(Scalar::from_i32(0))
Expand All @@ -384,10 +389,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
// If this was the last interest in this FD, remove us from the global list
// of who is interested in this FD.
if interest_list.range(range_for_id(id)).next().is_none() {
this.machine.epoll_interests.remove(id, epfd.addr());
this.machine.epoll_interests.remove(id, epfd.id());
}

// Remove related epoll_interest from ready list.
// Remove related event instance from ready list.
epfd.ready_list.borrow_mut().remove(&epoll_key);

interp_ok(Scalar::from_i32(0))
Expand Down Expand Up @@ -519,13 +524,16 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
}

/// For a specific file description, get its ready events and send it to everyone who registered
/// interest in this FD. This function should be called whenever an event causes more bytes or
/// an EOF to become newly readable from an FD, and whenever more bytes can be written to an FD
/// or no more future writes are possible.
/// interest in this FD. This function should be called whenever the result of
/// `get_epoll_ready_events` would change.
///
/// This *will* report an event if anyone is subscribed to it, without any further filtering, so
/// do not call this function when an FD didn't have anything happen to it!
fn epoll_send_fd_ready_events(&mut self, fd_ref: DynFileDescriptionRef) -> InterpResult<'tcx> {
/// If `force_edge` is set, edge-triggered interests will be triggered even if the set of
/// ready events did not change. This can lead to spurious wakeups. Use with caution!
fn epoll_send_fd_ready_events(
&mut self,
fd_ref: DynFileDescriptionRef,
force_edge: bool,
) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
let id = fd_ref.id();
// Figure out who is interested in this. We need to clone this list since we can't prove
Expand All @@ -534,7 +542,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
return interp_ok(());
};
let epolls = epolls
.iter()
.map(|weak| {
weak.upgrade()
.expect("someone forgot to remove the garbage from `machine.epoll_interests`")
Expand All @@ -546,7 +553,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
this,
&epoll,
event_bitmask,
epoll.interest_list.borrow().range(range_for_id(id)),
force_edge,
epoll.interest_list.borrow_mut().range_mut(range_for_id(id)),
)?;
}

Expand All @@ -560,25 +568,41 @@ fn send_ready_events_to_interests<'tcx, 'a>(
ecx: &mut MiriInterpCx<'tcx>,
epoll: &Epoll,
event_bitmask: u32,
interests: impl Iterator<Item = (&'a EpollEventKey, &'a EpollEventInterest)>,
force_edge: bool,
interests: impl Iterator<Item = (&'a EpollEventKey, &'a mut EpollEventInterest)>,
) -> InterpResult<'tcx> {
let mut wakeup = false;
for (&event_key, interest) in interests {
let mut ready_list = epoll.ready_list.borrow_mut();
// This checks if any of the events specified in epoll_event_interest.events
// match those in ready_events.
let flags = interest.events & event_bitmask;
let prev = std::mem::replace(&mut interest.prev_events, flags);
if flags == 0 {
// Make sure we *remove* any previous item from the ready list, since this
// is not ready any more.
ready_list.remove(&event_key);
continue;
}
// Geenrate a new event instance, with the flags that this one is interested in.
let mut new_instance = EpollEventInstance::new(flags, interest.data);
// Generate new instance, or update existing one. It is crucial that whe we are done,
// if an interest exists in the ready list, then it matches the latest events and data!
let instance = match ready_list.entry(event_key) {
btree_map::Entry::Occupied(e) => e.into_mut(),
btree_map::Entry::Vacant(e) => {
if !force_edge && flags == prev & flags {
// Every bit in `flags` was already set in `prev`, and there's currently
// no entry in the ready list for this. So there is nothing new and no
// prior entry to update; just skip it.
continue;
}
e.insert(EpollEventInstance::default())
}
};
instance.events = flags;
instance.data = interest.data;
ecx.release_clock(|clock| {
new_instance.clock.clone_from(clock);
instance.clock.join(clock);
})?;
// Add event to ready list for this epoll instance.
// Tests confirm that we have to *overwrite* the old instance for the same key.
let mut ready_list = epoll.ready_list.borrow_mut();
ready_list.insert(event_key, new_instance);
wakeup = true;
}
if wakeup {
Expand Down
12 changes: 8 additions & 4 deletions src/shims/unix/linux_like/eventfd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::io;
use std::io::ErrorKind;

use crate::concurrency::VClock;
use crate::shims::files::{FileDescription, FileDescriptionRef, WeakFileDescriptionRef};
use crate::shims::files::{FdId, FileDescription, FileDescriptionRef, WeakFileDescriptionRef};
use crate::shims::unix::UnixFileDescription;
use crate::shims::unix::linux_like::epoll::{EpollReadyEvents, EvalContextExt as _};
use crate::*;
Expand Down Expand Up @@ -39,7 +39,7 @@ impl FileDescription for EventFd {

fn destroy<'tcx>(
self,
_self_addr: usize,
_self_id: FdId,
_communicate_allowed: bool,
_ecx: &mut MiriInterpCx<'tcx>,
) -> InterpResult<'tcx, io::Result<()>> {
Expand Down Expand Up @@ -217,7 +217,10 @@ fn eventfd_write<'tcx>(

// The state changed; we check and update the status of all supported event
// types for current file description.
ecx.epoll_send_fd_ready_events(eventfd)?;
// Linux seems to cause spurious wakeups here, and Tokio seems to rely on that
// (see <https://github.com/rust-lang/miri/pull/4676#discussion_r2510528994>
// and also <https://www.illumos.org/issues/16700>).
ecx.epoll_send_fd_ready_events(eventfd, /* force_edge */ true)?;
Copy link
Member Author

@RalfJung RalfJung Nov 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Darksonn this is the suspicious part... if I make this false, the Tokio test deadlocks....


// Return how many bytes we consumed from the user-provided buffer.
return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize()));
Expand Down Expand Up @@ -312,7 +315,8 @@ fn eventfd_read<'tcx>(

// The state changed; we check and update the status of all supported event
// types for current file description.
ecx.epoll_send_fd_ready_events(eventfd)?;
// Linux seems to always emit do notifications here, even if we were already writable.
ecx.epoll_send_fd_ready_events(eventfd, /* force_edge */ true)?;

// Tell userspace how many bytes we put into the buffer.
return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize()));
Expand Down
Loading