diff --git a/quinn-proto/src/connection/mtud.rs b/quinn-proto/src/connection/mtud.rs index c659bf286b..c0561598d3 100644 --- a/quinn-proto/src/connection/mtud.rs +++ b/quinn-proto/src/connection/mtud.rs @@ -1,5 +1,5 @@ use crate::{packet::SpaceId, MtuDiscoveryConfig, MAX_UDP_PAYLOAD}; -use std::time::Instant; +use std::{cmp, time::Instant}; use tracing::trace; /// Implements Datagram Packetization Layer Path Maximum Transmission Unit Discovery @@ -104,14 +104,12 @@ impl MtuDiscovery { self.current_mtu = new_mtu; trace!(current_mtu = self.current_mtu, "new MTU detected"); - self.black_hole_detector.on_probe_acked(); + self.black_hole_detector + .on_probe_acked(packet_number, packet_bytes); true } else { - self.black_hole_detector.on_non_probe_acked( - self.current_mtu, - packet_number, - packet_bytes, - ); + self.black_hole_detector + .on_non_probe_acked(packet_number, packet_bytes); false } } @@ -352,24 +350,24 @@ impl SearchState { #[derive(Clone)] struct BlackHoleDetector { - /// Counts suspicious packet loss bursts since a packet with size equal to the current MTU was - /// acknowledged (or since a black hole was detected) + /// Packet loss bursts currently considered suspicious /// /// A packet loss burst is a group of contiguous packets that are deemed lost at the same time /// (see usages of [`MtuDiscovery::on_non_probe_lost`] for details on how loss detection is /// implemented) /// - /// A packet loss burst is considered suspicious when it contains only suspicious packets and no - /// MTU-sized packet has been acknowledged since the group's packets were sent - suspicious_loss_bursts: u8, - /// Indicates whether the current loss burst has any non-suspicious packets - /// - /// Non-suspicious packets are non-probe packets of size <= `min_mtu` - loss_burst_has_non_suspicious_packets: bool, - /// The largest non-probe packet that was lost (used to keep track of loss bursts) - largest_non_probe_lost: Option, - /// The largest acked packet of size `current_mtu` - largest_acked_mtu_sized_packet: Option, + /// A lost packet is considered suspicious when it is larger than both the minimum MTU and the + /// largest acknowledged packet transmitted later than it. A packet loss burst is considered + /// suspicious when it contains only suspicious packets. + suspicious_loss_bursts: Vec, + /// Loss burst currently being aggregated, if any + current_loss_burst: Option, + /// Packet number of the biggest packet larger than `min_mtu` which we've received + /// acknowledgment of more recently than a suspicious loss burst, if any + largest_post_loss_packet: u64, + /// The maximum of `min_mtu` and the size of `largest_post_loss_packet`, or exactly `min_mtu` if + /// no larger packets have been received since the most recent loss burst. + safe_mtu: u16, /// The UDP payload size guaranteed to be supported by the network min_mtu: u16, } @@ -377,109 +375,149 @@ struct BlackHoleDetector { impl BlackHoleDetector { fn new(min_mtu: u16) -> Self { Self { - suspicious_loss_bursts: 0, - largest_non_probe_lost: None, - loss_burst_has_non_suspicious_packets: false, - largest_acked_mtu_sized_packet: None, + suspicious_loss_bursts: Vec::with_capacity(BLACK_HOLE_THRESHOLD + 1), + current_loss_burst: None, + largest_post_loss_packet: 0, + safe_mtu: min_mtu, min_mtu, } } - fn on_probe_acked(&mut self) { - // We know for sure the path supports the current MTU - self.suspicious_loss_bursts = 0; + fn on_probe_acked(&mut self, packet_number: u64, packet_bytes: u16) { + debug_assert!( + packet_number >= self.largest_post_loss_packet, + "ACKs are delivered in order" + ); + // MTU probes are always larger than the previous MTU, so no previous loss bursts are + // suspicious. + self.suspicious_loss_bursts.clear(); + self.safe_mtu = packet_bytes; + self.largest_post_loss_packet = packet_number; } - fn on_non_probe_acked(&mut self, current_mtu: u16, packet_number: u64, packet_bytes: u16) { - // Reset the black hole counter if a packet the size of the current MTU or larger - // has been acknowledged - if packet_bytes >= current_mtu - && self - .largest_acked_mtu_sized_packet - .map_or(true, |pn| packet_number > pn) - { - self.suspicious_loss_bursts = 0; - self.largest_acked_mtu_sized_packet = Some(packet_number); + fn on_non_probe_acked(&mut self, packet_number: u64, packet_bytes: u16) { + debug_assert!( + packet_number >= self.largest_post_loss_packet, + "ACKs are delivered in order" + ); + if packet_bytes <= self.safe_mtu { + // No impact. + return; } + self.safe_mtu = packet_bytes; + self.largest_post_loss_packet = packet_number; + // Loss bursts packets smaller than this are retroactively deemed non-suspicious. + self.suspicious_loss_bursts + .retain(|burst| burst.smallest_packet_size > packet_bytes); } fn on_non_probe_lost(&mut self, packet_number: u64, packet_bytes: u16) { // A loss burst is a group of consecutive packets that are declared lost, so a distance // greater than 1 indicates a new burst - let new_loss_burst = self - .largest_non_probe_lost - .map_or(true, |prev| packet_number - prev != 1); + let end_last_burst = self.current_loss_burst.as_ref().map_or(false, |current| { + packet_number - current.latest_non_probe != 1 + }); - if new_loss_burst { + if end_last_burst { self.finish_loss_burst(); } - if packet_bytes <= self.min_mtu { - self.loss_burst_has_non_suspicious_packets = true; + match self.current_loss_burst { + None => { + self.current_loss_burst = Some(CurrentLossBurst { + smallest_packet_size: packet_bytes, + latest_non_probe: packet_number, + }); + } + Some(ref mut loss_burst) => { + loss_burst.latest_non_probe = packet_number; + loss_burst.smallest_packet_size = + cmp::min(loss_burst.smallest_packet_size, packet_bytes); + } } - - self.largest_non_probe_lost = Some(packet_number); } fn black_hole_detected(&mut self) -> bool { self.finish_loss_burst(); - if self.suspicious_loss_bursts <= BLACK_HOLE_THRESHOLD { + if self.suspicious_loss_bursts.len() <= BLACK_HOLE_THRESHOLD { return false; } - self.suspicious_loss_bursts = 0; - self.largest_acked_mtu_sized_packet = None; + self.suspicious_loss_bursts.clear(); true } /// Marks the end of the current loss burst, checking whether it was suspicious fn finish_loss_burst(&mut self) { - if self.last_burst_was_suspicious() { - self.suspicious_loss_bursts = self.suspicious_loss_bursts.saturating_add(1); - } - - self.loss_burst_has_non_suspicious_packets = false; - self.largest_non_probe_lost = None; - } - - /// Returns true if the burst was suspicious and should count towards black hole detection - fn last_burst_was_suspicious(&self) -> bool { - // Ignore burst if it contains any non-suspicious packets, because in that case packet loss - // was likely caused by congestion (instead of a sudden decrease in the path's MTU) - if self.loss_burst_has_non_suspicious_packets { - return false; - } - - // Ignore burst if we have received an ACK for a more recent MTU-sized packet, because that - // proves the network still supports the current MTU - let largest_acked = self.largest_acked_mtu_sized_packet.unwrap_or(0); - if self - .largest_non_probe_lost - .map_or(true, |largest_lost| largest_lost < largest_acked) + let Some(burst) = self.current_loss_burst.take() else { + return; + }; + // If a loss burst contains a packet smaller than the minimum MTU or a more recently + // transmitted packet, it is not suspicious. + if burst.smallest_packet_size < self.min_mtu + || (burst.latest_non_probe < self.largest_post_loss_packet + && burst.smallest_packet_size < self.safe_mtu) { - return false; + return; + } + // The loss burst is now deemed suspicious. + + // A suspicious loss burst more recent than `largest_post_loss_packet` invalidates + // it. Ideally we'd reduce `safe_mtu` to the largest packet size received later the burst, + // but that would require significantly more state and complexity, and erring on the side of + // false positives is safe. + if burst.latest_non_probe > self.largest_post_loss_packet { + self.safe_mtu = self.min_mtu; + } + + let burst = LossBurst { + smallest_packet_size: burst.smallest_packet_size, + }; + + if self.suspicious_loss_bursts.len() > BLACK_HOLE_THRESHOLD { + // To limit memory use, only track the most suspicious loss bursts. + let smallest = self + .suspicious_loss_bursts + .iter_mut() + .min_by_key(|prev| prev.smallest_packet_size) + .filter(|prev| prev.smallest_packet_size < burst.smallest_packet_size); + if let Some(smallest) = smallest { + *smallest = burst; + } + return; } - true + self.suspicious_loss_bursts.push(burst); } #[cfg(test)] fn suspicious_loss_burst_count(&self) -> usize { - self.suspicious_loss_bursts as usize + self.suspicious_loss_bursts.len() } #[cfg(test)] fn largest_non_probe_lost(&self) -> Option { - self.largest_non_probe_lost + self.current_loss_burst.as_ref().map(|x| x.latest_non_probe) } } +#[derive(Clone)] +struct LossBurst { + smallest_packet_size: u16, +} + +#[derive(Clone)] +struct CurrentLossBurst { + smallest_packet_size: u16, + latest_non_probe: u64, +} + // Corresponds to the RFC's `MAX_PROBES` constant (see // https://www.rfc-editor.org/rfc/rfc8899#section-5.1.2) const MAX_PROBE_RETRANSMITS: usize = 3; -const BLACK_HOLE_THRESHOLD: u8 = 3; +const BLACK_HOLE_THRESHOLD: usize = 3; #[cfg(test)] mod tests {