From 16187882558a2e824a7e759e2a580cd322e5b831 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Fri, 9 Sep 2022 19:38:55 +0000 Subject: [PATCH] Fix ownership bug in OPTE port management - Make the PortTicket non-clonable, and _not_ owned by the Port itself. This causes deadlocks. - Dropping the port from the zone cleans up the resources, and the ports are removed from the manager (via the singleton tickets) in the `Instance::stop` method - Make sure to try to clean up all ports for an instance, even if early ones fail --- sled-agent/src/instance.rs | 16 +++++++---- sled-agent/src/opte/illumos/port.rs | 17 ++---------- sled-agent/src/opte/illumos/port_manager.rs | 27 +++++++++---------- sled-agent/src/opte/non_illumos/port.rs | 13 --------- .../src/opte/non_illumos/port_manager.rs | 27 +++++++++---------- 5 files changed, 37 insertions(+), 63 deletions(-) diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 036480f4fd3..ee5601df180 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -517,14 +517,14 @@ impl Instance { } else { (None, None) }; - let port = inner.port_manager.create_port( + let (port, port_ticket) = inner.port_manager.create_port( *inner.id(), nic, snat, external_ips, )?; - port_tickets.push(port.ticket()); opte_ports.push(port); + port_tickets.push(port_ticket); } // Create a zone for the propolis instance, using the previously @@ -711,13 +711,19 @@ impl Instance { running_state.instance_ticket.terminate(); // And remove the OPTE ports from the port manager + let mut result = Ok(()); if let Some(tickets) = running_state.port_tickets.as_mut() { for ticket in tickets.iter_mut() { - ticket.release()?; + // Release the port from the manager, and store any error. We + // don't return immediately so that we can try to clean up all + // ports, even if early ones fail. Return the last error, which + // is OK for now. + if let Err(e) = ticket.release() { + result = Err(e.into()); + } } } - - Ok(()) + result } // Monitors propolis until explicitly told to disconnect. diff --git a/sled-agent/src/opte/illumos/port.rs b/sled-agent/src/opte/illumos/port.rs index efc15984bc8..abe84b13930 100644 --- a/sled-agent/src/opte/illumos/port.rs +++ b/sled-agent/src/opte/illumos/port.rs @@ -7,7 +7,6 @@ use crate::illumos::dladm::Dladm; use crate::opte::BoundaryServices; use crate::opte::Gateway; -use crate::opte::PortTicket; use crate::opte::Vni; use crate::params::SourceNatConfig; use ipnetwork::IpNetwork; @@ -18,8 +17,6 @@ use std::sync::Arc; #[derive(Debug)] struct PortInner { - // Contains instance ID and a pointer to the parent manager - ticket: PortTicket, // Name of the port as identified by OPTE name: String, // IP address within the VPC Subnet @@ -36,7 +33,7 @@ struct PortInner { _underlay_ip: Ipv6Addr, // The external IP address and port range provided for this port, to allow // outbound network connectivity. - source_nat: Option, + _source_nat: Option, // The external IP addresses provided to this port, to allow _inbound_ // network connectivity. external_ips: Option>, @@ -105,7 +102,6 @@ pub struct Port { impl Port { #[allow(clippy::too_many_arguments)] pub fn new( - ticket: PortTicket, name: String, ip: IpAddr, subnet: IpNetwork, @@ -121,7 +117,6 @@ impl Port { ) -> Self { Self { inner: Arc::new(PortInner { - ticket, name, _ip: ip, _subnet: subnet, @@ -129,7 +124,7 @@ impl Port { slot, _vni: vni, _underlay_ip: underlay_ip, - source_nat, + _source_nat: source_nat, external_ips, _gateway: gateway, _boundary_services: boundary_services, @@ -138,10 +133,6 @@ impl Port { } } - pub fn source_nat(&self) -> &Option { - &self.inner.source_nat - } - pub fn external_ips(&self) -> &Option> { &self.inner.external_ips } @@ -157,8 +148,4 @@ impl Port { pub fn slot(&self) -> u8 { self.inner.slot } - - pub fn ticket(&self) -> PortTicket { - self.inner.ticket.clone() - } } diff --git a/sled-agent/src/opte/illumos/port_manager.rs b/sled-agent/src/opte/illumos/port_manager.rs index 23eae2cd96e..55c9c1cb1e9 100644 --- a/sled-agent/src/opte/illumos/port_manager.rs +++ b/sled-agent/src/opte/illumos/port_manager.rs @@ -173,7 +173,7 @@ impl PortManager { nic: &NetworkInterface, source_nat: Option, external_ips: Option>, - ) -> Result { + ) -> Result<(Port, PortTicket), Error> { // TODO-completess: Remove IPv4 restrictions once OPTE supports virtual // IPv6 networks. let private_ip = match nic.ip { @@ -335,7 +335,7 @@ impl PortManager { vnic_name }; - let port = { + let (port, ticket) = { let mut ports = self.inner.ports.lock().unwrap(); let ticket = PortTicket::new( instance_id, @@ -343,7 +343,6 @@ impl PortManager { self.inner.clone(), ); let port = Port::new( - ticket, port_name.clone(), nic.ip, subnet, @@ -366,7 +365,7 @@ impl PortManager { &port_name, ); self.inner.update_secondary_macs(&mut ports)?; - port + (port, ticket) }; // Add a router entry for this interface's subnet, directing traffic to the @@ -433,11 +432,10 @@ impl PortManager { "Created OPTE port for guest"; "port" => ?&port, ); - Ok(port) + Ok((port, ticket)) } } -#[derive(Clone)] pub struct PortTicket { id: Uuid, port_name: String, @@ -446,15 +444,14 @@ pub struct PortTicket { impl std::fmt::Debug for PortTicket { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - const SOME: &str = "Some(_)"; - const NONE: &str = "None"; - f.debug_struct("PortTicket") - .field("id", &self.id) - .field( - "manager", - if self.manager.is_some() { &SOME } else { &NONE }, - ) - .finish() + if self.manager.is_some() { + f.debug_struct("PortTicket") + .field("id", &self.id) + .field("manager", &"{ .. }") + .finish() + } else { + f.debug_struct("PortTicket").field("id", &self.id).finish() + } } } diff --git a/sled-agent/src/opte/non_illumos/port.rs b/sled-agent/src/opte/non_illumos/port.rs index 556f8380a73..b562d987dbf 100644 --- a/sled-agent/src/opte/non_illumos/port.rs +++ b/sled-agent/src/opte/non_illumos/port.rs @@ -6,7 +6,6 @@ use crate::opte::BoundaryServices; use crate::opte::Gateway; -use crate::opte::PortTicket; use crate::opte::Vni; use crate::params::SourceNatConfig; use ipnetwork::IpNetwork; @@ -18,8 +17,6 @@ use std::sync::Arc; #[derive(Debug)] #[allow(dead_code)] struct PortInner { - // Contains instance ID and a pointer to the parent manager - ticket: PortTicket, // Name of the port as identified by OPTE name: String, // IP address within the VPC Subnet @@ -70,7 +67,6 @@ pub struct Port { impl Port { #[allow(clippy::too_many_arguments)] pub fn new( - ticket: PortTicket, name: String, ip: IpAddr, subnet: IpNetwork, @@ -86,7 +82,6 @@ impl Port { ) -> Self { Self { inner: Arc::new(PortInner { - ticket, name, _ip: ip, _subnet: subnet, @@ -103,10 +98,6 @@ impl Port { } } - pub fn source_nat(&self) -> &Option { - &self.inner.source_nat - } - pub fn external_ips(&self) -> &Option> { &self.inner.external_ips } @@ -122,8 +113,4 @@ impl Port { pub fn slot(&self) -> u8 { self.inner.slot } - - pub fn ticket(&self) -> PortTicket { - self.inner.ticket.clone() - } } diff --git a/sled-agent/src/opte/non_illumos/port_manager.rs b/sled-agent/src/opte/non_illumos/port_manager.rs index e2b1da07926..2b0649473b4 100644 --- a/sled-agent/src/opte/non_illumos/port_manager.rs +++ b/sled-agent/src/opte/non_illumos/port_manager.rs @@ -111,7 +111,7 @@ impl PortManager { nic: &NetworkInterface, source_nat: Option, external_ips: Option>, - ) -> Result { + ) -> Result<(Port, PortTicket), Error> { // TODO-completess: Remove IPv4 restrictions once OPTE supports virtual // IPv6 networks. let _ = match nic.ip { @@ -142,7 +142,7 @@ impl PortManager { let boundary_services = BoundaryServices::default(); let port_name = self.inner.next_port_name(); let vnic = format!("v{}", port_name); - let port = { + let (port, ticket) = { let mut ports = self.inner.ports.lock().unwrap(); let ticket = PortTicket::new( instance_id, @@ -150,7 +150,6 @@ impl PortManager { self.inner.clone(), ); let port = Port::new( - ticket, port_name.clone(), nic.ip, subnet, @@ -172,7 +171,7 @@ impl PortManager { instance_id, &port_name, ); - port + (port, ticket) }; info!( @@ -180,11 +179,10 @@ impl PortManager { "Created OPTE port for guest"; "port" => ?&port, ); - Ok(port) + Ok((port, ticket)) } } -#[derive(Clone)] pub struct PortTicket { id: Uuid, port_name: String, @@ -193,15 +191,14 @@ pub struct PortTicket { impl std::fmt::Debug for PortTicket { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - const SOME: &str = "Some(_)"; - const NONE: &str = "None"; - f.debug_struct("PortTicket") - .field("id", &self.id) - .field( - "manager", - if self.manager.is_some() { &SOME } else { &NONE }, - ) - .finish() + if self.manager.is_some() { + f.debug_struct("PortTicket") + .field("id", &self.id) + .field("manager", &"{ .. }") + .finish() + } else { + f.debug_struct("PortTicket").field("id", &self.id).finish() + } } }