oxidecomputer · andrewjstone · Nov 13, 2025 · hawkw · Nov 14, 2025 · pietroalbini
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -501,7 +501,7 @@ hyper = "1.6.0"
 hyper-util = "0.1.16"
 hyper-rustls = "0.27.7"
 hyper-staticfile = "0.10.1"
-iddqd = { version = "0.3.13", features = ["daft", "serde", "schemars08"] }
+iddqd = { version = "0.3.16", features = ["daft", "serde", "schemars08"] }
 id-map = { path = "id-map" }
 illumos-utils = { path = "illumos-utils" }
 iana-time-zone = "0.1.63"

diff --git a/trust-quorum/Cargo.toml b/trust-quorum/Cargo.toml
@@ -16,6 +16,7 @@ camino.workspace = true
 chacha20poly1305.workspace = true
 ciborium.workspace = true
 daft.workspace = true
+debug-ignore.workspace = true
 derive_more.workspace = true
 futures.workspace = true
 gfss.workspace = true

diff --git a/trust-quorum/protocol/src/node.rs b/trust-quorum/protocol/src/node.rs
@@ -32,7 +32,9 @@ use crate::{
 use daft::{Diffable, Leaf};
 use gfss::shamir::Share;
 use omicron_uuid_kinds::RackUuid;
+use serde::{Deserialize, Serialize};
 use slog::{Logger, error, info, o, warn};
+use slog_error_chain::SlogInlineError;
 
 /// An entity capable of participating in trust quorum
 ///
@@ -1063,7 +1065,16 @@ impl Node {
     }
 }
 
-#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
+#[derive(
+    Debug,
+    Clone,
+    thiserror::Error,
+    PartialEq,
+    Eq,
+    SlogInlineError,
+    Serialize,
+    Deserialize,
+)]
 pub enum CommitError {
     #[error("invalid rack id")]
     InvalidRackId(
@@ -1077,7 +1088,16 @@ pub enum CommitError {
     Expunged { epoch: Epoch, from: BaseboardId },
 }
 
-#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
+#[derive(
+    Debug,
+    Clone,
+    thiserror::Error,
+    PartialEq,
+    Eq,
+    SlogInlineError,
+    Serialize,
+    Deserialize,
+)]
 pub enum PrepareAndCommitError {
     #[error("invalid rack id")]
     InvalidRackId(

diff --git a/trust-quorum/protocol/src/validators.rs b/trust-quorum/protocol/src/validators.rs
@@ -12,6 +12,7 @@ use crate::{
 };
 use daft::{BTreeSetDiff, Diffable, Leaf};
 use omicron_uuid_kinds::RackUuid;
+use serde::{Deserialize, Serialize};
 use slog::{Logger, error, info, warn};
 use std::collections::BTreeSet;
 
@@ -57,7 +58,9 @@ pub struct SledExpungedError {
     last_prepared_epoch: Option<Epoch>,
 }
 
-#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
+#[derive(
+    Debug, Clone, thiserror::Error, PartialEq, Eq, Serialize, Deserialize,
+)]
 #[error("mismatched rack id: expected {expected:?}, got {got:?}")]
 pub struct MismatchedRackIdError {
     pub expected: RackUuid,

diff --git a/trust-quorum/src/connection_manager.rs b/trust-quorum/src/connection_manager.rs
@@ -5,13 +5,15 @@
 //! A mechanism for maintaining a full mesh of trust quorum node connections
 
 use crate::established_conn::EstablishedConn;
+use crate::proxy;
 use trust_quorum_protocol::{BaseboardId, Envelope, PeerMsg};
 
 // TODO: Move to this crate
 // https://github.com/oxidecomputer/omicron/issues/9311
 use bootstore::schemes::v0::NetworkConfig;
 
 use camino::Utf8PathBuf;
+use derive_more::From;
 use iddqd::{
     BiHashItem, BiHashMap, TriHashItem, TriHashMap, bi_upcast, tri_upcast,
 };
@@ -60,7 +62,7 @@ pub enum MainToConnMsg {
 ///
 /// All `WireMsg`s sent between nodes is prefixed with a 4 byte size header used
 /// for framing.
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Serialize, Deserialize, From)]
 pub enum WireMsg {
     /// Used for connection keep alive
     Ping,
@@ -79,6 +81,12 @@ pub enum WireMsg {
     /// of tiny information layered on top of trust quorum. You can still think
     /// of it as a bootstore, although, we no longer use that name.
     NetworkConfig(NetworkConfig),
+
+    /// Requests proxied to other nodes
+    ProxyRequest(proxy::WireRequest),
+
+    /// Responses to proxy requests
+    ProxyResponse(proxy::WireResponse),
 }
 
 /// Messages sent from connection managing tasks to the main peer task
@@ -99,6 +107,8 @@ pub enum ConnToMainMsgInner {
     Received { from: BaseboardId, msg: PeerMsg },
     ReceivedNetworkConfig { from: BaseboardId, config: NetworkConfig },
     Disconnected { peer_id: BaseboardId },
+    ProxyRequestReceived { from: BaseboardId, req: proxy::WireRequest },
+    ProxyResponseReceived { from: BaseboardId, rsp: proxy::WireResponse },
 }
 
 pub struct TaskHandle {
@@ -120,15 +130,11 @@ impl TaskHandle {
         self.abort_handle.abort()
     }
 
-    pub async fn send(&self, msg: PeerMsg) {
-        let _ = self.tx.send(MainToConnMsg::Msg(WireMsg::Tq(msg))).await;
-    }
-
-    pub async fn send_network_config(&self, config: NetworkConfig) {
-        let _ = self
-            .tx
-            .send(MainToConnMsg::Msg(WireMsg::NetworkConfig(config)))
-            .await;
+    pub async fn send<T>(&self, msg: T)
+    where
+        T: Into<WireMsg>,
+    {
+        let _ = self.tx.send(MainToConnMsg::Msg(msg.into())).await;
     }
 }
 
@@ -172,7 +178,10 @@ impl EstablishedTaskHandle {
         self.task_handle.abort();
     }
 
-    pub async fn send(&self, msg: PeerMsg) {
+    pub async fn send<T>(&self, msg: T)
+    where
+        T: Into<WireMsg>,
+    {
         let _ = self.task_handle.send(msg).await;
     }
 }
@@ -235,6 +244,12 @@ pub struct ConnMgrStatus {
     pub total_tasks_spawned: u64,
 }
 
+/// The state of a proxy connection
+pub enum ProxyConnState {
+    Connected,
+    Disconnected,
+}
+
 /// A structure to manage all sprockets connections to peer nodes
 ///
 /// Each sprockets connection runs in its own task which communicates with the
@@ -399,7 +414,7 @@ impl ConnMgr {
                 "peer_id" => %h.baseboard_id,
                 "generation" => network_config.generation
             );
-            h.task_handle.send_network_config(network_config.clone()).await;
+            h.send(network_config.clone()).await;
         }
     }
 
@@ -415,7 +430,42 @@ impl ConnMgr {
                 "peer_id" => %h.baseboard_id,
                 "generation" => network_config.generation
             );
-            h.task_handle.send_network_config(network_config.clone()).await;
+            h.send(network_config.clone()).await;
+        }
+    }
+
+    /// Forward an API request to another node
+    ///
+    /// Return the state of the connection at this point in time so that the
+    /// [`proxy::Tracker`] can manage the outstanding request on behalf of the
+    /// user.
+    pub async fn proxy_request(
+        &mut self,
+        destination: &BaseboardId,
+        req: proxy::WireRequest,
+    ) -> ProxyConnState {
+        if let Some(h) = self.established.get1(destination) {
+            info!(self.log, "Sending {req:?}"; "peer_id" => %destination);
+            h.send(req).await;
+            ProxyConnState::Connected
+        } else {
+            ProxyConnState::Disconnected
+        }
+    }
+
+    /// Return a response to a proxied request to another node
+    ///
+    /// There is no need to track whether this succeeds or fails. If the
+    /// connection goes away the client on the other side will notice it and
+    /// retry if needed.
+    pub async fn proxy_response(
+        &mut self,
+        destination: &BaseboardId,
+        rsp: proxy::WireResponse,
+    ) {
+        if let Some(h) = self.established.get1(destination) {
+            info!(self.log, "Sending {rsp:?}"; "peer_id" => %destination);
+            h.send(rsp).await;
         }
     }
 

diff --git a/trust-quorum/src/established_conn.rs b/trust-quorum/src/established_conn.rs
@@ -233,6 +233,36 @@ impl EstablishedConn {
                         panic!("Connection to main task channnel full");
                     }
                 }
+                WireMsg::ProxyRequest(req) => {
+                    if let Err(_) = self.main_tx.try_send(ConnToMainMsg {
+                        task_id: self.task_id,
+                        msg: ConnToMainMsgInner::ProxyRequestReceived {
+                            from: self.peer_id.clone(),
+                            req,
+                        },
+                    }) {
+                        error!(
+                            self.log,
+                            "Failed to send received proxy msg to the main task"
+                        );
+                        panic!("Connection to main task channel full");
+                    }
+                }
+                WireMsg::ProxyResponse(rsp) => {
+                    if let Err(_) = self.main_tx.try_send(ConnToMainMsg {
+                        task_id: self.task_id,
+                        msg: ConnToMainMsgInner::ProxyResponseReceived {
+                            from: self.peer_id.clone(),
+                            rsp,
+                        },
+                    }) {
+                        error!(
+                            self.log,
+                            "Failed to send received proxy msg to the main task"
+                        );
+                        panic!("Connection to main task channel full");
+                    }
+                }
             }
         }
     }

diff --git a/trust-quorum/src/ledgers.rs b/trust-quorum/src/ledgers.rs
@@ -17,7 +17,7 @@ use slog::{Logger, info};
 use trust_quorum_protocol::PersistentState;
 
 /// A wrapper type around [`PersistentState`] for use as a [`Ledger`]
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct PersistentStateLedger {
     pub generation: u64,
     pub state: PersistentState,

diff --git a/trust-quorum/src/lib.rs b/trust-quorum/src/lib.rs
@@ -7,9 +7,10 @@
 mod connection_manager;
 pub(crate) mod established_conn;
 mod ledgers;
+mod proxy;
 mod task;
 
 pub(crate) use connection_manager::{
     ConnToMainMsg, ConnToMainMsgInner, MainToConnMsg, WireMsg,
 };
-pub use task::NodeTask;
+pub use task::{CommitStatus, Config, NodeApiError, NodeTask, NodeTaskHandle};