Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
253 changes: 244 additions & 9 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions deploy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ omicron-sled-agent = { path = "../sled-agent" }
omicron-package = { path = "../package" }
serde = { version = "1.0", features = [ "derive" ] }
serde_derive = "1.0"
sp-sim = { path = "../sp-sim" }
structopt = "0.3"
thiserror = "1.0"
toml = "0.5.9"

# Disable doc builds by default for our binaries to work around issue
# rust-lang/cargo#8373. These docs would not be very useful anyway.
Expand Down
36 changes: 35 additions & 1 deletion deploy/src/bin/sled-agent-overlay-files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ use omicron_sled_agent::bootstrap::trust_quorum::{
RackSecret, ShareDistribution,
};

use anyhow::{anyhow, Result};
use anyhow::{anyhow, Context, Result};
use sp_sim::config::GimletConfig;
use sp_sim::config::SpCommonConfig;
use std::path::PathBuf;
use structopt::StructOpt;

Expand Down Expand Up @@ -60,8 +62,40 @@ fn overlay_secret_shares(
Ok(())
}

// Generate a config file for a simulated SP in each deployment server folder.
fn overlay_sp_configs(server_dirs: &[PathBuf]) -> Result<()> {
// We will eventually need to flesh out more of this config; for now,
// it's sufficient to only generate an SP that emulates a RoT.
let mut config = GimletConfig {
common: SpCommonConfig {
multicast_addr: None,
bind_addrs: None,
serial_number: [0; 16],
manufacturing_root_cert_seed: [0; 32],
device_id_cert_seed: [0; 32],
},
components: Vec::new(),
};

// Our lazy device ID generation fails if we overflow a u8.
assert!(server_dirs.len() <= 255, "expand simulated SP ID generation");

for server_dir in server_dirs {
config.common.serial_number[0] += 1;
config.common.device_id_cert_seed[0] += 1;

let bytes = toml::ser::to_vec(&config).unwrap();
let path = server_dir.join("config-sp.toml");
std::fs::write(&path, bytes)
.with_context(|| format!("failed to write {}", path.display()))?;
}

Ok(())
}

fn main() -> Result<()> {
let args = Args::from_args_safe().map_err(|err| anyhow!(err))?;
overlay_secret_shares(args.threshold, &args.directories)?;
overlay_sp_configs(&args.directories)?;
Ok(())
}
3 changes: 3 additions & 0 deletions sled-agent/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ slog = { version = "2.5", features = [ "max_level_trace", "release_max_level_deb
slog-dtrace = "0.2"
smf = "0.2"
spdm = { git = "https://github.com/oxidecomputer/spdm", rev = "9742f6e" }
sp-sim = { path = "../sp-sim" }
sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" }
sprockets-proxy = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" }
socket2 = { version = "0.4", features = [ "all" ] }
structopt = "0.3"
tar = "0.4"
Expand Down
48 changes: 47 additions & 1 deletion sled-agent/src/bin/sled-agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use omicron_sled_agent::bootstrap::{
};
use omicron_sled_agent::rack_setup::config::SetupServiceConfig as RssConfig;
use omicron_sled_agent::{config::Config as SledConfig, server as sled_server};
use sp_sim::config::GimletConfig;
use std::net::SocketAddr;
use std::path::PathBuf;
use structopt::StructOpt;
Expand Down Expand Up @@ -108,6 +109,20 @@ async fn do_run() -> Result<(), CmdError> {
} else {
None
};
let sp_config_path = {
let mut sp_config_path = config_path.clone();
sp_config_path.pop();
sp_config_path.push("config-sp.toml");
sp_config_path
};
let sp_config = if sp_config_path.exists() {
Some(
GimletConfig::from_file(sp_config_path)
.map_err(|e| CmdError::Failure(e.to_string()))?,
)
} else {
None
};

// Derive the bootstrap address from the data link's MAC address.
let link = config
Expand All @@ -116,16 +131,47 @@ async fn do_run() -> Result<(), CmdError> {
let bootstrap_address = bootstrap_address(link)
.map_err(|e| CmdError::Failure(e.to_string()))?;

// Are we going to simulate a local SP? If so:
//
// 1. The bootstrap dropshot server listens on localhost
// 2. A sprockets proxy listens on `bootstrap_address` (and relays
// incoming connections to the localhost dropshot server)
//
// If we're not simulating a local SP, we can't establish sprockets
// sessions, so we'll have the bootstrap dropshot server listen on
// `bootstrap_address` (and no sprockets proxy).
//
// TODO-security: With this configuration, dropshot itself is
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably should lift this to a Github issue as you mentioned in the PR description; I don't think we want to lose track of this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 I'll open an issue as a part of merging this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Merged and opened #1161

// running plain HTTP and blindly trusting all connections from
// localhost. We have a similar sprockets proxy on the client side,
// where the proxy blindly trusts all connections from localhost
// (although the client-side proxy only runs while is being made,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo (missing word?): while is being made

// while our dropshot server is always listening). Can we secure
// these connections sufficiently? Other options include expanding
// dropshot/progenitor to allow a custom connection layer (supported
// by hyper, but not reqwest), keeping the sprockets proxy but using
// something other than TCP that we can lock down, or abandoning
// dropshot and using a bespoke protocol over a raw
// sprockets-encrypted TCP connection.
let (bootstrap_dropshot_addr, sprockets_proxy_bind_addr) =
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: These two values seem really tightly coupled; I wonder if they should be a distinct type.

That being said, I understand passing things through the ConfigDropshot doesn't make that trivial, so we can punt on this.

if sp_config.is_some() {
("[::1]:0".parse().unwrap(), Some(bootstrap_address))
} else {
(SocketAddr::V6(bootstrap_address), None)
};

// Configure and run the Bootstrap server.
let bootstrap_config = BootstrapConfig {
id: config.id,
dropshot: ConfigDropshot {
bind_address: SocketAddr::V6(bootstrap_address),
bind_address: bootstrap_dropshot_addr,
request_body_max_bytes: 1024 * 1024,
..Default::default()
},
log: config.log.clone(),
rss_config,
sprockets_proxy_bind_addr,
sp_config,
};

// TODO: It's a little silly to pass the config this way - namely,
Expand Down
5 changes: 5 additions & 0 deletions sled-agent/src/bootstrap/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::config::Config as SledConfig;
use crate::illumos::dladm::{self, Dladm, PhysicalLink};
use crate::illumos::zone::Zones;
use crate::server::Server as SledServer;
use crate::sp::SpHandle;
use omicron_common::address::get_sled_address;
use omicron_common::api::external::{Error as ExternalError, MacAddr};
use omicron_common::backoff::{
Expand Down Expand Up @@ -93,6 +94,7 @@ pub(crate) struct Agent {
rss: Mutex<Option<RssHandle>>,
sled_agent: Mutex<Option<SledServer>>,
sled_config: SledConfig,
sp: Option<SpHandle>,
}

fn get_sled_agent_request_path() -> PathBuf {
Expand Down Expand Up @@ -132,6 +134,7 @@ impl Agent {
log: Logger,
sled_config: SledConfig,
address: Ipv6Addr,
sp: Option<SpHandle>,
) -> Result<Self, BootstrapError> {
let ba_log = log.new(o!(
"component" => "BootstrapAgent",
Expand Down Expand Up @@ -190,6 +193,7 @@ impl Agent {
rss: Mutex::new(None),
sled_agent: Mutex::new(None),
sled_config,
sp,
};

let request_path = get_sled_agent_request_path();
Expand Down Expand Up @@ -405,6 +409,7 @@ impl Agent {
&self.parent_log,
rss_config.clone(),
self.peer_monitor.observer().await,
self.sp.clone(),
);
self.rss.lock().await.replace(rss);
}
Expand Down
8 changes: 8 additions & 0 deletions sled-agent/src/bootstrap/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use dropshot::ConfigDropshot;
use dropshot::ConfigLogging;
use serde::Deserialize;
use serde::Serialize;
use sp_sim::config::GimletConfig;
use std::net::SocketAddrV6;
use uuid::Uuid;

pub const BOOTSTRAP_AGENT_PORT: u16 = 12346;
Expand All @@ -20,4 +22,10 @@ pub struct Config {
pub log: ConfigLogging,

pub rss_config: Option<crate::rack_setup::config::SetupServiceConfig>,

// If present, `dropshot` should bind to a localhost address, and we'll
// configure a sprockets-proxy pointed to it that listens on this
// (non-localhost) address.
pub sprockets_proxy_bind_addr: Option<SocketAddrV6>,
pub sp_config: Option<GimletConfig>,
}
77 changes: 71 additions & 6 deletions sled-agent/src/bootstrap/rss_handle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@ use super::discovery::PeerMonitorObserver;
use super::params::SledAgentRequest;
use crate::rack_setup::config::SetupServiceConfig;
use crate::rack_setup::service::Service;
use crate::sp::SpHandle;
use futures::stream::FuturesUnordered;
use futures::StreamExt;
use omicron_common::backoff::internal_service_policy;
use omicron_common::backoff::retry_notify;
use omicron_common::backoff::BackoffError;
use slog::Logger;
use std::net::SocketAddr;
use std::net::SocketAddrV6;
use std::time::Duration;
use thiserror::Error;
use tokio::sync::mpsc;
use tokio::sync::oneshot;
Expand Down Expand Up @@ -43,6 +46,7 @@ impl RssHandle {
log: &Logger,
config: SetupServiceConfig,
peer_monitor: PeerMonitorObserver,
sp: Option<SpHandle>,
) -> Self {
let (tx, rx) = rss_channel();

Expand All @@ -54,7 +58,7 @@ impl RssHandle {
);
let log = log.new(o!("component" => "BootstrapAgentRssHandler"));
let task = tokio::spawn(async move {
rx.initialize_sleds(&log).await;
rx.initialize_sleds(&log, &sp).await;
});
Self { _rss: rss, task }
}
Expand All @@ -65,6 +69,9 @@ enum InitializeSledAgentError {
#[error("Failed to construct an HTTP client: {0}")]
HttpClient(#[from] reqwest::Error),

#[error("Failed to start sprockets proxy: {0}")]
SprocketsProxy(#[from] sprockets_proxy::Error),

#[error("Error making HTTP request to Bootstrap Agent: {0}")]
BootstrapApi(
#[from]
Expand All @@ -76,6 +83,7 @@ async fn initialize_sled_agent(
log: &Logger,
bootstrap_addr: SocketAddrV6,
request: &SledAgentRequest,
sp: &Option<SpHandle>,
) -> Result<(), InitializeSledAgentError> {
let dur = std::time::Duration::from_secs(60);

Expand All @@ -84,8 +92,57 @@ async fn initialize_sled_agent(
.timeout(dur)
.build()?;

let url = format!("http://{}", bootstrap_addr);
info!(log, "Sending request to peer agent: {}", url);
let (url, _proxy_task) = if let Some(sp) = sp.as_ref() {
// We have an SP; spawn a sprockets proxy for this connection.
let proxy_config = sprockets_proxy::Config {
bind_address: "[::1]:0".parse().unwrap(),
target_address: SocketAddr::V6(bootstrap_addr),
role: sprockets_proxy::Role::Client,
};
// TODO-cleanup The `Duration` passed to `Proxy::new()` is the timeout
// for communicating with the RoT. Currently it can be set to anything
// at all (our simulated RoT always responds immediately). Should the
// value move to our config?
let proxy = sprockets_proxy::Proxy::new(
&proxy_config,
sp.manufacturing_public_key(),
sp.rot_handle(),
sp.rot_certs(),
Duration::from_secs(5),
log.new(o!("BootstrapAgentClientSprocketsProxy"
=> proxy_config.target_address)),
)
.await?;

let proxy_addr = proxy.local_addr();

let proxy_task = tokio::spawn(async move {
// TODO-robustness `proxy.run()` only fails if `accept()`ing on our
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Panicking seems totally reasonable to me.

// already-bound listening socket fails, which means something has
// gone very wrong. Do we have any recourse other than panicking?
// What does dropshot do if `accept()` fails?
proxy.run().await.expect("sprockets client proxy failed");
});

// Wrap `proxy_task` in `AbortOnDrop`, which will abort it (shutting
// down the proxy) when we return.
let proxy_task = AbortOnDrop(proxy_task);

info!(
log, "Sending request to peer agent via sprockets proxy";
"peer" => %bootstrap_addr,
"sprockets_proxy" => %proxy_addr,
);
(format!("http://{}", proxy_addr), Some(proxy_task))
} else {
// We have no SP; connect directly.
info!(
log, "Sending request to peer agent";
"peer" => %bootstrap_addr,
);
(format!("http://{}", bootstrap_addr), None)
};

let client = bootstrap_agent_client::Client::new_with_client(
&url,
client,
Expand Down Expand Up @@ -119,7 +176,7 @@ async fn initialize_sled_agent(
};
retry_notify(internal_service_policy(), sled_agent_initialize, log_failure)
.await?;
info!(log, "Peer agent at {} initialized", url);
info!(log, "Peer agent initialized"; "peer" => %bootstrap_addr);
Ok(())
}

Expand Down Expand Up @@ -178,7 +235,7 @@ struct BootstrapAgentHandleReceiver {
}

impl BootstrapAgentHandleReceiver {
async fn initialize_sleds(mut self, log: &Logger) {
async fn initialize_sleds(mut self, log: &Logger, sp: &Option<SpHandle>) {
let (requests, tx_response) = match self.inner.recv().await {
Some(requests) => requests,
None => {
Expand All @@ -201,7 +258,7 @@ impl BootstrapAgentHandleReceiver {
"target_sled" => %bootstrap_addr,
);

initialize_sled_agent(log, bootstrap_addr, &request)
initialize_sled_agent(log, bootstrap_addr, &request, sp)
.await
.map_err(|err| {
format!(
Expand Down Expand Up @@ -241,3 +298,11 @@ impl BootstrapAgentHandleReceiver {
tx_response.send(Ok(())).unwrap();
}
}

struct AbortOnDrop<T>(JoinHandle<T>);

impl<T> Drop for AbortOnDrop<T> {
fn drop(&mut self) {
self.0.abort();
}
}
Loading