From a4e5adbaab2c9d995934654ea36fd9772bb63bd7 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 15 Jul 2023 00:57:44 -0700 Subject: [PATCH 1/4] [sled-agent] Sled Agent destroys crypt/zone filesystems if it hasn't parsed them --- Cargo.lock | 1 + illumos-utils/src/zfs.rs | 14 +++----- sled-hardware/Cargo.toml | 1 + sled-hardware/src/disk.rs | 73 +++++++++++++++++++++++++++++++-------- 4 files changed, 66 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e620a341b14..442960097ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7520,6 +7520,7 @@ dependencies = [ "nexus-client 0.1.0", "omicron-common 0.1.0", "omicron-test-utils", + "rand 0.8.5", "schemars", "serde", "serial_test", diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index d533d5f9e39..76ec405422f 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -186,9 +186,6 @@ impl Zfs { /// Creates a new ZFS filesystem named `name`, unless one already exists. /// /// Applies an optional quota, provided _in bytes_. - /// - /// Returns "true" if the filesystem was mounted, when it previously was - /// not. pub fn ensure_filesystem( name: &str, mountpoint: Mountpoint, @@ -196,21 +193,20 @@ impl Zfs { do_format: bool, encryption_details: Option, quota: Option, - ) -> Result { + ) -> Result<(), EnsureFilesystemError> { let (exists, mounted) = Self::dataset_exists(name, &mountpoint)?; if exists { if encryption_details.is_none() { // If the dataset exists, we're done. Unencrypted datasets are // automatically mounted. - return Ok(false); + return Ok(()); } else { if mounted { // The dataset exists and is mounted - return Ok(false); + return Ok(()); } // We need to load the encryption key and mount the filesystem - Self::mount_encrypted_dataset(name, &mountpoint)?; - return Ok(true); + return Self::mount_encrypted_dataset(name, &mountpoint); } } @@ -262,7 +258,7 @@ impl Zfs { }); } } - Ok(true) + Ok(()) } fn mount_encrypted_dataset( diff --git a/sled-hardware/Cargo.toml b/sled-hardware/Cargo.toml index 5550d8ac057..c6bc09f41ec 100644 --- a/sled-hardware/Cargo.toml +++ b/sled-hardware/Cargo.toml @@ -16,6 +16,7 @@ libc.workspace = true macaddr.workspace = true nexus-client.workspace = true omicron-common.workspace = true +rand.workspace = true schemars.workspace = true serde.workspace = true slog.workspace = true diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index f2c4c276b22..dba0ad80cd3 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -4,6 +4,7 @@ use camino::{Utf8Path, Utf8PathBuf}; use illumos_utils::fstyp::Fstyp; +use illumos_utils::zfs; use illumos_utils::zfs::DestroyDatasetErrorVariant; use illumos_utils::zfs::EncryptionDetails; use illumos_utils::zfs::Keypath; @@ -14,8 +15,10 @@ use illumos_utils::zpool::ZpoolKind; use illumos_utils::zpool::ZpoolName; use key_manager::StorageKeyRequester; use omicron_common::disk::DiskIdentity; +use rand::distributions::{Alphanumeric, DistString}; use slog::Logger; use slog::{info, warn}; +use std::sync::OnceLock; use tokio::fs::{remove_file, File}; use tokio::io::{AsyncSeekExt, AsyncWriteExt, SeekFrom}; use uuid::Uuid; @@ -64,6 +67,16 @@ pub enum DiskError { MissingStorageKeyRequester, #[error("Encrypted filesystem '{0}' missing 'oxide:epoch' property")] CannotParseEpochProperty(String), + #[error("Encrypted filesystem '{dataset}' cannot set 'oxide:agent' property: {err}")] + CannotSetAgentProperty { dataset: String, err: zfs::SetValueError }, + #[error( + "Encrypted dataset '{dataset}' missing 'oxide:agent' property: {err}" + )] + CannotParseAgentProperty { + dataset: String, + #[source] + err: zfs::GetValueError, + }, } /// A partition (or 'slice') of a disk. @@ -438,7 +451,7 @@ impl Disk { // Ensure the root encrypted filesystem exists // Datasets below this in the hierarchy will inherit encryption - let newly_mounted_crypt = if let Some(dataset) = root { + if let Some(dataset) = root { let Some(key_requester) = key_requester else { return Err(DiskError::MissingStorageKeyRequester); }; @@ -503,26 +516,50 @@ impl Disk { DiskError::IoError { path: keyfile.path().0.clone(), error } })?; - result? - } else { - false + result?; }; for dataset in datasets.into_iter() { let mountpoint = zpool_name.dataset_mountpoint(dataset.name); let name = &format!("{}/{}", zpool_name, dataset.name); - if dataset.wipe && newly_mounted_crypt { - info!(log, "Automatically destroying dataset {}", name); - Zfs::destroy_dataset(name).or_else(|err| { - // If we can't find the dataset, that's fine -- it might - // not have been formatted yet. - if let DestroyDatasetErrorVariant::NotFound = err.err { - Ok(()) - } else { - Err(err) + // Use a value that's alive for the duration of this sled agent + // to answer the question: should we wipe this disk, or have + // we seen it before? + // + // If this zone comes from a prior iteration of the sled agent, + // we opt to remove it. + static AGENT_LOCAL_VALUE: OnceLock = OnceLock::new(); + let agent_local_value = AGENT_LOCAL_VALUE.get_or_init(|| { + Alphanumeric.sample_string(&mut rand::thread_rng(), 12) + }); + + if dataset.wipe { + match Zfs::get_oxide_value(name, "agent") { + Ok(v) if &v == agent_local_value => { + info!( + log, + "Skipping automatic wipe for dataset: {}", name + ); + } + Ok(_) | Err(_) => { + info!( + log, + "Automatically destroying dataset: {}", name + ); + Zfs::destroy_dataset(name).or_else(|err| { + // If we can't find the dataset, that's fine -- it might + // not have been formatted yet. + if let DestroyDatasetErrorVariant::NotFound = + err.err + { + Ok(()) + } else { + Err(err) + } + })?; } - })?; + } } let encryption_details = None; @@ -534,6 +571,14 @@ impl Disk { encryption_details, dataset.quota, )?; + + if dataset.wipe { + Zfs::set_oxide_value(name, "agent", agent_local_value) + .map_err(|err| DiskError::CannotSetAgentProperty { + dataset: name.clone(), + err, + })?; + } } Ok(()) } From b75850ebe3d7c027cfab2d7b8df889adb558651c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 15 Jul 2023 01:06:01 -0700 Subject: [PATCH 2/4] touch-ups --- sled-hardware/src/disk.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index dba0ad80cd3..a43ed70e2df 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -67,8 +67,12 @@ pub enum DiskError { MissingStorageKeyRequester, #[error("Encrypted filesystem '{0}' missing 'oxide:epoch' property")] CannotParseEpochProperty(String), - #[error("Encrypted filesystem '{dataset}' cannot set 'oxide:agent' property: {err}")] - CannotSetAgentProperty { dataset: String, err: zfs::SetValueError }, + #[error("Encrypted dataset '{dataset}' cannot set 'oxide:agent' property: {err}")] + CannotSetAgentProperty { + dataset: String, + #[source] + err: zfs::SetValueError, + }, #[error( "Encrypted dataset '{dataset}' missing 'oxide:agent' property: {err}" )] @@ -527,8 +531,8 @@ impl Disk { // to answer the question: should we wipe this disk, or have // we seen it before? // - // If this zone comes from a prior iteration of the sled agent, - // we opt to remove it. + // If this value comes from a prior iteration of the sled agent, + // we opt to remove the corresponding dataset. static AGENT_LOCAL_VALUE: OnceLock = OnceLock::new(); let agent_local_value = AGENT_LOCAL_VALUE.get_or_init(|| { Alphanumeric.sample_string(&mut rand::thread_rng(), 12) From 517bc95e3c9e13c5221c88143afca8651287dab6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 15 Jul 2023 01:08:00 -0700 Subject: [PATCH 3/4] clippy --- sled-hardware/src/disk.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index a43ed70e2df..3001714d078 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -71,7 +71,7 @@ pub enum DiskError { CannotSetAgentProperty { dataset: String, #[source] - err: zfs::SetValueError, + err: Box, }, #[error( "Encrypted dataset '{dataset}' missing 'oxide:agent' property: {err}" @@ -580,7 +580,7 @@ impl Disk { Zfs::set_oxide_value(name, "agent", agent_local_value) .map_err(|err| DiskError::CannotSetAgentProperty { dataset: name.clone(), - err, + err: Box::new(err), })?; } } From acab83bfd624a8ab9ff639141a63e2215f7cfe33 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 16 Jul 2023 20:14:06 -0700 Subject: [PATCH 4/4] Review feedback --- sled-hardware/src/disk.rs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index 8dccc2c0557..f19b0592fdc 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -73,14 +73,6 @@ pub enum DiskError { #[source] err: Box, }, - #[error( - "Encrypted dataset '{dataset}' missing 'oxide:agent' property: {err}" - )] - CannotParseAgentProperty { - dataset: String, - #[source] - err: zfs::GetValueError, - }, } /// A partition (or 'slice') of a disk. @@ -538,7 +530,7 @@ impl Disk { // we opt to remove the corresponding dataset. static AGENT_LOCAL_VALUE: OnceLock = OnceLock::new(); let agent_local_value = AGENT_LOCAL_VALUE.get_or_init(|| { - Alphanumeric.sample_string(&mut rand::thread_rng(), 12) + Alphanumeric.sample_string(&mut rand::thread_rng(), 20) }); if dataset.wipe {