From 4b50beb5117788d451c222fc9c6fa938b4c40fcb Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 27 Mar 2025 15:54:25 -0700 Subject: [PATCH 1/5] [sled-agent] Ensure that datasets get mounted --- illumos-utils/src/zfs.rs | 89 ++++++++++++++++++----- sled-agent/src/backing_fs.rs | 5 +- sled-agent/src/bootstrap/pre_server.rs | 1 + sled-storage/src/dataset.rs | 2 + sled-storage/src/manager.rs | 99 +++++++++++++++++++++++++- 5 files changed, 177 insertions(+), 19 deletions(-) diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index 61b943b587f..41fde4bfe1b 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -218,6 +218,8 @@ pub struct DatasetProperties { pub id: Option, /// The full name of the dataset. pub name: String, + /// Identifies whether or not the dataset is mount. + pub mounted: bool, /// Remaining space in the dataset and descendants. pub avail: ByteCount, /// Space used by dataset and descendants. @@ -237,7 +239,7 @@ pub struct DatasetProperties { impl DatasetProperties { const ZFS_GET_PROPS: &'static str = - "oxide:uuid,name,avail,used,quota,reservation,compression"; + "oxide:uuid,name,mounted,avail,used,quota,reservation,compression"; } impl TryFrom<&DatasetProperties> for SharedDatasetConfig { @@ -307,6 +309,11 @@ impl DatasetProperties { }) .transpose()?; let name = dataset_name.to_string(); + let mounted = props + .get("mounted") + .map(|(prop, _source)| prop.to_string()) + .ok_or(anyhow!("Missing 'mounted'"))? + == "yes"; let avail = props .get("available") .map(|(prop, _source)| prop) @@ -353,6 +360,7 @@ impl DatasetProperties { Ok(DatasetProperties { id, name, + mounted, avail, used, quota, @@ -420,19 +428,37 @@ fn build_zfs_set_key_value_pairs( props } +/// Describes the ZFS "canmount" options. +pub enum CanMount { + On, + Off, + NoAuto, +} + /// Arguments to [Zfs::ensure_dataset]. pub struct DatasetEnsureArgs<'a> { /// The full path of the ZFS dataset. pub name: &'a str, /// The expected mountpoint of this filesystem. + /// /// If the filesystem already exists, and is not mounted here, an error is /// returned. + /// + /// If creating a dataset, this adds the "mountpoint=..." option. pub mountpoint: Mountpoint, - /// Identifies whether or not this filesystem should be - /// used in a zone. Only used when creating a new filesystem - ignored - /// if the filesystem already exists. + /// Identifies whether or not hte dataset should be mounted. + /// + /// If "On": The dataset is mounted (unless it is also zoned). + /// If "Off/NoAuto": The dataset is not mounted. + /// + /// If creating a dataset, this adds the "canmount=..." option. + pub can_mount: CanMount, + + /// Identifies whether or not this filesystem should be used in a zone. + /// + /// If creating a dataset, this add the "zoned=on" option. pub zoned: bool, /// Ensures a filesystem as an encryption root. @@ -574,6 +600,7 @@ impl Zfs { DatasetEnsureArgs { name, mountpoint, + can_mount, zoned, encryption_details, size_details, @@ -585,6 +612,9 @@ impl Zfs { let props = build_zfs_set_key_value_pairs(size_details, id); if exists { + // If the dataset already exists: Update properties which might + // have changed, and ensure it has been mounted if it needs + // to be mounted. Self::set_values(name, props.as_slice()).map_err(|err| { EnsureDatasetError { name: name.to_string(), @@ -592,18 +622,10 @@ impl Zfs { } })?; - if encryption_details.is_none() { - // If the dataset exists, we're done. Unencrypted datasets are - // automatically mounted. - return Ok(()); - } else { - if mounted { - // The dataset exists and is mounted - return Ok(()); - } - // We need to load the encryption key and mount the filesystem - return Self::mount_encrypted_dataset(name); + if !mounted && !zoned && matches!(can_mount, CanMount::On) { + Self::mount_dataset(name)?; } + return Ok(()); } // If it doesn't exist, make it. @@ -627,6 +649,12 @@ impl Zfs { ]); } + match can_mount { + CanMount::On => cmd.args(&["-o", "canmount=on"]), + CanMount::Off => cmd.args(&["-o", "canmount=off"]), + CanMount::NoAuto => cmd.args(&["-o", "canmount=noauto"]), + }; + if let Some(opts) = additional_options { for o in &opts { cmd.args(&["-o", &o]); @@ -660,7 +688,8 @@ impl Zfs { Ok(()) } - fn mount_encrypted_dataset(name: &str) -> Result<(), EnsureDatasetError> { + // Mounts a dataset, loading keys if necessary. + fn mount_dataset(name: &str) -> Result<(), EnsureDatasetError> { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[ZFS, "mount", "-l", name]); execute(cmd).map_err(|err| EnsureDatasetError { @@ -989,6 +1018,7 @@ mod test { let input = "dataset_name\tavailable\t1234\t-\n\ dataset_name\tused\t5678\t-\n\ dataset_name\tname\tI_AM_IGNORED\t-\n\ + dataset_name\tmounted\tyes\t-\n\ dataset_name\tcompression\toff\tinherited from parent"; let props = DatasetProperties::parse_many(&input) .expect("Should have parsed data"); @@ -996,6 +1026,7 @@ mod test { assert_eq!(props[0].id, None); assert_eq!(props[0].name, "dataset_name"); + assert_eq!(props[0].mounted, true); assert_eq!(props[0].avail.to_bytes(), 1234); assert_eq!(props[0].used.to_bytes(), 5678); assert_eq!(props[0].quota, None); @@ -1008,6 +1039,7 @@ mod test { let input = "dataset_name\tavailable\t1234\t-\tEXTRA\n\ dataset_name\tused\t5678\t-\n\ dataset_name\tname\tI_AM_IGNORED\t-\n\ + dataset_name\tmounted\tyes\t-\n\ dataset_name\tcompression\toff\tinherited from parent"; let err = DatasetProperties::parse_many(&input) .expect_err("Should have parsed data"); @@ -1020,6 +1052,7 @@ mod test { #[test] fn parse_dataset_props_with_optionals() { let input = "dataset_name\toxide:uuid\td4e1e554-7b98-4413-809e-4a42561c3d0c\tlocal\n\ + dataset_name\tmounted\tyes\t-\n\ dataset_name\tavailable\t1234\t-\n\ dataset_name\tused\t5678\t-\n\ dataset_name\tquota\t111\t-\n\ @@ -1033,6 +1066,7 @@ mod test { Some("d4e1e554-7b98-4413-809e-4a42561c3d0c".parse().unwrap()) ); assert_eq!(props[0].name, "dataset_name"); + assert_eq!(props[0].mounted, true); assert_eq!(props[0].avail.to_bytes(), 1234); assert_eq!(props[0].used.to_bytes(), 5678); assert_eq!(props[0].quota.map(|q| q.to_bytes()), Some(111)); @@ -1057,6 +1091,7 @@ mod test { #[test] fn parse_dataset_bad_avail() { let input = "dataset_name\tavailable\tBADAVAIL\t-\n\ + dataset_name\tmounted\t-\t-\n\ dataset_name\tused\t5678\t-"; let err = DatasetProperties::parse_many(&input) .expect_err("Should have failed to parse"); @@ -1069,6 +1104,7 @@ mod test { #[test] fn parse_dataset_bad_usage() { let input = "dataset_name\tavailable\t1234\t-\n\ + dataset_name\tmounted\t-\t-\n\ dataset_name\tused\tBADUSAGE\t-"; let err = DatasetProperties::parse_many(&input) .expect_err("Should have failed to parse"); @@ -1082,6 +1118,7 @@ mod test { fn parse_dataset_bad_quota() { let input = "dataset_name\tavailable\t1234\t-\n\ dataset_name\tused\t5678\t-\n\ + dataset_name\tmounted\t-\t-\n\ dataset_name\tquota\tBADQUOTA\t-"; let err = DatasetProperties::parse_many(&input) .expect_err("Should have failed to parse"); @@ -1096,6 +1133,7 @@ mod test { let input = "dataset_name\tavailable\t1234\t-\n\ dataset_name\tused\t5678\t-\n\ dataset_name\tquota\t111\t-\n\ + dataset_name\tmounted\t-\t-\n\ dataset_name\treservation\tBADRES\t-"; let err = DatasetProperties::parse_many(&input) .expect_err("Should have failed to parse"); @@ -1118,6 +1156,7 @@ mod test { "dataset_name\tused\t5678\t-\n\ dataset_name\tquota\t111\t-\n\ dataset_name\treservation\t222\t-\n\ + dataset_name\tmounted\ttrue\t-\n\ dataset_name\tcompression\toff\tinherited", "'available'", ); @@ -1125,6 +1164,7 @@ mod test { "dataset_name\tavailable\t1234\t-\n\ dataset_name\tquota\t111\t-\n\ dataset_name\treservation\t222\t-\n\ + dataset_name\tmounted\ttrue\t-\n\ dataset_name\tcompression\toff\tinherited", "'used'", ); @@ -1132,9 +1172,17 @@ mod test { "dataset_name\tavailable\t1234\t-\n\ dataset_name\tused\t5678\t-\n\ dataset_name\tquota\t111\t-\n\ + dataset_name\tmounted\ttrue\t-\n\ dataset_name\treservation\t222\t-", "'compression'", ); + expect_missing( + "dataset_name\tavailable\t1234\t-\n\ + dataset_name\tused\t5678\t-\n\ + dataset_name\tquota\t111\t-\n\ + dataset_name\treservation\t222\t-", + "'mounted'", + ); } #[test] @@ -1142,6 +1190,7 @@ mod test { let input = "dataset_name\toxide:uuid\tb8698ede-60c2-4e16-b792-d28c165cfd12\tinherited from parent\n\ dataset_name\tavailable\t1234\t-\n\ dataset_name\tused\t5678\t-\n\ + dataset_name\tmounted\t-\t-\n\ dataset_name\tcompression\toff\t-"; let props = DatasetProperties::parse_many(&input) .expect("Should have parsed data"); @@ -1154,6 +1203,7 @@ mod test { let input = "dataset_name\toxide:uuid\t-\t-\n\ dataset_name\tavailable\t1234\t-\n\ dataset_name\tused\t5678\t-\n\ + dataset_name\tmounted\t-\t-\n\ dataset_name\tcompression\toff\t-"; let props = DatasetProperties::parse_many(&input) .expect("Should have parsed data"); @@ -1166,6 +1216,7 @@ mod test { let input = "dataset_name\tquota\t0\tdefault\n\ dataset_name\tavailable\t1234\t-\n\ dataset_name\tused\t5678\t-\n\ + dataset_name\tmounted\t-\t-\n\ dataset_name\tcompression\toff\t-"; let props = DatasetProperties::parse_many(&input) .expect("Should have parsed data"); @@ -1178,6 +1229,7 @@ mod test { let input = "dataset_name\treservation\t0\tdefault\n\ dataset_name\tavailable\t1234\t-\n\ dataset_name\tused\t5678\t-\n\ + dataset_name\tmounted\t-\t-\n\ dataset_name\tcompression\toff\t-"; let props = DatasetProperties::parse_many(&input) .expect("Should have parsed data"); @@ -1193,7 +1245,10 @@ mod test { foo\tavailable\t111\t-\n\ foo\tused\t111\t-\n\ foo\tcompression\toff\t-\n\ + foo\tmounted\t-\t-\n\ + foo\tmounted\t-\t-\n\ bar\tavailable\t222\t-\n\ + bar\tmounted\tyes\t-\n\ bar\tused\t222\t-\n\ bar\tcompression\toff\t-"; @@ -1202,7 +1257,9 @@ mod test { assert_eq!(props.len(), 2); assert_eq!(props[0].name, "bar"); assert_eq!(props[0].used, 222.into()); + assert_eq!(props[0].mounted, true); assert_eq!(props[1].name, "foo"); assert_eq!(props[1].used, 111.into()); + assert_eq!(props[1].mounted, false); } } diff --git a/sled-agent/src/backing_fs.rs b/sled-agent/src/backing_fs.rs index 1b04c7597bb..7c227e4c5ab 100644 --- a/sled-agent/src/backing_fs.rs +++ b/sled-agent/src/backing_fs.rs @@ -23,7 +23,7 @@ use camino::Utf8PathBuf; use illumos_utils::zfs::{ - DatasetEnsureArgs, EnsureDatasetError, GetValueError, Mountpoint, + CanMount, DatasetEnsureArgs, EnsureDatasetError, GetValueError, Mountpoint, SizeDetails, Zfs, }; use omicron_common::api::external::ByteCount; @@ -147,11 +147,12 @@ pub(crate) fn ensure_backing_fs( Zfs::ensure_dataset(DatasetEnsureArgs { name: &dataset, mountpoint: mountpoint.clone(), + can_mount: CanMount::NoAuto, zoned: false, encryption_details: None, size_details, id: None, - additional_options: Some(vec!["canmount=noauto".to_string()]), + additional_options: None, })?; // Check if a ZFS filesystem is already mounted on bfs.mountpoint by diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 327de6ce499..6bae21596c7 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -277,6 +277,7 @@ fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> { mountpoint: zfs::Mountpoint::Path(Utf8PathBuf::from( zfs::ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT, )), + can_mount: zfs::CanMount::On, zoned: false, encryption_details: None, size_details: None, diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs index 123a9a8653e..8e441ad07e3 100644 --- a/sled-storage/src/dataset.rs +++ b/sled-storage/src/dataset.rs @@ -265,6 +265,7 @@ pub(crate) async fn ensure_zpool_has_datasets( let result = Zfs::ensure_dataset(zfs::DatasetEnsureArgs { name: &format!("{}/{}", zpool_name, dataset), mountpoint: Mountpoint::Path(mountpoint), + can_mount: zfs::CanMount::On, zoned, encryption_details: Some(encryption_details), size_details: None, @@ -327,6 +328,7 @@ pub(crate) async fn ensure_zpool_has_datasets( Zfs::ensure_dataset(zfs::DatasetEnsureArgs { name, mountpoint: Mountpoint::Path(mountpoint), + can_mount: zfs::CanMount::On, zoned, encryption_details, size_details, diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index 6b8d7555af1..b49a706727c 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -17,7 +17,8 @@ use futures::Stream; use futures::StreamExt; use futures::future::FutureExt; use illumos_utils::zfs::{ - DatasetEnsureArgs, DatasetProperties, Mountpoint, WhichDatasets, Zfs, + CanMount, DatasetEnsureArgs, DatasetProperties, Mountpoint, WhichDatasets, + Zfs, }; use illumos_utils::zpool::{ZPOOL_MOUNTPOINT_ROOT, ZpoolName}; use key_manager::StorageKeyRequester; @@ -1028,6 +1029,17 @@ impl StorageManager { ); return Ok(false); } + + if !config.name.kind().zoned() && !old_dataset.mounted { + info!( + log, + "Dataset might need to be mounted"; + "old_dataset" => ?old_dataset, + "requested_props" => ?config.inner, + ); + return Ok(false); + } + info!(log, "No changes necessary, returning early"); return Ok(true); } @@ -1463,6 +1475,7 @@ impl StorageManager { Zfs::ensure_dataset(DatasetEnsureArgs { name: &full_name, mountpoint: mountpoint.clone(), + can_mount: CanMount::On, zoned: *zoned, encryption_details, size_details, @@ -1498,6 +1511,7 @@ impl StorageManager { Zfs::ensure_dataset(DatasetEnsureArgs { name: fs_name, mountpoint: Mountpoint::Path(Utf8PathBuf::from("/data")), + can_mount: CanMount::On, zoned, encryption_details, size_details, @@ -2152,6 +2166,89 @@ mod tests { logctx.cleanup_successful(); } + async fn is_mounted(dataset: &DatasetName) -> bool { + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let cmd = + command.args(&["list", "-Hpo", "mounted", &dataset.full_name()]); + let output = cmd.output().await.unwrap(); + assert!(output.status.success(), "Failed to list dataset: {output:?}"); + String::from_utf8_lossy(&output.stdout).trim() == "yes" + } + + async fn unmount(dataset: &DatasetName) { + let mut command = tokio::process::Command::new(illumos_utils::PFEXEC); + let cmd = command.args(&[ + illumos_utils::zfs::ZFS, + "unmount", + "-f", + &dataset.full_name(), + ]); + let output = cmd.output().await.unwrap(); + assert!( + output.status.success(), + "Failed to unmount dataset: {output:?}" + ); + } + + #[tokio::test] + async fn ensure_datasets_get_mounted() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log("ensure_datasets_get_mounted"); + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + + // Test setup: Add a U.2 and M.2, adopt them into the "control plane" + // for usage. + harness.handle().key_manager_ready().await; + let raw_disks = + harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; + let config = harness.make_config(1, &raw_disks); + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .expect("Ensuring disks should work after key manager is ready"); + assert!(!result.has_error(), "{:?}", result); + + // Create a dataset on the newly formatted U.2 + let id = DatasetUuid::new_v4(); + let zpool_name = ZpoolName::new_external(config.disks[0].pool_id); + let name = DatasetName::new(zpool_name.clone(), DatasetKind::Debug); + let datasets = BTreeMap::from([( + id, + DatasetConfig { + id, + name: name.clone(), + inner: SharedDatasetConfig::default(), + }, + )]); + // "Generation = 1" is reserved as "no requests seen yet", so we jump + // past it. + let generation = Generation::new().next(); + let config = DatasetsConfig { generation, datasets }; + + let status = + harness.handle().datasets_ensure(config.clone()).await.unwrap(); + assert!(!status.has_error()); + + // Creating the dataset should have mounted it + assert!(is_mounted(&name).await); + + // We can unmount the dataset manually + unmount(&name).await; + assert!(!is_mounted(&name).await); + + // We can re-apply the same config... + let status = + harness.handle().datasets_ensure(config.clone()).await.unwrap(); + assert!(!status.has_error()); + + // ... and doing so mounts the dataset again. + assert!(is_mounted(&name).await); + + harness.cleanup().await; + logctx.cleanup_successful(); + } + #[tokio::test] async fn ensure_many_datasets() { illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); From 86272212e06d06402c3b5f92cb42cbb27ac788d3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 27 Mar 2025 16:11:53 -0700 Subject: [PATCH 2/5] docs --- illumos-utils/src/zfs.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index 41fde4bfe1b..4f730b37a74 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -218,7 +218,7 @@ pub struct DatasetProperties { pub id: Option, /// The full name of the dataset. pub name: String, - /// Identifies whether or not the dataset is mount. + /// Identifies whether or not the dataset is mounted. pub mounted: bool, /// Remaining space in the dataset and descendants. pub avail: ByteCount, @@ -309,6 +309,10 @@ impl DatasetProperties { }) .transpose()?; let name = dataset_name.to_string(); + // Although the illumos man pages say the only valid options for + // "mounted" are "yes" and "no", "-" is also an observed output. + // We interpret that value, and anything other than "yes" + // explicitly as "not mounted". let mounted = props .get("mounted") .map(|(prop, _source)| prop.to_string()) @@ -448,7 +452,7 @@ pub struct DatasetEnsureArgs<'a> { /// If creating a dataset, this adds the "mountpoint=..." option. pub mountpoint: Mountpoint, - /// Identifies whether or not hte dataset should be mounted. + /// Identifies whether or not the dataset should be mounted. /// /// If "On": The dataset is mounted (unless it is also zoned). /// If "Off/NoAuto": The dataset is not mounted. From 5fb75076b1ad660688bcff91229898167e243038 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 27 Mar 2025 16:15:49 -0700 Subject: [PATCH 3/5] Update tests to less-confusing values --- illumos-utils/src/zfs.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index 4f730b37a74..ad3139277d9 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -1160,7 +1160,7 @@ mod test { "dataset_name\tused\t5678\t-\n\ dataset_name\tquota\t111\t-\n\ dataset_name\treservation\t222\t-\n\ - dataset_name\tmounted\ttrue\t-\n\ + dataset_name\tmounted\tyes\t-\n\ dataset_name\tcompression\toff\tinherited", "'available'", ); @@ -1168,7 +1168,7 @@ mod test { "dataset_name\tavailable\t1234\t-\n\ dataset_name\tquota\t111\t-\n\ dataset_name\treservation\t222\t-\n\ - dataset_name\tmounted\ttrue\t-\n\ + dataset_name\tmounted\tyes\t-\n\ dataset_name\tcompression\toff\tinherited", "'used'", ); @@ -1176,7 +1176,7 @@ mod test { "dataset_name\tavailable\t1234\t-\n\ dataset_name\tused\t5678\t-\n\ dataset_name\tquota\t111\t-\n\ - dataset_name\tmounted\ttrue\t-\n\ + dataset_name\tmounted\tyes\t-\n\ dataset_name\treservation\t222\t-", "'compression'", ); From a3d83133dd3b01a65d8d08de4bee7924943d36c7 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 27 Mar 2025 16:58:06 -0700 Subject: [PATCH 4/5] We dont have legacy mountpoints, but deal with them too --- illumos-utils/src/zfs.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index ad3139277d9..320f069a133 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -626,7 +626,11 @@ impl Zfs { } })?; - if !mounted && !zoned && matches!(can_mount, CanMount::On) { + let want_to_mount = !zoned + && matches!(can_mount, CanMount::On) + && matches!(mountpoint, Mountpoint::Path(_)); + + if !mounted && want_to_mount { Self::mount_dataset(name)?; } return Ok(()); From a3a7215c501c85ea5e1c84362f77ccb6808350a6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 31 Mar 2025 12:00:08 -0700 Subject: [PATCH 5/5] Disk::new failing can be an error --- sled-storage/src/resources.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs index df12713e521..f4f715e555e 100644 --- a/sled-storage/src/resources.rs +++ b/sled-storage/src/resources.rs @@ -442,7 +442,7 @@ impl StorageResources { ) .await .map_err(|err| { - warn!(log, "Disk::new failed"; "err" => ?err); + error!(log, "Disk::new failed"; "err" => ?err); match err { // We pick this error out and identify it separately because // it may be transient, and should sometimes be handled with