Skip to content
12 changes: 11 additions & 1 deletion common/src/api/external/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,16 @@ impl Display for ByteCount {
}

// TODO-cleanup This could use the experimental std::num::IntErrorKind.
#[derive(Debug, Eq, thiserror::Error, Ord, PartialEq, PartialOrd)]
#[derive(
Debug,
Eq,
thiserror::Error,
Ord,
PartialEq,
PartialOrd,
Serialize,
Deserialize,
)]
pub enum ByteCountRangeError {
#[error("value is too small for a byte count")]
TooSmall,
Expand Down Expand Up @@ -1425,6 +1434,7 @@ impl SimpleIdentityOrName for AntiAffinityGroupMember {
#[serde(rename_all = "snake_case")]
pub enum DiskType {
Crucible,
LocalStorage,
}

/// View of a Disk
Expand Down
1 change: 1 addition & 0 deletions dev-tools/dropshot-apis/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ fn all_apis() -> anyhow::Result<ManagedApis> {
let apis = ManagedApis::new(apis)
.context("error creating ManagedApis")?
.with_validation(validate);

Ok(apis)
}

Expand Down
143 changes: 141 additions & 2 deletions dev-tools/omdb/src/bin/omdb/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ use nexus_db_queries::db::datastore::CrucibleTargets;
use nexus_db_queries::db::datastore::Disk;
use nexus_db_queries::db::datastore::InstanceAndActiveVmm;
use nexus_db_queries::db::datastore::InstanceStateComputer;
use nexus_db_queries::db::datastore::LocalStorageDisk;
use nexus_db_queries::db::datastore::SQL_BATCH_SIZE;
use nexus_db_queries::db::datastore::VolumeCookedResult;
use nexus_db_queries::db::datastore::read_only_resources_associated_with_volume;
Expand Down Expand Up @@ -152,6 +153,7 @@ use omicron_common::api::external::MacAddr;
use omicron_uuid_kinds::CollectionUuid;
use omicron_uuid_kinds::DatasetUuid;
use omicron_uuid_kinds::DownstairsRegionUuid;
use omicron_uuid_kinds::ExternalZpoolUuid;
use omicron_uuid_kinds::GenericUuid;
use omicron_uuid_kinds::InstanceUuid;
use omicron_uuid_kinds::ParseError;
Expand Down Expand Up @@ -2216,8 +2218,7 @@ async fn crucible_disk_info(
}
}
} else {
// If the disk is not attached to anything, just print empty
// fields.
// If the disk is not attached to anything, just print empty fields.
UpstairsRow {
host_serial: "-".to_string(),
disk_name,
Expand Down Expand Up @@ -2277,6 +2278,141 @@ async fn crucible_disk_info(
Ok(())
}

async fn local_storage_disk_info(
opctx: &OpContext,
datastore: &DataStore,
disk: LocalStorageDisk,
) -> Result<(), anyhow::Error> {
#[derive(Tabled)]
#[tabled(rename_all = "SCREAMING_SNAKE_CASE")]
struct GenericRow {
host_serial: String,
disk_name: String,
instance_name: String,
propolis_zone: String,
disk_state: String,
}

let conn = datastore.pool_connection_for_tests().await?;

let disk_name = disk.name().to_string();
let disk_state = disk.runtime().disk_state.to_string();

let row = if let Some(instance_uuid) = disk.runtime().attach_instance_id {
// Get the instance this disk is attached to
use nexus_db_schema::schema::instance::dsl as instance_dsl;
use nexus_db_schema::schema::vmm::dsl as vmm_dsl;
let instances: Vec<InstanceAndActiveVmm> = instance_dsl::instance
.filter(instance_dsl::id.eq(instance_uuid))
.left_join(
vmm_dsl::vmm.on(vmm_dsl::id
.nullable()
.eq(instance_dsl::active_propolis_id)
.and(vmm_dsl::time_deleted.is_null())),
)
.limit(1)
.select((Instance::as_select(), Option::<Vmm>::as_select()))
.load_async(&*conn)
.await
.context("loading requested instance")?
.into_iter()
.map(|i: (Instance, Option<Vmm>)| i.into())
.collect();

let Some(instance) = instances.into_iter().next() else {
bail!("no instance: {} found", instance_uuid);
};

let instance_name = instance.instance().name().to_string();

if instance.vmm().is_some() {
let propolis_id =
instance.instance().runtime().propolis_id.unwrap();
let my_sled_id = instance.sled_id().unwrap();

let (_, my_sled) = LookupPath::new(opctx, datastore)
.sled_id(my_sled_id)
.fetch()
.await
.context("failed to look up sled")?;
Comment on lines +2322 to +2337
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in the cases where the instance doesn't exist, or the sled doesn't exist, that's probably an invalid state. but since OMDB is a debugging tool, i think it would be nicer if we didn't bail out here and just printed that we couldn't find the instance/sled and keep going so that we can still get partial output. OMDB ought to be useable in cases where we have put nonsensical state in the database, if possible.


GenericRow {
host_serial: my_sled.serial_number().to_string(),
disk_name,
instance_name,
propolis_zone: format!("oxz_propolis-server_{}", propolis_id),
disk_state,
}
} else {
GenericRow {
host_serial: NOT_ON_SLED_MSG.to_string(),
disk_name,
instance_name,
propolis_zone: NO_ACTIVE_PROPOLIS_MSG.to_string(),
disk_state,
}
}
} else {
// If the disk is not attached to anything, just print empty fields.
GenericRow {
host_serial: "-".to_string(),
disk_name,
instance_name: "-".to_string(),
propolis_zone: "-".to_string(),
disk_state,
}
};

let table = tabled::Table::new(vec![row])
.with(tabled::settings::Style::empty())
.with(tabled::settings::Padding::new(0, 1, 0, 0))
.to_string();

println!("{}", table);

#[derive(Tabled)]
#[tabled(rename_all = "SCREAMING_SNAKE_CASE")]
struct Row {
disk_name: String,

time_created: DateTime<Utc>,
#[tabled(display_with = "display_option_blank")]
time_deleted: Option<DateTime<Utc>>,

dataset_id: DatasetUuid,
pool_id: ExternalZpoolUuid,
sled_id: SledUuid,

dataset_size: u64,
}

if let Some(allocation) = &disk.local_storage_dataset_allocation {
let rows = vec![Row {
disk_name: disk.name().to_string(),

time_created: allocation.time_created,
time_deleted: allocation.time_deleted,

dataset_id: allocation.local_storage_dataset_id(),
pool_id: allocation.pool_id(),
sled_id: allocation.sled_id(),

dataset_size: allocation.dataset_size.to_bytes(),
}];

let table = tabled::Table::new(rows)
.with(tabled::settings::Style::empty())
.with(tabled::settings::Padding::new(0, 1, 0, 0))
.to_string();

println!("{}", table);
} else {
println!("no allocation yet");
}

Ok(())
}

/// Run `omdb db disk info <UUID>`.
async fn cmd_db_disk_info(
opctx: &OpContext,
Expand All @@ -2287,6 +2423,9 @@ async fn cmd_db_disk_info(
Disk::Crucible(disk) => {
crucible_disk_info(opctx, datastore, disk).await
}
Disk::LocalStorage(disk) => {
local_storage_disk_info(opctx, datastore, disk).await
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion end-to-end-tests/src/bin/bootstrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ async fn run_test() -> Result<()> {
ctx.client
.disk_create()
.project(ctx.project_name.clone())
.body(DiskCreate {
.body(DiskCreate::Crucible {
name: disk_name.clone(),
description: String::new(),
disk_source: DiskSource::Blank {
Expand Down
13 changes: 7 additions & 6 deletions end-to-end-tests/src/instance_launch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ use async_trait::async_trait;
use omicron_test_utils::dev::poll::{CondCheckError, wait_for_condition};
use oxide_client::types::{
ByteCount, DiskCreate, DiskSource, ExternalIp, ExternalIpCreate,
InstanceCpuCount, InstanceCreate, InstanceDiskAttachment,
InstanceNetworkInterfaceAttachment, InstanceState, SshKeyCreate,
InstanceCpuCount, InstanceCreate, InstanceDiskAttach,
InstanceDiskAttachment, InstanceNetworkInterfaceAttachment, InstanceState,
SshKeyCreate,
};
use oxide_client::{ClientCurrentUserExt, ClientDisksExt, ClientInstancesExt};
use russh::{ChannelMsg, Disconnect};
Expand Down Expand Up @@ -42,7 +43,7 @@ async fn instance_launch() -> Result<()> {
.client
.disk_create()
.project(ctx.project_name.clone())
.body(DiskCreate {
.body(DiskCreate::Crucible {
name: disk_name.clone(),
description: String::new(),
disk_source: DiskSource::Image {
Expand All @@ -66,9 +67,9 @@ async fn instance_launch() -> Result<()> {
hostname: "localshark".parse().unwrap(), // 🦈
memory: ByteCount(1024 * 1024 * 1024),
ncpus: InstanceCpuCount(2),
boot_disk: Some(InstanceDiskAttachment::Attach {
name: disk_name.clone(),
}),
boot_disk: Some(InstanceDiskAttachment::Attach(
InstanceDiskAttach { name: disk_name.clone() },
)),
disks: Vec::new(),
network_interfaces: InstanceNetworkInterfaceAttachment::Default,
external_ips: vec![ExternalIpCreate::Ephemeral { pool: None }],
Expand Down
8 changes: 5 additions & 3 deletions nexus/db-model/src/disk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ impl_enum_type!(

// Enum values
Crucible => b"crucible"
LocalStorage => b"local_storage"
);

/// A Disk, where how the blocks are stored depend on the disk_type.
Expand Down Expand Up @@ -75,7 +76,8 @@ pub struct Disk {
/// (where rows are matched based on the disk_id field in that table) and
/// combined into a higher level `datastore::Disk` enum.
///
/// For `Crucible` disks, see the DiskTypeCrucible model.
/// For `Crucible` disks, see the DiskTypeCrucible model. For `LocalStorage`
/// disks, see the DiskTypeLocalStorage model.
pub disk_type: DiskType,
}

Expand All @@ -88,15 +90,15 @@ impl Disk {
runtime_initial: DiskRuntimeState,
disk_type: DiskType,
) -> Self {
let identity = DiskIdentity::new(disk_id, params.identity.clone());
let identity = DiskIdentity::new(disk_id, params.identity().clone());

Self {
identity,
rcgen: external::Generation::new().into(),
project_id,
runtime_state: runtime_initial,
slot: None,
size: params.size.into(),
size: params.size().into(),
block_size,
disk_type,
}
Expand Down
10 changes: 5 additions & 5 deletions nexus/db-model/src/disk_type_crucible.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,16 @@ impl DiskTypeCrucible {
pub fn new(
disk_id: Uuid,
volume_id: VolumeUuid,
params: &params::DiskCreate,
disk_source: &params::DiskSource,
) -> Self {
let create_snapshot_id = match params.disk_source {
params::DiskSource::Snapshot { snapshot_id } => Some(snapshot_id),
let create_snapshot_id = match disk_source {
params::DiskSource::Snapshot { snapshot_id } => Some(*snapshot_id),
_ => None,
};

// XXX further enum here for different image types?
let create_image_id = match params.disk_source {
params::DiskSource::Image { image_id } => Some(image_id),
let create_image_id = match disk_source {
params::DiskSource::Image { image_id } => Some(*image_id),
_ => None,
};

Expand Down
68 changes: 68 additions & 0 deletions nexus/db-model/src/disk_type_local_storage.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use crate::ByteCount;
use crate::typed_uuid::DbTypedUuid;
use nexus_db_schema::schema::disk_type_local_storage;
use omicron_common::api::external;
use omicron_uuid_kinds::DatasetKind;
use omicron_uuid_kinds::DatasetUuid;
use serde::{Deserialize, Serialize};
use uuid::Uuid;

/// A Disk can be backed using a zvol slice from the local storage dataset
/// present on each zpool of a sled.
#[derive(
Queryable, Insertable, Clone, Debug, Selectable, Serialize, Deserialize,
)]
#[diesel(table_name = disk_type_local_storage)]
pub struct DiskTypeLocalStorage {
disk_id: Uuid,

/// For zvols inside a parent dataset, there's an overhead that must be
/// accounted for when setting a quota and reservation on that parent
/// dataset. Record at model creation time how much overhead is required for
/// the parent `local_storage` dataset slice in order to fit the child
/// volume.
required_dataset_overhead: ByteCount,

local_storage_dataset_allocation_id: Option<DbTypedUuid<DatasetKind>>,
}

impl DiskTypeLocalStorage {
/// Creates a new `DiskTypeLocalStorage`. Returns Err if the computed
/// required dataset overhead does not fit in a `ByteCount`.
pub fn new(
disk_id: Uuid,
size: external::ByteCount,
) -> Result<DiskTypeLocalStorage, external::ByteCountRangeError> {
// For zvols, there's an overhead that must be accounted for, and it
// empirically seems to be about 65M per 1G for volblocksize=4096.
// Multiple the disk size by something a little over this value.

let one_gb = external::ByteCount::from_gibibytes_u32(1).to_bytes();
let gbs = size.to_bytes() / one_gb;
let overhead: u64 =
external::ByteCount::from_mebibytes_u32(70).to_bytes() * gbs;

// Don't unwrap this - the size of this disk is a parameter set by an
// API call, and we don't want to panic on out of range input.
let required_dataset_overhead =
external::ByteCount::try_from(overhead)?;
Comment on lines +49 to +52
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think we should probably add some kind of context to this error in the case that it bubbles up to the user. the TryFrom error will say "value is too small for a byte count" or "too large for a byte count", but we should tell the user that it's not the byte count they provided for the disk's size, it's something we computed based on that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think so too, but there's currently no way that this would get out to a user from a saga (without querying for specific node's outputs, which would be brittle to changes in the saga itself). The way we've structured things is that (after some validation of the parameters) the disk create saga takes in the params::DiskCreate from the user and does all the work of creating a disk in the saga, and we only return a 500 if saga execution fails.

I put an additional step in validate_disk_create_params that tries to create this record outside the saga, and can return a better error if there's a failure in 135f6a9.


Ok(DiskTypeLocalStorage {
disk_id,
required_dataset_overhead: required_dataset_overhead.into(),
local_storage_dataset_allocation_id: None,
})
}

pub fn required_dataset_overhead(&self) -> external::ByteCount {
self.required_dataset_overhead.into()
}

pub fn local_storage_dataset_allocation_id(&self) -> Option<DatasetUuid> {
self.local_storage_dataset_allocation_id.map(Into::into)
}
}
Loading
Loading