Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions bin/propolis-server/src/lib/initializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,7 @@ impl MachineInitializer<'_> {
..Default::default()
},
nworkers,
self.log.clone(),
)
.with_context(|| {
format!(
Expand Down
6 changes: 3 additions & 3 deletions bin/propolis-server/src/lib/stats/virtual_disk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ impl VirtualDiskStats {
self.on_write_completion(result, len, duration)
}
Operation::Flush => self.on_flush_completion(result, duration),
Operation::Discard(..) => {
// Discard is not wired up in backends we care about for now, so
// it can safely be ignored.
Operation::Discard => {
// Discard is now wired up for local disks. We need to add support for it to the
// schema in Omicron before we can report stats for it. For now, just ignore it.
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion bin/propolis-standalone/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,8 @@ pub fn block_backend(
}
None => NonZeroUsize::new(DEFAULT_WORKER_COUNT).unwrap(),
};
block::FileBackend::create(&parsed.path, opts, workers).unwrap()
block::FileBackend::create(&parsed.path, opts, workers, log.clone())
.unwrap()
}
"crucible" => create_crucible_backend(be, opts, log),
"crucible-mem" => create_crucible_mem_backend(be, opts, log),
Expand Down
1 change: 1 addition & 0 deletions lib/propolis/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ async-trait.workspace = true
iddqd.workspace = true
nix.workspace = true
vm-attest.workspace = true
itertools.workspace = true

# falcon
libloading = { workspace = true, optional = true }
Expand Down
8 changes: 5 additions & 3 deletions lib/propolis/src/block/crucible.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,11 @@ impl WorkerState {
let _ = block.flush(None).await?;
}
}
block::Operation::Discard(..) => {
// Crucible does not support discard operations for now
return Err(Error::Unsupported);
block::Operation::Discard => {
// Crucible does not support discard operations for now, so we implement this as
// a no-op (which technically is a valid implementation of discard, just one that
// doesn't actually free any space).
return Ok(());
}
}
Ok(())
Expand Down
33 changes: 28 additions & 5 deletions lib/propolis/src/block/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use std::sync::{Arc, Mutex};
use crate::block::{self, SyncWorkerCtx, WorkerId};
use crate::tasks::ThreadGroup;
use crate::vmm::{MappingExt, MemCtx};
use slog::warn;

use anyhow::Context;

Expand All @@ -31,6 +32,7 @@ struct SharedState {

info: block::DeviceInfo,
skip_flush: bool,
log: slog::Logger,
}
struct WceState {
initial: bool,
Expand All @@ -43,13 +45,15 @@ impl SharedState {
skip_flush: bool,
wce_state: Option<WceState>,
discard_mech: Option<dkioc::DiscardMech>,
log: slog::Logger,
) -> Arc<Self> {
let state = SharedState {
fp,
wce_state: Mutex::new(wce_state),
discard_mech,
skip_flush,
info,
log,
};

// Attempt to enable write caching if underlying resource supports it
Expand Down Expand Up @@ -113,12 +117,29 @@ impl SharedState {
self.fp.sync_data().map_err(|_| "io error")?;
}
}
block::Operation::Discard(off, len) => {
block::Operation::Discard => {
if let Some(mech) = self.discard_mech {
dkioc::do_discard(&self.fp, mech, off as u64, len as u64)
.map_err(|_| {
"io error while attempting to free block(s)"
})?;
for &(off, len) in &req.ranges {
Comment thread
ahrens marked this conversation as resolved.
// There might be some performance benefits to combining the ranges into
// one DKIOCFREE call, but ZFS will only issue one range to the
// underlying disk at a time, so we expect the benefit to be minimal in
// practice.
if let Err(e) = dkioc::do_discard(
&self.fp, mech, off as u64, len as u64,
) {
if e.kind() == ErrorKind::Unsupported {
// If the discard mechanism is unsupported, we should not have
// advertised support for discard in the first place. However, if
// this happens, it likely means we're running on older ZFS bits that
// don't support DKIOCFREE on raw zvols. Since this is not a supported
// configuration, but developer machines might be in this state, we
// swallow errors from the ioctl rather than failing the command.
warn!(self.log, "discard at offset {off} length {len} is unsupported; check ZFS version");
} else {
return Err("io error while attempting to free block(s)");
}
}
}
} else {
unreachable!("handled above in processing_loop()");
}
Expand Down Expand Up @@ -159,6 +180,7 @@ impl FileBackend {
path: impl AsRef<Path>,
opts: block::BackendOpts,
worker_count: NonZeroUsize,
log: slog::Logger,
) -> Result<Arc<Self>> {
let p: &Path = path.as_ref();

Expand Down Expand Up @@ -202,6 +224,7 @@ impl FileBackend {
skip_flush,
wce_state,
disk_info.discard_mech,
log,
),
block_attach,
worker_count,
Expand Down
2 changes: 1 addition & 1 deletion lib/propolis/src/block/in_memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ impl SharedState {
block::Operation::Flush => {
// nothing to do
}
block::Operation::Discard(..) => {
block::Operation::Discard => {
unreachable!("handled in processing_loop()");
}
}
Expand Down
2 changes: 1 addition & 1 deletion lib/propolis/src/block/mem_async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ impl SharedState {
block::Operation::Flush => {
// nothing to do
}
block::Operation::Discard(..) => {
block::Operation::Discard => {
unreachable!("handled in processing_loop()")
}
}
Expand Down
6 changes: 3 additions & 3 deletions lib/propolis/src/block/minder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,9 +283,9 @@ impl QueueMinder {
Operation::Flush => {
probes::block_begin_flush!(|| { (devqid, id) });
}
Operation::Discard(off, len) => {
Operation::Discard => {
probes::block_begin_discard!(|| {
(devqid, id, off as u64, len as u64)
(devqid, id, req.ranges.len() as u64)
});
}
}
Expand Down Expand Up @@ -355,7 +355,7 @@ impl QueueMinder {
(devqid, id, rescode, ns_processed, ns_queued)
});
}
Operation::Discard(..) => {
Operation::Discard => {
probes::block_complete_discard!(|| {
(devqid, id, rescode, ns_processed, ns_queued)
});
Expand Down
26 changes: 15 additions & 11 deletions lib/propolis/src/block/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ mod probes {
fn block_begin_read(devq_id: u64, req_id: u64, offset: u64, len: u64) {}
fn block_begin_write(devq_id: u64, req_id: u64, offset: u64, len: u64) {}
fn block_begin_flush(devq_id: u64, req_id: u64) {}
fn block_begin_discard(devq_id: u64, req_id: u64, offset: u64, len: u64) {}
fn block_begin_discard(devq_id: u64, req_id: u64, nr: u64) {}

fn block_complete_read(
devq_id: u64,
Expand Down Expand Up @@ -106,8 +106,8 @@ pub enum Operation {
Write(ByteOffset, ByteLen),
/// Flush buffer(s)
Flush,
/// Discard/UNMAP/deallocate region
Discard(ByteOffset, ByteLen),
/// Discard/UNMAP/deallocate some ranges, which are specified in Request::ranges
Discard,
}
impl Operation {
pub const fn is_read(&self) -> bool {
Expand All @@ -120,7 +120,7 @@ impl Operation {
matches!(self, Operation::Flush)
}
pub const fn is_discard(&self) -> bool {
matches!(self, Operation::Discard(..))
matches!(self, Operation::Discard)
}
}

Expand Down Expand Up @@ -203,32 +203,36 @@ pub struct Request {
/// A list of regions of guest memory to read/write into as part of the I/O
/// request
pub regions: Vec<GuestRegion>,

/// A list of byte ranges to discard as part of the I/O request. This is only
/// relevant for discard operations, and is expected to be empty otherwise.
pub ranges: Vec<(ByteOffset, ByteLen)>,
}
impl Request {
pub fn new_read(
off: ByteOffset,
len: ByteLen,
regions: Vec<GuestRegion>,
) -> Self {
Self { op: Operation::Read(off, len), regions }
Self { op: Operation::Read(off, len), regions, ranges: Vec::new() }
Comment thread
ahrens marked this conversation as resolved.
}

pub fn new_write(
off: ByteOffset,
len: ByteLen,
regions: Vec<GuestRegion>,
) -> Self {
Self { op: Operation::Write(off, len), regions }
Self { op: Operation::Write(off, len), regions, ranges: Vec::new() }
}

pub fn new_flush() -> Self {
let op = Operation::Flush;
Self { op, regions: Vec::new() }
Self { op, regions: Vec::new(), ranges: Vec::new() }
}

pub fn new_discard(off: ByteOffset, len: ByteLen) -> Self {
let op = Operation::Discard(off, len);
Self { op, regions: Vec::new() }
pub fn new_discard(ranges: Vec<(ByteOffset, ByteLen)>) -> Self {
let op = Operation::Discard;
Self { op, regions: Vec::new(), ranges }
}

pub fn mappings<'a>(&self, mem: &'a MemCtx) -> Option<Vec<SubMapping<'a>>> {
Expand All @@ -239,7 +243,7 @@ impl Request {
Operation::Write(..) => {
self.regions.iter().map(|r| mem.readable_region(r)).collect()
}
Operation::Flush | Operation::Discard(..) => None,
Operation::Flush | Operation::Discard => None,
}
}
}
Expand Down
48 changes: 48 additions & 0 deletions lib/propolis/src/hw/nvme/bits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#![allow(dead_code)]

use crate::block::{ByteLen, ByteOffset};
use bitstruct::bitstruct;
use zerocopy::{FromBytes, IntoBytes};

Expand Down Expand Up @@ -176,6 +177,50 @@ impl CompletionQueueEntry {
}
}

/// A Dataset Management Range Definition as represented in memory.
///
/// See NVMe 1.0e Section 6.6 Figure 114: Dataset Management – Range Definition
#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, FromBytes, IntoBytes)]
#[repr(C, packed(1))]
pub struct DatasetManagementRangeDefinition {
/// The context attributes specified for each range provides information about how the range
/// is intended to be used by host software. The use of this information is optional and the
/// controller is not required to perform any specific action.
pub context_attributes: u32,

pub number_logical_blocks: u32,

pub starting_lba: u64,
}
impl DatasetManagementRangeDefinition {
pub fn new(
context_attributes: u32,
number_logical_blocks: u32,
starting_lba: u64,
) -> Self {
Self { context_attributes, number_logical_blocks, starting_lba }
}

pub fn offset_len(
&self,
lba_data_size: u64,
) -> Result<(ByteOffset, ByteLen), &'static str> {
// Check for overflow in the byte offset calculation
let byte_offset = self.starting_lba.checked_mul(lba_data_size).ok_or(
"Starting LBA and LBA data size multiplication overflowed",
)?;
// Check for overflow in the byte length calculation
let byte_len = (u64::from(self.number_logical_blocks))
.checked_mul(lba_data_size)
.ok_or("Number of logical blocks and LBA data size multiplication overflowed")?;
// Check for overflow of offset + length
byte_offset
.checked_add(byte_len)
.ok_or("Byte offset and byte length addition overflowed")?;
Ok((byte_offset as ByteOffset, byte_len as ByteLen))
}
}

// Register bits

bitstruct! {
Expand Down Expand Up @@ -539,6 +584,8 @@ pub const NVM_OPC_FLUSH: u8 = 0x00;
pub const NVM_OPC_WRITE: u8 = 0x01;
/// Read Command Opcode
pub const NVM_OPC_READ: u8 = 0x02;
/// Dataset Mangement Command Opcode
pub const NVM_OPC_DATASET_MANAGEMENT: u8 = 0x09;

// Generic Command Status values
// See NVMe 1.0e Section 4.5.1.2.1, Figure 17 Status Code - Generic Command Status Values
Expand Down Expand Up @@ -1195,5 +1242,6 @@ mod test {
assert_eq!(size_of::<IdentifyController>(), 4096);
assert_eq!(size_of::<LbaFormat>(), 4);
assert_eq!(size_of::<IdentifyNamespace>(), 4096);
assert_eq!(size_of::<DatasetManagementRangeDefinition>(), 16);
}
}
Loading
Loading