From 3e38e0c7ed29e3a14e49d9f8dc9cc0bfdb5f13b6 Mon Sep 17 00:00:00 2001 From: Matt LaFayette Date: Tue, 11 Nov 2025 21:10:56 +0000 Subject: [PATCH 1/8] nvme_driver: format nvme errors to be more verbose --- .../disk_nvme/nvme_driver/src/queue_pair.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs index 70ba59d033..75d9dfb9af 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs @@ -480,14 +480,20 @@ impl std::error::Error for NvmeError {} impl std::fmt::Display for NvmeError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.0.status_code_type() { - spec::StatusCodeType::GENERIC => write!(f, "general error {:#x?}", self.0), + spec::StatusCodeType::GENERIC => write!(f, "NVMe SCT general error, SC: {:#x?}", self.0), spec::StatusCodeType::COMMAND_SPECIFIC => { - write!(f, "command-specific error {:#x?}", self.0) + write!(f, "NVMe SCT command-specific error, SC: {:#x?}", self.0) } spec::StatusCodeType::MEDIA_ERROR => { - write!(f, "media error {:#x?}", self.0) + write!(f, "NVMe SCT media error, SC: {:#x?}", self.0) } - _ => write!(f, "{:#x?}", self.0), + spec::StatusCodeType::PATH_RELATED => { + write!(f, "NVMe SCT path-related error, SC: {:#x?}", self.0) + } + spec::StatusCodeType::VENDOR_SPECIFIC => { + write!(f, "NVMe SCT vendor-specific error, SC: {:#x?}", self.0) + } + _ => write!(f, "(other NVMe SCT), {:#x?}", self.0), } } } From 8febd937a7712b2353039cfee1961ba95bec816e Mon Sep 17 00:00:00 2001 From: Matt LaFayette Date: Tue, 11 Nov 2025 23:54:00 +0000 Subject: [PATCH 2/8] openvmm_entry: cmdline to enable nvme keepalive when running servicing --- openvmm/openvmm_entry/src/lib.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/openvmm/openvmm_entry/src/lib.rs b/openvmm/openvmm_entry/src/lib.rs index e86411387d..2ede58f382 100644 --- a/openvmm/openvmm_entry/src/lib.rs +++ b/openvmm/openvmm_entry/src/lib.rs @@ -1990,6 +1990,10 @@ enum InteractiveCommand { /// configured path. #[clap(long, conflicts_with("user_mode_only"))] igvm: Option, + /// Enable keepalive when servicing VTL2 devices. + /// Default is `true`. + #[clap(long, short = 'n', default_missing_value = "true")] + nvme_keepalive: bool, }, /// Read guest memory @@ -2843,6 +2847,7 @@ async fn run_control(driver: &DefaultDriver, mesh: &VmmMesh, opt: Options) -> an InteractiveCommand::ServiceVtl2 { user_mode_only, igvm, + nvme_keepalive, } => { let paravisor_diag = paravisor_diag.clone(); let vm_rpc = vm_rpc.clone(); @@ -2860,7 +2865,7 @@ async fn run_control(driver: &DefaultDriver, mesh: &VmmMesh, opt: Options) -> an hvlite_helpers::underhill::save_underhill( &vm_rpc, ged_rpc.as_ref().context("no GED")?, - GuestServicingFlags::default(), + GuestServicingFlags { nvme_keepalive }, file.into(), ) .await?; From dbb1cf2b78683fe67098b3dfd5f9c5b6fc956529 Mon Sep 17 00:00:00 2001 From: Matt LaFayette Date: Wed, 12 Nov 2025 00:06:45 +0000 Subject: [PATCH 3/8] wip: turn on keepalive for the many devices servicing test --- openhcl/underhill_core/src/dispatch/mod.rs | 6 +++++- .../tests/tests/x86_64/openhcl_linux_direct.rs | 17 +++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/openhcl/underhill_core/src/dispatch/mod.rs b/openhcl/underhill_core/src/dispatch/mod.rs index 7aeeb6409f..b9ea423062 100644 --- a/openhcl/underhill_core/src/dispatch/mod.rs +++ b/openhcl/underhill_core/src/dispatch/mod.rs @@ -754,7 +754,11 @@ impl LoadedVm { // to enter a state where subsequent teardown operations will noop. There is a STRONG // correlation between save/restore and keepalive. n.save(vf_keepalive_flag) - .instrument(tracing::info_span!("nvme_manager_save", CVM_ALLOWED)) + .instrument(tracing::info_span!( + "nvme_manager_save", + vf_keepalive_flag, + CVM_ALLOWED + )) .await .map(|s| NvmeSavedState { nvme_state: s }) } else { diff --git a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs index afc8f328f9..0f349f6ac0 100644 --- a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs +++ b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs @@ -9,6 +9,7 @@ use hvlite_defs::config::Vtl2BaseAddressType; use petri::MemoryConfig; use petri::OpenHclServicingFlags; use petri::PetriVmBuilder; +use petri::ProcessorTopology; use petri::ResolvedArtifact; use petri::openvmm::OpenVmmPetriBackend; use petri::pipette::PipetteClient; @@ -105,7 +106,7 @@ async fn mana_nic_servicing( /// Test an OpenHCL Linux direct VM with many NVMe devices assigned to VTL2 and vmbus relay. #[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])] -async fn many_nvme_devices_servicing( +async fn many_nvme_devices_servicing_heavy( config: PetriVmBuilder, (igvm_file,): (ResolvedArtifact,), ) -> Result<(), anyhow::Error> { @@ -121,6 +122,18 @@ async fn many_nvme_devices_servicing( let (mut vm, agent) = config .with_vmbus_redirect(true) + .with_vtl2_base_address_type(Vtl2BaseAddressType::MemoryLayout { + size: Some(768 * 1024 * 1024), // 768MB + }) + .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=16384") // 64MB of private pool for VTL2 NVMe devices. + .with_memory(MemoryConfig { + startup_bytes: 8 * 1024 * 1024, + ..Default::default() + }) // 8GB + .with_processor_topology(ProcessorTopology { + vp_count: 4, + ..Default::default() + }) .modify_backend(|b| { b.with_custom_config(|c| { let device_ids = (0..NUM_NVME_DEVICES) @@ -180,7 +193,7 @@ async fn many_nvme_devices_servicing( vm.restart_openhcl( igvm_file.clone(), OpenHclServicingFlags { - enable_nvme_keepalive: false, + enable_nvme_keepalive: true, ..Default::default() }, ) From 3f312e1c7f9dabaf19c51c3d0423f09c97a8212e Mon Sep 17 00:00:00 2001 From: Matt LaFayette Date: Wed, 12 Nov 2025 17:57:17 +0000 Subject: [PATCH 4/8] minor self / copilot pr feedback --- .../disk_nvme/nvme_driver/src/queue_pair.rs | 34 +++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs index 75d9dfb9af..ac5a11cdf1 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs @@ -480,20 +480,42 @@ impl std::error::Error for NvmeError {} impl std::fmt::Display for NvmeError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.0.status_code_type() { - spec::StatusCodeType::GENERIC => write!(f, "NVMe SCT general error, SC: {:#x?}", self.0), + spec::StatusCodeType::GENERIC => write!( + f, + "NVMe SCT general error, SC: {:#x?}", + self.0.status_code() + ), spec::StatusCodeType::COMMAND_SPECIFIC => { - write!(f, "NVMe SCT command-specific error, SC: {:#x?}", self.0) + write!( + f, + "NVMe SCT command-specific error, SC: {:#x?}", + self.0.status_code() + ) } spec::StatusCodeType::MEDIA_ERROR => { - write!(f, "NVMe SCT media error, SC: {:#x?}", self.0) + write!(f, "NVMe SCT media error, SC: {:#x?}", self.0.status_code()) } spec::StatusCodeType::PATH_RELATED => { - write!(f, "NVMe SCT path-related error, SC: {:#x?}", self.0) + write!( + f, + "NVMe SCT path-related error, SC: {:#x?}", + self.0.status_code() + ) } spec::StatusCodeType::VENDOR_SPECIFIC => { - write!(f, "NVMe SCT vendor-specific error, SC: {:#x?}", self.0) + write!( + f, + "NVMe SCT vendor-specific error, SC: {:#x?}", + self.0.status_code() + ) } - _ => write!(f, "(other NVMe SCT), {:#x?}", self.0), + _ => write!( + f, + "NVMe SCT unknown ({:#x?}), SC: {:#x?} (raw: {:#x?})", + self.0.status_code_type(), + self.0.status_code(), + self.0 + ), } } } From 52d8d3b06e7da5532b989561b5a027779feff94a Mon Sep 17 00:00:00 2001 From: Matt LaFayette Date: Wed, 12 Nov 2025 18:24:39 +0000 Subject: [PATCH 5/8] turn on keepalive by default --- petri/src/vm/mod.rs | 13 +++++++++++- .../tests/multiarch/openhcl_servicing.rs | 20 ++++--------------- .../tests/x86_64/openhcl_linux_direct.rs | 16 +++++---------- 3 files changed, 21 insertions(+), 28 deletions(-) diff --git a/petri/src/vm/mod.rs b/petri/src/vm/mod.rs index 5ecfbcd9b6..caaff8160a 100644 --- a/petri/src/vm/mod.rs +++ b/petri/src/vm/mod.rs @@ -1729,9 +1729,10 @@ pub enum IsolationType { } /// Flags controlling servicing behavior. -#[derive(Default, Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy)] pub struct OpenHclServicingFlags { /// Preserve DMA memory for NVMe devices if supported. + /// Defaults to `true`. pub enable_nvme_keepalive: bool, /// Skip any logic that the vmm may have to ignore servicing updates if the supplied igvm file version is not different than the one currently running. pub override_version_checks: bool, @@ -1739,6 +1740,16 @@ pub struct OpenHclServicingFlags { pub stop_timeout_hint_secs: Option, } +impl Default for OpenHclServicingFlags { + fn default() -> Self { + Self { + enable_nvme_keepalive: true, + override_version_checks: false, + stop_timeout_hint_secs: None, + } + } +} + /// Petri disk type #[derive(Debug, Clone)] pub enum PetriDiskType { diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs index 50e5813cbd..eecb364b3b 100644 --- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs +++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs @@ -125,10 +125,7 @@ async fn servicing_keepalive_no_device( config, "OPENHCL_ENABLE_VTL2_GPA_POOL=512", igvm_file, - OpenHclServicingFlags { - enable_nvme_keepalive: true, - ..Default::default() - }, + OpenHclServicingFlags::default(), DEFAULT_SERVICING_COUNT, ) .await @@ -145,10 +142,7 @@ async fn servicing_keepalive_with_device( config.with_vmbus_redirect(true), // Need this to attach the NVMe device "OPENHCL_ENABLE_VTL2_GPA_POOL=512", igvm_file, - OpenHclServicingFlags { - enable_nvme_keepalive: true, - ..Default::default() - }, + OpenHclServicingFlags::default(), 1, // Test is slow with NVMe device, so only do one loop to avoid timeout ) .await @@ -308,14 +302,8 @@ async fn servicing_keepalive_with_namespace_update( cmd!(sh, "ls /dev/sda").run().await?; fault_start_updater.set(true).await; - vm.save_openhcl( - igvm_file.clone(), - OpenHclServicingFlags { - enable_nvme_keepalive: true, - ..Default::default() - }, - ) - .await?; + vm.save_openhcl(igvm_file.clone(), OpenHclServicingFlags::default()) + .await?; ns_change_send .call(NamespaceChange::ChangeNotification, KEEPALIVE_VTL2_NSID) .await?; diff --git a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs index 0f349f6ac0..53638d2f26 100644 --- a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs +++ b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs @@ -123,13 +123,13 @@ async fn many_nvme_devices_servicing_heavy( let (mut vm, agent) = config .with_vmbus_redirect(true) .with_vtl2_base_address_type(Vtl2BaseAddressType::MemoryLayout { - size: Some(768 * 1024 * 1024), // 768MB + size: Some((960 + 64) * 1024 * 1024), // 960MB as specified in manfiest, plus 64MB extra for private pool. }) .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=16384") // 64MB of private pool for VTL2 NVMe devices. .with_memory(MemoryConfig { - startup_bytes: 8 * 1024 * 1024, + startup_bytes: 8 * 1024 * 1024 * 1024, // 8GB ..Default::default() - }) // 8GB + }) .with_processor_topology(ProcessorTopology { vp_count: 4, ..Default::default() @@ -190,14 +190,8 @@ async fn many_nvme_devices_servicing_heavy( // Test that inspect serialization works with the old version. vm.test_inspect_openhcl().await?; - vm.restart_openhcl( - igvm_file.clone(), - OpenHclServicingFlags { - enable_nvme_keepalive: true, - ..Default::default() - }, - ) - .await?; + vm.restart_openhcl(igvm_file.clone(), OpenHclServicingFlags::default()) + .await?; agent.ping().await?; From 0d863a2bc2f734393d356e93a815881d9f294521 Mon Sep 17 00:00:00 2001 From: Matt LaFayette Date: Wed, 12 Nov 2025 13:40:36 -0800 Subject: [PATCH 6/8] move deciding on default flags to the backend, since its the backend that decides what is default and supported (or not) --- petri/src/vm/hyperv/mod.rs | 8 ++++ petri/src/vm/mod.rs | 16 +++++-- petri/src/vm/openvmm/mod.rs | 9 ++++ .../tests/multiarch/openhcl_servicing.rs | 48 ++++++++----------- .../tests/x86_64/openhcl_linux_direct.rs | 12 ++--- 5 files changed, 57 insertions(+), 36 deletions(-) diff --git a/petri/src/vm/hyperv/mod.rs b/petri/src/vm/hyperv/mod.rs index 2938538be9..c103ca6470 100644 --- a/petri/src/vm/hyperv/mod.rs +++ b/petri/src/vm/hyperv/mod.rs @@ -151,6 +151,14 @@ impl PetriVmmBackend for HyperVPetriBackend { (firmware.quirks().hyperv, VmmQuirks::default()) } + fn default_servicing_flags() -> OpenHclServicingFlags { + OpenHclServicingFlags { + enable_nvme_keepalive: false, // TODO: Support NVMe KA in the Hyper-V Petri Backend + override_version_checks: false, + stop_timeout_hint_secs: None, + } + } + fn new(_resolver: &ArtifactResolver<'_>) -> Self { HyperVPetriBackend {} } diff --git a/petri/src/vm/mod.rs b/petri/src/vm/mod.rs index caaff8160a..f1aaaec3fb 100644 --- a/petri/src/vm/mod.rs +++ b/petri/src/vm/mod.rs @@ -154,6 +154,9 @@ pub trait PetriVmmBackend { /// Select backend specific quirks guest and vmm quirks. fn quirks(firmware: &Firmware) -> (GuestQuirksInner, VmmQuirks); + /// Get the default servicing flags (based on what this backend supports) + fn default_servicing_flags() -> OpenHclServicingFlags; + /// Resolve any artifacts needed to use this backend fn new(resolver: &ArtifactResolver<'_>) -> Self; @@ -698,6 +701,11 @@ impl PetriVmBuilder { self.config.arch } + /// Get the default OpenHCL servicing flags for this config + pub fn default_servicing_flags(&self) -> OpenHclServicingFlags { + T::default_servicing_flags() + } + /// Get the backend-specific config builder pub fn modify_backend( mut self, @@ -1740,10 +1748,12 @@ pub struct OpenHclServicingFlags { pub stop_timeout_hint_secs: Option, } -impl Default for OpenHclServicingFlags { - fn default() -> Self { +impl OpenHclServicingFlags { + /// Architecture-specific default flags for servicing OpenHCL. + /// e.g. aarch64 does not support NVMe Keepalive at the moment. + pub fn default(arch: MachineArch) -> Self { Self { - enable_nvme_keepalive: true, + enable_nvme_keepalive: arch == MachineArch::X86_64, override_version_checks: false, stop_timeout_hint_secs: None, } diff --git a/petri/src/vm/openvmm/mod.rs b/petri/src/vm/openvmm/mod.rs index 45993de944..afa9d10702 100644 --- a/petri/src/vm/openvmm/mod.rs +++ b/petri/src/vm/openvmm/mod.rs @@ -19,6 +19,7 @@ pub use runtime::PetriVmOpenVmm; use crate::BootDeviceType; use crate::Firmware; +use crate::OpenHclServicingFlags; use crate::PetriDiskType; use crate::PetriLogFile; use crate::PetriVmConfig; @@ -112,6 +113,14 @@ impl PetriVmmBackend for OpenVmmPetriBackend { ) } + fn default_servicing_flags() -> OpenHclServicingFlags { + OpenHclServicingFlags { + enable_nvme_keepalive: true, + override_version_checks: false, + stop_timeout_hint_secs: None, + } + } + fn new(resolver: &ArtifactResolver<'_>) -> Self { OpenVmmPetriBackend { openvmm_path: resolver diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs index eecb364b3b..5b377f3ce1 100644 --- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs +++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs @@ -101,17 +101,9 @@ async fn basic_servicing( config: PetriVmBuilder, (igvm_file,): (ResolvedArtifact,), ) -> anyhow::Result<()> { - openhcl_servicing_core( - config, - "", - igvm_file, - OpenHclServicingFlags { - override_version_checks: true, - ..Default::default() - }, - DEFAULT_SERVICING_COUNT, - ) - .await + let mut flags = config.default_servicing_flags(); + flags.override_version_checks = true; + openhcl_servicing_core(config, "", igvm_file, flags, DEFAULT_SERVICING_COUNT).await } /// Test servicing an OpenHCL VM from the current version to itself @@ -121,11 +113,13 @@ async fn servicing_keepalive_no_device( config: PetriVmBuilder, (igvm_file,): (ResolvedArtifact,), ) -> anyhow::Result<()> { + let mut flags = config.default_servicing_flags(); + flags.override_version_checks = true; openhcl_servicing_core( config, "OPENHCL_ENABLE_VTL2_GPA_POOL=512", igvm_file, - OpenHclServicingFlags::default(), + flags, DEFAULT_SERVICING_COUNT, ) .await @@ -138,11 +132,12 @@ async fn servicing_keepalive_with_device( config: PetriVmBuilder, (igvm_file,): (ResolvedArtifact,), ) -> anyhow::Result<()> { + let flags = config.default_servicing_flags(); openhcl_servicing_core( config.with_vmbus_redirect(true), // Need this to attach the NVMe device "OPENHCL_ENABLE_VTL2_GPA_POOL=512", igvm_file, - OpenHclServicingFlags::default(), + flags, 1, // Test is slow with NVMe device, so only do one loop to avoid timeout ) .await @@ -159,6 +154,8 @@ async fn servicing_upgrade( ResolvedArtifact, ), ) -> anyhow::Result<()> { + let flags = config.default_servicing_flags(); + // TODO: remove .with_guest_state_lifetime(PetriGuestStateLifetime::Disk). The default (ephemeral) does not exist in the 2505 release. openhcl_servicing_core( config @@ -166,7 +163,7 @@ async fn servicing_upgrade( .with_guest_state_lifetime(PetriGuestStateLifetime::Disk), "", to_igvm, - OpenHclServicingFlags::default(), + flags, DEFAULT_SERVICING_COUNT, ) .await @@ -184,13 +181,15 @@ async fn servicing_downgrade( ), ) -> anyhow::Result<()> { // TODO: remove .with_guest_state_lifetime(PetriGuestStateLifetime::Disk). The default (ephemeral) does not exist in the 2505 release. + let mut flags = config.default_servicing_flags(); + flags.enable_nvme_keepalive = false; // NVMe keepalive not supported in 2505 release openhcl_servicing_core( config .with_custom_openhcl(from_igvm) .with_guest_state_lifetime(PetriGuestStateLifetime::Disk), "", to_igvm, - OpenHclServicingFlags::default(), + flags, DEFAULT_SERVICING_COUNT, ) .await @@ -201,6 +200,7 @@ async fn servicing_shutdown_ic( config: PetriVmBuilder, (igvm_file,): (ResolvedArtifact,), ) -> anyhow::Result<()> { + let flags = config.default_servicing_flags(); let (mut vm, agent) = config .with_vmbus_redirect(true) .modify_backend(move |b| { @@ -245,8 +245,7 @@ async fn servicing_shutdown_ic( cmd!(sh, "ls /dev/sda").run().await?; let shutdown_ic = vm.backend().wait_for_enlightened_shutdown_ready().await?; - vm.restart_openhcl(igvm_file, OpenHclServicingFlags::default()) - .await?; + vm.restart_openhcl(igvm_file, flags).await?; // VTL2 will disconnect and then reconnect the shutdown IC across a servicing event. tracing::info!("waiting for shutdown IC to close"); shutdown_ic.await.unwrap_err(); @@ -270,6 +269,7 @@ async fn servicing_keepalive_with_namespace_update( config: PetriVmBuilder, (igvm_file,): (ResolvedArtifact,), ) -> Result<(), anyhow::Error> { + let flags = config.default_servicing_flags(); let mut fault_start_updater = CellUpdater::new(false); let (ns_change_send, ns_change_recv) = mesh::channel::(); let (aer_verify_send, aer_verify_recv) = mesh::oneshot::<()>(); @@ -302,8 +302,7 @@ async fn servicing_keepalive_with_namespace_update( cmd!(sh, "ls /dev/sda").run().await?; fault_start_updater.set(true).await; - vm.save_openhcl(igvm_file.clone(), OpenHclServicingFlags::default()) - .await?; + vm.save_openhcl(igvm_file.clone(), flags).await?; ns_change_send .call(NamespaceChange::ChangeNotification, KEEPALIVE_VTL2_NSID) .await?; @@ -427,6 +426,8 @@ async fn apply_fault_with_keepalive( mut fault_start_updater: CellUpdater, igvm_file: ResolvedArtifact, ) -> Result<(), anyhow::Error> { + let mut flags = config.default_servicing_flags(); + flags.enable_nvme_keepalive = true; let (mut vm, agent) = create_keepalive_test_config(config, fault_configuration).await?; agent.ping().await?; @@ -436,14 +437,7 @@ async fn apply_fault_with_keepalive( cmd!(sh, "ls /dev/sda").run().await?; fault_start_updater.set(true).await; - vm.restart_openhcl( - igvm_file.clone(), - OpenHclServicingFlags { - enable_nvme_keepalive: true, - ..Default::default() - }, - ) - .await?; + vm.restart_openhcl(igvm_file.clone(), flags).await?; fault_start_updater.set(false).await; agent.ping().await?; diff --git a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs index 53638d2f26..07b2f411d2 100644 --- a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs +++ b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs @@ -7,7 +7,6 @@ use crate::x86_64::storage::new_test_vtl2_nvme_device; use guid::Guid; use hvlite_defs::config::Vtl2BaseAddressType; use petri::MemoryConfig; -use petri::OpenHclServicingFlags; use petri::PetriVmBuilder; use petri::ProcessorTopology; use petri::ResolvedArtifact; @@ -85,6 +84,7 @@ async fn mana_nic_servicing( config: PetriVmBuilder, (igvm_file,): (ResolvedArtifact,), ) -> Result<(), anyhow::Error> { + let flags = config.default_servicing_flags(); let (mut vm, agent) = config .with_vmbus_redirect(true) .modify_backend(|b| b.with_nic()) @@ -93,8 +93,7 @@ async fn mana_nic_servicing( validate_mana_nic(&agent).await?; - vm.restart_openhcl(igvm_file, OpenHclServicingFlags::default()) - .await?; + vm.restart_openhcl(igvm_file, flags).await?; validate_mana_nic(&agent).await?; @@ -120,10 +119,12 @@ async fn many_nvme_devices_servicing_heavy( const GUID_UPDATE_PREFIX: u16 = 0x1110; const NSID_OFFSET: u32 = 0x10; + let flags = config.default_servicing_flags(); + let (mut vm, agent) = config .with_vmbus_redirect(true) .with_vtl2_base_address_type(Vtl2BaseAddressType::MemoryLayout { - size: Some((960 + 64) * 1024 * 1024), // 960MB as specified in manfiest, plus 64MB extra for private pool. + size: Some((960 + 64) * 1024 * 1024), // 960MB as specified in manifest, plus 64MB extra for private pool. }) .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=16384") // 64MB of private pool for VTL2 NVMe devices. .with_memory(MemoryConfig { @@ -190,8 +191,7 @@ async fn many_nvme_devices_servicing_heavy( // Test that inspect serialization works with the old version. vm.test_inspect_openhcl().await?; - vm.restart_openhcl(igvm_file.clone(), OpenHclServicingFlags::default()) - .await?; + vm.restart_openhcl(igvm_file.clone(), flags).await?; agent.ping().await?; From 849405d20a03ce26e97e45cea42be3d6ff777273 Mon Sep 17 00:00:00 2001 From: Matt LaFayette Date: Wed, 12 Nov 2025 13:47:28 -0800 Subject: [PATCH 7/8] oops --- petri/src/vm/mod.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/petri/src/vm/mod.rs b/petri/src/vm/mod.rs index f1aaaec3fb..360d449cae 100644 --- a/petri/src/vm/mod.rs +++ b/petri/src/vm/mod.rs @@ -1748,18 +1748,6 @@ pub struct OpenHclServicingFlags { pub stop_timeout_hint_secs: Option, } -impl OpenHclServicingFlags { - /// Architecture-specific default flags for servicing OpenHCL. - /// e.g. aarch64 does not support NVMe Keepalive at the moment. - pub fn default(arch: MachineArch) -> Self { - Self { - enable_nvme_keepalive: arch == MachineArch::X86_64, - override_version_checks: false, - stop_timeout_hint_secs: None, - } - } -} - /// Petri disk type #[derive(Debug, Clone)] pub enum PetriDiskType { From 4d367461d83eb506a721f18e9980bf23dc027207 Mon Sep 17 00:00:00 2001 From: Matt LaFayette Date: Wed, 12 Nov 2025 19:39:28 -0800 Subject: [PATCH 8/8] don't set override --- vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs index 5b377f3ce1..a08c6c4a5c 100644 --- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs +++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs @@ -113,8 +113,7 @@ async fn servicing_keepalive_no_device( config: PetriVmBuilder, (igvm_file,): (ResolvedArtifact,), ) -> anyhow::Result<()> { - let mut flags = config.default_servicing_flags(); - flags.override_version_checks = true; + let flags = config.default_servicing_flags(); openhcl_servicing_core( config, "OPENHCL_ENABLE_VTL2_GPA_POOL=512",