diff --git a/openhcl/underhill_core/src/dispatch/mod.rs b/openhcl/underhill_core/src/dispatch/mod.rs index 7aeeb6409f..b9ea423062 100644 --- a/openhcl/underhill_core/src/dispatch/mod.rs +++ b/openhcl/underhill_core/src/dispatch/mod.rs @@ -754,7 +754,11 @@ impl LoadedVm { // to enter a state where subsequent teardown operations will noop. There is a STRONG // correlation between save/restore and keepalive. n.save(vf_keepalive_flag) - .instrument(tracing::info_span!("nvme_manager_save", CVM_ALLOWED)) + .instrument(tracing::info_span!( + "nvme_manager_save", + vf_keepalive_flag, + CVM_ALLOWED + )) .await .map(|s| NvmeSavedState { nvme_state: s }) } else { diff --git a/petri/src/vm/hyperv/mod.rs b/petri/src/vm/hyperv/mod.rs index 2938538be9..c103ca6470 100644 --- a/petri/src/vm/hyperv/mod.rs +++ b/petri/src/vm/hyperv/mod.rs @@ -151,6 +151,14 @@ impl PetriVmmBackend for HyperVPetriBackend { (firmware.quirks().hyperv, VmmQuirks::default()) } + fn default_servicing_flags() -> OpenHclServicingFlags { + OpenHclServicingFlags { + enable_nvme_keepalive: false, // TODO: Support NVMe KA in the Hyper-V Petri Backend + override_version_checks: false, + stop_timeout_hint_secs: None, + } + } + fn new(_resolver: &ArtifactResolver<'_>) -> Self { HyperVPetriBackend {} } diff --git a/petri/src/vm/mod.rs b/petri/src/vm/mod.rs index 5ecfbcd9b6..360d449cae 100644 --- a/petri/src/vm/mod.rs +++ b/petri/src/vm/mod.rs @@ -154,6 +154,9 @@ pub trait PetriVmmBackend { /// Select backend specific quirks guest and vmm quirks. fn quirks(firmware: &Firmware) -> (GuestQuirksInner, VmmQuirks); + /// Get the default servicing flags (based on what this backend supports) + fn default_servicing_flags() -> OpenHclServicingFlags; + /// Resolve any artifacts needed to use this backend fn new(resolver: &ArtifactResolver<'_>) -> Self; @@ -698,6 +701,11 @@ impl PetriVmBuilder { self.config.arch } + /// Get the default OpenHCL servicing flags for this config + pub fn default_servicing_flags(&self) -> OpenHclServicingFlags { + T::default_servicing_flags() + } + /// Get the backend-specific config builder pub fn modify_backend( mut self, @@ -1729,9 +1737,10 @@ pub enum IsolationType { } /// Flags controlling servicing behavior. -#[derive(Default, Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy)] pub struct OpenHclServicingFlags { /// Preserve DMA memory for NVMe devices if supported. + /// Defaults to `true`. pub enable_nvme_keepalive: bool, /// Skip any logic that the vmm may have to ignore servicing updates if the supplied igvm file version is not different than the one currently running. pub override_version_checks: bool, diff --git a/petri/src/vm/openvmm/mod.rs b/petri/src/vm/openvmm/mod.rs index 45993de944..afa9d10702 100644 --- a/petri/src/vm/openvmm/mod.rs +++ b/petri/src/vm/openvmm/mod.rs @@ -19,6 +19,7 @@ pub use runtime::PetriVmOpenVmm; use crate::BootDeviceType; use crate::Firmware; +use crate::OpenHclServicingFlags; use crate::PetriDiskType; use crate::PetriLogFile; use crate::PetriVmConfig; @@ -112,6 +113,14 @@ impl PetriVmmBackend for OpenVmmPetriBackend { ) } + fn default_servicing_flags() -> OpenHclServicingFlags { + OpenHclServicingFlags { + enable_nvme_keepalive: true, + override_version_checks: false, + stop_timeout_hint_secs: None, + } + } + fn new(resolver: &ArtifactResolver<'_>) -> Self { OpenVmmPetriBackend { openvmm_path: resolver diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs index 50e5813cbd..a08c6c4a5c 100644 --- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs +++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs @@ -101,17 +101,9 @@ async fn basic_servicing( config: PetriVmBuilder, (igvm_file,): (ResolvedArtifact,), ) -> anyhow::Result<()> { - openhcl_servicing_core( - config, - "", - igvm_file, - OpenHclServicingFlags { - override_version_checks: true, - ..Default::default() - }, - DEFAULT_SERVICING_COUNT, - ) - .await + let mut flags = config.default_servicing_flags(); + flags.override_version_checks = true; + openhcl_servicing_core(config, "", igvm_file, flags, DEFAULT_SERVICING_COUNT).await } /// Test servicing an OpenHCL VM from the current version to itself @@ -121,14 +113,12 @@ async fn servicing_keepalive_no_device( config: PetriVmBuilder, (igvm_file,): (ResolvedArtifact,), ) -> anyhow::Result<()> { + let flags = config.default_servicing_flags(); openhcl_servicing_core( config, "OPENHCL_ENABLE_VTL2_GPA_POOL=512", igvm_file, - OpenHclServicingFlags { - enable_nvme_keepalive: true, - ..Default::default() - }, + flags, DEFAULT_SERVICING_COUNT, ) .await @@ -141,14 +131,12 @@ async fn servicing_keepalive_with_device( config: PetriVmBuilder, (igvm_file,): (ResolvedArtifact,), ) -> anyhow::Result<()> { + let flags = config.default_servicing_flags(); openhcl_servicing_core( config.with_vmbus_redirect(true), // Need this to attach the NVMe device "OPENHCL_ENABLE_VTL2_GPA_POOL=512", igvm_file, - OpenHclServicingFlags { - enable_nvme_keepalive: true, - ..Default::default() - }, + flags, 1, // Test is slow with NVMe device, so only do one loop to avoid timeout ) .await @@ -165,6 +153,8 @@ async fn servicing_upgrade( ResolvedArtifact, ), ) -> anyhow::Result<()> { + let flags = config.default_servicing_flags(); + // TODO: remove .with_guest_state_lifetime(PetriGuestStateLifetime::Disk). The default (ephemeral) does not exist in the 2505 release. openhcl_servicing_core( config @@ -172,7 +162,7 @@ async fn servicing_upgrade( .with_guest_state_lifetime(PetriGuestStateLifetime::Disk), "", to_igvm, - OpenHclServicingFlags::default(), + flags, DEFAULT_SERVICING_COUNT, ) .await @@ -190,13 +180,15 @@ async fn servicing_downgrade( ), ) -> anyhow::Result<()> { // TODO: remove .with_guest_state_lifetime(PetriGuestStateLifetime::Disk). The default (ephemeral) does not exist in the 2505 release. + let mut flags = config.default_servicing_flags(); + flags.enable_nvme_keepalive = false; // NVMe keepalive not supported in 2505 release openhcl_servicing_core( config .with_custom_openhcl(from_igvm) .with_guest_state_lifetime(PetriGuestStateLifetime::Disk), "", to_igvm, - OpenHclServicingFlags::default(), + flags, DEFAULT_SERVICING_COUNT, ) .await @@ -207,6 +199,7 @@ async fn servicing_shutdown_ic( config: PetriVmBuilder, (igvm_file,): (ResolvedArtifact,), ) -> anyhow::Result<()> { + let flags = config.default_servicing_flags(); let (mut vm, agent) = config .with_vmbus_redirect(true) .modify_backend(move |b| { @@ -251,8 +244,7 @@ async fn servicing_shutdown_ic( cmd!(sh, "ls /dev/sda").run().await?; let shutdown_ic = vm.backend().wait_for_enlightened_shutdown_ready().await?; - vm.restart_openhcl(igvm_file, OpenHclServicingFlags::default()) - .await?; + vm.restart_openhcl(igvm_file, flags).await?; // VTL2 will disconnect and then reconnect the shutdown IC across a servicing event. tracing::info!("waiting for shutdown IC to close"); shutdown_ic.await.unwrap_err(); @@ -276,6 +268,7 @@ async fn servicing_keepalive_with_namespace_update( config: PetriVmBuilder, (igvm_file,): (ResolvedArtifact,), ) -> Result<(), anyhow::Error> { + let flags = config.default_servicing_flags(); let mut fault_start_updater = CellUpdater::new(false); let (ns_change_send, ns_change_recv) = mesh::channel::(); let (aer_verify_send, aer_verify_recv) = mesh::oneshot::<()>(); @@ -308,14 +301,7 @@ async fn servicing_keepalive_with_namespace_update( cmd!(sh, "ls /dev/sda").run().await?; fault_start_updater.set(true).await; - vm.save_openhcl( - igvm_file.clone(), - OpenHclServicingFlags { - enable_nvme_keepalive: true, - ..Default::default() - }, - ) - .await?; + vm.save_openhcl(igvm_file.clone(), flags).await?; ns_change_send .call(NamespaceChange::ChangeNotification, KEEPALIVE_VTL2_NSID) .await?; @@ -439,6 +425,8 @@ async fn apply_fault_with_keepalive( mut fault_start_updater: CellUpdater, igvm_file: ResolvedArtifact, ) -> Result<(), anyhow::Error> { + let mut flags = config.default_servicing_flags(); + flags.enable_nvme_keepalive = true; let (mut vm, agent) = create_keepalive_test_config(config, fault_configuration).await?; agent.ping().await?; @@ -448,14 +436,7 @@ async fn apply_fault_with_keepalive( cmd!(sh, "ls /dev/sda").run().await?; fault_start_updater.set(true).await; - vm.restart_openhcl( - igvm_file.clone(), - OpenHclServicingFlags { - enable_nvme_keepalive: true, - ..Default::default() - }, - ) - .await?; + vm.restart_openhcl(igvm_file.clone(), flags).await?; fault_start_updater.set(false).await; agent.ping().await?; diff --git a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs index afc8f328f9..07b2f411d2 100644 --- a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs +++ b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs @@ -7,8 +7,8 @@ use crate::x86_64::storage::new_test_vtl2_nvme_device; use guid::Guid; use hvlite_defs::config::Vtl2BaseAddressType; use petri::MemoryConfig; -use petri::OpenHclServicingFlags; use petri::PetriVmBuilder; +use petri::ProcessorTopology; use petri::ResolvedArtifact; use petri::openvmm::OpenVmmPetriBackend; use petri::pipette::PipetteClient; @@ -84,6 +84,7 @@ async fn mana_nic_servicing( config: PetriVmBuilder, (igvm_file,): (ResolvedArtifact,), ) -> Result<(), anyhow::Error> { + let flags = config.default_servicing_flags(); let (mut vm, agent) = config .with_vmbus_redirect(true) .modify_backend(|b| b.with_nic()) @@ -92,8 +93,7 @@ async fn mana_nic_servicing( validate_mana_nic(&agent).await?; - vm.restart_openhcl(igvm_file, OpenHclServicingFlags::default()) - .await?; + vm.restart_openhcl(igvm_file, flags).await?; validate_mana_nic(&agent).await?; @@ -105,7 +105,7 @@ async fn mana_nic_servicing( /// Test an OpenHCL Linux direct VM with many NVMe devices assigned to VTL2 and vmbus relay. #[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])] -async fn many_nvme_devices_servicing( +async fn many_nvme_devices_servicing_heavy( config: PetriVmBuilder, (igvm_file,): (ResolvedArtifact,), ) -> Result<(), anyhow::Error> { @@ -119,8 +119,22 @@ async fn many_nvme_devices_servicing( const GUID_UPDATE_PREFIX: u16 = 0x1110; const NSID_OFFSET: u32 = 0x10; + let flags = config.default_servicing_flags(); + let (mut vm, agent) = config .with_vmbus_redirect(true) + .with_vtl2_base_address_type(Vtl2BaseAddressType::MemoryLayout { + size: Some((960 + 64) * 1024 * 1024), // 960MB as specified in manifest, plus 64MB extra for private pool. + }) + .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=16384") // 64MB of private pool for VTL2 NVMe devices. + .with_memory(MemoryConfig { + startup_bytes: 8 * 1024 * 1024 * 1024, // 8GB + ..Default::default() + }) + .with_processor_topology(ProcessorTopology { + vp_count: 4, + ..Default::default() + }) .modify_backend(|b| { b.with_custom_config(|c| { let device_ids = (0..NUM_NVME_DEVICES) @@ -177,14 +191,7 @@ async fn many_nvme_devices_servicing( // Test that inspect serialization works with the old version. vm.test_inspect_openhcl().await?; - vm.restart_openhcl( - igvm_file.clone(), - OpenHclServicingFlags { - enable_nvme_keepalive: false, - ..Default::default() - }, - ) - .await?; + vm.restart_openhcl(igvm_file.clone(), flags).await?; agent.ping().await?;