Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion openhcl/underhill_core/src/dispatch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,11 @@ impl LoadedVm {
// to enter a state where subsequent teardown operations will noop. There is a STRONG
// correlation between save/restore and keepalive.
n.save(vf_keepalive_flag)
.instrument(tracing::info_span!("nvme_manager_save", CVM_ALLOWED))
.instrument(tracing::info_span!(
"nvme_manager_save",
vf_keepalive_flag,
CVM_ALLOWED
))
.await
.map(|s| NvmeSavedState { nvme_state: s })
} else {
Expand Down
8 changes: 8 additions & 0 deletions petri/src/vm/hyperv/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,14 @@ impl PetriVmmBackend for HyperVPetriBackend {
(firmware.quirks().hyperv, VmmQuirks::default())
}

fn default_servicing_flags() -> OpenHclServicingFlags {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How much work is needed for this support?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know. I remember adding this check, but I don't quite know the delta. I will certainly be digging into this soon-ish.

OpenHclServicingFlags {
enable_nvme_keepalive: false, // TODO: Support NVMe KA in the Hyper-V Petri Backend
override_version_checks: false,
stop_timeout_hint_secs: None,
}
}

fn new(_resolver: &ArtifactResolver<'_>) -> Self {
HyperVPetriBackend {}
}
Expand Down
11 changes: 10 additions & 1 deletion petri/src/vm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ pub trait PetriVmmBackend {
/// Select backend specific quirks guest and vmm quirks.
fn quirks(firmware: &Firmware) -> (GuestQuirksInner, VmmQuirks);

/// Get the default servicing flags (based on what this backend supports)
fn default_servicing_flags() -> OpenHclServicingFlags;
Comment on lines +157 to +158
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tjones60 does this look like the right way to get some per-backend defaults?


/// Resolve any artifacts needed to use this backend
fn new(resolver: &ArtifactResolver<'_>) -> Self;

Expand Down Expand Up @@ -698,6 +701,11 @@ impl<T: PetriVmmBackend> PetriVmBuilder<T> {
self.config.arch
}

/// Get the default OpenHCL servicing flags for this config
pub fn default_servicing_flags(&self) -> OpenHclServicingFlags {
T::default_servicing_flags()
}

/// Get the backend-specific config builder
pub fn modify_backend(
mut self,
Expand Down Expand Up @@ -1729,9 +1737,10 @@ pub enum IsolationType {
}

/// Flags controlling servicing behavior.
#[derive(Default, Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy)]
pub struct OpenHclServicingFlags {
/// Preserve DMA memory for NVMe devices if supported.
/// Defaults to `true`.
pub enable_nvme_keepalive: bool,
/// Skip any logic that the vmm may have to ignore servicing updates if the supplied igvm file version is not different than the one currently running.
pub override_version_checks: bool,
Expand Down
9 changes: 9 additions & 0 deletions petri/src/vm/openvmm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pub use runtime::PetriVmOpenVmm;

use crate::BootDeviceType;
use crate::Firmware;
use crate::OpenHclServicingFlags;
use crate::PetriDiskType;
use crate::PetriLogFile;
use crate::PetriVmConfig;
Expand Down Expand Up @@ -112,6 +113,14 @@ impl PetriVmmBackend for OpenVmmPetriBackend {
)
}

fn default_servicing_flags() -> OpenHclServicingFlags {
OpenHclServicingFlags {
enable_nvme_keepalive: true,
override_version_checks: false,
stop_timeout_hint_secs: None,
}
Comment on lines +117 to +121
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}

fn new(resolver: &ArtifactResolver<'_>) -> Self {
OpenVmmPetriBackend {
openvmm_path: resolver
Expand Down
59 changes: 20 additions & 39 deletions vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,17 +101,9 @@ async fn basic_servicing<T: PetriVmmBackend>(
config: PetriVmBuilder<T>,
(igvm_file,): (ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,),
) -> anyhow::Result<()> {
openhcl_servicing_core(
config,
"",
igvm_file,
OpenHclServicingFlags {
override_version_checks: true,
..Default::default()
},
DEFAULT_SERVICING_COUNT,
)
.await
let mut flags = config.default_servicing_flags();
flags.override_version_checks = true;
openhcl_servicing_core(config, "", igvm_file, flags, DEFAULT_SERVICING_COUNT).await
}

/// Test servicing an OpenHCL VM from the current version to itself
Expand All @@ -121,14 +113,12 @@ async fn servicing_keepalive_no_device<T: PetriVmmBackend>(
config: PetriVmBuilder<T>,
(igvm_file,): (ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,),
) -> anyhow::Result<()> {
let flags = config.default_servicing_flags();
openhcl_servicing_core(
config,
"OPENHCL_ENABLE_VTL2_GPA_POOL=512",
igvm_file,
OpenHclServicingFlags {
enable_nvme_keepalive: true,
..Default::default()
},
flags,
DEFAULT_SERVICING_COUNT,
)
.await
Expand All @@ -141,14 +131,12 @@ async fn servicing_keepalive_with_device<T: PetriVmmBackend>(
config: PetriVmBuilder<T>,
(igvm_file,): (ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,),
) -> anyhow::Result<()> {
let flags = config.default_servicing_flags();
openhcl_servicing_core(
config.with_vmbus_redirect(true), // Need this to attach the NVMe device
"OPENHCL_ENABLE_VTL2_GPA_POOL=512",
igvm_file,
OpenHclServicingFlags {
enable_nvme_keepalive: true,
..Default::default()
},
flags,
1, // Test is slow with NVMe device, so only do one loop to avoid timeout
)
.await
Expand All @@ -165,14 +153,16 @@ async fn servicing_upgrade<T: PetriVmmBackend>(
ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,
),
) -> anyhow::Result<()> {
let flags = config.default_servicing_flags();

// TODO: remove .with_guest_state_lifetime(PetriGuestStateLifetime::Disk). The default (ephemeral) does not exist in the 2505 release.
openhcl_servicing_core(
config
.with_custom_openhcl(from_igvm)
.with_guest_state_lifetime(PetriGuestStateLifetime::Disk),
"",
to_igvm,
OpenHclServicingFlags::default(),
flags,
DEFAULT_SERVICING_COUNT,
)
.await
Expand All @@ -190,13 +180,15 @@ async fn servicing_downgrade<T: PetriVmmBackend>(
),
) -> anyhow::Result<()> {
// TODO: remove .with_guest_state_lifetime(PetriGuestStateLifetime::Disk). The default (ephemeral) does not exist in the 2505 release.
let mut flags = config.default_servicing_flags();
flags.enable_nvme_keepalive = false; // NVMe keepalive not supported in 2505 release
openhcl_servicing_core(
config
.with_custom_openhcl(from_igvm)
.with_guest_state_lifetime(PetriGuestStateLifetime::Disk),
"",
to_igvm,
OpenHclServicingFlags::default(),
flags,
DEFAULT_SERVICING_COUNT,
)
.await
Expand All @@ -207,6 +199,7 @@ async fn servicing_shutdown_ic(
config: PetriVmBuilder<OpenVmmPetriBackend>,
(igvm_file,): (ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,),
) -> anyhow::Result<()> {
let flags = config.default_servicing_flags();
let (mut vm, agent) = config
.with_vmbus_redirect(true)
.modify_backend(move |b| {
Expand Down Expand Up @@ -251,8 +244,7 @@ async fn servicing_shutdown_ic(
cmd!(sh, "ls /dev/sda").run().await?;

let shutdown_ic = vm.backend().wait_for_enlightened_shutdown_ready().await?;
vm.restart_openhcl(igvm_file, OpenHclServicingFlags::default())
.await?;
vm.restart_openhcl(igvm_file, flags).await?;
// VTL2 will disconnect and then reconnect the shutdown IC across a servicing event.
tracing::info!("waiting for shutdown IC to close");
shutdown_ic.await.unwrap_err();
Expand All @@ -276,6 +268,7 @@ async fn servicing_keepalive_with_namespace_update(
config: PetriVmBuilder<OpenVmmPetriBackend>,
(igvm_file,): (ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,),
) -> Result<(), anyhow::Error> {
let flags = config.default_servicing_flags();
let mut fault_start_updater = CellUpdater::new(false);
let (ns_change_send, ns_change_recv) = mesh::channel::<NamespaceChange>();
let (aer_verify_send, aer_verify_recv) = mesh::oneshot::<()>();
Expand Down Expand Up @@ -308,14 +301,7 @@ async fn servicing_keepalive_with_namespace_update(
cmd!(sh, "ls /dev/sda").run().await?;

fault_start_updater.set(true).await;
vm.save_openhcl(
igvm_file.clone(),
OpenHclServicingFlags {
enable_nvme_keepalive: true,
..Default::default()
},
)
.await?;
vm.save_openhcl(igvm_file.clone(), flags).await?;
ns_change_send
.call(NamespaceChange::ChangeNotification, KEEPALIVE_VTL2_NSID)
.await?;
Expand Down Expand Up @@ -439,6 +425,8 @@ async fn apply_fault_with_keepalive(
mut fault_start_updater: CellUpdater<bool>,
igvm_file: ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,
) -> Result<(), anyhow::Error> {
let mut flags = config.default_servicing_flags();
flags.enable_nvme_keepalive = true;
let (mut vm, agent) = create_keepalive_test_config(config, fault_configuration).await?;

agent.ping().await?;
Expand All @@ -448,14 +436,7 @@ async fn apply_fault_with_keepalive(
cmd!(sh, "ls /dev/sda").run().await?;

fault_start_updater.set(true).await;
vm.restart_openhcl(
igvm_file.clone(),
OpenHclServicingFlags {
enable_nvme_keepalive: true,
..Default::default()
},
)
.await?;
vm.restart_openhcl(igvm_file.clone(), flags).await?;

fault_start_updater.set(false).await;
agent.ping().await?;
Expand Down
31 changes: 19 additions & 12 deletions vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use crate::x86_64::storage::new_test_vtl2_nvme_device;
use guid::Guid;
use hvlite_defs::config::Vtl2BaseAddressType;
use petri::MemoryConfig;
use petri::OpenHclServicingFlags;
use petri::PetriVmBuilder;
use petri::ProcessorTopology;
use petri::ResolvedArtifact;
use petri::openvmm::OpenVmmPetriBackend;
use petri::pipette::PipetteClient;
Expand Down Expand Up @@ -84,6 +84,7 @@ async fn mana_nic_servicing(
config: PetriVmBuilder<OpenVmmPetriBackend>,
(igvm_file,): (ResolvedArtifact<LATEST_LINUX_DIRECT_TEST_X64>,),
) -> Result<(), anyhow::Error> {
let flags = config.default_servicing_flags();
let (mut vm, agent) = config
.with_vmbus_redirect(true)
.modify_backend(|b| b.with_nic())
Expand All @@ -92,8 +93,7 @@ async fn mana_nic_servicing(

validate_mana_nic(&agent).await?;

vm.restart_openhcl(igvm_file, OpenHclServicingFlags::default())
.await?;
vm.restart_openhcl(igvm_file, flags).await?;

validate_mana_nic(&agent).await?;

Expand All @@ -105,7 +105,7 @@ async fn mana_nic_servicing(

/// Test an OpenHCL Linux direct VM with many NVMe devices assigned to VTL2 and vmbus relay.
#[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])]
async fn many_nvme_devices_servicing(
async fn many_nvme_devices_servicing_heavy(
config: PetriVmBuilder<OpenVmmPetriBackend>,
(igvm_file,): (ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,),
) -> Result<(), anyhow::Error> {
Expand All @@ -119,8 +119,22 @@ async fn many_nvme_devices_servicing(
const GUID_UPDATE_PREFIX: u16 = 0x1110;
const NSID_OFFSET: u32 = 0x10;

let flags = config.default_servicing_flags();

let (mut vm, agent) = config
.with_vmbus_redirect(true)
.with_vtl2_base_address_type(Vtl2BaseAddressType::MemoryLayout {
size: Some((960 + 64) * 1024 * 1024), // 960MB as specified in manifest, plus 64MB extra for private pool.
})
.with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=16384") // 64MB of private pool for VTL2 NVMe devices.
.with_memory(MemoryConfig {
startup_bytes: 8 * 1024 * 1024 * 1024, // 8GB
..Default::default()
})
.with_processor_topology(ProcessorTopology {
vp_count: 4,
..Default::default()
})
.modify_backend(|b| {
b.with_custom_config(|c| {
let device_ids = (0..NUM_NVME_DEVICES)
Expand Down Expand Up @@ -177,14 +191,7 @@ async fn many_nvme_devices_servicing(
// Test that inspect serialization works with the old version.
vm.test_inspect_openhcl().await?;

vm.restart_openhcl(
igvm_file.clone(),
OpenHclServicingFlags {
enable_nvme_keepalive: false,
..Default::default()
},
)
.await?;
vm.restart_openhcl(igvm_file.clone(), flags).await?;

agent.ping().await?;

Expand Down