From 62243364629ee4e26a15b185eb0af484cff550b5 Mon Sep 17 00:00:00 2001 From: John Starks Date: Mon, 23 Mar 2026 09:49:37 -0700 Subject: [PATCH 1/8] aarch64: add ACPI boot support for Linux direct boot ARM64 Linux direct boot previously only supported device-tree mode, which limited device discovery to what could be expressed in DT nodes. This was insufficient for VMBus on older kernels, and diverged from the ACPI-based path used by UEFI boot. This change adds a new ACPI boot path for ARM64 Linux direct boot, making it the default. The VMM now synthesizes a minimal EFI system table, memory map, and full ACPI tables (HW_REDUCED_ACPI FADT, GICv3 MADT with v2m MSI frame, GTDT, DSDT with VMBus and SBSA UARTs), then builds a stub device tree whose only purpose is to point the kernel's EFI stub at those structures. The kernel enters its standard ACPI discovery path from there. The previous DT-only behavior is preserved via a new --device-tree CLI flag; omitting it selects ACPI mode. Supporting changes: - Consolidate GicInfo + pmu_gsiv + virt_timer_ppi into a single Aarch64PlatformConfig struct threaded through the topology builder. - Split AcpiTablesBuilder's arch-specific fields into an AcpiArchConfig enum (X86 vs Aarch64) so ARM64 gets HW_REDUCED_ACPI FADT and GTDT instead of x86 PM/PIC/PIT tables. - Add ACPI spec types: Gtdt, MadtGicMsiFrame, and EFI boot structures (EfiSystemTable, EfiMemoryDescriptor, EfiRtPropertiesTable). - Add SBSA UART and VMBus interrupt support to the DSDT builder. - Plumb virt_timer_ppi from topology into KVM, WHP, and HVF backends instead of hardcoding PPI 20. - Add LinuxDirectBootMode enum and --device-tree flag to select between the two ARM64 boot paths. - Add Guide documentation for Linux direct boot modes and a firmware overview page. --- Cargo.lock | 2 + Guide/src/SUMMARY.md | 8 +- .../devices/firmware/linux_direct.md | 99 ++++++ .../reference/devices/firmware/overview.md | 22 ++ openhcl/bootloader_fdt_parser/src/lib.rs | 50 +-- openhcl/underhill_core/src/loader/mod.rs | 16 +- openhcl/underhill_core/src/worker.rs | 20 +- openvmm/openvmm_core/Cargo.toml | 2 + openvmm/openvmm_core/src/worker/dispatch.rs | 163 ++++++--- .../src/worker/vm_loaders/linux.rs | 300 ++++++++++++++++- openvmm/openvmm_defs/src/config.rs | 19 ++ openvmm/openvmm_entry/src/cli_args.rs | 6 + openvmm/openvmm_entry/src/lib.rs | 6 + openvmm/openvmm_entry/src/ttrpc/mod.rs | 1 + petri/src/vm/openvmm/construct.rs | 1 + tmk/tmk_vmm/src/run.rs | 14 +- vm/acpi/src/dsdt.rs | 43 ++- vm/acpi_spec/src/gtdt.rs | 42 +++ vm/acpi_spec/src/lib.rs | 1 + vm/acpi_spec/src/madt.rs | 34 ++ .../firmware/uefi_specs/src/uefi/boot.rs | 83 +++++ .../vm_topology/src/processor/aarch64.rs | 53 +-- vmm_core/src/acpi_builder.rs | 318 ++++++++++++------ vmm_core/virt_hvf/src/lib.rs | 13 +- vmm_core/virt_kvm/src/arch/aarch64/mod.rs | 9 +- vmm_core/virt_whp/src/lib.rs | 2 +- .../tests/tests/aarch64_exclusive.rs | 32 ++ vmm_tests/vmm_tests/tests/tests/multiarch.rs | 1 + 28 files changed, 1110 insertions(+), 250 deletions(-) create mode 100644 Guide/src/reference/devices/firmware/linux_direct.md create mode 100644 Guide/src/reference/devices/firmware/overview.md create mode 100644 vm/acpi_spec/src/gtdt.rs diff --git a/Cargo.lock b/Cargo.lock index acc8a46148..2e75370719 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5178,6 +5178,7 @@ dependencies = [ "chipset_device_resources", "chipset_device_worker", "chipset_legacy", + "crc32fast", "debug_ptr", "disk_backend", "fdt", @@ -5226,6 +5227,7 @@ dependencies = [ "thiserror 2.0.16", "tracing", "uefi_nvram_storage", + "uefi_specs", "virt", "virt_hvf", "virt_kvm", diff --git a/Guide/src/SUMMARY.md b/Guide/src/SUMMARY.md index 36e34ab252..84a93fa983 100644 --- a/Guide/src/SUMMARY.md +++ b/Guide/src/SUMMARY.md @@ -86,11 +86,11 @@ - [Developer Features]() - [Hardware Debugging (gdbstub)](./reference/dev_feats/gdbstub.md) - [Kernel Debugging (KDNET)](./reference/dev_feats/kdnet.md) +- [Firmware and Boot Modes](./reference/devices/firmware/overview.md) + - [UEFI: mu_msvm](./reference/devices/firmware/mu_msvm_uefi.md) + - [BIOS: Hyper-V PCAT BIOS](./reference/devices/firmware/pcat_bios.md) + - [Linux Direct](./reference/devices/firmware/linux_direct.md) - [Devices]() - - [Firmware]() - - [UEFI: mu_msvm](./reference/devices/firmware/mu_msvm_uefi.md) - - [BIOS: Hyper-V PCAT BIOS](./reference/devices/firmware/pcat_bios.md) - - [Linux Direct]() - [Virtio]() - [virtio-fs]() - [virtio-9p]() diff --git a/Guide/src/reference/devices/firmware/linux_direct.md b/Guide/src/reference/devices/firmware/linux_direct.md new file mode 100644 index 0000000000..4d038b1295 --- /dev/null +++ b/Guide/src/reference/devices/firmware/linux_direct.md @@ -0,0 +1,99 @@ +# Linux Direct Boot + +Linux direct boot allows OpenVMM to load a Linux kernel directly into guest +memory without UEFI or BIOS firmware. The VMM itself acts as the bootloader: +it parses the kernel image, places the initrd, constructs the necessary boot +metadata, sets the initial register state, and starts execution at the kernel +entry point. + +This is the fastest path from "run" to a Linux userspace prompt, and is +useful for lightweight testing and development scenarios. + +## Architecture Support + +| Architecture | Supported | Kernel format | Boot protocol | +|-------------|-----------|---------------|---------------| +| x86_64 | Yes | Uncompressed ELF (`vmlinux`) | Linux boot protocol (zero page) | +| AArch64 | Yes | ARM64 `Image` (flat binary) | ARM64 Image boot (device tree or ACPI) | + +Compressed kernels (bzImage, gzip, etc.) are not supported. On x86_64, +pass the uncompressed `vmlinux` ELF. On AArch64, pass the uncompressed +`Image` file (not `Image.gz`). + +## x86_64 Boot Flow + +On x86_64, OpenVMM follows the standard Linux boot protocol: + +1. The kernel image is loaded at the conventional 1 MB address. +2. An initrd (if provided) is placed after the kernel. +3. A **zero page** is constructed containing the memory map, command line + pointer, initrd location, and ACPI RSDP address. +4. ACPI tables (MADT, FADT, DSDT, SRAT, etc.) are built by OpenVMM's ACPI + builder and written into guest memory. +5. A GDT and initial page tables are set up. +6. The BSP register state is configured and execution begins. + +The DSDT includes whatever x86 chipset devices are configured (serial ports, +IOAPIC, PCI bus, VMBus, virtio-mmio, RTC, etc.). + +## AArch64 Boot Flow + +On AArch64, OpenVMM supports two modes for presenting hardware descriptions to +the kernel, selected by the `--device-tree` CLI flag: + +### ACPI Mode (default) + +This is the default. The kernel discovers devices through ACPI tables, just as +it would on a server with UEFI firmware. + +Since the ARM64 kernel's ACPI code path requires entering through the EFI stub, +OpenVMM synthesizes a minimal set of EFI structures in guest memory: + +1. **EFI System Table** — points to a configuration table with the ACPI RSDP + and an RT Properties entry that advertises no runtime services. +2. **EFI Memory Map** — describes the EFI metadata region, ACPI tables, and + conventional RAM. +3. **ACPI Tables** — FADT (with `HW_REDUCED_ACPI`), MADT (GICv3 + redistributors, distributor, optional v2m MSI frame), GTDT (virtual timer), + DSDT (VMBus, serial UARTs), and optionally MCFG/SSDT for PCIe. + +A **stub device tree** is then built. Unlike a full device tree, it contains +no hardware nodes — no CPUs, GIC, timer, or devices. Its only purpose is a +`/chosen` node with `linux,uefi-system-table` and `linux,uefi-mmap-*` +properties that point the kernel's EFI stub to the synthesized EFI structures. +From there, the kernel follows its standard ACPI discovery path. + +```admonish tip title="When to use ACPI mode" +ACPI mode is the default and is recommended when running with the +Hyper-V hypervisor (`--hv`). Device tree mode also supports VMBus +(with recent kernels and hypervisor versions), but ACPI mode provides +broader compatibility. +``` + +### Device Tree Mode (`--device-tree`) + +In this mode, a full device tree is built describing all hardware +directly — CPUs, interrupt controller, timers, serial ports, VMBus, +PCIe bridges, and memory regions. The kernel discovers everything +from the DT; no EFI structures or ACPI tables are involved. + +```admonish note +Device tree mode is not supported on x86_64. Passing `--device-tree` on x86 +will result in an error. +``` + +## CLI Usage + +```bash +# x86_64 Linux direct boot +openvmm --kernel path/to/vmlinux --initrd path/to/initrd \ + --cmdline "console=ttyS0" + +# AArch64 ACPI mode (default) +openvmm --kernel path/to/Image --initrd path/to/initrd \ + --cmdline "console=ttyAMA0 earlycon" + +# AArch64 device tree mode +openvmm --kernel path/to/Image --initrd path/to/initrd \ + --cmdline "console=ttyAMA0 earlycon" --device-tree +``` diff --git a/Guide/src/reference/devices/firmware/overview.md b/Guide/src/reference/devices/firmware/overview.md new file mode 100644 index 0000000000..4393071d07 --- /dev/null +++ b/Guide/src/reference/devices/firmware/overview.md @@ -0,0 +1,22 @@ +# Firmware and Boot Modes + +OpenVMM supports several ways to boot a guest VM, each with different +firmware requirements and guest OS compatibility: + +| Boot mode | Architecture | Firmware | Use case | +|-----------|-------------|----------|----------| +| **UEFI** | x86_64, AArch64 | [mu_msvm](./mu_msvm_uefi.md) | Windows, modern Linux, full UEFI environment | +| **PCAT BIOS** | x86_64 | [Hyper-V PCAT BIOS](./pcat_bios.md) | Legacy OS, Gen1-style boot | +| **Linux Direct** | x86_64, AArch64 | None (VMM is the bootloader) | [Fast Linux boot](./linux_direct.md), development, testing | +| **IGVM** | x86_64, AArch64 | Packaged in IGVM file | OpenHCL paravisor, confidential VMs | + +The boot mode is selected by which `--kernel`, `--uefi`, `--pcat`, or +`--igvm` flag is passed on the command line (or the equivalent ttrpc +configuration). + +```admonish note +Not all boot modes are available on all architectures. PCAT BIOS is +x86_64 only. Linux direct boot supports both architectures but with +different kernel image formats — see the +[Linux Direct](./linux_direct.md) page for details. +``` diff --git a/openhcl/bootloader_fdt_parser/src/lib.rs b/openhcl/bootloader_fdt_parser/src/lib.rs index f858a16508..3414c22d97 100644 --- a/openhcl/bootloader_fdt_parser/src/lib.rs +++ b/openhcl/bootloader_fdt_parser/src/lib.rs @@ -18,7 +18,7 @@ use inspect::Inspect; use loader_defs::shim::MemoryVtlType; use memory_range::MemoryRange; use vm_topology::memory::MemoryRangeWithNode; -use vm_topology::processor::aarch64::GicInfo; +use vm_topology::processor::aarch64::Aarch64PlatformConfig; /// A parsed cpu. #[derive(Debug, Inspect, Clone, Copy, PartialEq, Eq)] @@ -173,10 +173,8 @@ pub struct ParsedBootDtInfo { #[inspect(iter_by_index)] pub private_pool_ranges: Vec, - /// GIC information, on AArch64. - pub gic: Option, - /// PMU GSIV, on AArch64. - pub pmu_gsiv: Option, + /// GIC and platform interrupt configuration, on AArch64. + pub gic: Option, } fn err_to_owned(e: fdt::parser::Error<'_>) -> anyhow::Error { @@ -526,17 +524,20 @@ fn parse_memory(node: &Node<'_>) -> anyhow::Result { } /// Parse GIC config -fn parse_gic(node: &Node<'_>) -> anyhow::Result { +fn parse_gic(node: &Node<'_>) -> anyhow::Result { let reg = property_to_u64_vec(node, "reg")?; if reg.len() != 4 { bail!("gic node {} does not have 4 u64s", node.name); } - Ok(GicInfo { + Ok(Aarch64PlatformConfig { gic_distributor_base: reg[0], gic_redistributors_base: reg[2], gic_v2m: None, + pmu_gsiv: 0, + // TODO: parse from the DT timer node instead of hardcoding. + virt_timer_ppi: 20, }) } @@ -662,6 +663,11 @@ impl ParsedBootDtInfo { vtl2_memory.sort_by_key(|r| r.range.start()); + // Merge PMU GSIV into the GIC platform config if both were parsed. + if let (Some(gic), Some(pmu_gsiv)) = (&mut gic, pmu_gsiv) { + gic.pmu_gsiv = pmu_gsiv; + } + Ok(Self { cpus, vtl0_mmio, @@ -671,7 +677,6 @@ impl ParsedBootDtInfo { vtl0_alias_map, accepted_ranges, gic, - pmu_gsiv, memory_allocation_mode, isolation, vtl2_reserved_range, @@ -841,15 +846,21 @@ mod tests { } // PMU - if let Some(pmu_gsiv) = info.pmu_gsiv { - assert!((16..32).contains(&pmu_gsiv)); - const GIC_PPI: u32 = 1; - const IRQ_TYPE_LEVEL_HIGH: u32 = 4; - root_builder = root_builder - .start_node("pmu")? - .add_str(p_compatible, "arm,armv8-pmuv3")? - .add_u32_array(p_interrupts, &[GIC_PPI, pmu_gsiv - 16, IRQ_TYPE_LEVEL_HIGH])? - .end_node()?; + if let Some(gic) = &info.gic { + let pmu_gsiv = gic.pmu_gsiv; + if pmu_gsiv != 0 { + anyhow::ensure!( + (16..32).contains(&pmu_gsiv), + "PMU GSIV {pmu_gsiv} is not a valid PPI (expected 16..32)" + ); + const GIC_PPI: u32 = 1; + const IRQ_TYPE_LEVEL_HIGH: u32 = 4; + root_builder = root_builder + .start_node("pmu")? + .add_str(p_compatible, "arm,armv8-pmuv3")? + .add_u32_array(p_interrupts, &[GIC_PPI, pmu_gsiv - 16, IRQ_TYPE_LEVEL_HIGH])? + .end_node()?; + } } let mut openhcl_builder = root_builder.start_node("openhcl")?; @@ -1054,12 +1065,13 @@ mod tests { MemoryRange::new(0x30000..0x40000), ], vtl0_alias_map: Some(1 << 48), - gic: Some(GicInfo { + gic: Some(Aarch64PlatformConfig { gic_distributor_base: 0x10000, gic_redistributors_base: 0x20000, gic_v2m: None, + pmu_gsiv: 0x17, + virt_timer_ppi: 20, }), - pmu_gsiv: Some(0x17), accepted_ranges: vec![ MemoryRange::new(0x10000..0x20000), MemoryRange::new(0x1000000..0x1500000), diff --git a/openhcl/underhill_core/src/loader/mod.rs b/openhcl/underhill_core/src/loader/mod.rs index 3c236ecd14..6d10a79416 100644 --- a/openhcl/underhill_core/src/loader/mod.rs +++ b/openhcl/underhill_core/src/loader/mod.rs @@ -268,12 +268,14 @@ fn load_linux(params: LoadLinuxParams<'_>) -> Result { mem_layout, cache_topology: None, pcie_host_bridges: &vec![], - with_ioapic: true, // underhill always runs with ioapic - with_pic: false, - with_pit: false, - with_psp: platform_config.general.psp_enabled, - pm_base: crate::worker::PM_BASE, - acpi_irq: crate::worker::SYSTEM_IRQ_ACPI, + arch: vmm_core::acpi_builder::AcpiArchConfig::X86 { + with_ioapic: true, // underhill always runs with ioapic + with_pic: false, + with_pit: false, + with_psp: platform_config.general.psp_enabled, + pm_base: crate::worker::PM_BASE, + acpi_irq: crate::worker::SYSTEM_IRQ_ACPI, + }, }; if mem_layout.mmio().len() < 2 { @@ -306,7 +308,7 @@ fn load_linux(params: LoadLinuxParams<'_>) -> Result { dsdt.add_mmio_module(mem_layout.mmio()[0], mem_layout.mmio()[1]); // TODO: change this once PCI is running in underhill - dsdt.add_vmbus(false); + dsdt.add_vmbus(false, None); dsdt.add_rtc(); }); let acpi_len = acpi_tables.tables.len() + 0x1000; diff --git a/openhcl/underhill_core/src/worker.rs b/openhcl/underhill_core/src/worker.rs index ee19d26644..3b5e90d014 100644 --- a/openhcl/underhill_core/src/worker.rs +++ b/openhcl/underhill_core/src/worker.rs @@ -1302,14 +1302,13 @@ fn new_x86_topology( #[cfg(guest_arch = "aarch64")] fn new_aarch64_topology( - gic: vm_topology::processor::aarch64::GicInfo, + gic: vm_topology::processor::aarch64::Aarch64PlatformConfig, cpus: &[bootloader_fdt_parser::Cpu], - pmu_gsiv: u32, ) -> anyhow::Result> { // TODO SMP: Query the MT property from the host topology somehow. Device Tree // doesn't specify that. let gic_redistributors_base = gic.gic_redistributors_base; - TopologyBuilder::new_aarch64(gic, pmu_gsiv) + TopologyBuilder::new_aarch64(gic) .vps_per_socket(cpus.len() as u32) .build_with_vp_info(cpus.iter().enumerate().map(|(vp_index, cpu)| { let mpidr = aarch64defs::MpidrEl1::from( @@ -1731,16 +1730,11 @@ async fn new_underhill_vm( #[cfg(guest_arch = "aarch64")] let processor_topology = { - new_aarch64_topology( - boot_info - .gic - .context("did not get gic state from bootloader")?, - &boot_info.cpus, - boot_info - .pmu_gsiv - .context("did not get pmu gsiv from bootloader")?, - ) - .context("failed to construct the processor topology")? + let platform = boot_info + .gic + .context("did not get gic state from bootloader")?; + new_aarch64_topology(platform, &boot_info.cpus) + .context("failed to construct the processor topology")? }; // also construct the VMGS nice and early, as much like the GET, it also diff --git a/openvmm/openvmm_core/Cargo.toml b/openvmm/openvmm_core/Cargo.toml index 1de90efb6e..72e00a11da 100644 --- a/openvmm/openvmm_core/Cargo.toml +++ b/openvmm/openvmm_core/Cargo.toml @@ -20,6 +20,8 @@ membacking.workspace = true memory_range = { workspace = true, features = ["mesh"] } vm_topology = { workspace = true, features = ["mesh"] } guestmem.workspace = true +uefi_specs.workspace = true +crc32fast.workspace = true vmcore.workspace = true vm_resource.workspace = true vmgs_resources.workspace = true diff --git a/openvmm/openvmm_core/src/worker/dispatch.rs b/openvmm/openvmm_core/src/worker/dispatch.rs index a4330a739e..353da12299 100644 --- a/openvmm/openvmm_core/src/worker/dispatch.rs +++ b/openvmm/openvmm_core/src/worker/dispatch.rs @@ -110,8 +110,8 @@ use vm_topology::pcie::PcieHostBridge; use vm_topology::processor::ArchTopology; use vm_topology::processor::ProcessorTopology; use vm_topology::processor::TopologyBuilder; +use vm_topology::processor::aarch64::Aarch64PlatformConfig; use vm_topology::processor::aarch64::Aarch64Topology; -use vm_topology::processor::aarch64::GicInfo; use vm_topology::processor::aarch64::GicV2mInfo; use vm_topology::processor::x86::X86Topology; use vmbus_channel::channel::VmbusDevice; @@ -488,27 +488,11 @@ impl BuildTopology for ProcessorTopologyConfig { Some(ArchTopologyConfig::Aarch64(arch)) => arch.clone(), _ => anyhow::bail!("invalid architecture config"), }; - let gic = if let Some(gic_config) = &arch.gic_config { - GicInfo { - gic_distributor_base: gic_config.gic_distributor_base, - gic_redistributors_base: gic_config.gic_redistributors_base, - gic_v2m: Some(GicV2mInfo { - frame_base: openvmm_defs::config::DEFAULT_GIC_V2M_MSI_FRAME_BASE, - spi_base: openvmm_defs::config::DEFAULT_GIC_V2M_SPI_BASE, - spi_count: openvmm_defs::config::DEFAULT_GIC_V2M_SPI_COUNT, - }), - } - } else { - GicInfo { - gic_distributor_base: openvmm_defs::config::DEFAULT_GIC_DISTRIBUTOR_BASE, - gic_redistributors_base: openvmm_defs::config::DEFAULT_GIC_REDISTRIBUTORS_BASE, - gic_v2m: Some(GicV2mInfo { - frame_base: openvmm_defs::config::DEFAULT_GIC_V2M_MSI_FRAME_BASE, - spi_base: openvmm_defs::config::DEFAULT_GIC_V2M_SPI_BASE, - spi_count: openvmm_defs::config::DEFAULT_GIC_V2M_SPI_COUNT, - }), - } - }; + let gic_v2m = Some(GicV2mInfo { + frame_base: openvmm_defs::config::DEFAULT_GIC_V2M_MSI_FRAME_BASE, + spi_base: openvmm_defs::config::DEFAULT_GIC_V2M_SPI_BASE, + spi_count: openvmm_defs::config::DEFAULT_GIC_V2M_SPI_COUNT, + }); let pmu_gsiv = match arch.pmu_gsiv { PmuGsivConfig::Gsiv(gsiv) => gsiv, PmuGsivConfig::Platform => platform_gsiv(hypervisor), @@ -521,7 +505,25 @@ impl BuildTopology for ProcessorTopologyConfig { tracing::warn!("PMU GSIV is set to 0"); } - let mut builder = TopologyBuilder::new_aarch64(gic, pmu_gsiv); + let platform = if let Some(gic_config) = &arch.gic_config { + Aarch64PlatformConfig { + gic_distributor_base: gic_config.gic_distributor_base, + gic_redistributors_base: gic_config.gic_redistributors_base, + gic_v2m, + pmu_gsiv, + virt_timer_ppi: openvmm_defs::config::DEFAULT_VIRT_TIMER_PPI, + } + } else { + Aarch64PlatformConfig { + gic_distributor_base: openvmm_defs::config::DEFAULT_GIC_DISTRIBUTOR_BASE, + gic_redistributors_base: openvmm_defs::config::DEFAULT_GIC_REDISTRIBUTORS_BASE, + gic_v2m, + pmu_gsiv, + virt_timer_ppi: openvmm_defs::config::DEFAULT_VIRT_TIMER_PPI, + } + }; + + let mut builder = TopologyBuilder::new_aarch64(platform); if let Some(smt) = self.enable_smt { builder.smt_enabled(smt); } @@ -1237,12 +1239,14 @@ impl InitializedVm { mem_layout: &mem_layout, cache_topology: None, pcie_host_bridges: &Vec::new(), - with_ioapic: cfg.chipset.with_generic_ioapic, - with_pic: cfg.chipset.with_generic_pic, - with_pit: cfg.chipset.with_generic_pit, - with_psp: cfg.chipset.with_generic_psp, - pm_base: PM_BASE, - acpi_irq: SYSTEM_IRQ_ACPI, + arch: vmm_core::acpi_builder::AcpiArchConfig::X86 { + with_ioapic: cfg.chipset.with_generic_ioapic, + with_pic: cfg.chipset.with_generic_pic, + with_pit: cfg.chipset.with_generic_pit, + with_psp: cfg.chipset.with_generic_psp, + pm_base: PM_BASE, + acpi_irq: SYSTEM_IRQ_ACPI, + }, }; let srat = acpi_tables_builder.build_srat(); firmware_pcat::config::PcatBiosConfig { @@ -2359,12 +2363,24 @@ impl LoadedVmInner { mem_layout: &self.mem_layout, cache_topology: cache_topology.as_ref(), pcie_host_bridges: &self.pcie_host_bridges, - with_ioapic: self.chipset_cfg.with_generic_ioapic, - with_psp: self.chipset_cfg.with_generic_psp, - with_pic: self.chipset_cfg.with_generic_pic, - with_pit: self.chipset_cfg.with_generic_pit, - pm_base: PM_BASE, - acpi_irq: SYSTEM_IRQ_ACPI, + #[cfg(guest_arch = "x86_64")] + arch: vmm_core::acpi_builder::AcpiArchConfig::X86 { + with_ioapic: self.chipset_cfg.with_generic_ioapic, + with_psp: self.chipset_cfg.with_generic_psp, + with_pic: self.chipset_cfg.with_generic_pic, + with_pit: self.chipset_cfg.with_generic_pit, + pm_base: PM_BASE, + acpi_irq: SYSTEM_IRQ_ACPI, + }, + #[cfg(guest_arch = "aarch64")] + arch: vmm_core::acpi_builder::AcpiArchConfig::Aarch64 { + hypervisor_vendor_identity: if self.hypervisor_cfg.with_hv { + u64::from_le_bytes(*b"MsHyperV") + } else { + 0 + }, + virt_timer_ppi: self.processor_topology.virt_timer_ppi(), + }, }; if vtl2_only { @@ -2381,7 +2397,14 @@ impl LoadedVmInner { ref cmdline, enable_serial, ref custom_dsdt, + boot_mode, } => { + match boot_mode { + openvmm_defs::config::LinuxDirectBootMode::DeviceTree => { + anyhow::bail!("device tree boot mode is not supported on x86_64"); + } + openvmm_defs::config::LinuxDirectBootMode::Acpi => {} + } let kernel_config = super::vm_loaders::linux::KernelConfig { kernel, initrd, @@ -2397,7 +2420,7 @@ impl LoadedVmInner { acpi_builder.build_acpi_tables_custom_dsdt(gpa, dsdt) } else { acpi_builder.build_acpi_tables(gpa, |mem_layout, dsdt| { - add_devices_to_dsdt( + add_devices_to_dsdt_x64( mem_layout, dsdt, &self.chipset_cfg, @@ -2424,19 +2447,40 @@ impl LoadedVmInner { ref cmdline, enable_serial, custom_dsdt: _, + boot_mode, } => { + use openvmm_defs::config::LinuxDirectBootMode; + let kernel_config = super::vm_loaders::linux::KernelConfig { kernel, initrd, cmdline, mem_layout: &self.mem_layout, }; + + let with_hv = self.hypervisor_cfg.with_hv; + let build_acpi = if boot_mode == LinuxDirectBootMode::Acpi { + Some(|rsdp_gpa: u64| { + acpi_builder.build_acpi_tables(rsdp_gpa, |mem_layout, dsdt| { + add_devices_to_dsdt_arm64( + mem_layout, + dsdt, + enable_serial, + with_hv, + ) + }) + }) + } else { + None + }; + let regs = super::vm_loaders::linux::load_linux_arm64( &kernel_config, &self.gm, enable_serial, &self.processor_topology, &self.pcie_host_bridges, + build_acpi, )?; (regs, Vec::new()) @@ -3000,7 +3044,7 @@ impl LoadedVm { } #[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))] -fn add_devices_to_dsdt( +fn add_devices_to_dsdt_x64( mem_layout: &MemoryLayout, dsdt: &mut dsdt::Dsdt, cfg: &BaseChipsetManifest, @@ -3067,10 +3111,53 @@ fn add_devices_to_dsdt( dsdt.add_mmio_module(low_mmio_gap, high_mmio_gap); } - dsdt.add_vmbus(cfg.with_generic_pci_bus || cfg.with_i440bx_host_pci_bridge); + dsdt.add_vmbus( + cfg.with_generic_pci_bus || cfg.with_i440bx_host_pci_bridge, + None, + ); dsdt.add_rtc(); } +#[cfg(guest_arch = "aarch64")] +fn add_devices_to_dsdt_arm64( + mem_layout: &MemoryLayout, + dsdt: &mut dsdt::Dsdt, + enable_serial: bool, + with_hv: bool, +) { + // VMBus GIC INTID (PPI 2 = INTID 16 + 2 = 18), matching the DT path. + const VMBUS_INTID: u32 = 18; + // SBSA UART MMIO bases and sizes. + const PL011_SERIAL0_BASE: u64 = 0xEFFEC000; + const PL011_SERIAL1_BASE: u64 = 0xEFFEB000; + const PL011_SERIAL_SIZE: u64 = 0x1000; + // UART GSIVs (SPI 1 = INTID 33, SPI 2 = INTID 34). + const PL011_SERIAL0_GSIV: u32 = 33; + const PL011_SERIAL1_GSIV: u32 = 34; + + if with_hv { + // Internal invariant: the memory layout for ARM64 with HV always has + // at least two MMIO gaps (low + high). This is configured by OpenVMM + // itself, not by guest input. + assert!( + mem_layout.mmio().len() >= 2, + "need at least two MMIO regions" + ); + let low_mmio_gap = mem_layout.mmio()[0]; + let high_mmio_gap: MemoryRange = mem_layout.mmio()[1]; + dsdt.add_mmio_module(low_mmio_gap, high_mmio_gap); + // VMBus on ARM64 ACPI needs a per-CPU interrupt (PPI) in _CRS. + // Always place under VMOD, not PCI0 — ARM64 doesn't use the x86 + // PCI0 DSDT node. + dsdt.add_vmbus(false, Some(VMBUS_INTID)); + } + + if enable_serial { + dsdt.add_sbsa_uart(b"\\_SB.UAR0", 0, PL011_SERIAL0_BASE, PL011_SERIAL_SIZE, PL011_SERIAL0_GSIV); + dsdt.add_sbsa_uart(b"\\_SB.UAR1", 1, PL011_SERIAL1_BASE, PL011_SERIAL_SIZE, PL011_SERIAL1_GSIV); + } +} + #[cfg(guest_arch = "x86_64")] struct WatchdogTimeoutNmi { partition: Arc, diff --git a/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs b/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs index c5ceb900e6..3a1bff5a21 100644 --- a/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs +++ b/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs @@ -19,6 +19,7 @@ use vm_topology::memory::MemoryLayout; use vm_topology::pcie::PcieHostBridge; use vm_topology::processor::ProcessorTopology; use vm_topology::processor::aarch64::Aarch64Topology; +use zerocopy::IntoBytes; #[derive(Debug, Error)] #[error("device tree error: {0:?}")] @@ -32,6 +33,17 @@ pub enum Error { Loader(#[source] loader::linux::Error), #[error("device tree error")] Dt(#[source] DtError), + #[error("failed to write EFI/ACPI tables to guest memory")] + Efi(#[source] guestmem::GuestMemoryError), +} + +#[cfg_attr(not(guest_arch = "aarch64"), expect(dead_code))] +struct Aarch64EfiInfo { + systab_addr: u64, + mmap_addr: u64, + mmap_size: u32, + mmap_desc_size: u32, + mmap_desc_ver: u32, } #[derive(Debug)] @@ -164,7 +176,7 @@ fn build_dt( && !(gic_redist_base..gic_redist_base + gic_redist_size).contains(&gic_dist_base) ); - let mut buffer = vec![0u8; hvdef::HV_PAGE_SIZE as usize * 256]; + let mut buffer = vec![0u8; 0x200000]; let builder_config = fdt::builder::BuilderConfig { blob_buffer: &mut buffer, @@ -323,7 +335,8 @@ fn build_dt( }; // ARM64 Architectural Timer. - const HYPERV_VIRT_TIMER_PPI: u32 = 4; // relative to PPI base of 16 + // The DT `interrupts` property uses the PPI offset (INTID - 16). + let virt_timer_ppi_offset = processor_topology.virt_timer_ppi() - 16; let timer = root_builder .start_node("timer")? .add_str(p_compatible, "arm,armv8-timer")? @@ -331,7 +344,7 @@ fn build_dt( .add_str(p_interrupt_names, "virt")? .add_u32_array( p_interrupts, - &[GIC_PPI, HYPERV_VIRT_TIMER_PPI, IRQ_TYPE_LEVEL_LOW], + &[GIC_PPI, virt_timer_ppi_offset, IRQ_TYPE_LEVEL_LOW], )? .add_null(p_always_on)?; root_builder = timer.end_node()?; @@ -492,7 +505,246 @@ fn build_dt( root_builder = chosen.end_node()?; let boot_cpu_id = 0; - root_builder.end_node()?.build(boot_cpu_id)?; + let dt_size = root_builder.end_node()?.build(boot_cpu_id)?; + buffer.truncate(dt_size); + + Ok(buffer) +} + +#[cfg_attr(not(guest_arch = "aarch64"), expect(dead_code))] +/// Write synthesized EFI and ACPI structures into guest memory. +/// +/// On ARM64, the Linux kernel can discover devices via ACPI instead of a +/// device tree, but it still needs to enter via the EFI stub to find the +/// RSDP. We synthesize: +/// - An `EFI_SYSTEM_TABLE` pointing to an ACPI 2.0 configuration table +/// entry (the RSDP) and an RT Properties table (advertising no runtime +/// services). +/// - An EFI memory map describing the metadata, ACPI tables, and +/// conventional RAM regions. +/// - The ACPI tables themselves (RSDP, XSDT, FADT, MADT, GTDT, DSDT, etc.). +/// +/// The companion [`build_stub_dt`] function then builds a minimal device tree +/// whose `/chosen` node carries `linux,uefi-system-table` and the memory map +/// pointers so that the kernel's EFI stub can locate these structures. +fn write_efi_and_acpi_tables( + gm: &GuestMemory, + efi_base: u64, + rsdp_addr: u64, + mem_layout: &MemoryLayout, + acpi_tables: &vmm_core::acpi_builder::BuiltAcpiTables, +) -> Result { + use memory_range::MemoryRange; + use uefi_specs::uefi::boot::ACPI_20_TABLE_GUID; + use uefi_specs::uefi::boot::EFI_2_70_SYSTEM_TABLE_REVISION; + use uefi_specs::uefi::boot::EFI_MEMORY_DESCRIPTOR_VERSION; + use uefi_specs::uefi::boot::EFI_MEMORY_WB; + use uefi_specs::uefi::boot::EFI_RT_PROPERTIES_TABLE_GUID; + use uefi_specs::uefi::boot::EFI_SYSTEM_TABLE_SIGNATURE; + use uefi_specs::uefi::boot::EfiMemoryDescriptor; + use uefi_specs::uefi::boot::EfiMemoryType; + use uefi_specs::uefi::boot::EfiRtPropertiesTable; + use uefi_specs::uefi::boot::EfiSystemTable; + + // Helper to align a value up to the given power-of-two alignment. + fn align_up(val: u64, align: u64) -> u64 { + (val + align - 1) & !(align - 1) + } + + // --- ACPI tables --- + let tables_addr = rsdp_addr + 0x1000; + gm.write_at(rsdp_addr, &acpi_tables.rdsp) + .map_err(Error::Efi)?; + gm.write_at(tables_addr, &acpi_tables.tables) + .map_err(Error::Efi)?; + + // --- EFI metadata (page 1): systab, config table, vendor, rt props --- + // Page 0 is reserved for the memory map (written last). + let mut cursor = efi_base + 0x1000; + + // EFI System Table + let systab_addr = cursor; + cursor += size_of::() as u64; + + // Configuration table entries (24 bytes each: 16-byte GUID + 8-byte pointer) + const CONFIG_ENTRY_SIZE: u64 = 24; + let num_config_entries: u64 = 2; + let config_table_addr = cursor; + cursor += num_config_entries * CONFIG_ENTRY_SIZE; + + // Firmware vendor string — NUL-terminated UTF-16LE + let fw_vendor_addr = cursor; + let fw_vendor: Vec = "OpenVMM\0" + .encode_utf16() + .flat_map(|c| c.to_le_bytes()) + .collect(); + cursor += fw_vendor.len() as u64; + cursor = align_up(cursor, 8); + + // EFI RT Properties Table — tells the OS no runtime services are available. + let rt_props_addr = cursor; + let rt_props = EfiRtPropertiesTable::NONE_SUPPORTED; + cursor += size_of::() as u64; + + // Compute how many pages the metadata region spans. + let metadata_end = align_up(cursor, 0x1000); + let metadata_pages = (metadata_end - efi_base) / 0x1000; + assert!( + cursor <= rsdp_addr, + "EFI metadata ({cursor:#x}) overflows into ACPI tables region ({rsdp_addr:#x})", + ); + + // Now write everything. + gm.write_at(rt_props_addr, rt_props.as_bytes()) + .map_err(Error::Efi)?; + + let mut config_entries = [0u8; 48]; + config_entries[0..16].copy_from_slice(ACPI_20_TABLE_GUID.as_bytes()); + config_entries[16..24].copy_from_slice(&rsdp_addr.to_le_bytes()); + config_entries[24..40].copy_from_slice(EFI_RT_PROPERTIES_TABLE_GUID.as_bytes()); + config_entries[40..48].copy_from_slice(&rt_props_addr.to_le_bytes()); + gm.write_at(config_table_addr, &config_entries) + .map_err(Error::Efi)?; + + gm.write_at(fw_vendor_addr, &fw_vendor) + .map_err(Error::Efi)?; + + let mut systab = EfiSystemTable { + signature: EFI_SYSTEM_TABLE_SIGNATURE, + revision: EFI_2_70_SYSTEM_TABLE_REVISION, + header_size: size_of::() as u32, + firmware_vendor: fw_vendor_addr, + firmware_revision: 1, + number_of_table_entries: num_config_entries, + configuration_table: config_table_addr, + ..Default::default() + }; + // UEFI spec 4.2: CRC32 is computed over header_size bytes with crc32 zeroed. + systab.crc32 = crc32fast::hash(systab.as_bytes()); + gm.write_at(systab_addr, systab.as_bytes()) + .map_err(Error::Efi)?; + + // --- Memory map (page 0) --- + let mut mmap_entries: Vec = Vec::new(); + + // EFI metadata region + mmap_entries.push(EfiMemoryDescriptor { + typ: EfiMemoryType::EFI_BOOT_SERVICES_DATA, + _pad: 0, + physical_start: efi_base, + virtual_start: 0, + number_of_pages: metadata_pages, + attribute: EFI_MEMORY_WB, + }); + + // ACPI tables region + let acpi_region_pages = { + let total = 0x1000 + acpi_tables.tables.len() as u64; + total.div_ceil(0x1000) + }; + mmap_entries.push(EfiMemoryDescriptor { + typ: EfiMemoryType::EFI_ACPI_RECLAIM_MEMORY, + _pad: 0, + physical_start: rsdp_addr, + virtual_start: 0, + number_of_pages: acpi_region_pages, + attribute: EFI_MEMORY_WB, + }); + + // Conventional memory — one entry per RAM range, excluding the + // EFI/ACPI reserved region to avoid overlapping memory map entries. + let reserved_start = efi_base; + let reserved_end = align_up(rsdp_addr + 0x1000 + acpi_tables.tables.len() as u64, 0x1000); + let reserved = [MemoryRange::new(reserved_start..reserved_end)]; + for range in memory_range::subtract_ranges( + mem_layout.ram().iter().map(|r| r.range), + reserved, + ) { + mmap_entries.push(EfiMemoryDescriptor { + typ: EfiMemoryType::EFI_CONVENTIONAL_MEMORY, + _pad: 0, + physical_start: range.start(), + virtual_start: 0, + number_of_pages: range.len() / 0x1000, + attribute: EFI_MEMORY_WB, + }); + } + + let mmap_addr = efi_base; + let mmap_bytes: Vec = mmap_entries + .iter() + .flat_map(|e| e.as_bytes()) + .copied() + .collect(); + let mmap_size = mmap_bytes.len() as u32; + + gm.write_at(mmap_addr, &mmap_bytes).map_err(Error::Efi)?; + + Ok(Aarch64EfiInfo { + systab_addr, + mmap_addr, + mmap_size, + mmap_desc_size: size_of::() as u32, + mmap_desc_ver: EFI_MEMORY_DESCRIPTOR_VERSION, + }) +} + +/// Build a "stub" device tree for ACPI-mode ARM64 direct boot. +/// +/// Unlike the full device tree built by [`build_dt`], this DT contains no +/// hardware descriptions — no CPU nodes, no GIC, no timer, no devices. +/// Its only purpose is a `/chosen` node that tells the Linux EFI stub +/// where to find the EFI system table and memory map written by +/// [`write_efi_and_acpi_tables`]. The kernel then uses those EFI +/// structures to locate the ACPI RSDP and discovers all hardware through +/// ACPI tables instead of DT nodes. +#[cfg_attr(not(guest_arch = "aarch64"), expect(dead_code))] +fn build_stub_dt( + cmdline: &str, + initrd_start: u64, + initrd_end: u64, + efi_info: &Aarch64EfiInfo, +) -> Result, fdt::builder::Error> { + let mut buffer = vec![0u8; 0x4000]; + + let builder_config = fdt::builder::BuilderConfig { + blob_buffer: &mut buffer, + string_table_cap: 256, + memory_reservations: &[], + }; + let mut builder = fdt::builder::Builder::new(builder_config)?; + let p_address_cells = builder.add_string("#address-cells")?; + let p_size_cells = builder.add_string("#size-cells")?; + let p_bootargs = builder.add_string("bootargs")?; + let p_initrd_start = builder.add_string("linux,initrd-start")?; + let p_initrd_end = builder.add_string("linux,initrd-end")?; + let p_uefi_system_table = builder.add_string("linux,uefi-system-table")?; + let p_uefi_mmap_start = builder.add_string("linux,uefi-mmap-start")?; + let p_uefi_mmap_size = builder.add_string("linux,uefi-mmap-size")?; + let p_uefi_mmap_desc_size = builder.add_string("linux,uefi-mmap-desc-size")?; + let p_uefi_mmap_desc_ver = builder.add_string("linux,uefi-mmap-desc-ver")?; + + let root_builder = builder + .start_node("")? + .add_u32(p_address_cells, 2)? + .add_u32(p_size_cells, 2)?; + + let chosen = root_builder + .start_node("chosen")? + .add_str(p_bootargs, cmdline)? + .add_u64(p_initrd_start, initrd_start)? + .add_u64(p_initrd_end, initrd_end)? + .add_u64(p_uefi_system_table, efi_info.systab_addr)? + .add_u64(p_uefi_mmap_start, efi_info.mmap_addr)? + .add_u32(p_uefi_mmap_size, efi_info.mmap_size)? + .add_u32(p_uefi_mmap_desc_size, efi_info.mmap_desc_size)? + .add_u32(p_uefi_mmap_desc_ver, efi_info.mmap_desc_ver)?; + + let root_builder = chosen.end_node()?; + + let boot_cpu_id = 0; + let dt_size = root_builder.end_node()?.build(boot_cpu_id)?; + buffer.truncate(dt_size); Ok(buffer) } @@ -504,6 +756,7 @@ pub fn load_linux_arm64( enable_serial: bool, processor_topology: &ProcessorTopology, pcie_host_bridges: &[PcieHostBridge], + build_acpi: Option vmm_core::acpi_builder::BuiltAcpiTables>, ) -> Result, Error> { let mut loader = Loader::new(gm.clone(), cfg.mem_layout, hvdef::Vtl::Vtl0); let mut kernel_file = cfg.kernel; @@ -526,22 +779,37 @@ pub fn load_linux_arm64( // Thus, we first start with planning the memory layout where // some space at the loader bottom is reserved for the initrd. - let load_bottom_addr: u64 = 16 << 20; - let initrd_start: u64 = load_bottom_addr; + const INITRD_BASE: u64 = 16 << 20; // 16 MB + let initrd_start: u64 = INITRD_BASE; let initrd_end: u64 = initrd_start + initrd_size; // Align the kernel to 2MB let kernel_minimum_start_address: u64 = (initrd_end + 0x1fffff) & !0x1fffff; - let device_tree = build_dt( - cfg, - gm, - enable_serial, - processor_topology, - pcie_host_bridges, - initrd_start, - initrd_end, - ) - .map_err(|e| Error::Dt(DtError(e)))?; + let device_tree = if let Some(build_acpi) = build_acpi { + // ACPI mode: write EFI + ACPI tables into guest memory, then build a + // minimal "stub" DT that points the kernel's EFI stub at them. The + // kernel discovers all devices through ACPI, not the DT. + const EFI_BASE: u64 = 0x0080_0000; // 8 MB + const ACPI_TABLES_OFFSET: u64 = 0x2000; + const { assert!(EFI_BASE < INITRD_BASE) }; + let rsdp_addr = EFI_BASE + ACPI_TABLES_OFFSET; + let acpi_tables = build_acpi(rsdp_addr); + let efi_info = + write_efi_and_acpi_tables(gm, EFI_BASE, rsdp_addr, cfg.mem_layout, &acpi_tables)?; + build_stub_dt(cfg.cmdline, initrd_start, initrd_end, &efi_info) + .map_err(|e| Error::Dt(DtError(e)))? + } else { + build_dt( + cfg, + gm, + enable_serial, + processor_topology, + pcie_host_bridges, + initrd_start, + initrd_end, + ) + .map_err(|e| Error::Dt(DtError(e)))? + }; let initrd_config = initrd_reader.as_mut().map(|r| InitrdConfig { initrd_address: InitrdAddressType::Address(initrd_start), diff --git a/openvmm/openvmm_defs/src/config.rs b/openvmm/openvmm_defs/src/config.rs index 53ed80bccc..a9d9ebe723 100644 --- a/openvmm/openvmm_defs/src/config.rs +++ b/openvmm/openvmm_defs/src/config.rs @@ -110,6 +110,24 @@ pub const DEFAULT_GIC_V2M_SPI_BASE: u32 = 512; /// Number of SPIs reserved for PCIe MSIs. pub const DEFAULT_GIC_V2M_SPI_COUNT: u32 = 64; +/// Default virtual timer PPI (GIC INTID). PPI 4 = INTID 16 + 4 = 20. +/// This is the EL1 virtual timer interrupt used across Hyper-V, KVM, and HVF. +pub const DEFAULT_VIRT_TIMER_PPI: u32 = 20; + +/// How firmware tables are presented to the guest in Linux direct boot. +/// +/// On x86, `DeviceTree` is not supported and will be rejected. On aarch64, +/// this selects between a full device tree or an ACPI boot path. +#[derive(MeshPayload, Debug, Clone, Copy, PartialEq, Eq)] +pub enum LinuxDirectBootMode { + /// Full device tree with all devices described in DT nodes (aarch64 only). + DeviceTree, + /// ACPI tables for device discovery. On aarch64, this also synthesizes + /// an EFI system table so the kernel enters its ACPI code path. On x86, + /// ACPI tables are always provided via the zero page. + Acpi, +} + #[derive(MeshPayload, Debug)] pub enum LoadMode { Linux { @@ -118,6 +136,7 @@ pub enum LoadMode { cmdline: String, enable_serial: bool, custom_dsdt: Option>, + boot_mode: LinuxDirectBootMode, }, Uefi { firmware: File, diff --git a/openvmm/openvmm_entry/src/cli_args.rs b/openvmm/openvmm_entry/src/cli_args.rs index bf27b503d4..5282658eb3 100644 --- a/openvmm/openvmm_entry/src/cli_args.rs +++ b/openvmm/openvmm_entry/src/cli_args.rs @@ -100,6 +100,12 @@ pub struct Options { #[clap(long)] pub hv: bool, + /// Use a full device tree instead of ACPI tables for ARM64 Linux direct + /// boot. By default, ARM64 uses ACPI mode (stub DT + EFI + ACPI tables). + /// This flag selects the legacy DT-only path. Rejected on x86. + #[clap(long, conflicts_with_all = ["uefi", "pcat", "igvm"])] + pub device_tree: bool, + /// enable vtl2 - only supported in WHP and simulated without hypervisor support currently /// /// Currently implies --get. diff --git a/openvmm/openvmm_entry/src/lib.rs b/openvmm/openvmm_entry/src/lib.rs index e1ae6fad10..1460dc68e2 100644 --- a/openvmm/openvmm_entry/src/lib.rs +++ b/openvmm/openvmm_entry/src/lib.rs @@ -1125,6 +1125,7 @@ async fn vm_config_from_command_line( if !console_str.is_empty() { let _ = write!(&mut cmdline, " console={}", console_str); } + if opt.gfx { cmdline += " console=tty"; } @@ -1162,6 +1163,11 @@ async fn vm_config_from_command_line( cmdline, custom_dsdt, enable_serial: any_serial_configured, + boot_mode: if opt.device_tree { + openvmm_defs::config::LinuxDirectBootMode::DeviceTree + } else { + openvmm_defs::config::LinuxDirectBootMode::Acpi + }, }; } diff --git a/openvmm/openvmm_entry/src/ttrpc/mod.rs b/openvmm/openvmm_entry/src/ttrpc/mod.rs index 8891d5e397..55d3d42229 100644 --- a/openvmm/openvmm_entry/src/ttrpc/mod.rs +++ b/openvmm/openvmm_entry/src/ttrpc/mod.rs @@ -430,6 +430,7 @@ impl VmService { cmdline: boot.kernel_cmdline, custom_dsdt: None, enable_serial: true, + boot_mode: openvmm_defs::config::LinuxDirectBootMode::Acpi, } } vmservice::vm_config::BootConfig::Uefi(_) => { diff --git a/petri/src/vm/openvmm/construct.rs b/petri/src/vm/openvmm/construct.rs index 7855d4dd7f..e2b7ea330a 100644 --- a/petri/src/vm/openvmm/construct.rs +++ b/petri/src/vm/openvmm/construct.rs @@ -693,6 +693,7 @@ impl PetriVmConfigSetupCore<'_> { cmdline, custom_dsdt: None, enable_serial: self.enable_serial, + boot_mode: openvmm_defs::config::LinuxDirectBootMode::Acpi, } } ( diff --git a/tmk/tmk_vmm/src/run.rs b/tmk/tmk_vmm/src/run.rs index ddc866ab02..fe2ef8851b 100644 --- a/tmk/tmk_vmm/src/run.rs +++ b/tmk/tmk_vmm/src/run.rs @@ -60,16 +60,16 @@ impl CommonState { .context("failed to build processor topology")?; #[cfg(guest_arch = "aarch64")] - let processor_topology = TopologyBuilder::new_aarch64( - vm_topology::processor::arch::GicInfo { + let processor_topology = + TopologyBuilder::new_aarch64(vm_topology::processor::arch::Aarch64PlatformConfig { gic_distributor_base: 0xff000000, gic_redistributors_base: 0xff020000, gic_v2m: None, - }, - 0, - ) - .build(1) - .context("failed to build processor topology")?; + pmu_gsiv: 0, + virt_timer_ppi: 20, // DEFAULT_VIRT_TIMER_PPI + }) + .build(1) + .context("failed to build processor topology")?; let ram_size = 0x400000; let memory_layout = diff --git a/vm/acpi/src/dsdt.rs b/vm/acpi/src/dsdt.rs index b24c97c55b..e3584dda3b 100644 --- a/vm/acpi/src/dsdt.rs +++ b/vm/acpi/src/dsdt.rs @@ -176,6 +176,34 @@ impl Dsdt { self.add_object(&uart); } + /// Add an ARM SBSA Generic UART to the DSDT for ACPI-based boot. + /// + /// ```text + /// Device() + /// { + /// Name(_HID, "ARMH0011") + /// Name(_UID, ) + /// Name(_CRS, ResourceTemplate() + /// { + /// QWORDMemory(..., , ) + /// Interrupt(ResourceConsumer, Level, ActiveHigh, Exclusive) + /// {} + /// }) + /// } + /// ``` + pub fn add_sbsa_uart(&mut self, name: &[u8], uid: u64, base_addr: u64, size: u64, gsiv: u32) { + let mut uart = Device::new(name); + uart.add_object(&NamedString::new(b"_HID", b"ARMH0011")); + uart.add_object(&NamedInteger::new(b"_UID", uid)); + let mut crs = CurrentResourceSettings::new(); + crs.add_resource(&QwordMemory::new(base_addr, size)); + let mut intr = Interrupt::new(gsiv); + intr.is_edge_triggered = false; // level-triggered + crs.add_resource(&intr); + uart.add_object(&crs); + self.add_object(&uart); + } + /// Add an ACPI module device to describe the low and high MMIO regions. /// This is used when PCI is not present so that VMBus can find MMIO space. /// @@ -275,6 +303,9 @@ impl Dsdt { /// If `in_pci`, then enumerate the device under PCI0. Otherwise, enumerate /// it under the VMOD module created by `add_mmio_module`. /// + /// If `interrupt` is provided, it is added as an Extended Interrupt resource + /// in `_CRS`. On ARM64 ACPI, the kernel reads the VMBus interrupt from this. + /// /// ```text /// Device(\_SB.VMOD.VMBS) /// { @@ -291,9 +322,10 @@ impl Dsdt { /// } /// /// Name(_PS3, 0) + /// Name(_CRS, ResourceTemplate() { ... }) /// } /// ``` - pub fn add_vmbus(&mut self, in_pci: bool) { + pub fn add_vmbus(&mut self, in_pci: bool, interrupt_intid: Option) { let name = if in_pci { b"\\_SB.PCI0.VMBS" } else { @@ -328,8 +360,13 @@ impl Dsdt { method.add_operation(&op); vmbs.add_object(&method); vmbs.add_object(&NamedInteger::new(b"_PS3", 0)); - // On linux, the vmbus driver will fail if the _CRS section is not present. - vmbs.add_object(&CurrentResourceSettings::new()); + let mut crs = CurrentResourceSettings::new(); + if let Some(intid) = interrupt_intid { + let mut intr = Interrupt::new(intid); + intr.is_edge_triggered = true; + crs.add_resource(&intr); + } + vmbs.add_object(&crs); self.add_object(&vmbs); } diff --git a/vm/acpi_spec/src/gtdt.rs b/vm/acpi_spec/src/gtdt.rs new file mode 100644 index 0000000000..0facf7c054 --- /dev/null +++ b/vm/acpi_spec/src/gtdt.rs @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use super::Table; +use core::mem::size_of; +use static_assertions::const_assert_eq; +use zerocopy::FromBytes; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; +use zerocopy::Unaligned; + +/// ACPI 6.5 Generic Timer Description Table (Table 5-128). +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default, IntoBytes, Immutable, KnownLayout, FromBytes, Unaligned)] +pub struct Gtdt { + pub cnt_control_base: u64, + pub reserved: u32, + pub secure_el1_timer_gsiv: u32, + pub secure_el1_timer_flags: u32, + pub non_secure_el1_timer_gsiv: u32, + pub non_secure_el1_timer_flags: u32, + pub virtual_el1_timer_gsiv: u32, + pub virtual_el1_timer_flags: u32, + pub el2_timer_gsiv: u32, + pub el2_timer_flags: u32, + pub cnt_read_base: u64, + pub platform_timer_count: u32, + pub platform_timer_offset: u32, + pub virtual_el2_timer_gsiv: u32, + pub virtual_el2_timer_flags: u32, +} + +const_assert_eq!(size_of::(), 68); + +impl Table for Gtdt { + const SIGNATURE: [u8; 4] = *b"GTDT"; +} + +pub const GTDT_TIMER_EDGE_TRIGGERED: u32 = 1 << 0; +pub const GTDT_TIMER_ACTIVE_LOW: u32 = 1 << 1; +pub const GTDT_TIMER_ALWAYS_ON: u32 = 1 << 2; diff --git a/vm/acpi_spec/src/lib.rs b/vm/acpi_spec/src/lib.rs index 4107577176..0a881b984d 100644 --- a/vm/acpi_spec/src/lib.rs +++ b/vm/acpi_spec/src/lib.rs @@ -12,6 +12,7 @@ extern crate alloc; pub mod aspt; pub mod fadt; +pub mod gtdt; pub mod madt; pub mod mcfg; pub mod pptt; diff --git a/vm/acpi_spec/src/madt.rs b/vm/acpi_spec/src/madt.rs index 116b61cdb8..b6f88d72ee 100644 --- a/vm/acpi_spec/src/madt.rs +++ b/vm/acpi_spec/src/madt.rs @@ -42,6 +42,7 @@ open_enum! { X2APIC = 0x9, GICC = 0xb, GICD = 0xc, + GIC_MSI_FRAME = 0xd, } } @@ -235,6 +236,39 @@ impl MadtGicd { } } +/// ACPI 6.5 MADT GIC MSI Frame structure (Table 5-67). +#[repr(C)] +#[derive(Copy, Clone, Debug, IntoBytes, Immutable, KnownLayout, FromBytes)] +pub struct MadtGicMsiFrame { + pub typ: MadtType, + pub length: u8, + pub reserved: u16, + pub gic_msi_frame_id: u32, + pub base_address: u64, + pub flags: u32, + pub spi_count: u16, + pub spi_base: u16, +} + +const_assert_eq!(size_of::(), 24); + +pub const GIC_MSI_FRAME_FLAGS_SPI_SELECT: u32 = 1 << 0; + +impl MadtGicMsiFrame { + pub fn new(gic_msi_frame_id: u32, base_address: u64, spi_base: u16, spi_count: u16) -> Self { + Self { + typ: MadtType::GIC_MSI_FRAME, + length: size_of::() as u8, + reserved: 0, + gic_msi_frame_id, + base_address, + flags: GIC_MSI_FRAME_FLAGS_SPI_SELECT, + spi_count, + spi_base, + } + } +} + // TODO: use LE types everywhere, as here, to avoid #[repr(packed)] and to be // specific about endianness (which the ACPI spec dictates is always LE). #[repr(C)] diff --git a/vm/devices/firmware/uefi_specs/src/uefi/boot.rs b/vm/devices/firmware/uefi_specs/src/uefi/boot.rs index 9be0301113..0571fb880f 100644 --- a/vm/devices/firmware/uefi_specs/src/uefi/boot.rs +++ b/vm/devices/firmware/uefi_specs/src/uefi/boot.rs @@ -4,6 +4,7 @@ //! Definitions related to UEFI boot entries use guid::Guid; +use static_assertions::const_assert_eq; use zerocopy::FromBytes; use zerocopy::Immutable; use zerocopy::IntoBytes; @@ -180,6 +181,88 @@ pub struct EfiScsiDevice { pub logical_unit_num: u16, } +/// From UEFI spec 4.6 — EFI_SYSTEM_TABLE +/// +/// Minimal layout covering header fields and the pointers needed by +/// the Linux EFI stub (firmware vendor, configuration table). +#[repr(C)] +#[derive(Clone, Copy, Debug, Default, IntoBytes, Immutable, KnownLayout)] +pub struct EfiSystemTable { + // EFI_TABLE_HEADER (UEFI spec 4.2) + pub signature: u64, + pub revision: u32, + pub header_size: u32, + pub crc32: u32, + pub reserved: u32, + // Body + pub firmware_vendor: u64, + pub firmware_revision: u32, + pub _pad0: u32, + pub console_in_handle: u64, + pub con_in: u64, + pub console_out_handle: u64, + pub con_out: u64, + pub standard_error_handle: u64, + pub std_err: u64, + pub runtime_services: u64, + pub boot_services: u64, + pub number_of_table_entries: u64, + pub configuration_table: u64, +} + +/// From UEFI spec 4.6 +pub const EFI_SYSTEM_TABLE_SIGNATURE: u64 = 0x5453595320494249; // "IBI SYST" +/// EFI 2.70 system table revision. +pub const EFI_2_70_SYSTEM_TABLE_REVISION: u32 = 0x0002_0046; + +/// From UEFI spec 7.2 — EFI_MEMORY_DESCRIPTOR +#[repr(C)] +#[derive(Clone, Copy, Debug, IntoBytes, Immutable, KnownLayout)] +pub struct EfiMemoryDescriptor { + pub typ: EfiMemoryType, + pub _pad: u32, + pub physical_start: u64, + pub virtual_start: u64, + pub number_of_pages: u64, + pub attribute: u64, +} + +const_assert_eq!(size_of::(), 40); + +/// From UEFI spec 7.2 +pub const EFI_MEMORY_DESCRIPTOR_VERSION: u32 = 1; + +/// From UEFI spec 7.2 — EFI_MEMORY_WB attribute +pub const EFI_MEMORY_WB: u64 = 0x8; + +/// ACPI 2.0 table GUID for EFI configuration table entries. +pub const ACPI_20_TABLE_GUID: Guid = guid::guid!("8868e871-e4f1-11d3-bc22-0080c73c8881"); + +/// EFI RT Properties Table GUID (UEFI spec 4.6). +pub const EFI_RT_PROPERTIES_TABLE_GUID: Guid = guid::guid!("eb66918a-7eef-402a-842e-931d21c38ae9"); + +/// From UEFI spec 4.6 — EFI_RT_PROPERTIES_TABLE +/// +/// Installed in the EFI Configuration Table to tell the OS which runtime +/// services are supported. Setting `runtime_services_supported` to zero +/// means no runtime services are backed by real code. +#[repr(C)] +#[derive(Clone, Copy, Debug, IntoBytes, Immutable, KnownLayout)] +pub struct EfiRtPropertiesTable { + pub version: u16, + pub length: u16, + pub runtime_services_supported: u32, +} + +impl EfiRtPropertiesTable { + /// A table advertising that no runtime services are supported. + pub const NONE_SUPPORTED: Self = Self { + version: 1, + length: size_of::() as u16, + runtime_services_supported: 0, + }; +} + #[repr(C, packed)] #[derive(IntoBytes, FromBytes, Immutable, KnownLayout, Debug, PartialEq)] pub struct EfiMemoryMappedDevice { diff --git a/vm/vmcore/vm_topology/src/processor/aarch64.rs b/vm/vmcore/vm_topology/src/processor/aarch64.rs index 82bac7cef8..6cd8e1ed86 100644 --- a/vm/vmcore/vm_topology/src/processor/aarch64.rs +++ b/vm/vmcore/vm_topology/src/processor/aarch64.rs @@ -17,10 +17,7 @@ use aarch64defs::MpidrEl1; #[derive(Debug, Copy, Clone)] #[non_exhaustive] pub struct Aarch64Topology { - gic: GicInfo, - /// Performance Interrupt GSIV (PMU) - #[cfg_attr(feature = "inspect", inspect(hex))] - pmu_gsiv: u32, + platform: Aarch64PlatformConfig, } impl ArchTopology for Aarch64Topology { @@ -38,23 +35,29 @@ impl ArchTopology for Aarch64Topology { /// Aarch64-specific [`TopologyBuilder`] state. pub struct Aarch64TopologyBuilderState { - gic: GicInfo, - pmu_gsiv: u32, + platform: Aarch64PlatformConfig, } -/// GIC information +/// ARM64 platform interrupt and GIC configuration. +/// +/// Groups GIC base addresses, MSI frame info, and platform interrupt +/// assignments (PMU, virtual timer) into a single struct so that the +/// topology builder takes one value instead of several positional `u32`s. #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[cfg_attr(feature = "inspect", derive(inspect::Inspect))] -pub struct GicInfo { - /// GIC distributor base +pub struct Aarch64PlatformConfig { + /// GIC distributor base address. #[cfg_attr(feature = "inspect", inspect(hex))] pub gic_distributor_base: u64, - /// GIC redistributors base + /// GIC redistributors base address. #[cfg_attr(feature = "inspect", inspect(hex))] pub gic_redistributors_base: u64, - /// GIC v2m MSI frame base address, and the SPI range it owns. - /// `None` if MSIs via v2m are not supported. + /// GIC v2m MSI frame, if MSIs via v2m are supported. pub gic_v2m: Option, + /// Performance Monitor Unit GSIV. 0 if not available. + pub pmu_gsiv: u32, + /// Virtual timer PPI (GIC INTID, e.g. 20 for PPI 4). + pub virt_timer_ppi: u32, } /// GIC v2m MSI frame parameters. @@ -95,12 +98,12 @@ impl AsRef for Aarch64VpInfo { } impl TopologyBuilder { - /// Returns a builder for creating an x86 processor topology. - pub fn new_aarch64(gic: GicInfo, pmu_gsiv: u32) -> Self { + /// Returns a builder for creating an aarch64 processor topology. + pub fn new_aarch64(platform: Aarch64PlatformConfig) -> Self { Self { vps_per_socket: 1, smt_enabled: false, - arch: Aarch64TopologyBuilderState { gic, pmu_gsiv }, + arch: Aarch64TopologyBuilderState { platform }, } } @@ -134,9 +137,9 @@ impl TopologyBuilder { vnode: 0, }, mpidr, - gicr: self.arch.gic.gic_redistributors_base + gicr: self.arch.platform.gic_redistributors_base + id as u64 * aarch64defs::GIC_REDISTRIBUTOR_SIZE, - pmu_gsiv: self.arch.pmu_gsiv, + pmu_gsiv: self.arch.platform.pmu_gsiv, })) } @@ -162,8 +165,7 @@ impl TopologyBuilder { smt_enabled, vps_per_socket: self.vps_per_socket, arch: Aarch64Topology { - gic: self.arch.gic, - pmu_gsiv: self.arch.pmu_gsiv, + platform: self.arch.platform, }, }) } @@ -172,21 +174,26 @@ impl TopologyBuilder { impl ProcessorTopology { /// Returns the GIC distributor base pub fn gic_distributor_base(&self) -> u64 { - self.arch.gic.gic_distributor_base + self.arch.platform.gic_distributor_base } /// Returns the GIC redistributors base pub fn gic_redistributors_base(&self) -> u64 { - self.arch.gic.gic_redistributors_base + self.arch.platform.gic_redistributors_base } /// Returns the PMU GSIV pub fn pmu_gsiv(&self) -> u32 { - self.arch.pmu_gsiv + self.arch.platform.pmu_gsiv } /// Returns the GIC v2m MSI frame info, if present. pub fn gic_v2m(&self) -> Option { - self.arch.gic.gic_v2m + self.arch.platform.gic_v2m + } + + /// Returns the virtual timer PPI (GIC INTID). + pub fn virt_timer_ppi(&self) -> u32 { + self.arch.platform.virt_timer_ppi } } diff --git a/vmm_core/src/acpi_builder.rs b/vmm_core/src/acpi_builder.rs index 231b3bc6d1..3cbd258d22 100644 --- a/vmm_core/src/acpi_builder.rs +++ b/vmm_core/src/acpi_builder.rs @@ -6,9 +6,6 @@ // TODO: continue to remove these hardcoded deps use acpi::dsdt; use acpi::ssdt::Ssdt; -use acpi_spec::fadt::AddressSpaceId; -use acpi_spec::fadt::AddressWidth; -use acpi_spec::fadt::GenericAddress; use acpi_spec::madt::InterruptPolarity; use acpi_spec::madt::InterruptTriggerMode; use cache_topology::CacheTopology; @@ -50,18 +47,35 @@ pub struct AcpiTablesBuilder<'a, T: AcpiTopology> { /// /// If and only if this has root complexes, then an MCFG will be generated. pub pcie_host_bridges: &'a Vec, - /// If an ioapic is present. - pub with_ioapic: bool, - /// If a PIC is present. - pub with_pic: bool, - /// If a PIT is present. - pub with_pit: bool, - /// If a psp is present. - pub with_psp: bool, - /// base address of dynamic power management device registers - pub pm_base: u16, - /// ACPI IRQ number - pub acpi_irq: u32, + /// Architecture-specific ACPI configuration. + pub arch: AcpiArchConfig, +} + +/// Architecture-specific ACPI configuration carried by [`AcpiTablesBuilder`]. +pub enum AcpiArchConfig { + /// x86-specific settings (IOAPIC, PIC, PIT, PSP, PM base, SCI IRQ). + X86 { + /// If an IOAPIC is present. + with_ioapic: bool, + /// If a PIC is present. + with_pic: bool, + /// If a PIT is present. + with_pit: bool, + /// If a PSP is present. + with_psp: bool, + /// Base address of dynamic power management device registers. + pm_base: u16, + /// ACPI IRQ number. + acpi_irq: u32, + }, + /// ARM64-specific settings (HW_REDUCED_ACPI FADT). + Aarch64 { + /// Hypervisor vendor identity for the FADT. + /// Zero when not running under a hypervisor. + hypervisor_vendor_identity: u64, + /// Virtual timer PPI (GIC INTID). + virt_timer_ppi: u32, + }, } pub const OEM_INFO: acpi::builder::OemInfo = acpi::builder::OemInfo { @@ -156,6 +170,19 @@ impl AcpiTopology for Aarch64Topology { acpi_spec::madt::MadtGicc::new(uid, mpidr, gicr, pmu_gsiv).as_bytes(), ); } + + // GIC v2m MSI frame for PCIe MSI support. + if let Some(v2m) = topology.gic_v2m() { + madt.extend_from_slice( + acpi_spec::madt::MadtGicMsiFrame::new( + 0, + v2m.frame_base, + v2m.spi_base as u16, + v2m.spi_count as u16, + ) + .as_bytes(), + ); + } } } @@ -190,50 +217,62 @@ impl AcpiTablesBuilder<'_, T> { F: FnOnce(&acpi::builder::Table<'_>) -> R, { let mut madt_extra: Vec = Vec::new(); - if self.with_ioapic { - madt_extra.extend_from_slice( - acpi_spec::madt::MadtIoApic { - io_apic_id: 0, - io_apic_address: ioapic::IOAPIC_DEVICE_MMIO_REGION_BASE_ADDRESS as u32, - ..acpi_spec::madt::MadtIoApic::new() - } - .as_bytes(), - ); - } - // Add override for ACPI interrupt to be level triggered, active high. - madt_extra.extend_from_slice( - acpi_spec::madt::MadtInterruptSourceOverride::new( - self.acpi_irq.try_into().expect("should be in range"), - self.acpi_irq, - Some(InterruptPolarity::ActiveHigh), - Some(InterruptTriggerMode::Level), - ) - .as_bytes(), - ); + if let AcpiArchConfig::X86 { + with_ioapic, + acpi_irq, + with_pit, + .. + } = self.arch + { + if with_ioapic { + madt_extra.extend_from_slice( + acpi_spec::madt::MadtIoApic { + io_apic_id: 0, + io_apic_address: ioapic::IOAPIC_DEVICE_MMIO_REGION_BASE_ADDRESS as u32, + ..acpi_spec::madt::MadtIoApic::new() + } + .as_bytes(), + ); + } - if self.with_pit { - // IO-APIC IRQ0 is interrupt 2, which the PIT is attached to. + // Add override for ACPI interrupt to be level triggered, active high. madt_extra.extend_from_slice( - acpi_spec::madt::MadtInterruptSourceOverride::new(0, 2, None, None).as_bytes(), + acpi_spec::madt::MadtInterruptSourceOverride::new( + acpi_irq.try_into().expect("should be in range"), + acpi_irq, + Some(InterruptPolarity::ActiveHigh), + Some(InterruptTriggerMode::Level), + ) + .as_bytes(), ); + + if with_pit { + // IO-APIC IRQ0 is interrupt 2, which the PIT is attached to. + madt_extra.extend_from_slice( + acpi_spec::madt::MadtInterruptSourceOverride::new(0, 2, None, None).as_bytes(), + ); + } } T::extend_madt(self.processor_topology, &mut madt_extra); - let flags = if self.with_pic { - acpi_spec::madt::MADT_PCAT_COMPAT - } else { - 0 + let (apic_addr, flags) = match self.arch { + AcpiArchConfig::X86 { with_pic, .. } => ( + APIC_BASE_ADDRESS, + if with_pic { + acpi_spec::madt::MADT_PCAT_COMPAT + } else { + 0 + }, + ), + AcpiArchConfig::Aarch64 { .. } => (0u32, 0u32), }; (f)(&acpi::builder::Table::new_dyn( 5, None, - &acpi_spec::madt::Madt { - apic_addr: APIC_BASE_ADDRESS, - flags, - }, + &acpi_spec::madt::Madt { apic_addr, flags }, &[madt_extra.as_slice()], )) } @@ -465,68 +504,96 @@ impl AcpiTablesBuilder<'_, T> { let dsdt = b.append_raw(dsdt); - b.append(&acpi::builder::Table::new( - 6, - None, - &acpi_spec::fadt::Fadt { - flags: acpi_spec::fadt::FADT_WBINVD - | acpi_spec::fadt::FADT_PROC_C1 - | acpi_spec::fadt::FADT_PWR_BUTTON - | acpi_spec::fadt::FADT_SLP_BUTTON - | acpi_spec::fadt::FADT_RTC_S4 - | acpi_spec::fadt::FADT_TMR_VAL_EXT - | acpi_spec::fadt::FADT_RESET_REG_SUP - | acpi_spec::fadt::FADT_USE_PLATFORM_CLOCK, - x_dsdt: dsdt, - sci_int: self.acpi_irq as u16, - p_lvl2_lat: 101, // disable C2 - p_lvl3_lat: 1001, // disable C3 - pm1_evt_len: 4, - x_pm1a_evt_blk: GenericAddress { - addr_space_id: AddressSpaceId::SystemIo, - register_bit_width: 32, - register_bit_offset: 0, - access_size: AddressWidth::Word, - address: (self.pm_base + chipset::pm::DynReg::STATUS.0 as u16).into(), - }, - pm1_cnt_len: 2, - x_pm1a_cnt_blk: GenericAddress { - addr_space_id: AddressSpaceId::SystemIo, - register_bit_width: 16, - register_bit_offset: 0, - access_size: AddressWidth::Word, - address: (self.pm_base + chipset::pm::DynReg::CONTROL.0 as u16).into(), - }, - gpe0_blk_len: 4, - x_gpe0_blk: GenericAddress { - addr_space_id: AddressSpaceId::SystemIo, - register_bit_width: 32, - register_bit_offset: 0, - access_size: AddressWidth::Word, - address: (self.pm_base + chipset::pm::DynReg::GEN_PURPOSE_STATUS.0 as u16) - .into(), - }, - reset_reg: GenericAddress { - addr_space_id: AddressSpaceId::SystemIo, - register_bit_width: 8, - register_bit_offset: 0, - access_size: AddressWidth::Byte, - address: (self.pm_base + chipset::pm::DynReg::RESET.0 as u16).into(), + if let AcpiArchConfig::X86 { + pm_base, acpi_irq, .. + } = self.arch + { + use acpi_spec::fadt::AddressSpaceId; + use acpi_spec::fadt::AddressWidth; + use acpi_spec::fadt::GenericAddress; + + b.append(&acpi::builder::Table::new( + 6, + None, + &acpi_spec::fadt::Fadt { + flags: acpi_spec::fadt::FADT_WBINVD + | acpi_spec::fadt::FADT_PROC_C1 + | acpi_spec::fadt::FADT_PWR_BUTTON + | acpi_spec::fadt::FADT_SLP_BUTTON + | acpi_spec::fadt::FADT_RTC_S4 + | acpi_spec::fadt::FADT_TMR_VAL_EXT + | acpi_spec::fadt::FADT_RESET_REG_SUP + | acpi_spec::fadt::FADT_USE_PLATFORM_CLOCK, + x_dsdt: dsdt, + sci_int: acpi_irq as u16, + p_lvl2_lat: 101, // disable C2 + p_lvl3_lat: 1001, // disable C3 + pm1_evt_len: 4, + x_pm1a_evt_blk: GenericAddress { + addr_space_id: AddressSpaceId::SystemIo, + register_bit_width: 32, + register_bit_offset: 0, + access_size: AddressWidth::Word, + address: (pm_base + chipset::pm::DynReg::STATUS.0 as u16).into(), + }, + pm1_cnt_len: 2, + x_pm1a_cnt_blk: GenericAddress { + addr_space_id: AddressSpaceId::SystemIo, + register_bit_width: 16, + register_bit_offset: 0, + access_size: AddressWidth::Word, + address: (pm_base + chipset::pm::DynReg::CONTROL.0 as u16).into(), + }, + gpe0_blk_len: 4, + x_gpe0_blk: GenericAddress { + addr_space_id: AddressSpaceId::SystemIo, + register_bit_width: 32, + register_bit_offset: 0, + access_size: AddressWidth::Word, + address: (pm_base + chipset::pm::DynReg::GEN_PURPOSE_STATUS.0 as u16) + .into(), + }, + reset_reg: GenericAddress { + addr_space_id: AddressSpaceId::SystemIo, + register_bit_width: 8, + register_bit_offset: 0, + access_size: AddressWidth::Byte, + address: (pm_base + chipset::pm::DynReg::RESET.0 as u16).into(), + }, + reset_value: chipset::pm::RESET_VALUE, + pm_tmr_len: 4, + x_pm_tmr_blk: GenericAddress { + addr_space_id: AddressSpaceId::SystemIo, + register_bit_width: 32, + register_bit_offset: 0, + access_size: AddressWidth::Dword, + address: (pm_base + chipset::pm::DynReg::TIMER.0 as u16).into(), + }, + ..Default::default() }, - reset_value: chipset::pm::RESET_VALUE, - pm_tmr_len: 4, - x_pm_tmr_blk: GenericAddress { - addr_space_id: AddressSpaceId::SystemIo, - register_bit_width: 32, - register_bit_offset: 0, - access_size: AddressWidth::Dword, - address: (self.pm_base + chipset::pm::DynReg::TIMER.0 as u16).into(), + )); + } + + if let AcpiArchConfig::Aarch64 { + hypervisor_vendor_identity, + .. + } = self.arch + { + b.append(&acpi::builder::Table::new( + 6, + None, + &acpi_spec::fadt::Fadt { + flags: acpi_spec::fadt::FADT_HW_REDUCED_ACPI, + arm_boot_arch: 0x0003, // PSCI_COMPLIANT | PSCI_USE_HVC + minor_version: 3, + hypervisor_vendor_identity, + x_dsdt: dsdt, + ..Default::default() }, - ..Default::default() - }, - )); + )); + } - if self.with_psp { + if let AcpiArchConfig::X86 { with_psp: true, .. } = self.arch { use acpi_spec::aspt; use acpi_spec::aspt::Aspt; use acpi_spec::aspt::AsptStructHeader; @@ -594,6 +661,10 @@ impl AcpiTablesBuilder<'_, T> { self.with_pptt(|t| b.append(t)); } + if matches!(self.arch, AcpiArchConfig::Aarch64 { .. }) { + self.with_gtdt(|t| b.append(t)); + } + let (rdsp, tables) = b.build(); BuiltAcpiTables { rdsp, tables } @@ -625,6 +696,29 @@ impl AcpiTablesBuilder<'_, T> { pub fn build_pptt(&self) -> Vec { self.with_pptt(|t| t.to_vec(&OEM_INFO)) } + + fn with_gtdt(&self, f: impl FnOnce(&acpi::builder::Table<'_>) -> R) -> R { + let virt_timer_ppi = if let AcpiArchConfig::Aarch64 { virt_timer_ppi, .. } = self.arch { + virt_timer_ppi + } else { + 0 + }; + (f)(&acpi::builder::Table::new( + 3, + None, + &acpi_spec::gtdt::Gtdt { + cnt_control_base: 0xFFFF_FFFF_FFFF_FFFF, + virtual_el1_timer_gsiv: virt_timer_ppi, + virtual_el1_timer_flags: acpi_spec::gtdt::GTDT_TIMER_ACTIVE_LOW, + cnt_read_base: 0xFFFF_FFFF_FFFF_FFFF, + ..Default::default() + }, + )) + } + + pub fn build_gtdt(&self) -> Vec { + self.with_gtdt(|t| t.to_vec(&OEM_INFO)) + } } #[cfg(test)] @@ -662,12 +756,14 @@ mod test { mem_layout, cache_topology: None, pcie_host_bridges, - with_ioapic: true, - with_pic: false, - with_pit: false, - with_psp: false, - pm_base: 1234, - acpi_irq: 2, + arch: AcpiArchConfig::X86 { + with_ioapic: true, + with_pic: false, + with_pit: false, + with_psp: false, + pm_base: 1234, + acpi_irq: 2, + }, } } diff --git a/vmm_core/virt_hvf/src/lib.rs b/vmm_core/virt_hvf/src/lib.rs index 3bcbf3a8d8..6b4a6fd3ad 100644 --- a/vmm_core/virt_hvf/src/lib.rs +++ b/vmm_core/virt_hvf/src/lib.rs @@ -69,8 +69,6 @@ use vmcore::reference_time::ReferenceTimeSource; use vmcore::synic::GuestEventPort; use vmcore::vmtime::VmTimeAccess; -const PPI_VTIMER: u32 = 20; - const HV_ARM64_HVC_SMCCC_IDENTIFIER: u32 = (1 << 30) | (6 << 24) | 1; #[derive(Debug)] @@ -152,6 +150,7 @@ impl virt::ProtoPartition for HvfProtoPartition<'_> { // Apple Silicon does not support aarch32. supports_aarch32_el0: false, }, + virt_timer_ppi: self.config.processor_topology.virt_timer_ppi(), vps: self .config .processor_topology @@ -444,6 +443,7 @@ impl AccessVmState for HvfPartitionStateAccess<'_> { #[derive(Inspect)] struct HvfPartitionInner { caps: Aarch64PartitionCapabilities, + virt_timer_ppi: u32, #[inspect(skip)] vps: Vec, gicd: gic::Distributor, @@ -919,7 +919,7 @@ impl<'p> Processor for HvfProcessor<'p> { self.vmtime.now().wrapping_add(Duration::from_millis(2)), ); ready!(self.vmtime.poll_timeout(cx)); - self.gicr.raise(PPI_VTIMER); + self.gicr.raise(self.partition.virt_timer_ppi); continue; } @@ -928,7 +928,10 @@ impl<'p> Processor for HvfProcessor<'p> { }) .await?; - if !self.gicr.is_pending_or_active(PPI_VTIMER) { + if !self + .gicr + .is_pending_or_active(self.partition.virt_timer_ppi) + { // SAFETY: no requirements. unsafe { abi::hv_vcpu_set_vtimer_mask(self.vcpu.vcpu, false) @@ -1129,7 +1132,7 @@ impl<'p> Processor for HvfProcessor<'p> { } } abi::HvExitReason::VTIMER_ACTIVATED => { - self.gicr.raise(PPI_VTIMER); + self.gicr.raise(self.partition.virt_timer_ppi); } reason => { return Err(dev.fatal_error( diff --git a/vmm_core/virt_kvm/src/arch/aarch64/mod.rs b/vmm_core/virt_kvm/src/arch/aarch64/mod.rs index 8146907f58..0b2c206617 100644 --- a/vmm_core/virt_kvm/src/arch/aarch64/mod.rs +++ b/vmm_core/virt_kvm/src/arch/aarch64/mod.rs @@ -618,9 +618,12 @@ impl virt::ProtoPartition for KvmProtoPartition<'_> { // TODO: Save the GICv3 FD to a File to ensure it is cleaned up. self.add_gicv3()?; - // Use the Hyper-V timers instead of the ARM architectural ones. TODO: - // make this configurable. - self.set_timer_ppis(20, 19)?; + // Configure the virtual timer PPI from topology. KVM also requires + // a physical timer PPI, but we don't expose it to the guest. + self.set_timer_ppis( + self.config.processor_topology.virt_timer_ppi(), + 19, // KVM requires this; unused by the guest + )?; let caps = { let supports_aarch32_el0 = { diff --git a/vmm_core/virt_whp/src/lib.rs b/vmm_core/virt_whp/src/lib.rs index 6ccb613dc1..20706392a3 100644 --- a/vmm_core/virt_whp/src/lib.rs +++ b/vmm_core/virt_whp/src/lib.rs @@ -1274,7 +1274,7 @@ impl VtlPartition { } else { 1 }, - GicPpiOverflowInterruptFromCntv: 0x14, + GicPpiOverflowInterruptFromCntv: config.processor_topology.virt_timer_ppi(), GicPpiPerformanceMonitorsInterrupt: 0x17, Reserved1: [0; 6], }, diff --git a/vmm_tests/vmm_tests/tests/tests/aarch64_exclusive.rs b/vmm_tests/vmm_tests/tests/tests/aarch64_exclusive.rs index 26a3c7f4cc..4ea0b38142 100644 --- a/vmm_tests/vmm_tests/tests/tests/aarch64_exclusive.rs +++ b/vmm_tests/vmm_tests/tests/tests/aarch64_exclusive.rs @@ -5,7 +5,9 @@ use petri::PetriVmBuilder; use petri::PetriVmmBackend; +use petri::openvmm::OpenVmmPetriBackend; use petri::pipette::cmd; +use vmm_test_macros::openvmm_test; use vmm_test_macros::vmm_test; /// Boot Linux and verify the PMU interrupt is available. @@ -43,3 +45,33 @@ async fn pmu_gsiv(config: PetriVmBuilder) -> Result<(), a Ok(()) } + +/// Boot ARM64 Linux in device-tree mode (full DT, no ACPI). +#[openvmm_test(linux_direct_aarch64)] +async fn boot_dt(config: PetriVmBuilder) -> Result<(), anyhow::Error> { + let (vm, agent) = config + .modify_backend(|c| { + c.with_custom_config(|c| { + if let openvmm_defs::config::LoadMode::Linux { boot_mode, .. } = &mut c.load_mode { + *boot_mode = openvmm_defs::config::LinuxDirectBootMode::DeviceTree; + } + }) + }) + .run() + .await?; + + // Verify we're in DT mode — no ACPI tables directory. + let shell = agent.unix_shell(); + let output = cmd!(shell, "test -d /sys/firmware/acpi/tables") + .ignore_status() + .output() + .await?; + assert!( + !output.status.success(), + "ACPI tables should not exist in DT-only mode" + ); + + agent.power_off().await?; + vm.wait_for_clean_teardown().await?; + Ok(()) +} diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch.rs b/vmm_tests/vmm_tests/tests/tests/multiarch.rs index 8a4cfa6e74..96567b02d6 100644 --- a/vmm_tests/vmm_tests/tests/tests/multiarch.rs +++ b/vmm_tests/vmm_tests/tests/tests/multiarch.rs @@ -52,6 +52,7 @@ async fn frontpage(config: PetriVmBuilder) -> anyhow::Res /// Basic boot test. #[vmm_test( openvmm_linux_direct_x64, + openvmm_linux_direct_aarch64, openvmm_openhcl_linux_direct_x64, openvmm_pcat_x64(vhd(windows_datacenter_core_2022_x64)), openvmm_pcat_x64(vhd(ubuntu_2404_server_x64)), From 5ed0c33e53d5c6f535f00152f3c4520fe06a08cb Mon Sep 17 00:00:00 2001 From: John Starks Date: Mon, 23 Mar 2026 18:20:39 +0000 Subject: [PATCH 2/8] aarch64: clean up interrupt constants and validation - Consolidate VMBUS interrupt: add DEFAULT_VMBUS_PPI (INTID 18) to openvmm_defs, rename DT-path constants from VMBUS_INTID to VMBUS_PPI_OFFSET to clarify they are PPI offsets, not INTIDs. - Fix VMBus DT interrupt type: use IRQ_TYPE_EDGE_RISING (active-high) instead of IRQ_TYPE_EDGE_FALLING, matching the ACPI path. Polarity is irrelevant for software-injected edge-triggered interrupts. - Validate virt_timer_ppi and pmu_gsiv at topology build time with InvalidPpiIntid error, instead of asserting downstream in DT gen. - Change pmu_gsiv from u32 (0=none) to Option throughout Aarch64PlatformConfig, Aarch64VpInfo, and ProcessorTopology. - Add PmuGsivConfig::Disabled variant instead of using Gsiv(0) as a sentinel. --- openhcl/bootloader_fdt_parser/src/lib.rs | 9 ++-- openhcl/openhcl_boot/src/dt.rs | 10 ++-- openvmm/openvmm_core/src/worker/dispatch.rs | 47 ++++++++++++------- .../src/worker/vm_loaders/linux.rs | 15 +++--- openvmm/openvmm_defs/src/config.rs | 5 ++ tmk/tmk_vmm/src/run.rs | 2 +- vm/vmcore/vm_topology/src/processor.rs | 3 ++ .../vm_topology/src/processor/aarch64.rs | 18 +++++-- vmm_core/src/acpi_builder.rs | 2 +- 9 files changed, 70 insertions(+), 41 deletions(-) diff --git a/openhcl/bootloader_fdt_parser/src/lib.rs b/openhcl/bootloader_fdt_parser/src/lib.rs index 3414c22d97..b7698a41fd 100644 --- a/openhcl/bootloader_fdt_parser/src/lib.rs +++ b/openhcl/bootloader_fdt_parser/src/lib.rs @@ -535,7 +535,7 @@ fn parse_gic(node: &Node<'_>) -> anyhow::Result { gic_distributor_base: reg[0], gic_redistributors_base: reg[2], gic_v2m: None, - pmu_gsiv: 0, + pmu_gsiv: None, // TODO: parse from the DT timer node instead of hardcoding. virt_timer_ppi: 20, }) @@ -665,7 +665,7 @@ impl ParsedBootDtInfo { // Merge PMU GSIV into the GIC platform config if both were parsed. if let (Some(gic), Some(pmu_gsiv)) = (&mut gic, pmu_gsiv) { - gic.pmu_gsiv = pmu_gsiv; + gic.pmu_gsiv = Some(pmu_gsiv); } Ok(Self { @@ -847,8 +847,7 @@ mod tests { // PMU if let Some(gic) = &info.gic { - let pmu_gsiv = gic.pmu_gsiv; - if pmu_gsiv != 0 { + if let Some(pmu_gsiv) = gic.pmu_gsiv { anyhow::ensure!( (16..32).contains(&pmu_gsiv), "PMU GSIV {pmu_gsiv} is not a valid PPI (expected 16..32)" @@ -1069,7 +1068,7 @@ mod tests { gic_distributor_base: 0x10000, gic_redistributors_base: 0x20000, gic_v2m: None, - pmu_gsiv: 0x17, + pmu_gsiv: Some(0x17), virt_timer_ppi: 20, }), accepted_ranges: vec![ diff --git a/openhcl/openhcl_boot/src/dt.rs b/openhcl/openhcl_boot/src/dt.rs index a31155e3f1..9baa6d014f 100644 --- a/openhcl/openhcl_boot/src/dt.rs +++ b/openhcl/openhcl_boot/src/dt.rs @@ -43,7 +43,9 @@ mod aarch64 { // Architecturally, PPIs occupy INTID's in the [16..32) range. In DeviceTree, // the type of the interrupt is specified first (PPI) and then the _relative_ INTID: // for PPI INTID `27` `[GIC_PPI, 27-16, flags]` goes into the DT description. - pub const VMBUS_INTID: u32 = 2; // Note: the hardware INTID will be 16 + 2 + /// VMBus PPI offset for the DT `interrupts` property. + /// Canonical INTID is DEFAULT_VMBUS_PPI (18) in openvmm_defs. + pub const VMBUS_PPI_OFFSET: u32 = 2; pub const TIMER_INTID: u32 = 4; // Note: the hardware INTID will be 16 + 4 /// The Hyper-V default PMU_GSIV value. @@ -52,7 +54,7 @@ mod aarch64 { pub const GIC_PHANDLE: u32 = 1; pub const GIC_PPI: u32 = 1; - pub const IRQ_TYPE_EDGE_FALLING: u32 = 2; + pub const IRQ_TYPE_EDGE_RISING: u32 = 1; pub const IRQ_TYPE_LEVEL_LOW: u32 = 8; pub const IRQ_TYPE_LEVEL_HIGH: u32 = 4; } @@ -146,7 +148,7 @@ fn write_vmbus<'a, T>( // above specifies. &[ aarch64::GIC_PPI, - aarch64::VMBUS_INTID, + aarch64::VMBUS_PPI_OFFSET, interrupt_cell_value.expect("must be set on aarch64"), ], )?; @@ -456,7 +458,7 @@ pub fn write_dt( p_interrupt_parent, p_interrupts, interrupt_cell_value: if cfg!(target_arch = "aarch64") { - Some(aarch64::IRQ_TYPE_EDGE_FALLING) + Some(aarch64::IRQ_TYPE_EDGE_RISING) } else { None }, diff --git a/openvmm/openvmm_core/src/worker/dispatch.rs b/openvmm/openvmm_core/src/worker/dispatch.rs index 353da12299..8518be5516 100644 --- a/openvmm/openvmm_core/src/worker/dispatch.rs +++ b/openvmm/openvmm_core/src/worker/dispatch.rs @@ -472,7 +472,10 @@ impl ExtractTopologyConfig for ProcessorTopology { gic_distributor_base: self.gic_distributor_base(), gic_redistributors_base: self.gic_redistributors_base(), }), - pmu_gsiv: PmuGsivConfig::Gsiv(self.pmu_gsiv()), + pmu_gsiv: match self.pmu_gsiv() { + Some(gsiv) => PmuGsivConfig::Gsiv(gsiv), + None => PmuGsivConfig::Disabled, + }, })), } } @@ -494,15 +497,16 @@ impl BuildTopology for ProcessorTopologyConfig { spi_count: openvmm_defs::config::DEFAULT_GIC_V2M_SPI_COUNT, }); let pmu_gsiv = match arch.pmu_gsiv { - PmuGsivConfig::Gsiv(gsiv) => gsiv, + PmuGsivConfig::Disabled => None, + PmuGsivConfig::Gsiv(gsiv) => Some(gsiv), PmuGsivConfig::Platform => platform_gsiv(hypervisor), }; // TODO: When this value is supported on all platforms, we should change // the arch config to not be an option. For now, warn since the ARM VBSA // expects this to be available. - if pmu_gsiv == 0 { - tracing::warn!("PMU GSIV is set to 0"); + if pmu_gsiv.is_none() { + tracing::warn!("PMU GSIV is not set"); } let platform = if let Some(gic_config) = &arch.gic_config { @@ -626,7 +630,7 @@ fn choose_hypervisor() -> anyhow::Result { anyhow::bail!("no hypervisor available"); } -fn platform_gsiv(hypervisor: Hypervisor) -> u32 { +fn platform_gsiv(hypervisor: Hypervisor) -> Option { let gsiv = match hypervisor { #[cfg(all( feature = "virt_whp", @@ -634,15 +638,15 @@ fn platform_gsiv(hypervisor: Hypervisor) -> u32 { guest_is_native, guest_arch = "aarch64" ))] - Hypervisor::Whp => virt_whp::WHP_PMU_GSIV, + Hypervisor::Whp => Some(virt_whp::WHP_PMU_GSIV), // TODO: hvf supports the PMU interrupt, but enabling it didn't seem to // make it work it a Linux guest. More investigation required. #[cfg(all(target_os = "macos", guest_is_native, guest_arch = "aarch64"))] - Hypervisor::Hvf => 0, - _ => 0, + Hypervisor::Hvf => None, + _ => None, }; - if gsiv == 0 { + if gsiv.is_none() { tracing::warn!(?hypervisor, "no platform GSIV available for hypervisor"); } @@ -2462,12 +2466,7 @@ impl LoadedVmInner { let build_acpi = if boot_mode == LinuxDirectBootMode::Acpi { Some(|rsdp_gpa: u64| { acpi_builder.build_acpi_tables(rsdp_gpa, |mem_layout, dsdt| { - add_devices_to_dsdt_arm64( - mem_layout, - dsdt, - enable_serial, - with_hv, - ) + add_devices_to_dsdt_arm64(mem_layout, dsdt, enable_serial, with_hv) }) }) } else { @@ -3126,7 +3125,7 @@ fn add_devices_to_dsdt_arm64( with_hv: bool, ) { // VMBus GIC INTID (PPI 2 = INTID 16 + 2 = 18), matching the DT path. - const VMBUS_INTID: u32 = 18; + const VMBUS_INTID: u32 = openvmm_defs::config::DEFAULT_VMBUS_PPI; // SBSA UART MMIO bases and sizes. const PL011_SERIAL0_BASE: u64 = 0xEFFEC000; const PL011_SERIAL1_BASE: u64 = 0xEFFEB000; @@ -3153,8 +3152,20 @@ fn add_devices_to_dsdt_arm64( } if enable_serial { - dsdt.add_sbsa_uart(b"\\_SB.UAR0", 0, PL011_SERIAL0_BASE, PL011_SERIAL_SIZE, PL011_SERIAL0_GSIV); - dsdt.add_sbsa_uart(b"\\_SB.UAR1", 1, PL011_SERIAL1_BASE, PL011_SERIAL_SIZE, PL011_SERIAL1_GSIV); + dsdt.add_sbsa_uart( + b"\\_SB.UAR0", + 0, + PL011_SERIAL0_BASE, + PL011_SERIAL_SIZE, + PL011_SERIAL0_GSIV, + ); + dsdt.add_sbsa_uart( + b"\\_SB.UAR1", + 1, + PL011_SERIAL1_BASE, + PL011_SERIAL_SIZE, + PL011_SERIAL1_GSIV, + ); } } diff --git a/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs b/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs index 3a1bff5a21..18df905b21 100644 --- a/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs +++ b/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs @@ -229,8 +229,9 @@ fn build_dt( const GIC_PPI: u32 = 1; const IRQ_TYPE_LEVEL_LOW: u32 = 8; const IRQ_TYPE_LEVEL_HIGH: u32 = 4; - const IRQ_TYPE_EDGE_FALLING: u32 = 2; - const VMBUS_INTID: u32 = 2; // Note: the hardware INTID will be 16 + 2 + const IRQ_TYPE_EDGE_RISING: u32 = 1; + /// VMBus PPI offset for the DT `interrupts` property. + const VMBUS_PPI_OFFSET: u32 = openvmm_defs::config::DEFAULT_VMBUS_PPI - 16; let mut root_builder = builder .start_node("")? @@ -336,6 +337,7 @@ fn build_dt( // ARM64 Architectural Timer. // The DT `interrupts` property uses the PPI offset (INTID - 16). + assert!((16..32).contains(&processor_topology.virt_timer_ppi())); let virt_timer_ppi_offset = processor_topology.virt_timer_ppi() - 16; let timer = root_builder .start_node("timer")? @@ -349,11 +351,8 @@ fn build_dt( .add_null(p_always_on)?; root_builder = timer.end_node()?; - // Add PMU, if the interrupt is non-zero. - let pmu_gsiv = processor_topology.pmu_gsiv(); - if pmu_gsiv != 0 { - // TODO: This assumes the GSIV is a PPI. On all platforms, that seems to - // be the case today. + // Add PMU, if the interrupt is configured. + if let Some(pmu_gsiv) = processor_topology.pmu_gsiv() { assert!((16..32).contains(&pmu_gsiv)); let ppi_index = pmu_gsiv - 16; let pmu = root_builder @@ -484,7 +483,7 @@ fn build_dt( p_interrupts, // Here 3 parameters are used as the "#interrupt-cells" // above specifies. - &[GIC_PPI, VMBUS_INTID, IRQ_TYPE_EDGE_FALLING], + &[GIC_PPI, VMBUS_PPI_OFFSET, IRQ_TYPE_EDGE_RISING], )? .end_node()?; diff --git a/openvmm/openvmm_defs/src/config.rs b/openvmm/openvmm_defs/src/config.rs index a9d9ebe723..250a6076c6 100644 --- a/openvmm/openvmm_defs/src/config.rs +++ b/openvmm/openvmm_defs/src/config.rs @@ -114,6 +114,9 @@ pub const DEFAULT_GIC_V2M_SPI_COUNT: u32 = 64; /// This is the EL1 virtual timer interrupt used across Hyper-V, KVM, and HVF. pub const DEFAULT_VIRT_TIMER_PPI: u32 = 20; +/// Default VMBus PPI (GIC INTID). PPI 2 = INTID 16 + 2 = 18. +pub const DEFAULT_VMBUS_PPI: u32 = 18; + /// How firmware tables are presented to the guest in Linux direct boot. /// /// On x86, `DeviceTree` is not supported and will be rejected. On aarch64, @@ -276,6 +279,8 @@ pub enum PmuGsivConfig { Platform, /// Use the specified GSIV value for the PMU. Gsiv(u32), + /// Disable the PMU. + Disabled, } #[derive(Debug, Protobuf, Default, Clone)] diff --git a/tmk/tmk_vmm/src/run.rs b/tmk/tmk_vmm/src/run.rs index fe2ef8851b..241a9d419f 100644 --- a/tmk/tmk_vmm/src/run.rs +++ b/tmk/tmk_vmm/src/run.rs @@ -65,7 +65,7 @@ impl CommonState { gic_distributor_base: 0xff000000, gic_redistributors_base: 0xff020000, gic_v2m: None, - pmu_gsiv: 0, + pmu_gsiv: None, virt_timer_ppi: 20, // DEFAULT_VIRT_TIMER_PPI }) .build(1) diff --git a/vm/vmcore/vm_topology/src/processor.rs b/vm/vmcore/vm_topology/src/processor.rs index a88d12f9bd..bed244243b 100644 --- a/vm/vmcore/vm_topology/src/processor.rs +++ b/vm/vmcore/vm_topology/src/processor.rs @@ -91,6 +91,9 @@ pub enum InvalidTopology { /// VpInfo indices must be linear and start at 0 #[error("vp indices don't start at 0 or don't count up")] InvalidVpIndices, + /// A PPI INTID is not in the valid range (16..32). + #[error("PPI INTID {0} is not in the valid range 16..32")] + InvalidPpiIntid(u32), /// Failed to query the topology information from Device Tree. #[error("failed to query memory topology from device tree")] StdIoError(#[source] std::io::Error), diff --git a/vm/vmcore/vm_topology/src/processor/aarch64.rs b/vm/vmcore/vm_topology/src/processor/aarch64.rs index 6cd8e1ed86..7b7d06d01e 100644 --- a/vm/vmcore/vm_topology/src/processor/aarch64.rs +++ b/vm/vmcore/vm_topology/src/processor/aarch64.rs @@ -54,8 +54,8 @@ pub struct Aarch64PlatformConfig { pub gic_redistributors_base: u64, /// GIC v2m MSI frame, if MSIs via v2m are supported. pub gic_v2m: Option, - /// Performance Monitor Unit GSIV. 0 if not available. - pub pmu_gsiv: u32, + /// Performance Monitor Unit GSIV (GIC INTID). `None` if not available. + pub pmu_gsiv: Option, /// Virtual timer PPI (GIC INTID, e.g. 20 for PPI 4). pub virt_timer_ppi: u32, } @@ -88,7 +88,7 @@ pub struct Aarch64VpInfo { pub gicr: u64, /// Performance Interrupt GSIV (PMU) #[cfg_attr(feature = "inspect", inspect(hex))] - pub pmu_gsiv: u32, + pub pmu_gsiv: Option, } impl AsRef for Aarch64VpInfo { @@ -118,6 +118,16 @@ impl TopologyBuilder { max: u8::MAX.into(), }); } + if !(16..32).contains(&self.arch.platform.virt_timer_ppi) { + return Err(InvalidTopology::InvalidPpiIntid( + self.arch.platform.virt_timer_ppi, + )); + } + if let Some(gsiv) = self.arch.platform.pmu_gsiv { + if !(16..32).contains(&gsiv) { + return Err(InvalidTopology::InvalidPpiIntid(gsiv)); + } + } let mpidrs = (0..proc_count).map(|vp_index| { // TODO: construct mpidr appropriately for the specified // topology. @@ -183,7 +193,7 @@ impl ProcessorTopology { } /// Returns the PMU GSIV - pub fn pmu_gsiv(&self) -> u32 { + pub fn pmu_gsiv(&self) -> Option { self.arch.platform.pmu_gsiv } diff --git a/vmm_core/src/acpi_builder.rs b/vmm_core/src/acpi_builder.rs index 3cbd258d22..7bec67227b 100644 --- a/vmm_core/src/acpi_builder.rs +++ b/vmm_core/src/acpi_builder.rs @@ -165,7 +165,7 @@ impl AcpiTopology for Aarch64Topology { let mpidr = u64::from(vp.mpidr) & u64::from(aarch64defs::MpidrEl1::AFFINITY_MASK); let gicr = topology.gic_redistributors_base() + vp.base.vp_index.index() as u64 * aarch64defs::GIC_REDISTRIBUTOR_SIZE; - let pmu_gsiv = topology.pmu_gsiv(); + let pmu_gsiv = topology.pmu_gsiv().unwrap_or(0); madt.extend_from_slice( acpi_spec::madt::MadtGicc::new(uid, mpidr, gicr, pmu_gsiv).as_bytes(), ); From 794978e8e749b19b0c08508c382b4fbff94c2d6e Mon Sep 17 00:00:00 2001 From: John Starks Date: Mon, 23 Mar 2026 18:42:00 +0000 Subject: [PATCH 3/8] feedback --- Guide/src/reference/devices/firmware/linux_direct.md | 5 +++-- Guide/src/reference/devices/firmware/overview.md | 6 ++---- openhcl/underhill_core/src/loader/mod.rs | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Guide/src/reference/devices/firmware/linux_direct.md b/Guide/src/reference/devices/firmware/linux_direct.md index 4d038b1295..dcbdd36d3e 100644 --- a/Guide/src/reference/devices/firmware/linux_direct.md +++ b/Guide/src/reference/devices/firmware/linux_direct.md @@ -27,9 +27,10 @@ On x86_64, OpenVMM follows the standard Linux boot protocol: 1. The kernel image is loaded at the conventional 1 MB address. 2. An initrd (if provided) is placed after the kernel. 3. A **zero page** is constructed containing the memory map, command line - pointer, initrd location, and ACPI RSDP address. + pointer, and initrd location. 4. ACPI tables (MADT, FADT, DSDT, SRAT, etc.) are built by OpenVMM's ACPI - builder and written into guest memory. + builder and written at `0xE0000`, where the kernel finds the RSDP via + its standard firmware scan. 5. A GDT and initial page tables are set up. 6. The BSP register state is configured and execution begins. diff --git a/Guide/src/reference/devices/firmware/overview.md b/Guide/src/reference/devices/firmware/overview.md index 4393071d07..6df5c28a31 100644 --- a/Guide/src/reference/devices/firmware/overview.md +++ b/Guide/src/reference/devices/firmware/overview.md @@ -15,8 +15,6 @@ The boot mode is selected by which `--kernel`, `--uefi`, `--pcat`, or configuration). ```admonish note -Not all boot modes are available on all architectures. PCAT BIOS is -x86_64 only. Linux direct boot supports both architectures but with -different kernel image formats — see the -[Linux Direct](./linux_direct.md) page for details. +Not all boot modes are available on all architectures — see the table +above for supported combinations. ``` diff --git a/openhcl/underhill_core/src/loader/mod.rs b/openhcl/underhill_core/src/loader/mod.rs index 6d10a79416..e5006b6ebe 100644 --- a/openhcl/underhill_core/src/loader/mod.rs +++ b/openhcl/underhill_core/src/loader/mod.rs @@ -269,7 +269,7 @@ fn load_linux(params: LoadLinuxParams<'_>) -> Result { cache_topology: None, pcie_host_bridges: &vec![], arch: vmm_core::acpi_builder::AcpiArchConfig::X86 { - with_ioapic: true, // underhill always runs with ioapic + with_ioapic: true, // openhcl always runs with ioapic with_pic: false, with_pit: false, with_psp: platform_config.general.psp_enabled, From e4330497d913f097e42154c34a15ed7aa7b34d1c Mon Sep 17 00:00:00 2001 From: John Starks Date: Mon, 23 Mar 2026 18:54:22 +0000 Subject: [PATCH 4/8] fix formatting --- openvmm/openvmm_core/src/worker/vm_loaders/linux.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs b/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs index 18df905b21..178b1eaafa 100644 --- a/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs +++ b/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs @@ -655,10 +655,7 @@ fn write_efi_and_acpi_tables( let reserved_start = efi_base; let reserved_end = align_up(rsdp_addr + 0x1000 + acpi_tables.tables.len() as u64, 0x1000); let reserved = [MemoryRange::new(reserved_start..reserved_end)]; - for range in memory_range::subtract_ranges( - mem_layout.ram().iter().map(|r| r.range), - reserved, - ) { + for range in memory_range::subtract_ranges(mem_layout.ram().iter().map(|r| r.range), reserved) { mmap_entries.push(EfiMemoryDescriptor { typ: EfiMemoryType::EFI_CONVENTIONAL_MEMORY, _pad: 0, From 6641ffcdbfca349b3c10501ffc75e52e4720fdbc Mon Sep 17 00:00:00 2001 From: John Starks Date: Mon, 23 Mar 2026 19:42:19 +0000 Subject: [PATCH 5/8] build fixes --- openhcl/underhill_core/src/loader/mod.rs | 21 +++++++++++++------ openhcl/underhill_core/src/worker.rs | 14 +++++++------ .../src/worker/vm_loaders/linux.rs | 3 --- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/openhcl/underhill_core/src/loader/mod.rs b/openhcl/underhill_core/src/loader/mod.rs index e5006b6ebe..4ebda54cb7 100644 --- a/openhcl/underhill_core/src/loader/mod.rs +++ b/openhcl/underhill_core/src/loader/mod.rs @@ -464,12 +464,21 @@ pub fn write_uefi_config( mem_layout, cache_topology: None, pcie_host_bridges: &vec![], - with_ioapic: cfg!(guest_arch = "x86_64"), // OpenHCL always runs with ioapic on x64 - with_pic: false, // uefi never runs with pic or pit - with_pit: false, - with_psp: platform_config.general.psp_enabled, - pm_base: crate::worker::PM_BASE, - acpi_irq: crate::worker::SYSTEM_IRQ_ACPI, + #[cfg(guest_arch = "x86_64")] + arch: vmm_core::acpi_builder::AcpiArchConfig::X86 { + with_ioapic: true, + with_pic: false, + with_pit: false, + with_psp: platform_config.general.psp_enabled, + pm_base: crate::worker::PM_BASE, + acpi_irq: crate::worker::SYSTEM_IRQ_ACPI, + }, + #[cfg(guest_arch = "aarch64")] + arch: vmm_core::acpi_builder::AcpiArchConfig::Aarch64 { + // Not used for MADT/SRAT generation; only matters for FADT. + hypervisor_vendor_identity: 0, + virt_timer_ppi: processor_topology.virt_timer_ppi(), + }, }; // Build the ACPI tables as specified. diff --git a/openhcl/underhill_core/src/worker.rs b/openhcl/underhill_core/src/worker.rs index 3b5e90d014..8fe4aa16e9 100644 --- a/openhcl/underhill_core/src/worker.rs +++ b/openhcl/underhill_core/src/worker.rs @@ -2334,12 +2334,14 @@ async fn new_underhill_vm( mem_layout: &mem_layout, cache_topology: None, pcie_host_bridges: &vec![], - with_ioapic: true, // underhill always runs with ioapic - with_pic: true, // pcat always runs with pic and pit - with_pit: true, - with_psp: dps.general.psp_enabled, - pm_base: PM_BASE, - acpi_irq: SYSTEM_IRQ_ACPI, + arch: vmm_core::acpi_builder::AcpiArchConfig::X86 { + with_ioapic: true, // openhcl always runs with ioapic + with_pic: true, // pcat always runs with pic and pit + with_pit: true, + with_psp: dps.general.psp_enabled, + pm_base: PM_BASE, + acpi_irq: SYSTEM_IRQ_ACPI, + }, }; let config = firmware_pcat::config::PcatBiosConfig { diff --git a/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs b/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs index 178b1eaafa..20c4aa1c2e 100644 --- a/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs +++ b/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs @@ -37,7 +37,6 @@ pub enum Error { Efi(#[source] guestmem::GuestMemoryError), } -#[cfg_attr(not(guest_arch = "aarch64"), expect(dead_code))] struct Aarch64EfiInfo { systab_addr: u64, mmap_addr: u64, @@ -510,7 +509,6 @@ fn build_dt( Ok(buffer) } -#[cfg_attr(not(guest_arch = "aarch64"), expect(dead_code))] /// Write synthesized EFI and ACPI structures into guest memory. /// /// On ARM64, the Linux kernel can discover devices via ACPI instead of a @@ -694,7 +692,6 @@ fn write_efi_and_acpi_tables( /// [`write_efi_and_acpi_tables`]. The kernel then uses those EFI /// structures to locate the ACPI RSDP and discovers all hardware through /// ACPI tables instead of DT nodes. -#[cfg_attr(not(guest_arch = "aarch64"), expect(dead_code))] fn build_stub_dt( cmdline: &str, initrd_start: u64, From 3bf75b62f743e4b31f39ff0765c4a639e66dcce4 Mon Sep 17 00:00:00 2001 From: John Starks Date: Mon, 23 Mar 2026 20:16:30 +0000 Subject: [PATCH 6/8] fix underhill_core AcpiArchConfig for aarch64 and pmu_gsiv field init --- openhcl/underhill_core/src/worker.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhcl/underhill_core/src/worker.rs b/openhcl/underhill_core/src/worker.rs index 8fe4aa16e9..0ecc73fbaa 100644 --- a/openhcl/underhill_core/src/worker.rs +++ b/openhcl/underhill_core/src/worker.rs @@ -1324,7 +1324,7 @@ fn new_aarch64_topology( mpidr, gicr: gic_redistributors_base + vp_index as u64 * aarch64defs::GIC_REDISTRIBUTOR_SIZE, - pmu_gsiv, + pmu_gsiv: gic.pmu_gsiv, } })) .context("failed to construct the processor topology") From 0fc984835d197079e1e7e3e731784d59f4ac9ef8 Mon Sep 17 00:00:00 2001 From: John Starks Date: Mon, 23 Mar 2026 22:11:08 +0000 Subject: [PATCH 7/8] fix --- vmm_tests/vmm_tests/tests/tests/aarch64_exclusive.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vmm_tests/vmm_tests/tests/tests/aarch64_exclusive.rs b/vmm_tests/vmm_tests/tests/tests/aarch64_exclusive.rs index 4ea0b38142..887ea13313 100644 --- a/vmm_tests/vmm_tests/tests/tests/aarch64_exclusive.rs +++ b/vmm_tests/vmm_tests/tests/tests/aarch64_exclusive.rs @@ -7,7 +7,8 @@ use petri::PetriVmBuilder; use petri::PetriVmmBackend; use petri::openvmm::OpenVmmPetriBackend; use petri::pipette::cmd; -use vmm_test_macros::openvmm_test; +// TODO: re-enable when boot_dt test is re-enabled +// use vmm_test_macros::openvmm_test; use vmm_test_macros::vmm_test; /// Boot Linux and verify the PMU interrupt is available. @@ -47,7 +48,10 @@ async fn pmu_gsiv(config: PetriVmBuilder) -> Result<(), a } /// Boot ARM64 Linux in device-tree mode (full DT, no ACPI). -#[openvmm_test(linux_direct_aarch64)] +// TODO: disabled until we get a kernel that supports DT boot with the +// current device configuration. +// #[openvmm_test(linux_direct_aarch64)] +#[allow(dead_code)] async fn boot_dt(config: PetriVmBuilder) -> Result<(), anyhow::Error> { let (vm, agent) = config .modify_backend(|c| { From 5ea2d6cb770b94239326f0520b333b7f521b3aac Mon Sep 17 00:00:00 2001 From: John Starks Date: Mon, 23 Mar 2026 23:41:15 +0000 Subject: [PATCH 8/8] feedback --- vmm_tests/vmm_tests/tests/tests/aarch64_exclusive.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vmm_tests/vmm_tests/tests/tests/aarch64_exclusive.rs b/vmm_tests/vmm_tests/tests/tests/aarch64_exclusive.rs index 887ea13313..7e38aa9004 100644 --- a/vmm_tests/vmm_tests/tests/tests/aarch64_exclusive.rs +++ b/vmm_tests/vmm_tests/tests/tests/aarch64_exclusive.rs @@ -51,7 +51,7 @@ async fn pmu_gsiv(config: PetriVmBuilder) -> Result<(), a // TODO: disabled until we get a kernel that supports DT boot with the // current device configuration. // #[openvmm_test(linux_direct_aarch64)] -#[allow(dead_code)] +#[expect(dead_code)] async fn boot_dt(config: PetriVmBuilder) -> Result<(), anyhow::Error> { let (vm, agent) = config .modify_backend(|c| {