From d7c57932da426c025d03e2d103af59a79b577ea2 Mon Sep 17 00:00:00 2001 From: Jack Schefer Date: Fri, 5 Sep 2025 14:25:34 -0700 Subject: [PATCH 01/12] acpi: Add SSDT generation for PCIe, refactoring DSDT generation --- vm/acpi/src/dsdt.rs | 152 +----------------- vm/acpi/src/lib.rs | 2 + vm/acpi/src/sdt.rs | 31 ++++ vm/acpi/src/sdt/devices.rs | 138 ++++++++++++++++ vm/acpi/src/{dsdt => sdt}/helpers.rs | 2 +- vm/acpi/src/{dsdt => sdt}/objects.rs | 20 +-- vm/acpi/src/{dsdt => sdt}/ops.rs | 4 +- vm/acpi/src/{dsdt => sdt}/resources.rs | 5 +- vm/acpi/src/ssdt.rs | 213 +++++++++++++++++++++++++ 9 files changed, 403 insertions(+), 164 deletions(-) create mode 100644 vm/acpi/src/sdt.rs create mode 100644 vm/acpi/src/sdt/devices.rs rename vm/acpi/src/{dsdt => sdt}/helpers.rs (98%) rename vm/acpi/src/{dsdt => sdt}/objects.rs (93%) rename vm/acpi/src/{dsdt => sdt}/ops.rs (96%) rename vm/acpi/src/{dsdt => sdt}/resources.rs (99%) create mode 100644 vm/acpi/src/ssdt.rs diff --git a/vm/acpi/src/dsdt.rs b/vm/acpi/src/dsdt.rs index 8102a9baee..947003bb5b 100644 --- a/vm/acpi/src/dsdt.rs +++ b/vm/acpi/src/dsdt.rs @@ -1,16 +1,8 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -pub mod helpers; -pub mod objects; -pub mod ops; -pub mod resources; - -pub use helpers::*; +pub use crate::sdt::*; use memory_range::MemoryRange; -pub use objects::*; -pub use ops::*; -pub use resources::*; use x86defs::apic::APIC_BASE_ADDRESS; use zerocopy::FromBytes; use zerocopy::Immutable; @@ -31,91 +23,6 @@ pub struct DescriptionHeader { pub creator_rev: u32, } -pub struct Method { - pub name: [u8; 4], - pub sync_level: u8, - pub is_serialized: bool, - pub arg_count: u8, - operations: Vec, -} - -impl Method { - pub fn new(name: &[u8; 4]) -> Self { - let local_name: [u8; 4] = [name[0], name[1], name[2], name[3]]; - Self { - name: local_name, - sync_level: 0, - is_serialized: false, - arg_count: 0, - operations: vec![], - } - } - - pub fn set_arg_count(&mut self, arg_count: u8) { - self.arg_count = arg_count; - } - - pub fn add_operation(&mut self, op: &impl OperationObject) { - op.append_to_vec(&mut self.operations); - } -} - -impl DsdtObject for Method { - fn append_to_vec(&self, byte_stream: &mut Vec) { - byte_stream.push(0x14); - byte_stream.extend_from_slice(&encode_package_len(5 + self.operations.len())); - byte_stream.extend_from_slice(&self.name); - byte_stream.push( - self.sync_level << 4 | if self.is_serialized { 1 << 3 } else { 0 } | self.arg_count, - ); - byte_stream.extend_from_slice(&self.operations); - } -} - -pub struct EisaId(pub [u8; 7]); - -impl DsdtObject for EisaId { - fn append_to_vec(&self, byte_stream: &mut Vec) { - let mut id: [u8; 4] = [0; 4]; - id[0] = (self.0[0] - b'@') << 2 | (self.0[1] - b'@') >> 3; - id[1] = (self.0[1] & 7) << 5 | (self.0[2] - b'@'); - id[2] = char_to_hex(self.0[3]) << 4 | char_to_hex(self.0[4]); - id[3] = char_to_hex(self.0[5]) << 4 | char_to_hex(self.0[6]); - byte_stream.append(&mut encode_integer(u32::from_le_bytes(id) as u64)); - } -} - -pub struct Device { - name: Vec, - objects: Vec, -} - -impl Device { - pub fn new(name: &[u8]) -> Self { - Self { - name: encode_name(name), - objects: vec![], - } - } - - pub fn add_object(&mut self, obj: &impl DsdtObject) { - obj.append_to_vec(&mut self.objects); - } -} - -impl DsdtObject for Device { - // A device object consists of the extended identifier (0x5b 0x82) followed by the length, the name and then the - // contained objects. - fn append_to_vec(&self, byte_stream: &mut Vec) { - byte_stream.push(0x5b); - byte_stream.push(0x82); - let length = self.name.len() + self.objects.len(); - byte_stream.extend_from_slice(&encode_package_len(length)); - byte_stream.extend_from_slice(&self.name); - byte_stream.extend_from_slice(&self.objects); - } -} - pub struct PciRoutingTableEntry { pub address: u32, pub pin: u8, @@ -139,7 +46,7 @@ impl PciRoutingTable { } } -impl DsdtObject for PciRoutingTable { +impl SdtObject for PciRoutingTable { fn append_to_vec(&self, byte_stream: &mut Vec) { let mut table_data: Vec = Vec::with_capacity(self.entries.len() * 10); for entry in self.entries.iter() { @@ -213,7 +120,7 @@ impl Dsdt { byte_stream } - pub fn add_object(&mut self, obj: &impl DsdtObject) { + pub fn add_object(&mut self, obj: &impl SdtObject) { obj.append_to_vec(&mut self.objects); } @@ -458,6 +365,7 @@ impl Dsdt { #[cfg(test)] mod tests { use super::*; + use crate::sdt::test_helpers::verify_expected_bytes; pub fn verify_header(bytes: &[u8]) { assert!(bytes.len() >= 36); @@ -515,58 +423,6 @@ mod tests { assert_eq!(creator_rev, 0x5000000); } - pub fn verify_expected_bytes(actual: &[u8], expected: &[u8]) { - assert_eq!( - actual.len(), - expected.len(), - "Length of buffer does not match" - ); - for i in 0..actual.len() { - assert_eq!(actual[i], expected[i], "Mismatch at index {}", i); - } - } - - #[test] - fn verify_eisaid() { - let eisa_id = EisaId(*b"PNP0003"); - let bytes = eisa_id.to_bytes(); - verify_expected_bytes(&bytes, &[0xc, 0x41, 0xd0, 0, 0x3]); - } - - #[test] - fn verify_method() { - let op = AndOp { - operand1: vec![b'S', b'T', b'A', b'_'], - operand2: encode_integer(13), - target_name: vec![b'S', b'T', b'A', b'_'], - }; - let mut method = Method::new(b"_DIS"); - method.add_operation(&op); - let bytes = method.to_bytes(); - verify_expected_bytes( - &bytes, - &[ - 0x14, 0x11, 0x5F, 0x44, 0x49, 0x53, 0x00, 0x7b, b'S', b'T', b'A', b'_', 0x0a, 0x0d, - b'S', b'T', b'A', b'_', - ], - ); - } - - #[test] - fn verify_device_object() { - let package = Package(vec![0]); - let nobj = NamedObject::new(b"FOO", &package); - let mut device = Device::new(b"DEV"); - device.add_object(&nobj); - let bytes = device.to_bytes(); - verify_expected_bytes( - &bytes, - &[ - 0x5b, 0x82, 14, b'D', b'E', b'V', b'_', 8, b'F', b'O', b'O', b'_', 0x12, 3, 1, 0, - ], - ); - } - #[test] fn verify_simple_table() { let mut dsdt = Dsdt::new(); diff --git a/vm/acpi/src/lib.rs b/vm/acpi/src/lib.rs index 8d38191d82..4845327c49 100644 --- a/vm/acpi/src/lib.rs +++ b/vm/acpi/src/lib.rs @@ -8,3 +8,5 @@ pub mod builder; pub mod dsdt; +mod sdt; +pub mod ssdt; diff --git a/vm/acpi/src/sdt.rs b/vm/acpi/src/sdt.rs new file mode 100644 index 0000000000..3df7779999 --- /dev/null +++ b/vm/acpi/src/sdt.rs @@ -0,0 +1,31 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Shared utilities for generating differentiated (DSDT) and +//! secondary (SSDT) system description tables. + +pub mod devices; +pub mod helpers; +pub mod objects; +pub mod ops; +pub mod resources; + +pub use self::devices::*; +pub use self::helpers::*; +pub use self::objects::*; +pub use self::ops::*; +pub use self::resources::*; + +#[cfg(test)] +pub mod test_helpers { + pub fn verify_expected_bytes(actual: &[u8], expected: &[u8]) { + assert_eq!( + actual.len(), + expected.len(), + "Length of buffer does not match" + ); + for i in 0..actual.len() { + assert_eq!(actual[i], expected[i], "Mismatch at index {}", i); + } + } +} diff --git a/vm/acpi/src/sdt/devices.rs b/vm/acpi/src/sdt/devices.rs new file mode 100644 index 0000000000..701084103c --- /dev/null +++ b/vm/acpi/src/sdt/devices.rs @@ -0,0 +1,138 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use super::helpers::*; +use super::objects::*; +use super::ops::*; + +pub struct Method { + pub name: [u8; 4], + pub sync_level: u8, + pub is_serialized: bool, + pub arg_count: u8, + operations: Vec, +} + +impl Method { + pub fn new(name: &[u8; 4]) -> Self { + let local_name: [u8; 4] = [name[0], name[1], name[2], name[3]]; + Self { + name: local_name, + sync_level: 0, + is_serialized: false, + arg_count: 0, + operations: vec![], + } + } + + pub fn set_arg_count(&mut self, arg_count: u8) { + self.arg_count = arg_count; + } + + pub fn add_operation(&mut self, op: &impl OperationObject) { + op.append_to_vec(&mut self.operations); + } +} + +impl SdtObject for Method { + fn append_to_vec(&self, byte_stream: &mut Vec) { + byte_stream.push(0x14); + byte_stream.extend_from_slice(&encode_package_len(5 + self.operations.len())); + byte_stream.extend_from_slice(&self.name); + byte_stream.push( + self.sync_level << 4 | if self.is_serialized { 1 << 3 } else { 0 } | self.arg_count, + ); + byte_stream.extend_from_slice(&self.operations); + } +} + +pub struct Device { + name: Vec, + objects: Vec, +} + +impl Device { + pub fn new(name: &[u8]) -> Self { + Self { + name: encode_name(name), + objects: vec![], + } + } + + pub fn add_object(&mut self, obj: &impl SdtObject) { + obj.append_to_vec(&mut self.objects); + } +} + +impl SdtObject for Device { + // A device object consists of the extended identifier (0x5b 0x82) followed by the length, the name and then the + // contained objects. + fn append_to_vec(&self, byte_stream: &mut Vec) { + byte_stream.push(0x5b); + byte_stream.push(0x82); + let length = self.name.len() + self.objects.len(); + byte_stream.extend_from_slice(&encode_package_len(length)); + byte_stream.extend_from_slice(&self.name); + byte_stream.extend_from_slice(&self.objects); + } +} + +pub struct EisaId(pub [u8; 7]); + +impl SdtObject for EisaId { + fn append_to_vec(&self, byte_stream: &mut Vec) { + let mut id: [u8; 4] = [0; 4]; + id[0] = (self.0[0] - b'@') << 2 | (self.0[1] - b'@') >> 3; + id[1] = (self.0[1] & 7) << 5 | (self.0[2] - b'@'); + id[2] = char_to_hex(self.0[3]) << 4 | char_to_hex(self.0[4]); + id[3] = char_to_hex(self.0[5]) << 4 | char_to_hex(self.0[6]); + byte_stream.append(&mut encode_integer(u32::from_le_bytes(id) as u64)); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::sdt::test_helpers::verify_expected_bytes; + + #[test] + fn verify_eisaid() { + let eisa_id = EisaId(*b"PNP0003"); + let bytes = eisa_id.to_bytes(); + verify_expected_bytes(&bytes, &[0xc, 0x41, 0xd0, 0, 0x3]); + } + + #[test] + fn verify_method() { + let op = AndOp { + operand1: vec![b'S', b'T', b'A', b'_'], + operand2: encode_integer(13), + target_name: vec![b'S', b'T', b'A', b'_'], + }; + let mut method = Method::new(b"_DIS"); + method.add_operation(&op); + let bytes = method.to_bytes(); + verify_expected_bytes( + &bytes, + &[ + 0x14, 0x11, 0x5F, 0x44, 0x49, 0x53, 0x00, 0x7b, b'S', b'T', b'A', b'_', 0x0a, 0x0d, + b'S', b'T', b'A', b'_', + ], + ); + } + + #[test] + fn verify_device_object() { + let package = Package(vec![0]); + let nobj = NamedObject::new(b"FOO", &package); + let mut device = Device::new(b"DEV"); + device.add_object(&nobj); + let bytes = device.to_bytes(); + verify_expected_bytes( + &bytes, + &[ + 0x5b, 0x82, 14, b'D', b'E', b'V', b'_', 8, b'F', b'O', b'O', b'_', 0x12, 3, 1, 0, + ], + ); + } +} diff --git a/vm/acpi/src/dsdt/helpers.rs b/vm/acpi/src/sdt/helpers.rs similarity index 98% rename from vm/acpi/src/dsdt/helpers.rs rename to vm/acpi/src/sdt/helpers.rs index 9bfbdfa001..b7ad63dc14 100644 --- a/vm/acpi/src/dsdt/helpers.rs +++ b/vm/acpi/src/sdt/helpers.rs @@ -153,7 +153,7 @@ pub fn char_to_hex(value: u8) -> u8 { #[cfg(test)] mod tests { use super::*; - use crate::dsdt::tests::verify_expected_bytes; + use crate::sdt::test_helpers::verify_expected_bytes; #[test] fn verify_simple_name() { diff --git a/vm/acpi/src/dsdt/objects.rs b/vm/acpi/src/sdt/objects.rs similarity index 93% rename from vm/acpi/src/dsdt/objects.rs rename to vm/acpi/src/sdt/objects.rs index 67e768755a..85462e8efa 100644 --- a/vm/acpi/src/dsdt/objects.rs +++ b/vm/acpi/src/sdt/objects.rs @@ -3,7 +3,7 @@ use super::helpers::*; -pub trait DsdtObject { +pub trait SdtObject { fn append_to_vec(&self, byte_stream: &mut Vec); fn to_bytes(&self) -> Vec { @@ -19,7 +19,7 @@ pub struct NamedObject { } impl NamedObject { - pub fn new(name: &[u8], object: &impl DsdtObject) -> Self { + pub fn new(name: &[u8], object: &impl SdtObject) -> Self { let encoded_name = encode_name(name); assert!(!encoded_name.is_empty()); NamedObject { @@ -29,7 +29,7 @@ impl NamedObject { } } -impl DsdtObject for NamedObject { +impl SdtObject for NamedObject { // A named object consists of the identifier (0x8) followed by the 4-byte name fn append_to_vec(&self, byte_stream: &mut Vec) { byte_stream.push(8); @@ -40,7 +40,7 @@ impl DsdtObject for NamedObject { pub struct GenericObject>(pub T); -impl DsdtObject for GenericObject +impl SdtObject for GenericObject where T: AsRef<[u8]>, { @@ -62,7 +62,7 @@ impl NamedInteger { } } -impl DsdtObject for NamedInteger { +impl SdtObject for NamedInteger { fn append_to_vec(&self, byte_stream: &mut Vec) { self.data.append_to_vec(byte_stream); } @@ -80,7 +80,7 @@ impl NamedString { } } -impl DsdtObject for NamedString { +impl SdtObject for NamedString { fn append_to_vec(&self, byte_stream: &mut Vec) { self.data.append_to_vec(byte_stream); } @@ -91,7 +91,7 @@ pub struct StructuredPackage> { pub elem_data: T, } -impl DsdtObject for StructuredPackage +impl SdtObject for StructuredPackage where T: AsRef<[u8]>, { @@ -108,7 +108,7 @@ where pub struct Package>(pub T); -impl DsdtObject for Package +impl SdtObject for Package where T: AsRef<[u8]>, { @@ -124,7 +124,7 @@ where pub struct Buffer>(pub T); -impl DsdtObject for Buffer +impl SdtObject for Buffer where T: AsRef<[u8]>, { @@ -143,7 +143,7 @@ where #[cfg(test)] mod tests { use super::*; - use crate::dsdt::tests::verify_expected_bytes; + use crate::sdt::test_helpers::verify_expected_bytes; #[test] fn verify_package() { diff --git a/vm/acpi/src/dsdt/ops.rs b/vm/acpi/src/sdt/ops.rs similarity index 96% rename from vm/acpi/src/dsdt/ops.rs rename to vm/acpi/src/sdt/ops.rs index 52395a2059..2d87a78e3e 100644 --- a/vm/acpi/src/dsdt/ops.rs +++ b/vm/acpi/src/sdt/ops.rs @@ -55,8 +55,8 @@ impl OperationObject for ReturnOp { #[cfg(test)] mod tests { use super::*; - use crate::dsdt::encode_integer; - use crate::dsdt::tests::verify_expected_bytes; + use crate::sdt::encode_integer; + use crate::sdt::test_helpers::verify_expected_bytes; #[test] fn verify_and_operation() { diff --git a/vm/acpi/src/dsdt/resources.rs b/vm/acpi/src/sdt/resources.rs similarity index 99% rename from vm/acpi/src/dsdt/resources.rs rename to vm/acpi/src/sdt/resources.rs index 692ab78c98..46c99d9eb3 100644 --- a/vm/acpi/src/dsdt/resources.rs +++ b/vm/acpi/src/sdt/resources.rs @@ -89,7 +89,6 @@ impl ResourceObject for DwordMemory { } } -#[cfg(test)] impl DwordMemory { pub fn new(address: u32, length: u32) -> Self { assert!(address as u64 + length as u64 - 1 <= u32::MAX as u64); @@ -265,7 +264,7 @@ impl CurrentResourceSettings { } } -impl DsdtObject for CurrentResourceSettings { +impl SdtObject for CurrentResourceSettings { fn append_to_vec(&self, byte_stream: &mut Vec) { let mut resource_bytes = self.resources.clone(); // Add end of resource marker @@ -280,7 +279,7 @@ impl DsdtObject for CurrentResourceSettings { #[cfg(test)] mod tests { use super::*; - use crate::dsdt::tests::verify_expected_bytes; + use crate::sdt::test_helpers::verify_expected_bytes; #[test] fn verify_memory_resource_object() { diff --git a/vm/acpi/src/ssdt.rs b/vm/acpi/src/ssdt.rs new file mode 100644 index 0000000000..604384dfa9 --- /dev/null +++ b/vm/acpi/src/ssdt.rs @@ -0,0 +1,213 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +pub use crate::sdt::*; +use memory_range::MemoryRange; +use zerocopy::FromBytes; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; + +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, IntoBytes, Immutable, KnownLayout, FromBytes)] +pub struct DescriptionHeader { + pub signature: u32, + _length: u32, + pub revision: u8, + _checksum: u8, + pub oem_id: [u8; 6], + pub oem_table_id: u64, + pub oem_revision: u32, + pub creator_id: u32, + pub creator_rev: u32, +} + +fn encode_pcie_name(mut pcie_index: u32) -> Vec { + assert!(pcie_index < 1000); + let mut temp = "PCI0".as_bytes().to_vec(); + let mut i = temp.len() - 1; + while pcie_index > 0 { + temp[i] = b'0' + (pcie_index % 10) as u8; + pcie_index /= 10; + i -= 1; + } + temp +} + +pub struct Ssdt { + description_header: DescriptionHeader, + objects: Vec, +} + +impl Ssdt { + pub fn new() -> Self { + Self { + description_header: DescriptionHeader { + signature: u32::from_le_bytes(*b"SSDT"), + _length: 0, + revision: 2, + _checksum: 0, + oem_id: *b"MSFTVM", + oem_table_id: 0x313054445353, // b'SSDT01' + oem_revision: 1, + creator_id: u32::from_le_bytes(*b"MSFT"), + creator_rev: 0x01000000, + }, + objects: vec![], + //ecam_ranges: vec![], + } + } + + pub fn to_bytes(&self) -> Vec { + let mut byte_stream = Vec::new(); + byte_stream.extend_from_slice(self.description_header.as_bytes()); + byte_stream.extend_from_slice(&self.objects); + + let length = byte_stream.len(); + byte_stream[4..8].copy_from_slice(&u32::try_from(length).unwrap().to_le_bytes()); + let mut checksum: u8 = 0; + for byte in &byte_stream { + checksum = checksum.wrapping_add(*byte); + } + + byte_stream[9] = (!checksum).wrapping_add(1); + byte_stream + } + + pub fn add_object(&mut self, obj: &impl SdtObject) { + obj.append_to_vec(&mut self.objects); + } + + /// Adds a PCI Express root complex with the specified bus number and MMIO ranges. + /// + /// ```text + /// Device(\_SB.PCI) + /// { + /// Name(_HID, PNP0A08) + /// Name(_UID, ) + /// Name(_SEG, ) + /// Name(_BBN, ) + /// Name(_CRS, ResourceTemplate() + /// { + /// WordBusNumber(...) // Bus number range + /// QWordMemory() // Low MMIO + /// QWordMemory() // High MMIO + /// }) + /// } + /// ``` + pub fn add_pcie( + &mut self, + index: u32, + segment: u16, + start_bus: u8, + end_bus: u8, + low_mmio: MemoryRange, + high_mmio: MemoryRange, + ) { + let mut pcie = Device::new(encode_pcie_name(index).as_slice()); + pcie.add_object(&NamedObject::new(b"_HID", &EisaId(*b"PNP0A08"))); + pcie.add_object(&NamedInteger::new(b"_UID", index.into())); + pcie.add_object(&NamedInteger::new(b"_SEG", segment.into())); + pcie.add_object(&NamedInteger::new(b"_BBN", start_bus.into())); + + // TODO: Lots of work needed for _OSC. + + let mut crs = CurrentResourceSettings::new(); + crs.add_resource(&BusNumber::new( + start_bus.into(), + (end_bus as u16) - (start_bus as u16) + 1, + )); + crs.add_resource(&QwordMemory::new( + low_mmio.start(), + low_mmio.end() - low_mmio.start(), + )); + crs.add_resource(&QwordMemory::new( + high_mmio.start(), + high_mmio.end() - high_mmio.start(), + )); + pcie.add_object(&crs); + + self.add_object(&pcie); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::sdt::test_helpers::verify_expected_bytes; + + pub fn verify_header(bytes: &[u8]) { + assert!(bytes.len() >= 36); + + // signature + assert_eq!(bytes[0], b'S'); + assert_eq!(bytes[1], b'S'); + assert_eq!(bytes[2], b'D'); + assert_eq!(bytes[3], b'T'); + + // length + let ssdt_len = u32::from_le_bytes(bytes[4..8].try_into().unwrap()); + assert_eq!(ssdt_len as usize, bytes.len()); + + // revision + assert_eq!(bytes[8], 2); + + // Validate checksum bytes[9] by verifying content adds to zero. + let mut checksum: u8 = 0; + for byte in bytes.iter() { + checksum = checksum.wrapping_add(*byte); + } + assert_eq!(checksum, 0); + + // oem_id + assert_eq!(bytes[10], b'M'); + assert_eq!(bytes[11], b'S'); + assert_eq!(bytes[12], b'F'); + assert_eq!(bytes[13], b'T'); + assert_eq!(bytes[14], b'V'); + assert_eq!(bytes[15], b'M'); + + // oem_table_id + assert_eq!(bytes[16], b'S'); + assert_eq!(bytes[17], b'S'); + assert_eq!(bytes[18], b'D'); + assert_eq!(bytes[19], b'T'); + assert_eq!(bytes[20], b'0'); + assert_eq!(bytes[21], b'1'); + assert_eq!(bytes[22], 0); + assert_eq!(bytes[23], 0); + + // oem_revision + let oem_revision = u32::from_le_bytes(bytes[24..28].try_into().unwrap()); + assert_eq!(oem_revision, 1); + + // creator_id + assert_eq!(bytes[28], b'M'); + assert_eq!(bytes[29], b'S'); + assert_eq!(bytes[30], b'F'); + assert_eq!(bytes[31], b'T'); + + // creator_rev + let creator_rev = u32::from_le_bytes(bytes[32..36].try_into().unwrap()); + assert_eq!(creator_rev, 0x01000000); + } + + #[test] + pub fn verify_pcie_name_encoding() { + assert_eq!(encode_pcie_name(0), b"PCI0".to_vec()); + assert_eq!(encode_pcie_name(1), b"PCI1".to_vec()); + assert_eq!(encode_pcie_name(2), b"PCI2".to_vec()); + assert_eq!(encode_pcie_name(54), b"PC54".to_vec()); + assert_eq!(encode_pcie_name(294), b"P294".to_vec()); + } + + #[test] + fn verify_simple_table() { + let mut dsdt = Ssdt::new(); + let nobj = NamedObject::new(b"_S0", &Package(vec![0, 0])); + dsdt.add_object(&nobj); + let bytes = dsdt.to_bytes(); + verify_header(&bytes); + verify_expected_bytes(&bytes[36..], &[8, b'_', b'S', b'0', b'_', 0x12, 4, 2, 0, 0]); + } +} From f80ec0cfe968abb1a052ecaf1b1d9247018a1005 Mon Sep 17 00:00:00 2001 From: Jack Schefer Date: Fri, 5 Sep 2025 14:31:01 -0700 Subject: [PATCH 02/12] acpi_spec: add MCFG definitions and parsing --- vm/acpi_spec/src/lib.rs | 1 + vm/acpi_spec/src/mcfg.rs | 166 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+) create mode 100644 vm/acpi_spec/src/mcfg.rs diff --git a/vm/acpi_spec/src/lib.rs b/vm/acpi_spec/src/lib.rs index 92da23dd7a..4107577176 100644 --- a/vm/acpi_spec/src/lib.rs +++ b/vm/acpi_spec/src/lib.rs @@ -13,6 +13,7 @@ extern crate alloc; pub mod aspt; pub mod fadt; pub mod madt; +pub mod mcfg; pub mod pptt; pub mod srat; diff --git a/vm/acpi_spec/src/mcfg.rs b/vm/acpi_spec/src/mcfg.rs new file mode 100644 index 0000000000..46b0056df5 --- /dev/null +++ b/vm/acpi_spec/src/mcfg.rs @@ -0,0 +1,166 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#[cfg(feature = "alloc")] +pub use self::alloc_parse::*; + +use super::Table; +use crate::packed_nums::*; +use core::mem::size_of; +use static_assertions::const_assert_eq; +use zerocopy::FromBytes; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; +use zerocopy::Ref; +use zerocopy::Unaligned; + +#[repr(C)] +#[derive(Copy, Clone, Debug, IntoBytes, Immutable, KnownLayout, FromBytes, Unaligned)] +pub struct McfgHeader { + pub rsvd: u64_ne, +} + +impl McfgHeader { + pub fn new() -> Self { + McfgHeader { rsvd: 0.into() } + } +} + +impl Table for McfgHeader { + const SIGNATURE: [u8; 4] = *b"MCFG"; +} + +pub const MCFG_REVISION: u8 = 1; + +#[repr(C)] +#[derive(Copy, Clone, Debug, IntoBytes, Immutable, KnownLayout, FromBytes, Unaligned)] +pub struct McfgSegmentBusRange { + pub ecam_base: u64_ne, + pub segment: u16_ne, + pub start_bus: u8, + pub end_bus: u8, + pub rsvd: u32_ne, +} + +const_assert_eq!(size_of::(), 16); + +impl McfgSegmentBusRange { + pub fn new(ecam_base: u64, segment: u16, start_bus: u8, end_bus: u8) -> Self { + Self { + ecam_base: ecam_base.into(), + segment: segment.into(), + start_bus, + end_bus, + rsvd: 0.into(), + } + } +} + +#[derive(Debug)] +pub enum ParseMcfgError { + MissingAcpiHeader, + InvalidSignature([u8; 4]), + MismatchedLength { in_header: usize, actual: usize }, + MissingFixedHeader, + BadSegmentBusRange, +} + +impl core::fmt::Display for ParseMcfgError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::MissingAcpiHeader => write!(f, "could not read standard ACPI header"), + Self::InvalidSignature(sig) => { + write!(f, "invalid signature. expected b\"MCFG\", found {sig:?}") + } + Self::MismatchedLength { in_header, actual } => { + write!(f, "mismatched len. in_header: {in_header}, actual {actual}") + } + Self::MissingFixedHeader => write!(f, "missing fixed MCFG header"), + Self::BadSegmentBusRange => write!(f, "could not read segment bus range structure"), + } + } +} + +impl core::error::Error for ParseMcfgError {} + +pub fn parse_mcfg<'a>( + raw_mcfg: &'a [u8], + mut on_segment_bus_range: impl FnMut(&'a McfgSegmentBusRange), +) -> Result<(&'a crate::Header, &'a McfgHeader), ParseMcfgError> { + let raw_mcfg_len = raw_mcfg.len(); + let (acpi_header, buf) = Ref::<_, crate::Header>::from_prefix(raw_mcfg) + .map_err(|_| ParseMcfgError::MissingAcpiHeader)?; // TODO: zerocopy: map_err (https://github.com/microsoft/openvmm/issues/759) + + if acpi_header.signature != *b"MCFG" { + return Err(ParseMcfgError::InvalidSignature(acpi_header.signature)); + } + + if acpi_header.length.get() as usize != raw_mcfg_len { + return Err(ParseMcfgError::MismatchedLength { + in_header: acpi_header.length.get() as usize, + actual: raw_mcfg_len, + }); + } + + let (mcfg_header, mut buf) = + Ref::<_, McfgHeader>::from_prefix(buf).map_err(|_| ParseMcfgError::MissingFixedHeader)?; // TODO: zerocopy: map_err (https://github.com/microsoft/openvmm/issues/759) + + while !buf.is_empty() { + let (sbr, rest) = Ref::<_, McfgSegmentBusRange>::from_prefix(buf) + .map_err(|_| ParseMcfgError::BadSegmentBusRange)?; // TODO: zerocopy: map_err (https://github.com/microsoft/openvmm/issues/759) + on_segment_bus_range(Ref::into_ref(sbr)); + buf = rest + } + + Ok((Ref::into_ref(acpi_header), Ref::into_ref(mcfg_header))) +} + +#[cfg(feature = "alloc")] +pub mod alloc_parse { + use super::*; + use alloc::vec::Vec; + + #[derive(Debug)] + pub struct BorrowedMcfg<'a> { + pub acpi_header: &'a crate::Header, + pub mcfg_header: &'a McfgHeader, + pub segment_bus_ranges: Vec<&'a McfgSegmentBusRange>, + } + + #[derive(Debug)] + pub struct OwnedMcfg { + pub acpi_header: crate::Header, + pub mcfg_header: McfgHeader, + pub segment_bus_ranges: Vec, + } + + impl From> for OwnedMcfg { + fn from(b: BorrowedMcfg<'_>) -> Self { + OwnedMcfg { + acpi_header: *b.acpi_header, + mcfg_header: *b.mcfg_header, + segment_bus_ranges: b.segment_bus_ranges.into_iter().cloned().collect(), + } + } + } + + impl BorrowedMcfg<'_> { + pub fn new(raw_mcfg: &[u8]) -> Result, ParseMcfgError> { + let mut segment_bus_ranges = Vec::new(); + let (acpi_header, mcfg_header) = parse_mcfg(raw_mcfg, |x| segment_bus_ranges.push(x))?; + + Ok(BorrowedMcfg { + acpi_header, + mcfg_header, + segment_bus_ranges, + }) + } + } + + impl OwnedMcfg { + pub fn new(raw_mcfg: &[u8]) -> Result { + Ok(BorrowedMcfg::new(raw_mcfg)?.into()) + } + } +} From c7aa1b76c010290739e0c94ede40fa941ff2dd33 Mon Sep 17 00:00:00 2001 From: Jack Schefer Date: Fri, 5 Sep 2025 14:43:20 -0700 Subject: [PATCH 03/12] nvme: Enable presenting multiple controllers to the same NVM subsystem --- openvmm/openvmm_entry/src/storage_builder.rs | 2 ++ petri/src/vm/openvmm/construct.rs | 1 + .../storage/disk_nvme/nvme_driver/fuzz/fuzz_nvme_driver.rs | 1 + vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs | 6 ++++++ vm/devices/storage/nvme/src/pci.rs | 3 +++ vm/devices/storage/nvme/src/resolver.rs | 1 + vm/devices/storage/nvme/src/tests/controller_tests.rs | 1 + vm/devices/storage/nvme/src/workers/admin.rs | 3 +++ vm/devices/storage/nvme/src/workers/coordinator.rs | 2 ++ vm/devices/storage/nvme_resources/src/lib.rs | 4 ++++ vm/devices/storage/storage_tests/tests/scsidvd_nvme.rs | 2 ++ .../vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs | 1 + 12 files changed, 27 insertions(+) diff --git a/openvmm/openvmm_entry/src/storage_builder.rs b/openvmm/openvmm_entry/src/storage_builder.rs index 130ea03e6f..2444873b18 100644 --- a/openvmm/openvmm_entry/src/storage_builder.rs +++ b/openvmm/openvmm_entry/src/storage_builder.rs @@ -334,6 +334,7 @@ impl StorageBuilder { instance_id: NVME_VTL0_INSTANCE_ID, resource: NvmeControllerHandle { subsystem_id: NVME_VTL0_INSTANCE_ID, + controller_id: 0, namespaces: std::mem::take(&mut self.vtl0_nvme_namespaces), max_io_queues: 64, msix_count: 64, @@ -366,6 +367,7 @@ impl StorageBuilder { instance_id: NVME_VTL2_INSTANCE_ID, resource: NvmeControllerHandle { subsystem_id: NVME_VTL2_INSTANCE_ID, + controller_id: 0, namespaces: std::mem::take(&mut self.vtl2_nvme_namespaces), max_io_queues: 64, msix_count: 64, diff --git a/petri/src/vm/openvmm/construct.rs b/petri/src/vm/openvmm/construct.rs index bf53f2b66d..3ca22a77b7 100644 --- a/petri/src/vm/openvmm/construct.rs +++ b/petri/src/vm/openvmm/construct.rs @@ -828,6 +828,7 @@ impl PetriVmConfigSetupCore<'_> { instance_id: BOOT_NVME_INSTANCE, resource: NvmeControllerHandle { subsystem_id: BOOT_NVME_INSTANCE, + controller_id: 0, max_io_queues: 64, msix_count: 64, namespaces: vec![NamespaceDefinition { diff --git a/vm/devices/storage/disk_nvme/nvme_driver/fuzz/fuzz_nvme_driver.rs b/vm/devices/storage/disk_nvme/nvme_driver/fuzz/fuzz_nvme_driver.rs index 26f0e4d872..d321447c4a 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/fuzz/fuzz_nvme_driver.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/fuzz/fuzz_nvme_driver.rs @@ -55,6 +55,7 @@ impl FuzzNvmeDriver { msix_count: 2, // TODO: [use-arbitrary-input] max_io_queues: 64, // TODO: [use-arbitrary-input] subsystem_id: guid, + controller_id: 0, }, ); diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs index 769c093eeb..bc48ab3d47 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs @@ -77,6 +77,7 @@ async fn test_nvme_save_restore(driver: DefaultDriver) { #[async_test] async fn test_nvme_ioqueue_max_mqes(driver: DefaultDriver) { + const CNTLID: u16 = 0; const MSIX_COUNT: u16 = 2; const IO_QUEUE_COUNT: u16 = 64; const CPU_COUNT: u32 = 64; @@ -99,6 +100,7 @@ async fn test_nvme_ioqueue_max_mqes(driver: DefaultDriver) { msix_count: MSIX_COUNT, max_io_queues: IO_QUEUE_COUNT, subsystem_id: Guid::new_random(), + controller_id: CNTLID, }, ); @@ -136,6 +138,7 @@ async fn test_nvme_ioqueue_invalid_mqes(driver: DefaultDriver) { msix_count: MSIX_COUNT, max_io_queues: IO_QUEUE_COUNT, subsystem_id: Guid::new_random(), + controller_id: CNTLID, }, ); @@ -173,6 +176,7 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) { msix_count: MSIX_COUNT, max_io_queues: IO_QUEUE_COUNT, subsystem_id: Guid::new_random(), + controller_id: CNTLID, }, ); @@ -286,6 +290,7 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) { msix_count: MSIX_COUNT, max_io_queues: IO_QUEUE_COUNT, subsystem_id: Guid::new_random(), + controller_id: CNTLID, }, ); @@ -318,6 +323,7 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) { msix_count: MSIX_COUNT, max_io_queues: IO_QUEUE_COUNT, subsystem_id: Guid::new_random(), + controller_id: CNTLID, }, ); diff --git a/vm/devices/storage/nvme/src/pci.rs b/vm/devices/storage/nvme/src/pci.rs index 1adf314aab..5bcb51d534 100644 --- a/vm/devices/storage/nvme/src/pci.rs +++ b/vm/devices/storage/nvme/src/pci.rs @@ -103,6 +103,8 @@ pub struct NvmeControllerCaps { /// The subsystem ID, used as part of the subnqn field of the identify /// controller response. pub subsystem_id: Guid, + /// The controller ID, used in the identify controller response. + pub controller_id: u16, } impl NvmeController { @@ -153,6 +155,7 @@ impl NvmeController { caps.max_io_queues, Arc::clone(&qe_sizes), caps.subsystem_id, + caps.controller_id, ); Self { diff --git a/vm/devices/storage/nvme/src/resolver.rs b/vm/devices/storage/nvme/src/resolver.rs index 3819249dee..9a6cdb4174 100644 --- a/vm/devices/storage/nvme/src/resolver.rs +++ b/vm/devices/storage/nvme/src/resolver.rs @@ -61,6 +61,7 @@ impl AsyncResolveResource for NvmeCon msix_count: resource.msix_count, max_io_queues: resource.max_io_queues, subsystem_id: resource.subsystem_id, + controller_id: resource.controller_id, }, ); for NamespaceDefinition { diff --git a/vm/devices/storage/nvme/src/tests/controller_tests.rs b/vm/devices/storage/nvme/src/tests/controller_tests.rs index 615eafdc4a..e0eac2411c 100644 --- a/vm/devices/storage/nvme/src/tests/controller_tests.rs +++ b/vm/devices/storage/nvme/src/tests/controller_tests.rs @@ -42,6 +42,7 @@ fn instantiate_controller( msix_count: 64, max_io_queues: 64, subsystem_id: Guid::new_random(), + controller_id: 0, }, ); diff --git a/vm/devices/storage/nvme/src/workers/admin.rs b/vm/devices/storage/nvme/src/workers/admin.rs index 2927b0dba8..b2ecce7b33 100644 --- a/vm/devices/storage/nvme/src/workers/admin.rs +++ b/vm/devices/storage/nvme/src/workers/admin.rs @@ -71,6 +71,7 @@ pub struct AdminConfig { pub doorbells: Arc>, #[inspect(display)] pub subsystem_id: Guid, + pub controller_id: u16, pub max_sqs: u16, pub max_cqs: u16, pub qe_sizes: Arc>, @@ -613,6 +614,8 @@ impl AdminHandler { spec::IdentifyController { vid: VENDOR_ID, ssvid: VENDOR_ID, + cntlid: self.config.controller_id, + cmic: 0x02, mdts: (MAX_DATA_TRANSFER_SIZE / PAGE_SIZE).trailing_zeros() as u8, ver: NVME_VERSION, rtd3r: 400000, diff --git a/vm/devices/storage/nvme/src/workers/coordinator.rs b/vm/devices/storage/nvme/src/workers/coordinator.rs index ab67966a5c..9dcf7ef007 100644 --- a/vm/devices/storage/nvme/src/workers/coordinator.rs +++ b/vm/devices/storage/nvme/src/workers/coordinator.rs @@ -62,6 +62,7 @@ impl NvmeWorkers { max_cqs: u16, qe_sizes: Arc>, subsystem_id: Guid, + controller_id: u16, ) -> Self { let num_qids = 2 + max_sqs.max(max_cqs) * 2; let doorbells = Arc::new(RwLock::new(DoorbellMemory::new(num_qids))); @@ -74,6 +75,7 @@ impl NvmeWorkers { interrupts, doorbells: doorbells.clone(), subsystem_id, + controller_id, max_sqs, max_cqs, qe_sizes, diff --git a/vm/devices/storage/nvme_resources/src/lib.rs b/vm/devices/storage/nvme_resources/src/lib.rs index 4b7d55616b..ebeaa6e40d 100644 --- a/vm/devices/storage/nvme_resources/src/lib.rs +++ b/vm/devices/storage/nvme_resources/src/lib.rs @@ -20,6 +20,8 @@ pub mod fault; pub struct NvmeControllerHandle { /// The subsystem ID to use when responding to controller identify queries. pub subsystem_id: Guid, + /// The controller ID to use when responding to controller identify queries. + pub controller_id: u16, /// The number of MSI-X interrupts to support. pub msix_count: u16, /// The number of IO queues to support. @@ -37,6 +39,8 @@ impl ResourceId for NvmeControllerHandle { pub struct NvmeFaultControllerHandle { /// The subsystem ID to use when responding to controller identify queries. pub subsystem_id: Guid, + /// The controller ID to use when responding to controller identify queries. + pub controller_id: u16, /// The number of MSI-X interrupts to support. pub msix_count: u16, /// The number of IO queues to support. diff --git a/vm/devices/storage/storage_tests/tests/scsidvd_nvme.rs b/vm/devices/storage/storage_tests/tests/scsidvd_nvme.rs index 23879e0dfc..8188cb9e67 100644 --- a/vm/devices/storage/storage_tests/tests/scsidvd_nvme.rs +++ b/vm/devices/storage/storage_tests/tests/scsidvd_nvme.rs @@ -42,6 +42,7 @@ impl ScsiDvdNvmeTest { sector_count: u64, read_only: bool, ) -> Self { + const CNTLID: u16 = 0; const MSIX_COUNT: u16 = 2; const IO_QUEUE_COUNT: u16 = 64; const CPU_COUNT: u32 = 64; @@ -64,6 +65,7 @@ impl ScsiDvdNvmeTest { msix_count: MSIX_COUNT, max_io_queues: IO_QUEUE_COUNT, subsystem_id: Guid::new_random(), + controller_id: CNTLID, }, ); diff --git a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs index 08c81a884c..0cf1f83846 100644 --- a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs +++ b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs @@ -142,6 +142,7 @@ fn new_test_vtl2_nvme_device( instance_id, resource: NvmeControllerHandle { subsystem_id: instance_id, + controller_id: 0, max_io_queues: 64, msix_count: 64, namespaces: vec![NamespaceDefinition { From 963b65700a86809eb650e1d928acc09c5ca34ef3 Mon Sep 17 00:00:00 2001 From: Jack Schefer Date: Mon, 8 Sep 2025 10:14:39 -0700 Subject: [PATCH 04/12] openvmm: add CLI and Config types for PCIe root complexes and ports, allocate MMIO --- openvmm/hvlite_core/src/worker/dispatch.rs | 56 +++- .../hvlite_core/src/worker/vm_loaders/uefi.rs | 22 ++ openvmm/hvlite_defs/src/config.rs | 21 ++ openvmm/openvmm_entry/src/cli_args.rs | 264 ++++++++++++++++++ openvmm/openvmm_entry/src/lib.rs | 32 +++ openvmm/openvmm_entry/src/ttrpc/mod.rs | 3 + petri/src/vm/openvmm/construct.rs | 1 + vm/vmcore/vm_topology/src/lib.rs | 1 + vm/vmcore/vm_topology/src/pcie.rs | 24 ++ vmm_core/src/acpi_builder.rs | 45 +++ 10 files changed, 466 insertions(+), 3 deletions(-) create mode 100644 vm/vmcore/vm_topology/src/pcie.rs diff --git a/openvmm/hvlite_core/src/worker/dispatch.rs b/openvmm/hvlite_core/src/worker/dispatch.rs index ed1e95df86..9035b32b20 100644 --- a/openvmm/hvlite_core/src/worker/dispatch.rs +++ b/openvmm/hvlite_core/src/worker/dispatch.rs @@ -33,6 +33,7 @@ use hvlite_defs::config::Hypervisor; use hvlite_defs::config::HypervisorConfig; use hvlite_defs::config::LoadMode; use hvlite_defs::config::MemoryConfig; +use hvlite_defs::config::PcieRootComplexConfig; use hvlite_defs::config::PmuGsivConfig; use hvlite_defs::config::ProcessorTopologyConfig; use hvlite_defs::config::SerialPipes; @@ -106,6 +107,7 @@ use vm_resource::kind::MouseInputHandleKind; use vm_resource::kind::VirtioDeviceHandle; use vm_resource::kind::VmbusDeviceHandleKind; use vm_topology::memory::MemoryLayout; +use vm_topology::pcie::PcieHostBridge; use vm_topology::processor::ArchTopology; use vm_topology::processor::ProcessorTopology; use vm_topology::processor::TopologyBuilder; @@ -165,6 +167,7 @@ impl Manifest { load_mode: config.load_mode, floppy_disks: config.floppy_disks, ide_disks: config.ide_disks, + pcie_root_complexes: config.pcie_root_complexes, vpci_devices: config.vpci_devices, hypervisor: config.hypervisor, memory: config.memory, @@ -206,6 +209,7 @@ pub struct Manifest { load_mode: LoadMode, floppy_disks: Vec, ide_disks: Vec, + pcie_root_complexes: Vec, vpci_devices: Vec, memory: MemoryConfig, processor_topology: ProcessorTopologyConfig, @@ -571,6 +575,7 @@ struct LoadedVmInner { client_notify_send: mesh::Sender, /// allow the guest to reset without notifying the client automatic_guest_reset: bool, + pcie_host_bridges: Vec, } fn choose_hypervisor() -> anyhow::Result { @@ -1196,6 +1201,7 @@ impl InitializedVm { processor_topology: &processor_topology, mem_layout: &mem_layout, cache_topology: None, + pcie_host_bridges: &Vec::new(), with_ioapic: cfg.chipset.with_generic_ioapic, with_pic: cfg.chipset.with_generic_pic, with_pit: cfg.chipset.with_generic_pit, @@ -1727,6 +1733,44 @@ impl InitializedVm { let mut vtl2_hvsock_relay = None; let mut vmbus_redirect = false; + // PCI Express topology + + let mut pcie_host_bridges = Vec::new(); + { + // ECAM allocation starts at the configured base and grows upwards. + // Low MMIO allocation for PCIe starts just below the low MMIO window for other + // devices and grows downwards. + // High MMIO allocation for PCIe starts just above the high MMIO window for + // other devices and grows upwards. + let mut ecam_address = cfg.memory.pcie_ecam_base; + let mut low_mmio_address = cfg.memory.mmio_gaps[0].start(); + let mut high_mmio_address = cfg.memory.mmio_gaps[1].end(); + + for rc in cfg.pcie_root_complexes { + let bus_count = (rc.end_bus as u16) - (rc.start_bus as u16) + 1; + let ecam_size = (bus_count as u64) * 256 * 4096; + let low_mmio_size = rc.low_mmio_size as u64; + + let host_bridge = PcieHostBridge { + index: rc.index, + segment: rc.segment, + start_bus: rc.start_bus, + end_bus: rc.end_bus, + ecam_range: MemoryRange::new(ecam_address..ecam_address + ecam_size), + low_mmio: MemoryRange::new(low_mmio_address - low_mmio_size..low_mmio_address), + high_mmio: MemoryRange::new( + high_mmio_address..high_mmio_address + rc.high_mmio_size, + ), + }; + + pcie_host_bridges.push(host_bridge); + + ecam_address += ecam_size; + low_mmio_address -= low_mmio_size; + high_mmio_address += rc.high_mmio_size; + } + } + if let Some(vmbus_cfg) = cfg.vmbus { if !cfg.hypervisor.with_hv { anyhow::bail!("vmbus required hypervisor enlightements"); @@ -2354,6 +2398,7 @@ impl InitializedVm { halt_recv, client_notify_send, automatic_guest_reset: cfg.automatic_guest_reset, + pcie_host_bridges, }, }; @@ -2383,6 +2428,7 @@ impl LoadedVmInner { processor_topology: &self.processor_topology, mem_layout: &self.mem_layout, cache_topology: cache_topology.as_ref(), + pcie_host_bridges: &self.pcie_host_bridges, with_ioapic: self.chipset_cfg.with_generic_ioapic, with_psp: self.chipset_cfg.with_generic_psp, with_pic: self.chipset_cfg.with_generic_pic, @@ -2478,6 +2524,7 @@ impl LoadedVmInner { } => { let madt = acpi_builder.build_madt(); let srat = acpi_builder.build_srat(); + let mcfg = (!self.pcie_host_bridges.is_empty()).then(|| acpi_builder.build_mcfg()); let pptt = cache_topology.is_some().then(|| acpi_builder.build_pptt()); let load_settings = super::vm_loaders::uefi::UefiLoadSettings { debugging: enable_debugging, @@ -2496,9 +2543,11 @@ impl LoadedVmInner { &self.gm, &self.processor_topology, &self.mem_layout, + &self.pcie_host_bridges, load_settings, &madt, &srat, + mcfg.as_deref(), pptt.as_deref(), )?; @@ -2941,9 +2990,10 @@ impl LoadedVm { let manifest = Manifest { load_mode: self.inner.load_mode, - floppy_disks: vec![], // TODO - ide_disks: vec![], // TODO - vpci_devices: vec![], // TODO + floppy_disks: vec![], // TODO + ide_disks: vec![], // TODO + pcie_root_complexes: vec![], // TODO + vpci_devices: vec![], // TODO memory: self.inner.memory_cfg, processor_topology: self.inner.processor_topology.to_config(), chipset: self.inner.chipset_cfg, diff --git a/openvmm/hvlite_core/src/worker/vm_loaders/uefi.rs b/openvmm/hvlite_core/src/worker/vm_loaders/uefi.rs index 90e2acf76a..cfb134621e 100644 --- a/openvmm/hvlite_core/src/worker/vm_loaders/uefi.rs +++ b/openvmm/hvlite_core/src/worker/vm_loaders/uefi.rs @@ -13,6 +13,7 @@ use std::io::Seek; use thiserror::Error; use vm_loader::Loader; use vm_topology::memory::MemoryLayout; +use vm_topology::pcie::PcieHostBridge; use vm_topology::processor::ProcessorTopology; use zerocopy::IntoBytes; @@ -47,9 +48,11 @@ pub fn load_uefi( gm: &GuestMemory, processor_topology: &ProcessorTopology, mem_layout: &MemoryLayout, + pcie_host_bridges: &Vec, load_settings: UefiLoadSettings, madt: &[u8], srat: &[u8], + mcfg: Option<&[u8]>, pptt: Option<&[u8]>, ) -> Result, Error> { if mem_layout.mmio().len() < 2 { @@ -155,10 +158,29 @@ pub fn load_uefi( }); } + if let Some(mcfg) = mcfg { + cfg.add_raw(config::BlobStructureType::Mcfg, mcfg); + } + if let Some(pptt) = pptt { cfg.add_raw(config::BlobStructureType::Pptt, pptt); } + if !pcie_host_bridges.is_empty() { + let mut ssdt = acpi::ssdt::Ssdt::new(); + for bridge in pcie_host_bridges { + ssdt.add_pcie( + bridge.index, + bridge.segment, + bridge.start_bus, + bridge.end_bus, + bridge.low_mmio, + bridge.high_mmio, + ); + } + cfg.add_raw(config::BlobStructureType::Ssdt, &ssdt.to_bytes()); + } + let mut loader = Loader::new(gm.clone(), mem_layout, hvdef::Vtl::Vtl0); loader::uefi::load( diff --git a/openvmm/hvlite_defs/src/config.rs b/openvmm/hvlite_defs/src/config.rs index 4aa4ce3448..de2bf6d03c 100644 --- a/openvmm/hvlite_defs/src/config.rs +++ b/openvmm/hvlite_defs/src/config.rs @@ -25,6 +25,7 @@ pub struct Config { pub load_mode: LoadMode, pub floppy_disks: Vec, pub ide_disks: Vec, + pub pcie_root_complexes: Vec, pub vpci_devices: Vec, pub memory: MemoryConfig, pub processor_topology: ProcessorTopologyConfig, @@ -96,6 +97,8 @@ pub const DEFAULT_GIC_REDISTRIBUTORS_BASE: u64 = if cfg!(target_os = "linux") { 0xEFFE_E000 }; +pub const DEFAULT_PCIE_ECAM_BASE: u64 = 0x8_0000_0000; // 32GB, size depends on configuration + #[derive(MeshPayload, Debug)] pub enum LoadMode { Linux { @@ -164,6 +167,23 @@ pub enum Vtl2BaseAddressType { Vtl2Allocate { size: Option }, } +#[derive(Debug, MeshPayload)] +pub struct PcieRootComplexConfig { + pub index: u32, + pub name: String, + pub segment: u16, + pub start_bus: u8, + pub end_bus: u8, + pub low_mmio_size: u32, + pub high_mmio_size: u64, + pub ports: Vec, +} + +#[derive(Debug, MeshPayload)] +pub struct PcieRootPortConfig { + pub name: String, +} + #[derive(Debug, MeshPayload)] pub struct VpciDeviceConfig { pub vtl: DeviceVtl, @@ -234,6 +254,7 @@ pub struct MemoryConfig { pub mem_size: u64, pub mmio_gaps: Vec, pub prefetch_memory: bool, + pub pcie_ecam_base: u64, } #[derive(Debug, MeshPayload, Default)] diff --git a/openvmm/openvmm_entry/src/cli_args.rs b/openvmm/openvmm_entry/src/cli_args.rs index 6720f94518..8c7f6d8d96 100644 --- a/openvmm/openvmm_entry/src/cli_args.rs +++ b/openvmm/openvmm_entry/src/cli_args.rs @@ -546,6 +546,31 @@ flags: /// Perform a default boot even if boot entries exist and fail #[clap(long)] pub default_boot_always_attempt: bool, + + /// Attach a PCI Express root complex to the VM + #[clap(long_help = r#" +e.g: --pcie-root-complex rc0,segment=0,start_bus=0,end_bus=255,low_mmio=4M,high_mmio=1G + +syntax: [,opt=arg,...] + +options: + `segment=` configures the PCI Express segment, default 0 + `start_bus=` lowest valid bus number, default 0 + `end_bus=` highest valid bus number, default 255 + `low_mmio=` low MMIO window size, default 4M + `high_mmio=` high MMIO window size, default 1G +"#)] + #[clap(long, conflicts_with("pcat"))] + pub pcie_root_complex: Vec, + + /// Attach a PCI Express root root port to the VM + #[clap(long_help = r#" +e.g: --pcie-root-port rc0:rc0rp0 + +syntax: : +"#)] + #[clap(long, conflicts_with("pcat"))] + pub pcie_root_port: Vec, } #[derive(Clone, Debug, PartialEq)] @@ -1358,6 +1383,113 @@ pub enum UefiConsoleModeCli { None, } +#[derive(Clone, Debug, PartialEq)] +pub struct PcieRootComplexCli { + pub name: String, + pub segment: u16, + pub start_bus: u8, + pub end_bus: u8, + pub low_mmio: u32, + pub high_mmio: u64, +} + +impl FromStr for PcieRootComplexCli { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + let mut opts = s.split(','); + let name = opts.next().context("expected root complex name")?; + if name.is_empty() { + anyhow::bail!("must provide a root complex name"); + } + + let mut segment = 0; + let mut start_bus = 0; + let mut end_bus = 255; + let mut low_mmio = 4 * 1024 * 1024; + let mut high_mmio = 1024 * 1024 * 1024; + for opt in opts { + let mut s = opt.split('='); + let opt = s.next().context("expected option")?; + match opt { + "segment" => { + let seg_str = s.next().context("expected segment number")?; + segment = u16::from_str(seg_str).context("failed to parse segment number")?; + } + "start_bus" => { + let bus_str = s.next().context("expected start bus number")?; + start_bus = + u8::from_str(bus_str).context("failed to parse start bus number")?; + } + "end_bus" => { + let bus_str = s.next().context("expected end bus number")?; + end_bus = u8::from_str(bus_str).context("failed to parse end bus number")?; + } + "low_mmio" => { + let low_mmio_str = s.next().context("expected low MMIO size")?; + low_mmio = parse_memory(low_mmio_str) + .context("failed to parse low MMIO size")? + .try_into()?; + } + "high_mmio" => { + let high_mmio_str = s.next().context("expected high MMIO size")?; + high_mmio = + parse_memory(high_mmio_str).context("failed to parse high MMIO size")?; + } + opt => anyhow::bail!("unknown option: '{opt}'"), + } + } + + if start_bus >= end_bus { + anyhow::bail!("start_bus must be below end_bus"); + } + + Ok(PcieRootComplexCli { + name: name.to_string(), + segment, + start_bus, + end_bus, + low_mmio, + high_mmio, + }) + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct PcieRootPortCli { + pub root_complex_name: String, + pub name: String, +} + +impl FromStr for PcieRootPortCli { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + let mut opts = s.split(','); + let names = opts.next().context("expected root port identifiers")?; + if names.is_empty() { + anyhow::bail!("must provide a root port identifiers"); + } + + let mut s = names.split(':'); + let rc_name = s.next().context("expected name of parent root complex")?; + let rp_name = s.next().context("expect root port name")?; + + if let Some(extra) = s.next() { + anyhow::bail!("unexpected token: '{extra}'") + } + + if let Some(extra) = opts.next() { + anyhow::bail!("unexpected token: '{extra}'") + } + + Ok(PcieRootPortCli { + root_complex_name: rc_name.to_string(), + name: rp_name.to_string(), + }) + } +} + /// Read a environment variable that may / may-not have a target-specific /// prefix. e.g: `default_value_from_arch_env("FOO")` would first try and read /// from `FOO`, and if that's not found, it will try `X86_64_FOO`. @@ -1837,4 +1969,136 @@ mod tests { assert!(FloppyDiskCli::from_str("").is_err()); assert!(FloppyDiskCli::from_str("file:/path/to/floppy.img,invalid").is_err()); } + + #[test] + fn test_pcie_root_complex_from_str() { + const ONE_MB: u64 = 1024 * 1024; + const ONE_GB: u64 = 1024 * ONE_MB; + + const DEFAULT_LOW_MMIO: u32 = (4 * ONE_MB) as u32; + const DEFAULT_HIGH_MMIO: u64 = ONE_GB; + + assert_eq!( + PcieRootComplexCli::from_str("rc0").unwrap(), + PcieRootComplexCli { + name: "rc0".to_string(), + segment: 0, + start_bus: 0, + end_bus: 255, + low_mmio: DEFAULT_LOW_MMIO, + high_mmio: DEFAULT_HIGH_MMIO, + } + ); + + assert_eq!( + PcieRootComplexCli::from_str("rc1,segment=1").unwrap(), + PcieRootComplexCli { + name: "rc1".to_string(), + segment: 1, + start_bus: 0, + end_bus: 255, + low_mmio: DEFAULT_LOW_MMIO, + high_mmio: DEFAULT_HIGH_MMIO, + } + ); + + assert_eq!( + PcieRootComplexCli::from_str("rc2,start_bus=32").unwrap(), + PcieRootComplexCli { + name: "rc2".to_string(), + segment: 0, + start_bus: 32, + end_bus: 255, + low_mmio: DEFAULT_LOW_MMIO, + high_mmio: DEFAULT_HIGH_MMIO, + } + ); + + assert_eq!( + PcieRootComplexCli::from_str("rc3,end_bus=31").unwrap(), + PcieRootComplexCli { + name: "rc3".to_string(), + segment: 0, + start_bus: 0, + end_bus: 31, + low_mmio: DEFAULT_LOW_MMIO, + high_mmio: DEFAULT_HIGH_MMIO, + } + ); + + assert_eq!( + PcieRootComplexCli::from_str("rc4,start_bus=32,end_bus=127,high_mmio=2G").unwrap(), + PcieRootComplexCli { + name: "rc4".to_string(), + segment: 0, + start_bus: 32, + end_bus: 127, + low_mmio: DEFAULT_LOW_MMIO, + high_mmio: 2 * ONE_GB, + } + ); + + assert_eq!( + PcieRootComplexCli::from_str("rc5,segment=2,start_bus=32,end_bus=127").unwrap(), + PcieRootComplexCli { + name: "rc5".to_string(), + segment: 2, + start_bus: 32, + end_bus: 127, + low_mmio: DEFAULT_LOW_MMIO, + high_mmio: DEFAULT_HIGH_MMIO, + } + ); + + assert_eq!( + PcieRootComplexCli::from_str("rc6,low_mmio=1M,high_mmio=64G").unwrap(), + PcieRootComplexCli { + name: "rc6".to_string(), + segment: 0, + start_bus: 0, + end_bus: 255, + low_mmio: ONE_MB as u32, + high_mmio: 64 * ONE_GB, + } + ); + + // Error cases + assert!(PcieRootComplexCli::from_str("").is_err()); + assert!(PcieRootComplexCli::from_str("poorly,").is_err()); + assert!(PcieRootComplexCli::from_str("configured,complex").is_err()); + assert!(PcieRootComplexCli::from_str("fails,start_bus=foo").is_err()); + assert!(PcieRootComplexCli::from_str("fails,start_bus=32,end_bus=31").is_err()); + assert!(PcieRootComplexCli::from_str("rc,start_bus=256").is_err()); + assert!(PcieRootComplexCli::from_str("rc,end_bus=256").is_err()); + assert!(PcieRootComplexCli::from_str("rc,low_mmio=5G").is_err()); + assert!(PcieRootComplexCli::from_str("rc,low_mmio=aG").is_err()); + assert!(PcieRootComplexCli::from_str("rc,high_mmio=bad").is_err()); + assert!(PcieRootComplexCli::from_str("rc,high_mmio").is_err()); + } + + #[test] + fn test_pcie_root_port_from_str() { + assert_eq!( + PcieRootPortCli::from_str("rc0:rc0rp0").unwrap(), + PcieRootPortCli { + root_complex_name: "rc0".to_string(), + name: "rc0rp0".to_string() + } + ); + + assert_eq!( + PcieRootPortCli::from_str("my_rc:port2").unwrap(), + PcieRootPortCli { + root_complex_name: "my_rc".to_string(), + name: "port2".to_string() + } + ); + + // Error cases + assert!(PcieRootPortCli::from_str("").is_err()); + assert!(PcieRootPortCli::from_str("rp0").is_err()); + assert!(PcieRootPortCli::from_str("rp0,opt").is_err()); + assert!(PcieRootPortCli::from_str("rc0:rp0:rp3").is_err()); + assert!(PcieRootPortCli::from_str("rc0:rp0,rp3").is_err()); + } } diff --git a/openvmm/openvmm_entry/src/lib.rs b/openvmm/openvmm_entry/src/lib.rs index 86d3169702..c399c2dada 100644 --- a/openvmm/openvmm_entry/src/lib.rs +++ b/openvmm/openvmm_entry/src/lib.rs @@ -64,11 +64,14 @@ use hvlite_defs::config::DEFAULT_MMIO_GAPS_AARCH64_WITH_VTL2; use hvlite_defs::config::DEFAULT_MMIO_GAPS_X86; use hvlite_defs::config::DEFAULT_MMIO_GAPS_X86_WITH_VTL2; use hvlite_defs::config::DEFAULT_PCAT_BOOT_ORDER; +use hvlite_defs::config::DEFAULT_PCIE_ECAM_BASE; use hvlite_defs::config::DeviceVtl; use hvlite_defs::config::HypervisorConfig; use hvlite_defs::config::LateMapVtl0MemoryPolicy; use hvlite_defs::config::LoadMode; use hvlite_defs::config::MemoryConfig; +use hvlite_defs::config::PcieRootComplexConfig; +use hvlite_defs::config::PcieRootPortConfig; use hvlite_defs::config::ProcessorTopologyConfig; use hvlite_defs::config::SerialInformation; use hvlite_defs::config::VirtioBus; @@ -678,6 +681,33 @@ fn vm_config_from_command_line( }) })); + let pcie_root_complexes = opt + .pcie_root_complex + .iter() + .enumerate() + .map(|(i, cli)| { + let ports = opt + .pcie_root_port + .iter() + .filter(|port_cli| port_cli.root_complex_name == cli.name) + .map(|port_cli| PcieRootPortConfig { + name: port_cli.name.clone(), + }) + .collect(); + + PcieRootComplexConfig { + index: i as u32, + name: cli.name.clone(), + segment: cli.segment, + start_bus: cli.start_bus, + end_bus: cli.end_bus, + low_mmio_size: cli.low_mmio, + high_mmio_size: cli.high_mmio, + ports, + } + }) + .collect(); + #[cfg(windows)] let vpci_resources: Vec<_> = opt .device @@ -1305,12 +1335,14 @@ fn vm_config_from_command_line( chipset, load_mode, floppy_disks, + pcie_root_complexes, vpci_devices, ide_disks: Vec::new(), memory: MemoryConfig { mem_size: opt.memory, mmio_gaps, prefetch_memory: opt.prefetch, + pcie_ecam_base: DEFAULT_PCIE_ECAM_BASE, }, processor_topology: ProcessorTopologyConfig { proc_count: opt.processors, diff --git a/openvmm/openvmm_entry/src/ttrpc/mod.rs b/openvmm/openvmm_entry/src/ttrpc/mod.rs index f2402581e9..54db5d0ad6 100644 --- a/openvmm/openvmm_entry/src/ttrpc/mod.rs +++ b/openvmm/openvmm_entry/src/ttrpc/mod.rs @@ -14,6 +14,7 @@ use futures::StreamExt; use guid::Guid; use hvlite_defs::config::Config; use hvlite_defs::config::DEFAULT_MMIO_GAPS_X86; +use hvlite_defs::config::DEFAULT_PCIE_ECAM_BASE; use hvlite_defs::config::DeviceVtl; use hvlite_defs::config::HypervisorConfig; use hvlite_defs::config::LoadMode; @@ -457,6 +458,7 @@ impl VmService { load_mode, ide_disks: vec![], floppy_disks: vec![], + pcie_root_complexes: vec![], vpci_devices: vec![], memory: MemoryConfig { mem_size: req_config @@ -468,6 +470,7 @@ impl VmService { .context("invalid memory configuration")?, mmio_gaps: DEFAULT_MMIO_GAPS_X86.into(), prefetch_memory: false, + pcie_ecam_base: DEFAULT_PCIE_ECAM_BASE, }, chipset: chipset.chipset, processor_topology: ProcessorTopologyConfig { diff --git a/petri/src/vm/openvmm/construct.rs b/petri/src/vm/openvmm/construct.rs index 3ca22a77b7..2c9bca69f9 100644 --- a/petri/src/vm/openvmm/construct.rs +++ b/petri/src/vm/openvmm/construct.rs @@ -293,6 +293,7 @@ impl PetriVmConfigOpenVmm { } }, prefetch_memory: false, + pcie_ecam_base: DEFAULT_PCIE_ECAM_BASE, } }; diff --git a/vm/vmcore/vm_topology/src/lib.rs b/vm/vmcore/vm_topology/src/lib.rs index 19a86d1f01..c9649cca18 100644 --- a/vm/vmcore/vm_topology/src/lib.rs +++ b/vm/vmcore/vm_topology/src/lib.rs @@ -7,4 +7,5 @@ #![forbid(unsafe_code)] pub mod memory; +pub mod pcie; pub mod processor; diff --git a/vm/vmcore/vm_topology/src/pcie.rs b/vm/vmcore/vm_topology/src/pcie.rs new file mode 100644 index 0000000000..95d164d50d --- /dev/null +++ b/vm/vmcore/vm_topology/src/pcie.rs @@ -0,0 +1,24 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! PCI Express topology types. + +use memory_range::MemoryRange; + +/// A description of a PCI Express Root Complex, as visible to the CPU. +pub struct PcieHostBridge { + /// A unique integer index of this host bridge in the VM. + pub index: u32, + /// PCIe segment number. + pub segment: u16, + /// Lowest valid bus number. + pub start_bus: u8, + /// Highest valid bus number. + pub end_bus: u8, + /// Memory range used for configuration space access. + pub ecam_range: MemoryRange, + /// Memory range used for low MMIO. + pub low_mmio: MemoryRange, + /// Memory range used for high MMIO. + pub high_mmio: MemoryRange, +} diff --git a/vmm_core/src/acpi_builder.rs b/vmm_core/src/acpi_builder.rs index fe364cf46a..b6b2cde31f 100644 --- a/vmm_core/src/acpi_builder.rs +++ b/vmm_core/src/acpi_builder.rs @@ -16,6 +16,7 @@ use chipset::psp; use inspect::Inspect; use std::collections::BTreeMap; use vm_topology::memory::MemoryLayout; +use vm_topology::pcie::PcieHostBridge; use vm_topology::processor::ArchTopology; use vm_topology::processor::ProcessorTopology; use vm_topology::processor::aarch64::Aarch64Topology; @@ -44,6 +45,10 @@ pub struct AcpiTablesBuilder<'a, T: AcpiTopology> { /// /// If and only if this is set, then the PPTT table will be generated. pub cache_topology: Option<&'a CacheTopology>, + /// The PCIe topology. + /// + /// If and only if this has root complexes, then an MCFG will be generated. + pub pcie_host_bridges: &'a Vec, /// If an ioapic is present. pub with_ioapic: bool, /// If a PIC is present. @@ -232,6 +237,37 @@ impl AcpiTablesBuilder<'_, T> { )) } + fn with_mcfg(&self, f: F) -> R + where + F: FnOnce(&acpi::builder::Table<'_>) -> R, + { + let mut mcfg_extra: Vec = Vec::new(); + for bridge in self.pcie_host_bridges { + // Note: The topology representation of the host bridge reflects + // the actual MMIO region regardless of starting bus number, but the + // address reported in the MCFG table must reflect wherever bus number + // 0 would be accessible even if the host bridge has a different starting + // bus number. + let ecam_region_offset = (bridge.start_bus as u64) * 256 * 4096; + mcfg_extra.extend_from_slice( + acpi_spec::mcfg::McfgSegmentBusRange::new( + bridge.ecam_range.start() - ecam_region_offset, + bridge.segment, + bridge.start_bus, + bridge.end_bus, + ) + .as_bytes(), + ) + } + + (f)(&acpi::builder::Table::new_dyn( + acpi_spec::mcfg::MCFG_REVISION, + None, + &acpi_spec::mcfg::McfgHeader::new(), + &[mcfg_extra.as_slice()], + )) + } + fn with_pptt(&self, f: F) -> R where F: FnOnce(&acpi::builder::Table<'_>) -> R, @@ -535,6 +571,9 @@ impl AcpiTablesBuilder<'_, T> { self.with_madt(|t| b.append(t)); self.with_srat(|t| b.append(t)); + if !self.pcie_host_bridges.is_empty() { + self.with_mcfg(|t| b.append(t)); + } if self.cache_topology.is_some() { self.with_pptt(|t| b.append(t)); } @@ -556,6 +595,12 @@ impl AcpiTablesBuilder<'_, T> { self.with_srat(|t| t.to_vec(&OEM_INFO)) } + /// Helper method to construct a MCFG without constructing the rest of the + /// ACPI tables. + pub fn build_mcfg(&self) -> Vec { + self.with_mcfg(|t| t.to_vec(&OEM_INFO)) + } + /// Helper method to construct a PPTT without constructing the rest of the /// ACPI tables. /// From 9d8bb69e4d6edbd2fe0c78c1aceb33c861848abb Mon Sep 17 00:00:00 2001 From: Jack Schefer Date: Mon, 8 Sep 2025 10:25:49 -0700 Subject: [PATCH 05/12] pcie/hvlite_core: introduce and consume a new pcie emulation crate --- Cargo.lock | 14 + Cargo.toml | 1 + openvmm/hvlite_core/Cargo.toml | 1 + openvmm/hvlite_core/src/worker/dispatch.rs | 24 + vm/devices/pci/pcie/Cargo.toml | 19 + vm/devices/pci/pcie/src/lib.rs | 11 + vm/devices/pci/pcie/src/root.rs | 543 +++++++++++++++++++++ vm/devices/pci/pcie/src/test_helpers.rs | 100 ++++ 8 files changed, 713 insertions(+) create mode 100644 vm/devices/pci/pcie/Cargo.toml create mode 100644 vm/devices/pci/pcie/src/lib.rs create mode 100644 vm/devices/pci/pcie/src/root.rs create mode 100644 vm/devices/pci/pcie/src/test_helpers.rs diff --git a/Cargo.lock b/Cargo.lock index 9f998cf310..ab0d07f9c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3089,6 +3089,7 @@ dependencies = [ "pal_async", "pci_bus", "pci_core", + "pcie", "range_map_vec", "scsi_core", "scsidisk", @@ -5543,6 +5544,19 @@ dependencies = [ "vmcore", ] +[[package]] +name = "pcie" +version = "0.0.0" +dependencies = [ + "chipset_device", + "inspect", + "pci_bus", + "tracelimit", + "tracing", + "vmcore", + "zerocopy 0.8.25", +] + [[package]] name = "pem-rfc7468" version = "0.7.0" diff --git a/Cargo.toml b/Cargo.toml index 0bb5847330..20214800fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -254,6 +254,7 @@ vmswitch = { path = "vm/devices/net/vmswitch" } pci_bus = { path = "vm/devices/pci/pci_bus" } pci_core = { path = "vm/devices/pci/pci_core" } pci_resources = { path = "vm/devices/pci/pci_resources" } +pcie = { path = "vm/devices/pci/pcie" } vpci = { path = "vm/devices/pci/vpci" } vpci_client = { path = "vm/devices/pci/vpci_client" } vpci_protocol = { path = "vm/devices/pci/vpci_protocol" } diff --git a/openvmm/hvlite_core/Cargo.toml b/openvmm/hvlite_core/Cargo.toml index aa474a1f85..d034dfb246 100644 --- a/openvmm/hvlite_core/Cargo.toml +++ b/openvmm/hvlite_core/Cargo.toml @@ -62,6 +62,7 @@ input_core.workspace = true missing_dev.workspace = true pci_bus.workspace = true pci_core.workspace = true +pcie.workspace = true scsi_core.workspace = true scsidisk.workspace = true serial_16550_resources.workspace = true diff --git a/openvmm/hvlite_core/src/worker/dispatch.rs b/openvmm/hvlite_core/src/worker/dispatch.rs index 9035b32b20..a51d77ada4 100644 --- a/openvmm/hvlite_core/src/worker/dispatch.rs +++ b/openvmm/hvlite_core/src/worker/dispatch.rs @@ -75,6 +75,8 @@ use pal_async::task::Spawn; use pal_async::task::Task; use pci_core::PciInterruptPin; use pci_core::msi::MsiInterruptSet; +use pcie::root::GenericPcieRootComplex; +use pcie::root::GenericPcieRootPortDefinition; use scsi_core::ResolveScsiDeviceHandleParams; use scsidisk::SimpleScsiDisk; use scsidisk::atapi_scsi::AtapiScsiDisk; @@ -1763,6 +1765,28 @@ impl InitializedVm { ), }; + let device_name = format!("pcie-rc{}:{}", host_bridge.index, rc.name); + let _root_complex = + chipset_builder + .arc_mutex_device(device_name) + .add(|services| { + let root_port_definitions = rc + .ports + .into_iter() + .map(|rp_cfg| GenericPcieRootPortDefinition { + name: rp_cfg.name.into(), + }) + .collect(); + + GenericPcieRootComplex::new( + &mut services.register_mmio(), + host_bridge.start_bus, + host_bridge.end_bus, + host_bridge.ecam_range.start(), + root_port_definitions, + ) + })?; + pcie_host_bridges.push(host_bridge); ecam_address += ecam_size; diff --git a/vm/devices/pci/pcie/Cargo.toml b/vm/devices/pci/pcie/Cargo.toml new file mode 100644 index 0000000000..189db4f01f --- /dev/null +++ b/vm/devices/pci/pcie/Cargo.toml @@ -0,0 +1,19 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +[package] +name = "pcie" +rust-version.workspace = true +edition.workspace = true + +[dependencies] +chipset_device.workspace = true +inspect.workspace = true +pci_bus.workspace = true +tracing.workspace = true +tracelimit.workspace = true +vmcore.workspace = true +zerocopy.workspace = true + +[lints] +workspace = true diff --git a/vm/devices/pci/pcie/src/lib.rs b/vm/devices/pci/pcie/src/lib.rs new file mode 100644 index 0000000000..e652bd0a8c --- /dev/null +++ b/vm/devices/pci/pcie/src/lib.rs @@ -0,0 +1,11 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! PCI Express definitions and emulators. + +#![forbid(unsafe_code)] + +pub mod root; + +#[cfg(test)] +mod test_helpers; diff --git a/vm/devices/pci/pcie/src/root.rs b/vm/devices/pci/pcie/src/root.rs new file mode 100644 index 0000000000..2abbe50c0b --- /dev/null +++ b/vm/devices/pci/pcie/src/root.rs @@ -0,0 +1,543 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! PCI Express root complex and root port emulation. + +use chipset_device::ChipsetDevice; +use chipset_device::io::IoError; +use chipset_device::io::IoResult; +use chipset_device::mmio::ControlMmioIntercept; +use chipset_device::mmio::MmioIntercept; +use chipset_device::mmio::RegisterMmioIntercept; +use inspect::Inspect; +use inspect::InspectMut; +use pci_bus::GenericPciBusDevice; +use std::collections::HashMap; +use std::sync::Arc; +use vmcore::device_state::ChangeDeviceState; +use zerocopy::IntoBytes; + +/// A generic PCI Express root complex emulator. +#[derive(InspectMut)] +pub struct GenericPcieRootComplex { + /// The lowest valid bus number under the root complex. + start_bus: u8, + /// The highest valid bus number under the root complex. + end_bus: u8, + /// Intercept control for the ECAM MMIO region. + ecam: Box, + /// Map of root ports attached to the root complex, indexed by combined device and function numbers. + #[inspect(with = "|x| inspect::iter_by_key(x).map_value(|(name, _)| name)")] + ports: HashMap, RootPort)>, +} + +/// A description of a generic PCIe root port. +pub struct GenericPcieRootPortDefinition { + /// The name of the root port. + pub name: Arc, +} + +enum DecodedEcamAccess<'a> { + UnexpectedIntercept(), + Unroutable(), + InternalBus(&'a mut RootPort, u16), + DownstreamPort(&'a mut RootPort, u8, u16), +} + +impl GenericPcieRootComplex { + /// Constructs a new `GenericPcieRootComplex` emulator. + pub fn new( + register_mmio: &mut dyn RegisterMmioIntercept, + start_bus: u8, + end_bus: u8, + ecam_base: u64, + ports: Vec, + ) -> Self { + let bus_count = (end_bus as u16) - (start_bus as u16) + 1; + let ecam_size = (bus_count as u64) * 256 * 4096; + let mut ecam = register_mmio.new_io_region("ecam", ecam_size); + ecam.map(ecam_base); + + let port_map = ports + .into_iter() + .enumerate() + .map(|(i, definition)| { + let device_number = (i << 3) as u8; + let emulator = RootPort::new(); + (device_number, (definition.name, emulator)) + }) + .collect(); + + Self { + start_bus, + end_bus, + ecam, + ports: port_map, + } + } + + /// Attach the provided `GenericPciBusDevice` to the port identified. + pub fn add_pcie_device( + &mut self, + port: u8, + name: impl AsRef, + dev: D, + ) -> Result<(), (D, Arc)> { + let (_, root_port) = self.ports.get_mut(&port).unwrap(); + root_port.connect_device(name, dev)?; + Ok(()) + } + + /// Enumerate the downstream ports of the root complex. + pub fn downstream_ports(&self) -> Vec<(u8, Arc)> { + self.ports + .iter() + .map(|(port, (name, _))| (*port, name.clone())) + .collect() + } + + fn decode_ecam_access<'a>(&'a mut self, addr: u64) -> DecodedEcamAccess<'a> { + let ecam_offset = match self.ecam.offset_of(addr) { + Some(offset) => offset, + None => { + return DecodedEcamAccess::UnexpectedIntercept(); + } + }; + + let cfg_offset_within_function = (ecam_offset % 4096) as u16; + let bdf_offset_within_ecam = (ecam_offset / 4096) & 0xFFFF; + let bus_offset_within_ecam = ((bdf_offset_within_ecam & 0xFF00) >> 8) as u8; + let bus_number = bus_offset_within_ecam + self.start_bus; + let device_function = (bdf_offset_within_ecam & 0xFF) as u8; + + if bus_number == self.start_bus { + match self.ports.get_mut(&device_function) { + Some((_, port)) => { + return DecodedEcamAccess::InternalBus(port, cfg_offset_within_function); + } + None => return DecodedEcamAccess::Unroutable(), + } + } else if bus_number > self.start_bus && bus_number <= self.end_bus { + for (_, port) in self.ports.values_mut() { + if port.assigned_bus_number(bus_number) { + return DecodedEcamAccess::DownstreamPort( + port, + bus_number, + cfg_offset_within_function, + ); + } + } + return DecodedEcamAccess::Unroutable(); + } + + DecodedEcamAccess::UnexpectedIntercept() + } +} + +fn shift_read_value(cfg_offset: u16, len: usize, value: u32) -> u32 { + let shift = (cfg_offset & 0x3) * 8; + match len { + 4 => value, + 2 => value >> shift & 0xFFFF, + 1 => value >> shift & 0xFF, + _ => unreachable!(), + } +} + +fn combine_old_new_values(cfg_offset: u16, old_value: u32, new_value: u32, len: usize) -> u32 { + let shift = (cfg_offset & 0x3) * 8; + let mask = (1 << (len * 8)) - 1; + (old_value & !(mask << shift)) | (new_value << shift) +} + +impl ChangeDeviceState for GenericPcieRootComplex { + fn start(&mut self) {} + + async fn stop(&mut self) {} + + async fn reset(&mut self) {} +} + +impl ChipsetDevice for GenericPcieRootComplex { + fn supports_mmio(&mut self) -> Option<&mut dyn MmioIntercept> { + Some(self) + } +} + +macro_rules! validate_ecam_intercept { + ($address:ident, $data:ident) => { + if !matches!($data.len(), 1 | 2 | 4) { + return IoResult::Err(IoError::InvalidAccessSize); + } + + if !((($data.len() == 4) && ($address & 3 == 0)) + || (($data.len() == 2) && ($address & 1 == 0)) + || ($data.len() == 1)) + { + return IoResult::Err(IoError::UnalignedAccess); + } + }; +} + +impl MmioIntercept for GenericPcieRootComplex { + fn mmio_read(&mut self, addr: u64, data: &mut [u8]) -> IoResult { + validate_ecam_intercept!(addr, data); + + let mut value = !0; + match self.decode_ecam_access(addr) { + DecodedEcamAccess::UnexpectedIntercept() => { + tracing::error!("unexpected intercept at address 0x{:16x}", addr); + } + DecodedEcamAccess::Unroutable() => { + tracelimit::warn_ratelimited!("unroutable config space access"); + } + DecodedEcamAccess::InternalBus(port, cfg_offset) => { + let _ = port.pci_cfg_read(cfg_offset & !3, &mut value); + value = shift_read_value(cfg_offset, data.len(), value); + } + DecodedEcamAccess::DownstreamPort(port, bus_number, cfg_offset) => { + let _ = port.forward_cfg_read(&bus_number, cfg_offset & !3, &mut value); + value = shift_read_value(cfg_offset, data.len(), value); + } + } + + data.copy_from_slice(&value.as_bytes()[..data.len()]); + IoResult::Ok + } + + fn mmio_write(&mut self, addr: u64, data: &[u8]) -> IoResult { + validate_ecam_intercept!(addr, data); + + let write_value = { + let mut temp: u32 = 0; + temp.as_mut_bytes()[..data.len()].copy_from_slice(data); + temp + }; + + match self.decode_ecam_access(addr) { + DecodedEcamAccess::UnexpectedIntercept() => { + tracing::error!("unexpected intercept at address 0x{:16x}", addr); + } + DecodedEcamAccess::Unroutable() => { + tracelimit::warn_ratelimited!("unroutable config space access"); + } + DecodedEcamAccess::InternalBus(port, cfg_offset) => { + let rounded_offset = cfg_offset & !3; + let merged_value = if data.len() == 4 { + write_value + } else { + let mut temp: u32 = 0; + let _ = port.pci_cfg_read(rounded_offset, &mut temp); + combine_old_new_values(cfg_offset, temp, write_value, data.len()) + }; + + let _ = port.pci_cfg_write(rounded_offset, merged_value); + } + DecodedEcamAccess::DownstreamPort(port, bus_number, cfg_offset) => { + let rounded_offset = cfg_offset & !3; + let merged_value = if data.len() == 4 { + write_value + } else { + let mut temp: u32 = 0; + let _ = port.forward_cfg_read(&bus_number, rounded_offset, &mut temp); + combine_old_new_values(cfg_offset, temp, write_value, data.len()) + }; + + let _ = port.forward_cfg_write(&bus_number, rounded_offset, merged_value); + } + } + + IoResult::Ok + } +} + +#[derive(Inspect)] +struct RootPort { + // Minimal type 1 configuration space emulation for + // Linux and Windows to enumerate the port. This should + // be refactored into a dedicated type 1 emulator. + command_status_register: u32, + bus_number_registers: u32, + memory_limit_registers: u32, + prefetch_limit_registers: u32, + prefetch_base_upper_register: u32, + prefetch_limit_upper_register: u32, + + #[inspect(skip)] + link: Option<(Arc, Box)>, +} + +impl RootPort { + /// Constructs a new `RootPort` emulator. + pub fn new() -> Self { + Self { + command_status_register: 0, + bus_number_registers: 0, + memory_limit_registers: 0, + prefetch_limit_registers: 0, + prefetch_base_upper_register: 0, + prefetch_limit_upper_register: 0, + link: None, + } + } + + /// Try to connect a PCIe device, returning (device, existing_device_name) if the + /// port is already occupied. + pub fn connect_device( + &mut self, + name: impl AsRef, + dev: D, + ) -> Result<(), (D, Arc)> { + if let Some((name, _)) = &self.link { + return Err((dev, name.clone())); + } + + self.link = Some((name.as_ref().into(), Box::new(dev))); + Ok(()) + } + + fn pci_cfg_read(&mut self, offset: u16, value: &mut u32) -> IoResult { + *value = match offset { + 0x00 => 0xF111_1414, // Device and Vendor IDs + 0x04 => self.command_status_register | 0x0010_0000, + 0x08 => 0x0604_0000, // Class code and revision + 0x0C => 0x0001_0000, // Header type 1 + 0x10 => 0x0000_0000, // BAR0 + 0x14 => 0x0000_0000, // BAR1 + 0x18 => self.bus_number_registers, + 0x1C => 0x0000_0000, // Secondary status and I/O range + 0x20 => self.memory_limit_registers, + 0x24 => self.prefetch_limit_registers, + 0x28 => self.prefetch_base_upper_register, + 0x2C => self.prefetch_limit_upper_register, + 0x30 => 0x0000_0000, // I/O base and limit 16 bit + 0x34 => 0x0000_0040, // Reserved and Capability pointer + 0x38 => 0x0000_0000, // Expansion ROM + 0x3C => 0x0000_0000, // Bridge control, interrupt pin/line + + // PCI Express capability structure + 0x40 => 0x0142_0010, // Capability header and PCI Express capabilities register + 0x44 => 0x0000_0000, // Device capabilities register + 0x48 => 0x0000_0000, // Device control and status registers + 0x4C => 0x0000_0000, // Link capabilities register + 0x50 => 0x0011_0000, // Link control and status registers + 0x54 => 0x0000_0000, // Slot capabilities register + 0x58 => 0x0000_0000, // Slot status and control registers + 0x5C => 0x0000_0000, // Root capabilities and control registers + 0x60 => 0x0000_0000, // Root status register + 0x64 => 0x0000_0000, // Device capabilities 2 register + 0x68 => 0x0000_0000, // Device status 2 and control 2 registers + 0x6C => 0x0000_0000, // Link capabilities 2 register + 0x70 => 0x0000_0000, // Link status 2 and control 2 registers + 0x74 => 0x0000_0000, // Slot capabilities 2 register + 0x78 => 0x0000_0000, // Slot status 2 and control 2 registers + + _ => 0xFFFF, + }; + + IoResult::Ok + } + + fn pci_cfg_write(&mut self, offset: u16, value: u32) -> IoResult { + match offset { + 0x04 => self.command_status_register = value, + 0x18 => self.bus_number_registers = value, + 0x20 => self.memory_limit_registers = value, + 0x24 => self.prefetch_limit_registers = value, + 0x28 => self.prefetch_base_upper_register = value, + 0x2C => self.prefetch_limit_upper_register = value, + _ => {} + }; + + IoResult::Ok + } + + fn assigned_bus_number(&self, bus: u8) -> bool { + let secondary_bus_number = ((self.bus_number_registers >> 8) & 0xFF) as u8; + let suboordinate_bus_number = ((self.bus_number_registers >> 16) & 0xFF) as u8; + + bus >= secondary_bus_number && bus <= suboordinate_bus_number + } + + fn forward_cfg_read(&mut self, bus: &u8, cfg_offset: u16, value: &mut u32) -> IoResult { + let secondary_bus_number = ((self.bus_number_registers >> 8) & 0xFF) as u8; + let suboordinate_bus_number = ((self.bus_number_registers >> 16) & 0xFF) as u8; + + if *bus == secondary_bus_number { + if let Some((_, device)) = &mut self.link { + let _ = device.pci_cfg_read(cfg_offset, value); + } + } else if *bus > secondary_bus_number && *bus <= suboordinate_bus_number { + tracelimit::warn_ratelimited!("multi-level hierarchies not implemented yet"); + } + + IoResult::Ok + } + + fn forward_cfg_write(&mut self, bus: &u8, cfg_offset: u16, value: u32) -> IoResult { + let secondary_bus_number = ((self.bus_number_registers >> 8) & 0xFF) as u8; + let suboordinate_bus_number = ((self.bus_number_registers >> 16) & 0xFF) as u8; + + if *bus == secondary_bus_number { + if let Some((_, device)) = &mut self.link { + let _ = device.pci_cfg_write(cfg_offset, value); + } + } else if *bus > secondary_bus_number && *bus <= suboordinate_bus_number { + tracelimit::warn_ratelimited!("multi-level hierarchies not implemented yet"); + } + + IoResult::Ok + } +} + +mod save_restore { + use super::*; + use vmcore::save_restore::SaveError; + use vmcore::save_restore::SaveRestore; + use vmcore::save_restore::SavedStateNotSupported; + + impl SaveRestore for GenericPcieRootComplex { + type SavedState = SavedStateNotSupported; + + fn save(&mut self) -> Result { + Err(SaveError::NotSupported) + } + + fn restore( + &mut self, + state: Self::SavedState, + ) -> Result<(), vmcore::save_restore::RestoreError> { + match state {} + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_helpers::*; + + /// Instantiate a root complex with the provided lowest bus number and port count. + /// ECAM base address is assumed to be 0, and highest bus number is assumed to be 255. + fn instantiate_root_complex(start_bus: u8, port_count: u8) -> GenericPcieRootComplex { + let port_defs = (0..port_count) + .map(|i| GenericPcieRootPortDefinition { + name: format!("test-port-{}", i).into(), + }) + .collect(); + + let mut register_mmio = TestPcieMmioRegistration {}; + GenericPcieRootComplex::new(&mut register_mmio, start_bus, 255, 0, port_defs) + } + + #[test] + fn test_create() { + let rc = instantiate_root_complex(0, 4); + assert_eq!(rc.downstream_ports().len(), 4); + } + + #[test] + fn test_probe_ports_via_config_space() { + let mut rc = instantiate_root_complex(0, 4); + for device_number in 0..4 { + let mut vendor_device: u32 = 0; + rc.mmio_read((device_number << 3) * 4096, vendor_device.as_mut_bytes()) + .unwrap(); + assert_eq!(vendor_device, 0xF111_1414); + + let mut value_16: u16 = 0; + rc.mmio_read((device_number << 3) * 4096, value_16.as_mut_bytes()) + .unwrap(); + assert_eq!(value_16, 0x1414); + + rc.mmio_read((device_number << 3) * 4096 + 2, value_16.as_mut_bytes()) + .unwrap(); + assert_eq!(value_16, 0xF111); + } + + for device_number in 4..10 { + let mut value_32: u32 = 0; + rc.mmio_read((device_number << 3) * 4096, value_32.as_mut_bytes()) + .unwrap(); + assert_eq!(value_32, 0xFFFF_FFFF); + + let mut value_16: u16 = 0; + rc.mmio_read((device_number << 3) * 4096, value_16.as_mut_bytes()) + .unwrap(); + assert_eq!(value_16, 0xFFFF); + rc.mmio_read((device_number << 3) * 4096 + 2, value_16.as_mut_bytes()) + .unwrap(); + assert_eq!(value_16, 0xFFFF); + } + } + + #[test] + fn test_root_port_cfg_forwarding() { + const SECONDARY_BUS_NUMBER_ADDRESS: u64 = 0x19; + const SUBOORDINATE_BUS_NUMBER_ADDRESS: u64 = 0x1A; + + let mut rc = instantiate_root_complex(0, 1); + + // Pre-bus number assignment, random accesses don't work. + let mut value_32: u32 = 0; + rc.mmio_read(256 * 4096, value_32.as_mut_bytes()).unwrap(); + assert_eq!(value_32, 0xFFFF_FFFF); + + // Secondary and suboordinate bus number registers are both + // read / write, defaulting to 0. + let mut bus_number: u8 = 0xFF; + rc.mmio_read(SECONDARY_BUS_NUMBER_ADDRESS, bus_number.as_mut_bytes()) + .unwrap(); + assert_eq!(bus_number, 0); + rc.mmio_read(SUBOORDINATE_BUS_NUMBER_ADDRESS, bus_number.as_mut_bytes()) + .unwrap(); + assert_eq!(bus_number, 0); + + rc.mmio_write(SECONDARY_BUS_NUMBER_ADDRESS, &[1]).unwrap(); + rc.mmio_read(SECONDARY_BUS_NUMBER_ADDRESS, bus_number.as_mut_bytes()) + .unwrap(); + assert_eq!(bus_number, 1); + + rc.mmio_write(SUBOORDINATE_BUS_NUMBER_ADDRESS, &[2]) + .unwrap(); + rc.mmio_read(SUBOORDINATE_BUS_NUMBER_ADDRESS, bus_number.as_mut_bytes()) + .unwrap(); + assert_eq!(bus_number, 2); + + // Bus numbers assigned, but no endpoint attached yet. + rc.mmio_read(256 * 4096, value_32.as_mut_bytes()).unwrap(); + assert_eq!(value_32, 0xFFFF_FFFF); + + let endpoint = TestPcieEndpoint::new( + |offset, value| match offset { + 0x0 => { + *value = 0xDEAD_BEEF; + Some(IoResult::Ok) + } + _ => Some(IoResult::Err(IoError::InvalidRegister)), + }, + |_, _| Some(IoResult::Err(IoError::InvalidRegister)), + ); + + let _ = rc.add_pcie_device(0, "test-ep", endpoint); + + // The secondary bus behind root port 0 has been assigned bus number + // 1, so now the attached endpoint is accessible. + rc.mmio_read(256 * 4096, value_32.as_mut_bytes()).unwrap(); + assert_eq!(value_32, 0xDEAD_BEEF); + + // Reassign the secondary bus number to 2. + rc.mmio_write(SECONDARY_BUS_NUMBER_ADDRESS, &[2]).unwrap(); + rc.mmio_read(SECONDARY_BUS_NUMBER_ADDRESS, bus_number.as_mut_bytes()) + .unwrap(); + assert_eq!(bus_number, 2); + + // The endpoint is no longer accessible at bus number 1, and is now + // accessible at bus number 2. + rc.mmio_read(256 * 4096, value_32.as_mut_bytes()).unwrap(); + assert_eq!(value_32, 0xFFFF_FFFF); + rc.mmio_read(2 * 256 * 4096, value_32.as_mut_bytes()) + .unwrap(); + assert_eq!(value_32, 0xDEAD_BEEF); + } +} diff --git a/vm/devices/pci/pcie/src/test_helpers.rs b/vm/devices/pci/pcie/src/test_helpers.rs new file mode 100644 index 0000000000..b64369c469 --- /dev/null +++ b/vm/devices/pci/pcie/src/test_helpers.rs @@ -0,0 +1,100 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use chipset_device::io::IoResult; +use chipset_device::mmio::ControlMmioIntercept; +use chipset_device::mmio::RegisterMmioIntercept; +use pci_bus::GenericPciBusDevice; + +pub struct TestPcieMmioRegistration {} + +impl RegisterMmioIntercept for TestPcieMmioRegistration { + fn new_io_region(&mut self, _debug_name: &str, len: u64) -> Box { + Box::new(TestPcieControlMmioIntercept { mapping: None, len }) + } +} + +pub struct TestPcieControlMmioIntercept { + pub mapping: Option, + pub len: u64, +} + +impl ControlMmioIntercept for TestPcieControlMmioIntercept { + /// Enables the IO region. + fn map(&mut self, addr: u64) { + match self.mapping { + Some(_) => panic!("already mapped"), + None => self.mapping = Some(addr), + } + } + + /// Disables the IO region. + fn unmap(&mut self) { + match self.mapping { + Some(_) => self.mapping = None, + None => panic!("not mapped"), + } + } + + /// Return the currently mapped address. + /// + /// Returns `None` if the region is currently unmapped. + fn addr(&self) -> Option { + self.mapping + } + + fn len(&self) -> u64 { + self.len + } + + /// Return the offset of `addr` from the region's base address. + /// + /// Returns `None` if the provided `addr` is outside of the memory + /// region, or the region is currently unmapped. + fn offset_of(&self, addr: u64) -> Option { + match self.mapping { + Some(base_address) => Some(addr - base_address), + None => None, + } + } + + fn region_name(&self) -> &str { + "???" + } +} + +pub struct TestPcieEndpoint +where + R: Fn(u16, &mut u32) -> Option + 'static + Send, + W: FnMut(u16, u32) -> Option + 'static + Send, +{ + cfg_read_closure: R, + cfg_write_closure: W, +} + +impl TestPcieEndpoint +where + R: Fn(u16, &mut u32) -> Option + 'static + Send, + W: FnMut(u16, u32) -> Option + 'static + Send, +{ + pub fn new(cfg_read_closure: R, cfg_write_closure: W) -> Self { + Self { + cfg_read_closure, + cfg_write_closure, + } + } +} + +impl GenericPciBusDevice for TestPcieEndpoint +where + R: Fn(u16, &mut u32) -> Option + 'static + Send, + W: FnMut(u16, u32) -> Option + 'static + Send, +{ + fn pci_cfg_read(&mut self, offset: u16, value: &mut u32) -> Option { + (self.cfg_read_closure)(offset, value) + } + + fn pci_cfg_write(&mut self, offset: u16, value: u32) -> Option { + (self.cfg_write_closure)(offset, value) + } +} From 521b80a5aea9ef09553c8d4e2c6f8e7d20ea789b Mon Sep 17 00:00:00 2001 From: Jack Schefer Date: Mon, 8 Sep 2025 10:47:40 -0700 Subject: [PATCH 06/12] vmm_core/vmotherboard: chipset bus resolution for pcie enumerators and downstream ports --- Cargo.lock | 1 + openvmm/hvlite_core/src/worker/dispatch.rs | 4 +- vmm_core/vmotherboard/Cargo.toml | 1 + vmm_core/vmotherboard/src/base_chipset.rs | 24 ++++++ .../src/chipset/backing/arc_mutex/device.rs | 79 +++++++++++-------- .../src/chipset/backing/arc_mutex/pci.rs | 79 +++++++++++++++++++ .../src/chipset/backing/arc_mutex/services.rs | 9 +++ .../src/chipset/builder/errors.rs | 4 + .../vmotherboard/src/chipset/builder/mod.rs | 67 +++++++++++++++- vmm_core/vmotherboard/src/chipset/mod.rs | 41 ++++++++++ vmm_core/vmotherboard/src/lib.rs | 11 +++ 11 files changed, 285 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ab0d07f9c9..7c6e8731c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9370,6 +9370,7 @@ dependencies = [ "parking_lot", "paste", "pci_bus", + "pcie", "range_map_vec", "state_unit", "thiserror 2.0.16", diff --git a/openvmm/hvlite_core/src/worker/dispatch.rs b/openvmm/hvlite_core/src/worker/dispatch.rs index a51d77ada4..fc81d44dbf 100644 --- a/openvmm/hvlite_core/src/worker/dispatch.rs +++ b/openvmm/hvlite_core/src/worker/dispatch.rs @@ -1766,7 +1766,7 @@ impl InitializedVm { }; let device_name = format!("pcie-rc{}:{}", host_bridge.index, rc.name); - let _root_complex = + let root_complex = chipset_builder .arc_mutex_device(device_name) .add(|services| { @@ -1787,6 +1787,8 @@ impl InitializedVm { ) })?; + let bus_id = vmotherboard::BusId::new(&rc.name); + chipset_builder.register_weak_mutex_pcie_enumerator(bus_id, Box::new(root_complex)); pcie_host_bridges.push(host_bridge); ecam_address += ecam_size; diff --git a/vmm_core/vmotherboard/Cargo.toml b/vmm_core/vmotherboard/Cargo.toml index 6b5f6c7cae..ce1bc8d2cd 100644 --- a/vmm_core/vmotherboard/Cargo.toml +++ b/vmm_core/vmotherboard/Cargo.toml @@ -39,6 +39,7 @@ guest_watchdog.workspace = true ide.workspace = true missing_dev.workspace = true pci_bus.workspace = true +pcie.workspace = true vga_proxy = { optional = true, workspace = true } vga = { optional = true, workspace = true } watchdog_core.workspace = true diff --git a/vmm_core/vmotherboard/src/base_chipset.rs b/vmm_core/vmotherboard/src/base_chipset.rs index 96ac2f979b..947299c0a5 100644 --- a/vmm_core/vmotherboard/src/base_chipset.rs +++ b/vmm_core/vmotherboard/src/base_chipset.rs @@ -797,7 +797,10 @@ impl ConfigureChipsetDevice for ArcMutexChipsetServices<'_, '_> { mod weak_mutex_pci { use crate::chipset::PciConflict; use crate::chipset::PciConflictReason; + use crate::chipset::PcieConflict; + use crate::chipset::PcieConflictReason; use crate::chipset::backing::arc_mutex::pci::RegisterWeakMutexPci; + use crate::chipset::backing::arc_mutex::pci::RegisterWeakMutexPcie; use chipset_device::ChipsetDevice; use chipset_device::io::IoResult; use closeable_mutex::CloseableMutex; @@ -885,6 +888,27 @@ mod weak_mutex_pci { }) } } + + // wiring to enable using the generic PCIe root port alongside the Arc+CloseableMutex device infra + impl RegisterWeakMutexPcie for Arc> { + fn add_pcie_device( + &mut self, + port: u8, + name: Arc, + dev: Weak>, + ) -> Result<(), PcieConflict> { + self.lock() + .add_pcie_device(port, name.clone(), WeakMutexPciDeviceWrapper(dev)) + .map_err(|(_, existing_dev)| PcieConflict { + reason: PcieConflictReason::ExistingDev(existing_dev), + conflict_dev: name, + }) + } + + fn downstream_ports(&self) -> Vec<(u8, Arc)> { + self.lock().downstream_ports() + } + } } pub struct ArcMutexIsaDmaChannel { diff --git a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/device.rs b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/device.rs index 47728ba1e9..a75e76f2b1 100644 --- a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/device.rs +++ b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/device.rs @@ -6,6 +6,7 @@ use super::services::ArcMutexChipsetServices; use crate::BusIdPci; +use crate::BusIdPcieDownstreamPort; use crate::VmmChipsetDevice; use arc_cyclic_builder::ArcCyclicBuilder; use arc_cyclic_builder::ArcCyclicBuilderExt; @@ -71,6 +72,7 @@ pub struct ArcMutexChipsetDeviceBuilder<'a, 'b, T> { pci_addr: Option<(u8, u8, u8)>, pci_bus_id: Option, + pcie_port: Option, external_pci: bool, } @@ -97,6 +99,7 @@ where pci_addr: None, pci_bus_id: None, + pcie_port: None, external_pci: false, } } @@ -120,6 +123,12 @@ where self } + /// For PCIe devices: place the device on the specified downstream port + pub fn on_pcie_port(mut self, id: BusIdPcieDownstreamPort) -> Self { + self.pcie_port = Some(id); + self + } + /// For PCI devices: do not register the device with any PCI bus. This is /// used when the device is hooked up to a bus (such as a VPCI bus) outside /// of the vmotherboard infrastructure. @@ -156,39 +165,43 @@ where if !self.external_pci { if let Some(dev) = typed_dev.supports_pci() { - // static pci registration - let bdf = match (self.pci_addr, dev.suggested_bdf()) { - (Some(override_bdf), Some(suggested_bdf)) => { - let (ob, od, of) = override_bdf; - let (sb, sd, sf) = suggested_bdf; - tracing::info!( - "overriding suggested bdf: using {:02x}:{:02x}:{} instead of {:02x}:{:02x}:{}", - ob, - od, - of, - sb, - sd, - sf - ); - override_bdf - } - (None, Some(bdf)) | (Some(bdf), None) => bdf, - (None, None) => { - return Err( - AddDeviceErrorKind::NoPciBusAddress.with_dev_name(self.dev_name) - ); - } - }; - - let bus_id = match self.pci_bus_id.take() { - Some(bus_id) => bus_id, - None => panic!( - "wiring error: did not invoke `on_pci_bus` for `{}`", - self.dev_name - ), - }; - - self.services.register_static_pci(bus_id, bdf); + if let Some(bus_id_port) = self.pcie_port { + self.services.register_static_pcie(bus_id_port); + } else { + // static pci registration + let bdf = match (self.pci_addr, dev.suggested_bdf()) { + (Some(override_bdf), Some(suggested_bdf)) => { + let (ob, od, of) = override_bdf; + let (sb, sd, sf) = suggested_bdf; + tracing::info!( + "overriding suggested bdf: using {:02x}:{:02x}:{} instead of {:02x}:{:02x}:{}", + ob, + od, + of, + sb, + sd, + sf + ); + override_bdf + } + (None, Some(bdf)) | (Some(bdf), None) => bdf, + (None, None) => { + return Err( + AddDeviceErrorKind::NoPciBusAddress.with_dev_name(self.dev_name) + ); + } + }; + + let bus_id = match self.pci_bus_id.take() { + Some(bus_id) => bus_id, + None => panic!( + "wiring error: did not invoke `on_pci_bus` for `{}`", + self.dev_name + ), + }; + + self.services.register_static_pci(bus_id, bdf); + } } } diff --git a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs index 7f514ca0c5..482bbdedb9 100644 --- a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs +++ b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs @@ -2,8 +2,12 @@ // Licensed under the MIT License. use crate::BusIdPci; +use crate::BusIdPcieDownstreamPort; +use crate::BusIdPcieEnumerator; use crate::chipset::PciConflict; use crate::chipset::PciConflictReason; +use crate::chipset::PcieConflict; +use crate::chipset::PcieConflictReason; use chipset_device::ChipsetDevice; use closeable_mutex::CloseableMutex; use std::collections::HashMap; @@ -68,3 +72,78 @@ impl BusResolverWeakMutexPci { if !errs.is_empty() { Err(errs) } else { Ok(()) } } } + +/// An abstraction over an upstream PCIe enumerator implementation that +/// is able to route accesses to `Weak>` +/// devices via downstream ports. +pub trait RegisterWeakMutexPcie: Send { + /// Try to add a PCIe device to the enumerator at the sepcified port, + /// reporting any conflicts. + fn add_pcie_device( + &mut self, + port: u8, + name: Arc, + device: Weak>, + ) -> Result<(), PcieConflict>; + + /// Enumerate the downstream ports. + fn downstream_ports(&self) -> Vec<(u8, Arc)>; +} + +pub struct WeakMutexPcieDeviceEntry { + pub bus_id_port: BusIdPcieDownstreamPort, + pub name: Arc, + pub dev: Weak>, +} + +#[derive(Default)] +pub struct BusResolverWeakMutexPcie { + pub enumerators: HashMap>, + pub ports: HashMap, + pub devices: Vec, +} + +impl BusResolverWeakMutexPcie { + pub fn resolve(mut self) -> Result<(), Vec> { + let mut errs = Vec::new(); + + for WeakMutexPcieDeviceEntry { + bus_id_port, + name, + dev, + } in self.devices + { + let (port_number, bus_id_enumerator) = match self.ports.get(&bus_id_port) { + Some(v) => v, + None => { + errs.push(PcieConflict { + conflict_dev: name.clone(), + reason: PcieConflictReason::MissingDownstreamPort, + }); + continue; + } + }; + + let enumerator = match self.enumerators.get_mut(bus_id_enumerator) { + Some(enumerator) => enumerator, + None => { + errs.push(PcieConflict { + conflict_dev: name.clone(), + reason: PcieConflictReason::MissingEnumerator, + }); + continue; + } + }; + + match enumerator.add_pcie_device(*port_number, name, dev) { + Ok(()) => {} + Err(conflict) => { + errs.push(conflict); + continue; + } + }; + } + + if !errs.is_empty() { Err(errs) } else { Ok(()) } + } +} diff --git a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/services.rs b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/services.rs index 04393233de..a6a578d08e 100644 --- a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/services.rs +++ b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/services.rs @@ -7,6 +7,7 @@ use self::device_range::DeviceRangeMapper; use super::device::ArcMutexChipsetServicesFinalize; use super::state_unit::ArcMutexChipsetDeviceUnit; use crate::BusIdPci; +use crate::BusIdPcieDownstreamPort; use crate::ChipsetBuilder; use crate::VmmChipsetDevice; use crate::chipset::io_ranges::IoRanges; @@ -182,6 +183,14 @@ impl<'a, 'b> ArcMutexChipsetServices<'a, 'b> { ); } + pub fn register_static_pcie(&mut self, bus_id: BusIdPcieDownstreamPort) { + self.builder.register_weak_mutex_pcie_device( + bus_id, + self.dev_name.clone(), + self.dev.clone(), + ); + } + pub fn new_line(&mut self, id: LineSetId, name: &str, vector: u32) -> LineInterrupt { let (line_set, _) = self.builder.line_set(id.clone()); match line_set.new_line(vector, format!("{}:{}", self.dev_name, name)) { diff --git a/vmm_core/vmotherboard/src/chipset/builder/errors.rs b/vmm_core/vmotherboard/src/chipset/builder/errors.rs index 83b16d9cfd..4f0bbafa2d 100644 --- a/vmm_core/vmotherboard/src/chipset/builder/errors.rs +++ b/vmm_core/vmotherboard/src/chipset/builder/errors.rs @@ -2,6 +2,7 @@ // Licensed under the MIT License. use crate::chipset::PciConflict; +use crate::chipset::PcieConflict; use crate::chipset::io_ranges::IoRangeConflict; use std::fmt::Debug; use thiserror::Error; @@ -21,6 +22,9 @@ pub enum ChipsetBuilderError { /// detected static pci address conflict #[error("static pci conflict: {0}")] PciConflict(PciConflict), + /// detected static pcie port conflict + #[error("static pcie port conflict: {0}")] + PcieConflict(PcieConflict), } #[derive(Debug, Error)] diff --git a/vmm_core/vmotherboard/src/chipset/builder/mod.rs b/vmm_core/vmotherboard/src/chipset/builder/mod.rs index 399299b6da..ba8daa909d 100644 --- a/vmm_core/vmotherboard/src/chipset/builder/mod.rs +++ b/vmm_core/vmotherboard/src/chipset/builder/mod.rs @@ -10,11 +10,17 @@ use self::errors::ErrorListExt; use self::errors::FinalChipsetBuilderError; use super::backing::arc_mutex::device::ArcMutexChipsetDeviceBuilder; use super::backing::arc_mutex::pci::BusResolverWeakMutexPci; +use super::backing::arc_mutex::pci::BusResolverWeakMutexPcie; use super::backing::arc_mutex::pci::RegisterWeakMutexPci; +use super::backing::arc_mutex::pci::RegisterWeakMutexPcie; use super::backing::arc_mutex::pci::WeakMutexPciEntry; +use super::backing::arc_mutex::pci::WeakMutexPcieDeviceEntry; use super::backing::arc_mutex::services::ArcMutexChipsetServices; use super::backing::arc_mutex::state_unit::ArcMutexChipsetDeviceUnit; +use crate::BusId; use crate::BusIdPci; +use crate::BusIdPcieDownstreamPort; +use crate::BusIdPcieEnumerator; use crate::DebugEventHandler; use crate::VmmChipsetDevice; use crate::chipset::Chipset; @@ -97,6 +103,7 @@ impl DynamicDeviceUnit { #[derive(Default)] pub(crate) struct BusResolver { pci: BusResolverWeakMutexPci, + pcie: BusResolverWeakMutexPcie, } /// A builder for [`Chipset`] @@ -198,6 +205,55 @@ impl<'a> ChipsetBuilder<'a> { .push(WeakMutexPciEntry { bdf, name, dev }); } + /// Register a PCIe enumerator (ex. root complex or switch), and all of + /// it's downstream ports. + pub fn register_weak_mutex_pcie_enumerator( + &mut self, + bus_id: BusIdPcieEnumerator, + enumerator: Box, + ) { + let downstream_ports = enumerator.downstream_ports(); + let existing = self + .bus_resolver + .pcie + .enumerators + .insert(bus_id.clone(), enumerator); + assert!( + existing.is_none(), + "duplicate pcie enumerator ID: {:?}", + bus_id + ); + + for (port_number, port_name) in downstream_ports { + let existing = self + .bus_resolver + .pcie + .ports + .insert(BusId::new(&port_name), (port_number, bus_id.clone())); + assert!( + existing.is_none(), + "duplicate pcie port ID: {:?}", + port_name + ); + } + } + + pub(crate) fn register_weak_mutex_pcie_device( + &mut self, + bus_id_port: BusIdPcieDownstreamPort, + name: Arc, + dev: Weak>, + ) { + self.bus_resolver + .pcie + .devices + .push(WeakMutexPcieDeviceEntry { + bus_id_port, + name, + dev, + }); + } + pub(crate) fn line_set( &mut self, id: LineSetId, @@ -253,7 +309,7 @@ impl<'a> ChipsetBuilder<'a> { } { - let BusResolver { pci } = self.bus_resolver; + let BusResolver { pci, pcie } = self.bus_resolver; match pci.resolve() { Ok(()) => {} @@ -263,6 +319,15 @@ impl<'a> ChipsetBuilder<'a> { } } } + + match pcie.resolve() { + Ok(()) => {} + Err(conflicts) => { + for conflict in conflicts { + errs.append(ChipsetBuilderError::PcieConflict(conflict)); + } + } + } } if let Some(err) = errs { diff --git a/vmm_core/vmotherboard/src/chipset/mod.rs b/vmm_core/vmotherboard/src/chipset/mod.rs index e5009474d7..89b332e9bc 100644 --- a/vmm_core/vmotherboard/src/chipset/mod.rs +++ b/vmm_core/vmotherboard/src/chipset/mod.rs @@ -323,3 +323,44 @@ impl std::fmt::Display for PciConflict { } } } + +#[derive(Debug)] +pub enum PcieConflictReason { + ExistingDev(Arc), + MissingDownstreamPort, + MissingEnumerator, +} + +#[derive(Debug)] +pub struct PcieConflict { + pub conflict_dev: Arc, + pub reason: PcieConflictReason, +} + +impl std::fmt::Display for PcieConflict { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self.reason { + PcieConflictReason::ExistingDev(existing_dev) => { + write!( + fmt, + "cannot attach {}, port already occupied by {}", + self.conflict_dev, existing_dev + ) + } + PcieConflictReason::MissingDownstreamPort => { + write!( + fmt, + "cannot attach {}, no valid pcie downstream port", + self.conflict_dev + ) + } + PcieConflictReason::MissingEnumerator => { + write!( + fmt, + "cannot attach {}, no valid pcie enumerator", + self.conflict_dev + ) + } + } + } +} diff --git a/vmm_core/vmotherboard/src/lib.rs b/vmm_core/vmotherboard/src/lib.rs index 3639f5a769..b2c1a68480 100644 --- a/vmm_core/vmotherboard/src/lib.rs +++ b/vmm_core/vmotherboard/src/lib.rs @@ -95,11 +95,22 @@ impl BusId { pub mod bus_kind { #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Pci {} + #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub enum PcieEnumerator {} + #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub enum PcieDownstreamPort {} } /// Type-safe PCI bus ID. pub type BusIdPci = BusId; +/// Type-safe ID for the internal "bus" of a PCIe root +/// complex or switch. +pub type BusIdPcieEnumerator = BusId; + +/// Type-safe ID for a downstream PCIe port. +pub type BusIdPcieDownstreamPort = BusId; + /// A handle to instantiate a chipset device. #[derive(MeshPayload, Debug)] pub struct ChipsetDeviceHandle { From 789894d1ed21a6e2ec36df51ac340bd29d45d4e5 Mon Sep 17 00:00:00 2001 From: Jack Schefer Date: Mon, 8 Sep 2025 10:55:25 -0700 Subject: [PATCH 07/12] openvmm_entry/hvlite_core: support for configuration of pcie endpoints --- Cargo.lock | 1 + openvmm/hvlite_core/Cargo.toml | 1 + openvmm/hvlite_core/src/worker/dispatch.rs | 31 ++++++++++++++++++++++ openvmm/hvlite_defs/src/config.rs | 7 +++++ openvmm/openvmm_entry/src/lib.rs | 1 + openvmm/openvmm_entry/src/ttrpc/mod.rs | 1 + 6 files changed, 42 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 7c6e8731c5..173a97d289 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3089,6 +3089,7 @@ dependencies = [ "pal_async", "pci_bus", "pci_core", + "pci_resources", "pcie", "range_map_vec", "scsi_core", diff --git a/openvmm/hvlite_core/Cargo.toml b/openvmm/hvlite_core/Cargo.toml index d034dfb246..3e738a3d0f 100644 --- a/openvmm/hvlite_core/Cargo.toml +++ b/openvmm/hvlite_core/Cargo.toml @@ -62,6 +62,7 @@ input_core.workspace = true missing_dev.workspace = true pci_bus.workspace = true pci_core.workspace = true +pci_resources.workspace = true pcie.workspace = true scsi_core.workspace = true scsidisk.workspace = true diff --git a/openvmm/hvlite_core/src/worker/dispatch.rs b/openvmm/hvlite_core/src/worker/dispatch.rs index fc81d44dbf..d4ae37b899 100644 --- a/openvmm/hvlite_core/src/worker/dispatch.rs +++ b/openvmm/hvlite_core/src/worker/dispatch.rs @@ -33,6 +33,7 @@ use hvlite_defs::config::Hypervisor; use hvlite_defs::config::HypervisorConfig; use hvlite_defs::config::LoadMode; use hvlite_defs::config::MemoryConfig; +use hvlite_defs::config::PcieEndpointConfig; use hvlite_defs::config::PcieRootComplexConfig; use hvlite_defs::config::PmuGsivConfig; use hvlite_defs::config::ProcessorTopologyConfig; @@ -170,6 +171,7 @@ impl Manifest { floppy_disks: config.floppy_disks, ide_disks: config.ide_disks, pcie_root_complexes: config.pcie_root_complexes, + pcie_endpoints: config.pcie_endpoints, vpci_devices: config.vpci_devices, hypervisor: config.hypervisor, memory: config.memory, @@ -212,6 +214,7 @@ pub struct Manifest { floppy_disks: Vec, ide_disks: Vec, pcie_root_complexes: Vec, + pcie_endpoints: Vec, vpci_devices: Vec, memory: MemoryConfig, processor_topology: ProcessorTopologyConfig, @@ -1797,6 +1800,33 @@ impl InitializedVm { } } + for dev_cfg in cfg.pcie_endpoints { + let dev_name = format!("pcie:{}-{}", dev_cfg.port_name, dev_cfg.resource.id()); + let mut msi_set = MsiInterruptSet::new(); + chipset_builder + .arc_mutex_device(dev_name) + .on_pcie_port(vmotherboard::BusId::new(&dev_cfg.port_name)) + .try_add_async(async |services| { + resolver + .resolve( + dev_cfg.resource, + pci_resources::ResolvePciDeviceHandleParams { + register_msi: &mut msi_set, + register_mmio: &mut services.register_mmio(), + driver_source: &driver_source, + guest_memory: &gm, + doorbell_registration: partition + .clone() + .into_doorbell_registration(Vtl::Vtl0), + shared_mem_mapper: Some(&mapper), + }, + ) + .await + .map(|r| r.0) + }) + .await?; + } + if let Some(vmbus_cfg) = cfg.vmbus { if !cfg.hypervisor.with_hv { anyhow::bail!("vmbus required hypervisor enlightements"); @@ -3019,6 +3049,7 @@ impl LoadedVm { floppy_disks: vec![], // TODO ide_disks: vec![], // TODO pcie_root_complexes: vec![], // TODO + pcie_endpoints: vec![], // TODO vpci_devices: vec![], // TODO memory: self.inner.memory_cfg, processor_topology: self.inner.processor_topology.to_config(), diff --git a/openvmm/hvlite_defs/src/config.rs b/openvmm/hvlite_defs/src/config.rs index de2bf6d03c..8ca920b290 100644 --- a/openvmm/hvlite_defs/src/config.rs +++ b/openvmm/hvlite_defs/src/config.rs @@ -26,6 +26,7 @@ pub struct Config { pub floppy_disks: Vec, pub ide_disks: Vec, pub pcie_root_complexes: Vec, + pub pcie_endpoints: Vec, pub vpci_devices: Vec, pub memory: MemoryConfig, pub processor_topology: ProcessorTopologyConfig, @@ -184,6 +185,12 @@ pub struct PcieRootPortConfig { pub name: String, } +#[derive(Debug, MeshPayload)] +pub struct PcieEndpointConfig { + pub port_name: String, + pub resource: Resource, +} + #[derive(Debug, MeshPayload)] pub struct VpciDeviceConfig { pub vtl: DeviceVtl, diff --git a/openvmm/openvmm_entry/src/lib.rs b/openvmm/openvmm_entry/src/lib.rs index c399c2dada..677fdd1c33 100644 --- a/openvmm/openvmm_entry/src/lib.rs +++ b/openvmm/openvmm_entry/src/lib.rs @@ -1336,6 +1336,7 @@ fn vm_config_from_command_line( load_mode, floppy_disks, pcie_root_complexes, + pcie_endpoints: Vec::new(), vpci_devices, ide_disks: Vec::new(), memory: MemoryConfig { diff --git a/openvmm/openvmm_entry/src/ttrpc/mod.rs b/openvmm/openvmm_entry/src/ttrpc/mod.rs index 54db5d0ad6..39c2dd9f08 100644 --- a/openvmm/openvmm_entry/src/ttrpc/mod.rs +++ b/openvmm/openvmm_entry/src/ttrpc/mod.rs @@ -459,6 +459,7 @@ impl VmService { ide_disks: vec![], floppy_disks: vec![], pcie_root_complexes: vec![], + pcie_endpoints: vec![], vpci_devices: vec![], memory: MemoryConfig { mem_size: req_config From 2bf852992d17f42c44700118959759a24dec14f6 Mon Sep 17 00:00:00 2001 From: Jack Schefer Date: Mon, 8 Sep 2025 10:56:07 -0700 Subject: [PATCH 08/12] openvmm_entry/nvme: configuration support for enumerating nvme emulator over pcie --- openvmm/openvmm_entry/src/cli_args.rs | 10 ++++ openvmm/openvmm_entry/src/lib.rs | 4 +- openvmm/openvmm_entry/src/storage_builder.rs | 49 +++++++++++++++----- 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/openvmm/openvmm_entry/src/cli_args.rs b/openvmm/openvmm_entry/src/cli_args.rs index 8c7f6d8d96..0fb0ad55fe 100644 --- a/openvmm/openvmm_entry/src/cli_args.rs +++ b/openvmm/openvmm_entry/src/cli_args.rs @@ -165,6 +165,9 @@ valid disk kinds: flags: `ro` open disk as read-only `vtl2` assign this disk to VTL2 + +options: + `pcie_port=` present the disk using pcie under the specified port "#)] #[clap(long)] pub nvme: Vec, @@ -897,6 +900,7 @@ pub struct DiskCli { pub read_only: bool, pub is_dvd: bool, pub underhill: Option, + pub pcie_port: Option, } #[derive(Copy, Clone)] @@ -916,6 +920,7 @@ impl FromStr for DiskCli { let mut is_dvd = false; let mut underhill = None; let mut vtl = DeviceVtl::Vtl0; + let mut pcie_port = None; for opt in opts { let mut s = opt.split('='); let opt = s.next().unwrap(); @@ -930,6 +935,10 @@ impl FromStr for DiskCli { } "uh" => underhill = Some(UnderhillDiskSource::Scsi), "uh-nvme" => underhill = Some(UnderhillDiskSource::Nvme), + "pcie_port" => { + let port = s.next().context("pcie_port requires port name")?; + pcie_port = Some(String::from(port)); + } opt => anyhow::bail!("unknown option: '{opt}'"), } } @@ -944,6 +953,7 @@ impl FromStr for DiskCli { read_only, is_dvd, underhill, + pcie_port, }) } } diff --git a/openvmm/openvmm_entry/src/lib.rs b/openvmm/openvmm_entry/src/lib.rs index 677fdd1c33..c70470b5d7 100644 --- a/openvmm/openvmm_entry/src/lib.rs +++ b/openvmm/openvmm_entry/src/lib.rs @@ -504,6 +504,7 @@ fn vm_config_from_command_line( read_only, is_dvd, underhill, + pcie_port: _, } in &opt.disk { storage.add( @@ -540,12 +541,13 @@ fn vm_config_from_command_line( read_only, is_dvd, underhill, + ref pcie_port, } in &opt.nvme { storage.add( vtl, underhill, - storage_builder::DiskLocation::Nvme(None), + storage_builder::DiskLocation::Nvme(None, pcie_port.clone()), kind, is_dvd, read_only, diff --git a/openvmm/openvmm_entry/src/storage_builder.rs b/openvmm/openvmm_entry/src/storage_builder.rs index 2444873b18..124c1635a1 100644 --- a/openvmm/openvmm_entry/src/storage_builder.rs +++ b/openvmm/openvmm_entry/src/storage_builder.rs @@ -12,6 +12,7 @@ use guid::Guid; use hvlite_defs::config::Config; use hvlite_defs::config::DeviceVtl; use hvlite_defs::config::LoadMode; +use hvlite_defs::config::PcieEndpointConfig; use hvlite_defs::config::VpciDeviceConfig; use ide_resources::GuestMedia; use ide_resources::IdeDeviceConfig; @@ -20,6 +21,7 @@ use nvme_resources::NamespaceDefinition; use nvme_resources::NvmeControllerHandle; use scsidisk_resources::SimpleScsiDiskHandle; use scsidisk_resources::SimpleScsiDvdHandle; +use std::collections::HashMap; use storvsp_resources::ScsiControllerHandle; use storvsp_resources::ScsiDeviceAndPath; use storvsp_resources::ScsiPath; @@ -34,23 +36,24 @@ pub(super) struct StorageBuilder { vtl2_scsi_devices: Vec, vtl0_nvme_namespaces: Vec, vtl2_nvme_namespaces: Vec, + pcie_nvme_controllers: HashMap>, underhill_scsi_luns: Vec, underhill_nvme_luns: Vec, openhcl_vtl: Option, } -#[derive(Copy, Clone)] +#[derive(Clone)] pub enum DiskLocation { Ide(Option, Option), Scsi(Option), - Nvme(Option), + Nvme(Option, Option), } impl From for DiskLocation { fn from(value: UnderhillDiskSource) -> Self { match value { UnderhillDiskSource::Scsi => Self::Scsi(None), - UnderhillDiskSource::Nvme => Self::Nvme(None), + UnderhillDiskSource::Nvme => Self::Nvme(None, None), } } } @@ -63,6 +66,7 @@ const SCSI_VTL0_INSTANCE_ID: Guid = guid::guid!("ba6163d9-04a1-4d29-b605-72e2ffb const SCSI_VTL2_INSTANCE_ID: Guid = guid::guid!("73d3aa59-b82b-4fe7-9e15-e2b0b5575cf8"); const UNDERHILL_VTL0_SCSI_INSTANCE: Guid = guid::guid!("e1c5bd94-d0d6-41d4-a2b0-88095a16ded7"); const UNDERHILL_VTL0_NVME_INSTANCE: Guid = guid::guid!("09a59b81-2bf6-4164-81d7-3a0dc977ba65"); +const PCIE_NVME_SUBSYSTEM_ID: Guid = guid::guid!("9672d1ac-4402-4fbe-a355-5b64400ff13d"); impl StorageBuilder { pub fn new(openhcl_vtl: Option) -> Self { @@ -72,6 +76,7 @@ impl StorageBuilder { vtl2_scsi_devices: Vec::new(), vtl0_nvme_namespaces: Vec::new(), vtl2_nvme_namespaces: Vec::new(), + pcie_nvme_controllers: HashMap::new(), underhill_scsi_luns: Vec::new(), underhill_nvme_luns: Vec::new(), openhcl_vtl, @@ -188,11 +193,18 @@ impl StorageBuilder { }); Some(lun.into()) } - DiskLocation::Nvme(nsid) => { - let namespaces = match vtl { - DeviceVtl::Vtl0 => &mut self.vtl0_nvme_namespaces, - DeviceVtl::Vtl1 => anyhow::bail!("vtl1 unsupported"), - DeviceVtl::Vtl2 => &mut self.vtl2_nvme_namespaces, + DiskLocation::Nvme(nsid, pcie_port) => { + let namespaces = match (vtl, pcie_port) { + // VPCI + (DeviceVtl::Vtl0, None) => &mut self.vtl0_nvme_namespaces, + (DeviceVtl::Vtl1, None) => anyhow::bail!("vtl1 vpci unsupported"), + (DeviceVtl::Vtl2, None) => &mut self.vtl2_nvme_namespaces, + // PCIe + (DeviceVtl::Vtl0, Some(port)) => { + self.pcie_nvme_controllers.entry(port).or_default() + } + (DeviceVtl::Vtl1, Some(_)) => anyhow::bail!("vtl1 pcie unsupported"), + (DeviceVtl::Vtl2, Some(_)) => anyhow::bail!("vtl2 pcie unsupported"), }; if is_dvd { anyhow::bail!("dvd not supported with nvme"); @@ -219,7 +231,7 @@ impl StorageBuilder { ) -> anyhow::Result<()> { let vtl = self.openhcl_vtl.context("openhcl not configured")?; let sub_device_path = self - .add_inner(vtl, source, kind, is_dvd, read_only)? + .add_inner(vtl, source.clone(), kind, is_dvd, read_only)? .context("source device not supported by underhill")?; let (device_type, device_path) = match source { @@ -232,7 +244,7 @@ impl StorageBuilder { SCSI_VTL0_INSTANCE_ID }, ), - DiskLocation::Nvme(_) => ( + DiskLocation::Nvme(_, _) => ( vtl2_settings_proto::physical_device::DeviceType::Nvme, if vtl == DeviceVtl::Vtl2 { NVME_VTL2_INSTANCE_ID @@ -251,7 +263,7 @@ impl StorageBuilder { let lun = lun.unwrap_or(self.underhill_scsi_luns.len() as u8); (&mut self.underhill_scsi_luns, lun.into()) } - DiskLocation::Nvme(nsid) => { + DiskLocation::Nvme(nsid, _) => { let nsid = nsid.unwrap_or(self.underhill_nvme_luns.len() as u32 + 1); (&mut self.underhill_nvme_luns, nsid) } @@ -376,6 +388,21 @@ impl StorageBuilder { }); } + let owned_pcie_controllers = std::mem::take(&mut self.pcie_nvme_controllers); + for (index, (port_name, namespaces)) in owned_pcie_controllers.into_iter().enumerate() { + config.pcie_endpoints.push(PcieEndpointConfig { + port_name, + resource: NvmeControllerHandle { + subsystem_id: PCIE_NVME_SUBSYSTEM_ID, + controller_id: index as u16, + namespaces, + max_io_queues: 64, + msix_count: 64, + } + .into_resource(), + }); + } + Ok(()) } From 0ce4649948804ec1d58f4f88989bfb55788f9032 Mon Sep 17 00:00:00 2001 From: Jack Schefer Date: Tue, 9 Sep 2025 16:18:01 -0700 Subject: [PATCH 09/12] hvlite_core: connect pcie interrupt target to kvm if supported --- openvmm/hvlite_core/src/partition.rs | 13 +++++++++++++ openvmm/hvlite_core/src/worker/dispatch.rs | 4 ++++ 2 files changed, 17 insertions(+) diff --git a/openvmm/hvlite_core/src/partition.rs b/openvmm/hvlite_core/src/partition.rs index 96f805a0a3..8592e4bd44 100644 --- a/openvmm/hvlite_core/src/partition.rs +++ b/openvmm/hvlite_core/src/partition.rs @@ -90,6 +90,12 @@ pub trait HvlitePartition: Inspect + Send + Sync + RequestYield + Synic { minimum_vtl: Vtl, ) -> Option>; + /// Gets the [`MsiInterruptTarget`] interface for a particular VTL. + fn into_msi_target( + self: Arc, + minimum_vtl: Vtl, + ) -> Option>; + /// Returns whether virtual devices are supported. fn supports_virtual_devices(&self) -> bool; @@ -204,6 +210,13 @@ where self.doorbell_registration(minimum_vtl) } + fn into_msi_target( + self: Arc, + minimum_vtl: Vtl, + ) -> Option> { + self.msi_interrupt_target(minimum_vtl) + } + fn supports_virtual_devices(&self) -> bool { self.new_virtual_device().is_some() } diff --git a/openvmm/hvlite_core/src/worker/dispatch.rs b/openvmm/hvlite_core/src/worker/dispatch.rs index d4ae37b899..35907eecdc 100644 --- a/openvmm/hvlite_core/src/worker/dispatch.rs +++ b/openvmm/hvlite_core/src/worker/dispatch.rs @@ -1825,6 +1825,10 @@ impl InitializedVm { .map(|r| r.0) }) .await?; + + if let Some(target) = partition.clone().into_msi_target(Vtl::Vtl0) { + msi_set.connect(target.as_ref()); + } } if let Some(vmbus_cfg) = cfg.vmbus { From a9b84718597567ac59d4ac1e6144790c820a3821 Mon Sep 17 00:00:00 2001 From: Jack Schefer Date: Wed, 10 Sep 2025 08:50:15 -0700 Subject: [PATCH 10/12] pcie: disable multi-function root port forwarding because it doesn't work --- vm/devices/pci/pcie/src/root.rs | 41 +++++++++++++++++---------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/vm/devices/pci/pcie/src/root.rs b/vm/devices/pci/pcie/src/root.rs index 2abbe50c0b..d09e51cf8e 100644 --- a/vm/devices/pci/pcie/src/root.rs +++ b/vm/devices/pci/pcie/src/root.rs @@ -38,10 +38,10 @@ pub struct GenericPcieRootPortDefinition { } enum DecodedEcamAccess<'a> { - UnexpectedIntercept(), - Unroutable(), + UnexpectedIntercept, + Unroutable, InternalBus(&'a mut RootPort, u16), - DownstreamPort(&'a mut RootPort, u8, u16), + DownstreamPort(&'a mut RootPort, u8, u8, u16), } impl GenericPcieRootComplex { @@ -100,7 +100,7 @@ impl GenericPcieRootComplex { let ecam_offset = match self.ecam.offset_of(addr) { Some(offset) => offset, None => { - return DecodedEcamAccess::UnexpectedIntercept(); + return DecodedEcamAccess::UnexpectedIntercept; } }; @@ -115,7 +115,7 @@ impl GenericPcieRootComplex { Some((_, port)) => { return DecodedEcamAccess::InternalBus(port, cfg_offset_within_function); } - None => return DecodedEcamAccess::Unroutable(), + None => return DecodedEcamAccess::Unroutable, } } else if bus_number > self.start_bus && bus_number <= self.end_bus { for (_, port) in self.ports.values_mut() { @@ -123,14 +123,15 @@ impl GenericPcieRootComplex { return DecodedEcamAccess::DownstreamPort( port, bus_number, + device_function, cfg_offset_within_function, ); } } - return DecodedEcamAccess::Unroutable(); + return DecodedEcamAccess::Unroutable; } - DecodedEcamAccess::UnexpectedIntercept() + DecodedEcamAccess::UnexpectedIntercept } } @@ -185,18 +186,18 @@ impl MmioIntercept for GenericPcieRootComplex { let mut value = !0; match self.decode_ecam_access(addr) { - DecodedEcamAccess::UnexpectedIntercept() => { + DecodedEcamAccess::UnexpectedIntercept => { tracing::error!("unexpected intercept at address 0x{:16x}", addr); } - DecodedEcamAccess::Unroutable() => { + DecodedEcamAccess::Unroutable => { tracelimit::warn_ratelimited!("unroutable config space access"); } DecodedEcamAccess::InternalBus(port, cfg_offset) => { let _ = port.pci_cfg_read(cfg_offset & !3, &mut value); value = shift_read_value(cfg_offset, data.len(), value); } - DecodedEcamAccess::DownstreamPort(port, bus_number, cfg_offset) => { - let _ = port.forward_cfg_read(&bus_number, cfg_offset & !3, &mut value); + DecodedEcamAccess::DownstreamPort(port, bus_number, device_function, cfg_offset) => { + let _ = port.forward_cfg_read(&bus_number, &device_function, cfg_offset & !3, &mut value); value = shift_read_value(cfg_offset, data.len(), value); } } @@ -215,10 +216,10 @@ impl MmioIntercept for GenericPcieRootComplex { }; match self.decode_ecam_access(addr) { - DecodedEcamAccess::UnexpectedIntercept() => { + DecodedEcamAccess::UnexpectedIntercept => { tracing::error!("unexpected intercept at address 0x{:16x}", addr); } - DecodedEcamAccess::Unroutable() => { + DecodedEcamAccess::Unroutable => { tracelimit::warn_ratelimited!("unroutable config space access"); } DecodedEcamAccess::InternalBus(port, cfg_offset) => { @@ -233,17 +234,17 @@ impl MmioIntercept for GenericPcieRootComplex { let _ = port.pci_cfg_write(rounded_offset, merged_value); } - DecodedEcamAccess::DownstreamPort(port, bus_number, cfg_offset) => { + DecodedEcamAccess::DownstreamPort(port, bus_number, device_function, cfg_offset) => { let rounded_offset = cfg_offset & !3; let merged_value = if data.len() == 4 { write_value } else { let mut temp: u32 = 0; - let _ = port.forward_cfg_read(&bus_number, rounded_offset, &mut temp); + let _ = port.forward_cfg_read(&bus_number, &device_function, rounded_offset, &mut temp); combine_old_new_values(cfg_offset, temp, write_value, data.len()) }; - let _ = port.forward_cfg_write(&bus_number, rounded_offset, merged_value); + let _ = port.forward_cfg_write(&bus_number, &device_function, rounded_offset, merged_value); } } @@ -359,11 +360,11 @@ impl RootPort { bus >= secondary_bus_number && bus <= suboordinate_bus_number } - fn forward_cfg_read(&mut self, bus: &u8, cfg_offset: u16, value: &mut u32) -> IoResult { + fn forward_cfg_read(&mut self, bus: &u8, device_function: &u8, cfg_offset: u16, value: &mut u32) -> IoResult { let secondary_bus_number = ((self.bus_number_registers >> 8) & 0xFF) as u8; let suboordinate_bus_number = ((self.bus_number_registers >> 16) & 0xFF) as u8; - if *bus == secondary_bus_number { + if *bus == secondary_bus_number && *device_function == 0 { if let Some((_, device)) = &mut self.link { let _ = device.pci_cfg_read(cfg_offset, value); } @@ -374,11 +375,11 @@ impl RootPort { IoResult::Ok } - fn forward_cfg_write(&mut self, bus: &u8, cfg_offset: u16, value: u32) -> IoResult { + fn forward_cfg_write(&mut self, bus: &u8, device_function: &u8, cfg_offset: u16, value: u32) -> IoResult { let secondary_bus_number = ((self.bus_number_registers >> 8) & 0xFF) as u8; let suboordinate_bus_number = ((self.bus_number_registers >> 16) & 0xFF) as u8; - if *bus == secondary_bus_number { + if *bus == secondary_bus_number && *device_function == 0 { if let Some((_, device)) = &mut self.link { let _ = device.pci_cfg_write(cfg_offset, value); } From 120d0cf9af049cf398c16dab922ea9b98f377a1d Mon Sep 17 00:00:00 2001 From: Jack Schefer Date: Thu, 25 Sep 2025 12:59:04 -0700 Subject: [PATCH 11/12] pci_core: header and capability updates for root ports --- .../pci_core/src/capabilities/pci_express.rs | 31 +- vm/devices/pci/pci_core/src/cfg_space_emu.rs | 496 +++++++++++++++++- vm/devices/pci/pci_core/src/spec.rs | 84 ++- 3 files changed, 595 insertions(+), 16 deletions(-) diff --git a/vm/devices/pci/pci_core/src/capabilities/pci_express.rs b/vm/devices/pci/pci_core/src/capabilities/pci_express.rs index ba506323a9..ab27086f7e 100644 --- a/vm/devices/pci/pci_core/src/capabilities/pci_express.rs +++ b/vm/devices/pci/pci_core/src/capabilities/pci_express.rs @@ -38,6 +38,7 @@ impl PciExpressState { #[derive(Inspect)] /// Configurable PCI Express capability. pub struct PciExpressCapability { + pcie_capabilites: pci_express::PciExpressCapabilities, device_capabilities: pci_express::DeviceCapabilities, state: Arc>, #[inspect(skip)] @@ -48,9 +49,13 @@ impl PciExpressCapability { /// Creates a new PCI Express capability with FLR support. /// /// # Arguments + /// * `typ` - The spec-defined device or port type. /// * `flr_handler` - Optional handler to be called when FLR is initiated. This emulator will report that FLR is supported if flr_handler = Some(_) - pub fn new(flr_handler: Option>) -> Self { + pub fn new(typ: pci_express::DevicePortType, flr_handler: Option>) -> Self { Self { + pcie_capabilites: pci_express::PciExpressCapabilities::new() + .with_capability_version(2) + .with_device_port_type(typ), device_capabilities: pci_express::DeviceCapabilities::new() .with_function_level_reset(flr_handler.is_some()), state: Arc::new(Mutex::new(PciExpressState::new())), @@ -98,9 +103,8 @@ impl PciCapability for PciExpressCapability { match PciExpressCapabilityHeader(offset) { PciExpressCapabilityHeader::PCIE_CAPS => { // PCIe Capabilities Register (16 bits) + Next Pointer (8 bits) + Capability ID (8 bits) - // For basic endpoint: Version=2, Device/Port Type=0 (PCI Express Endpoint) - let pcie_caps: u16 = 0x0002; // Version 2, Device/Port Type 0 - (pcie_caps as u32) << 16 | CapabilityId::PCI_EXPRESS.0 as u32 + (self.pcie_capabilites.into_bits() as u32) << 16 + | CapabilityId::PCI_EXPRESS.0 as u32 } PciExpressCapabilityHeader::DEVICE_CAPS => self.device_capabilities.into_bits(), PciExpressCapabilityHeader::DEVICE_CTL_STS => { @@ -223,6 +227,7 @@ mod save_restore { #[cfg(test)] mod tests { use super::*; + use crate::spec::caps::pci_express::DevicePortType; use std::sync::atomic::{AtomicBool, Ordering}; #[derive(Debug)] @@ -262,7 +267,7 @@ mod tests { #[test] fn test_pci_express_capability_read_u32() { let flr_handler = TestFlrHandler::new(); - let cap = PciExpressCapability::new(Some(flr_handler)); + let cap = PciExpressCapability::new(DevicePortType::Endpoint, Some(flr_handler)); // Test PCIe Capabilities Register (offset 0x00) let caps_val = cap.read_u32(0x00); @@ -288,7 +293,7 @@ mod tests { #[test] fn test_pci_express_capability_read_u32_no_flr() { - let cap = PciExpressCapability::new(None); + let cap = PciExpressCapability::new(DevicePortType::Endpoint, None); // Test Device Capabilities Register (offset 0x04) - FLR should not be set let device_caps_val = cap.read_u32(0x04); @@ -297,7 +302,7 @@ mod tests { #[test] fn test_pci_express_capability_write_u32_readonly_registers() { - let mut cap = PciExpressCapability::new(None); + let mut cap = PciExpressCapability::new(DevicePortType::Endpoint, None); // Try to write to read-only PCIe Capabilities Register (offset 0x00) let original_caps = cap.read_u32(0x00); @@ -313,7 +318,7 @@ mod tests { #[test] fn test_pci_express_capability_write_u32_device_control() { let flr_handler = TestFlrHandler::new(); - let mut cap = PciExpressCapability::new(Some(flr_handler.clone())); + let mut cap = PciExpressCapability::new(DevicePortType::Endpoint, Some(flr_handler.clone())); // Initial state should have FLR bit clear let initial_ctl_sts = cap.read_u32(0x08); @@ -345,7 +350,7 @@ mod tests { #[test] fn test_pci_express_capability_write_u32_device_status() { - let mut cap = PciExpressCapability::new(None); + let mut cap = PciExpressCapability::new(DevicePortType::Endpoint, None); // Manually set some status bits to test write-1-to-clear behavior { @@ -376,7 +381,7 @@ mod tests { #[test] fn test_pci_express_capability_write_u32_unhandled_offset() { - let mut cap = PciExpressCapability::new(None); + let mut cap = PciExpressCapability::new(DevicePortType::Endpoint, None); // Writing to unhandled offset should not panic cap.write_u32(0x10, 0xFFFFFFFF); @@ -387,7 +392,7 @@ mod tests { #[test] fn test_pci_express_capability_reset() { let flr_handler = TestFlrHandler::new(); - let mut cap = PciExpressCapability::new(Some(flr_handler.clone())); + let mut cap = PciExpressCapability::new(DevicePortType::Endpoint, Some(flr_handler.clone())); // Set some state cap.write_u32(0x08, 0x0001); // Set some device control bits @@ -412,13 +417,13 @@ mod tests { #[test] fn test_pci_express_capability_length() { - let cap = PciExpressCapability::new(None); + let cap = PciExpressCapability::new(DevicePortType::Endpoint, None); assert_eq!(cap.len(), 0x0C); // Should be 12 bytes } #[test] fn test_pci_express_capability_label() { - let cap = PciExpressCapability::new(None); + let cap = PciExpressCapability::new(DevicePortType::Endpoint, None); assert_eq!(cap.label(), "pci-express"); } } diff --git a/vm/devices/pci/pci_core/src/cfg_space_emu.rs b/vm/devices/pci/pci_core/src/cfg_space_emu.rs index e358d82fa8..35729ad9e3 100644 --- a/vm/devices/pci/pci_core/src/cfg_space_emu.rs +++ b/vm/devices/pci/pci_core/src/cfg_space_emu.rs @@ -16,6 +16,7 @@ use chipset_device::io::IoResult; use chipset_device::mmio::ControlMmioIntercept; use guestmem::MappableGuestMemory; use inspect::Inspect; +use std::ops::RangeInclusive; use std::sync::Arc; use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering; @@ -118,8 +119,8 @@ impl ConfigSpaceType0EmulatorState { /// Emulator for the standard Type 0 PCI configuration space header. // -// TODO: split + share shared registers with other (yet unimplemented) -// header types +// TODO: Figure out how to split this up and share the handling of common +// registers (hardware IDs, command, status, etc.) with the type 1 emulator. #[derive(Inspect)] pub struct ConfigSpaceType0Emulator { // Fixed configuration @@ -571,6 +572,322 @@ impl ConfigSpaceType0Emulator { } } +#[derive(Debug, Inspect)] +struct ConfigSpaceType1EmulatorState { + /// The command register + command: cfg_space::Command, + /// The subordinate bus number register. Software programs + /// this register with the highest bus number below the bridge. + subordinate_bus_number: u8, + /// The secondary bus number register. Software programs + /// this register with the bus number assigned to the secondary + /// side of the bridge. + secondary_bus_number: u8, + /// The primary bus number register. This is unused for PCI Express but + /// is supposed to be read/write for compability with legacy software. + primary_bus_number: u8, + /// The memory base register. Software programs the upper 12 bits of this + /// register with the upper 12 bits of a 32-bit base address of MMIO assigned + /// to the hierarchy under the bridge (the lower 20 bits are assumed to be 0s). + memory_base: u16, + /// The memory limit register. Software programs the upper 12 bits of this + /// register with the upper 12 bits of a 32-bit limit address of MMIO assigned + /// to the hierarchy under the bridge (the lower 20 bits are assumed to be 1s). + memory_limit: u16, + /// The prefetchable memory base register. Software programs the upper 12 bits of + /// this register with bits 20:31 of the base address of the prefetchable MMIO + /// window assigned to the hierarchy under the bridge. Bits 0:19 are assumed to + /// be 0s. + prefetch_base: u16, + /// The prefetchable memory limit register. Software programs the upper 12 bits of + /// this register with bits 20:31 of the limit address of the prefetchable MMIO + /// window assigned to the hierarchy under the bridge. Bits 0:19 are assumed to + /// be 1s. + prefetch_limit: u16, + /// The prefetchable memory base upper 32 bits register. When the bridge supports + /// 64-bit addressing for prefetchable memory, software programs this register + /// with the upper 32 bits of the base address of the prefetchable MMIO window + /// assigned to the hierarchy under the bridge. + prefetch_base_upper: u32, + /// The prefetchable memory limit upper 32 bits register. When the bridge supports + /// 64-bit addressing for prefetchable memory, software programs this register + /// with the upper 32 bits of the base address of the prefetchable MMIO window + /// assigned to the hierarchy under the bridge. + prefetch_limit_upper: u32, +} + +impl ConfigSpaceType1EmulatorState { + fn new() -> Self { + Self { + command: cfg_space::Command::new(), + subordinate_bus_number: 0, + secondary_bus_number: 0, + primary_bus_number: 0, + memory_base: 0, + memory_limit: 0, + prefetch_base: 0, + prefetch_limit: 0, + prefetch_base_upper: 0, + prefetch_limit_upper: 0, + } + } +} + +/// Emulator for the standard Type 1 PCI configuration space header. +// +// TODO: Figure out how to split this up and share the handling of common +// registers (hardware IDs, command, status, etc.) with the type 0 emulator. +// TODO: Support type 1 BARs (only two) +#[derive(Inspect)] +pub struct ConfigSpaceType1Emulator { + hardware_ids: HardwareIds, + #[inspect(with = "|x| inspect::iter_by_key(x.iter().map(|cap| (cap.label(), cap)))")] + capabilities: Vec>, + state: ConfigSpaceType1EmulatorState, +} + +impl ConfigSpaceType1Emulator { + /// Create a new [`ConfigSpaceType1Emulator`] + pub fn new(hardware_ids: HardwareIds, capabilities: Vec>) -> Self { + Self { + hardware_ids, + capabilities, + state: ConfigSpaceType1EmulatorState::new(), + } + } + + /// Resets the configuration space state. + pub fn reset(&mut self) { + self.state = ConfigSpaceType1EmulatorState::new(); + + for cap in &mut self.capabilities { + cap.reset(); + } + } + + /// Returns the range of bus numbers the bridge is programmed to decode. + pub fn assigned_bus_range(&self) -> RangeInclusive { + let secondary = self.state.secondary_bus_number; + let subordinate = self.state.subordinate_bus_number; + if secondary <= subordinate { + secondary..=subordinate + } else { + 0..=0 + } + } + + fn decode_memory_range(&self, base_register: u16, limit_register: u16) -> (u32, u32) { + let base_addr = ((base_register & !0b1111) as u32) << 16; + let limit_addr = ((limit_register & !0b1111) as u32) << 16 | 0xF_FFFF; + (base_addr, limit_addr) + } + + /// If memory decoding is currently enabled, and the memory window assignment is valid, + /// returns the 32-bit memory addresses the bridge is programmed to decode. + pub fn assigned_memory_range(&self) -> Option> { + let (base_addr, limit_addr) = + self.decode_memory_range(self.state.memory_base, self.state.memory_limit); + if self.state.command.mmio_enabled() && base_addr <= limit_addr { + Some(base_addr..=limit_addr) + } else { + None + } + } + + /// If memory decoding is currently enabled, and the prefetchable memory window assignment + /// is valid, returns the 64-bit prefetchable memory addresses the bridge is programmed to decode. + pub fn assigned_prefetch_range(&self) -> Option> { + let (base_low, limit_low) = + self.decode_memory_range(self.state.prefetch_base, self.state.prefetch_limit); + let base_addr = (self.state.prefetch_base_upper as u64) << 32 | base_low as u64; + let limit_addr = (self.state.prefetch_limit_upper as u64) << 32 | limit_low as u64; + if self.state.command.mmio_enabled() && base_addr <= limit_addr { + Some(base_addr..=limit_addr) + } else { + None + } + } + + fn get_capability_index_and_offset(&self, offset: u16) -> Option<(usize, u16)> { + let mut cap_offset = 0; + for i in 0..self.capabilities.len() { + let cap_size = self.capabilities[i].len() as u16; + if offset < cap_offset + cap_size { + return Some((i, offset - cap_offset)); + } + cap_offset += cap_size; + } + None + } + + /// Read from the config space. `offset` must be 32-bit aligned. + pub fn read_u32(&self, offset: u16, value: &mut u32) -> IoResult { + use cfg_space::HeaderType01; + + *value = match HeaderType01(offset) { + HeaderType01::DEVICE_VENDOR => { + (self.hardware_ids.device_id as u32) << 16 | self.hardware_ids.vendor_id as u32 + } + HeaderType01::STATUS_COMMAND => { + let status = + cfg_space::Status::new().with_capabilities_list(!self.capabilities.is_empty()); + + (status.into_bits() as u32) << 16 | self.state.command.into_bits() as u32 + } + HeaderType01::CLASS_REVISION => { + (u8::from(self.hardware_ids.base_class) as u32) << 24 + | (u8::from(self.hardware_ids.sub_class) as u32) << 16 + | (u8::from(self.hardware_ids.prog_if) as u32) << 8 + | self.hardware_ids.revision_id as u32 + } + HeaderType01::BIST_HEADER => { + // Header type 01 + 0x00010000 + } + HeaderType01::BAR0 => 0, + HeaderType01::BAR1 => 0, + HeaderType01::LATENCY_BUS_NUMBERS => { + (self.state.subordinate_bus_number as u32) << 16 + | (self.state.secondary_bus_number as u32) << 8 + | self.state.primary_bus_number as u32 + } + HeaderType01::SEC_STATUS_IO_RANGE => 0, + HeaderType01::MEMORY_RANGE => { + (self.state.memory_limit as u32) << 16 | self.state.memory_base as u32 + } + HeaderType01::PREFETCH_RANGE => { + // Set the low bit in both the limit and base registers to indicate + // support for 64-bit addressing. + ((self.state.prefetch_limit | 0b0001) as u32) << 16 + | (self.state.prefetch_base | 0b0001) as u32 + } + HeaderType01::PREFETCH_BASE_UPPER => self.state.prefetch_base_upper, + HeaderType01::PREFETCH_LIMIT_UPPER => self.state.prefetch_limit_upper, + HeaderType01::IO_RANGE_UPPER => 0, + HeaderType01::RESERVED_CAP_PTR => { + if self.capabilities.is_empty() { + 0 + } else { + 0x40 + } + } + HeaderType01::EXPANSION_ROM_BASE => 0, + HeaderType01::BRDIGE_CTRL_INTERRUPT => 0, + // rest of the range is reserved for device capabilities + _ if (0x40..0x100).contains(&offset) => { + if let Some((cap_index, cap_offset)) = + self.get_capability_index_and_offset(offset - 0x40) + { + let mut value = self.capabilities[cap_index].read_u32(cap_offset); + if cap_offset == 0 { + let next = if cap_index < self.capabilities.len() - 1 { + offset as u32 + self.capabilities[cap_index].len() as u32 + } else { + 0 + }; + assert!(value & 0xff00 == 0); + value |= next << 8; + } + value + } else { + tracelimit::warn_ratelimited!(offset, "unhandled config space read"); + return IoResult::Err(IoError::InvalidRegister); + } + } + _ if (0x100..0x1000).contains(&offset) => { + // TODO: properly support extended pci express configuration space + if offset == 0x100 { + tracelimit::warn_ratelimited!(offset, "unexpected pci express probe"); + 0x000ffff + } else { + tracelimit::warn_ratelimited!(offset, "unhandled extended config space read"); + return IoResult::Err(IoError::InvalidRegister); + } + } + _ => { + tracelimit::warn_ratelimited!(offset, "unexpected config space read"); + return IoResult::Err(IoError::InvalidRegister); + } + }; + + IoResult::Ok + } + + /// Write to the config space. `offset` must be 32-bit aligned. + pub fn write_u32(&mut self, offset: u16, val: u32) -> IoResult { + use cfg_space::HeaderType01; + + match HeaderType01(offset) { + HeaderType01::STATUS_COMMAND => { + let mut command = cfg_space::Command::from_bits(val as u16); + if command.into_bits() & !SUPPORTED_COMMAND_BITS != 0 { + tracelimit::warn_ratelimited!(offset, val, "setting invalid command bits"); + // still do our best + command = + cfg_space::Command::from_bits(command.into_bits() & SUPPORTED_COMMAND_BITS); + }; + + // TODO: on MSE, sanity check the programmed memory and prefetch ranges... + + self.state.command = command; + } + //HeaderType01::BAR0 => () + //HeaderType01::BAR1 => () + HeaderType01::LATENCY_BUS_NUMBERS => { + self.state.subordinate_bus_number = (val >> 16) as u8; + self.state.secondary_bus_number = (val >> 8) as u8; + self.state.primary_bus_number = val as u8; + } + HeaderType01::MEMORY_RANGE => { + self.state.memory_base = val as u16; + self.state.memory_limit = (val >> 16) as u16; + } + HeaderType01::PREFETCH_RANGE => { + self.state.prefetch_base = val as u16; + self.state.prefetch_limit = (val >> 16) as u16; + } + HeaderType01::PREFETCH_BASE_UPPER => { + self.state.prefetch_base_upper = val; + } + HeaderType01::PREFETCH_LIMIT_UPPER => { + self.state.prefetch_limit_upper = val; + } + // all other base regs are noops + _ if offset < 0x40 && offset % 4 == 0 => (), + // rest of the range is reserved for extended device capabilities + _ if (0x40..0x100).contains(&offset) => { + if let Some((cap_index, cap_offset)) = + self.get_capability_index_and_offset(offset - 0x40) + { + self.capabilities[cap_index].write_u32(cap_offset, val); + } else { + tracelimit::warn_ratelimited!( + offset, + value = val, + "unhandled config space write" + ); + return IoResult::Err(IoError::InvalidRegister); + } + } + _ if (0x100..0x1000).contains(&offset) => { + // TODO: properly support extended pci express configuration space + tracelimit::warn_ratelimited!( + offset, + value = val, + "unhandled extended config space write" + ); + return IoResult::Err(IoError::InvalidRegister); + } + _ => { + tracelimit::warn_ratelimited!(offset, value = val, "unexpected config space write"); + return IoResult::Err(IoError::InvalidRegister); + } + } + + IoResult::Ok + } +} + mod save_restore { use super::*; use thiserror::Error; @@ -684,3 +1001,178 @@ mod save_restore { } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::capabilities::read_only::ReadOnlyCapability; + use crate::spec::hwid::ClassCode; + use crate::spec::hwid::ProgrammingInterface; + use crate::spec::hwid::Subclass; + + fn create_type1_emulator(caps: Vec>) -> ConfigSpaceType1Emulator { + ConfigSpaceType1Emulator::new( + HardwareIds { + vendor_id: 0x1111, + device_id: 0x2222, + revision_id: 1, + prog_if: ProgrammingInterface::NONE, + sub_class: Subclass::BRIDGE_PCI_TO_PCI, + base_class: ClassCode::BRIDGE, + type0_sub_vendor_id: 0, + type0_sub_system_id: 0, + }, + caps, + ) + } + + fn read_cfg(emulator: &ConfigSpaceType1Emulator, offset: u16) -> u32 { + let mut val = 0; + emulator.read_u32(offset, &mut val).unwrap(); + val + } + + #[test] + fn test_type1_probe() { + let emu = create_type1_emulator(vec![]); + assert_eq!(read_cfg(&emu, 0), 0x2222_1111); + assert_eq!(read_cfg(&emu, 4) & 0x10_0000, 0); // Capabilities pointer + + let emu = create_type1_emulator(vec![Box::new(ReadOnlyCapability::new("foo", 0))]); + assert_eq!(read_cfg(&emu, 0), 0x2222_1111); + assert_eq!(read_cfg(&emu, 4) & 0x10_0000, 0x10_0000); // Capabilities pointer + } + + #[test] + fn test_type1_bus_number_assignment() { + let mut emu = create_type1_emulator(vec![]); + + // The bus number (and latency timer) registers are + // all default 0. + assert_eq!(read_cfg(&emu, 0x18), 0); + assert_eq!(emu.assigned_bus_range(), 0..=0); + + // The bus numbers can be programmed one by one, + // and the range may not be valid during the middle + // of allocation. + emu.write_u32(0x18, 0x0000_1000).unwrap(); + assert_eq!(read_cfg(&emu, 0x18), 0x0000_1000); + assert_eq!(emu.assigned_bus_range(), 0..=0); + emu.write_u32(0x18, 0x0012_1000).unwrap(); + assert_eq!(read_cfg(&emu, 0x18), 0x0012_1000); + assert_eq!(emu.assigned_bus_range(), 0x10..=0x12); + + // The primary bus number register is read/write for compatability + // but unused. + emu.write_u32(0x18, 0x0012_1033).unwrap(); + assert_eq!(read_cfg(&emu, 0x18), 0x0012_1033); + assert_eq!(emu.assigned_bus_range(), 0x10..=0x12); + + // Software can also just write the entire 4byte value at once + emu.write_u32(0x18, 0x0047_4411).unwrap(); + assert_eq!(read_cfg(&emu, 0x18), 0x0047_4411); + assert_eq!(emu.assigned_bus_range(), 0x44..=0x47); + + // The subordinate bus number can equal the secondary bus number... + emu.write_u32(0x18, 0x0088_8800).unwrap(); + assert_eq!(emu.assigned_bus_range(), 0x88..=0x88); + + // ... but it cannot be less, that's a confused guest OS. + emu.write_u32(0x18, 0x0087_8800).unwrap(); + assert_eq!(emu.assigned_bus_range(), 0..=0); + } + + #[test] + fn test_type1_memory_assignment() { + const MMIO_ENABLED: u32 = 0x0000_0002; + const MMIO_DISABLED: u32 = 0x0000_0000; + + let mut emu = create_type1_emulator(vec![]); + assert!(emu.assigned_memory_range().is_none()); + + // The guest can write whatever it wants while MMIO + // is disabled. + emu.write_u32(0x20, 0xDEAD_BEEF).unwrap(); + assert!(emu.assigned_memory_range().is_none()); + + // The guest can program a valid resource assignment... + emu.write_u32(0x20, 0xFFF0_FF00).unwrap(); + assert!(emu.assigned_memory_range().is_none()); + // ... enable memory decoding... + emu.write_u32(0x4, MMIO_ENABLED).unwrap(); + assert_eq!(emu.assigned_memory_range(), Some(0xFF00_0000..=0xFFFF_FFFF)); + // ... then disable memory decoding it. + emu.write_u32(0x4, MMIO_DISABLED).unwrap(); + assert!(emu.assigned_memory_range().is_none()); + + // Setting memory base equal to memory limit is a valid 1MB range. + emu.write_u32(0x20, 0xBBB0_BBB0).unwrap(); + emu.write_u32(0x4, MMIO_ENABLED).unwrap(); + assert_eq!(emu.assigned_memory_range(), Some(0xBBB0_0000..=0xBBBF_FFFF)); + emu.write_u32(0x4, MMIO_DISABLED).unwrap(); + assert!(emu.assigned_memory_range().is_none()); + + // The guest can try to program an invalid assignment (base > limit), we + // just won't decode it. + emu.write_u32(0x20, 0xAA00_BB00).unwrap(); + assert!(emu.assigned_memory_range().is_none()); + emu.write_u32(0x4, MMIO_ENABLED).unwrap(); + assert!(emu.assigned_memory_range().is_none()); + emu.write_u32(0x4, MMIO_DISABLED).unwrap(); + assert!(emu.assigned_memory_range().is_none()); + } + + #[test] + fn test_type1_prefetch_assignment() { + const MMIO_ENABLED: u32 = 0x0000_0002; + const MMIO_DISABLED: u32 = 0x0000_0000; + + let mut emu = create_type1_emulator(vec![]); + assert!(emu.assigned_prefetch_range().is_none()); + + // The guest can program a valid prefetch range... + emu.write_u32(0x24, 0xFFF0_FF00).unwrap(); // limit + base + emu.write_u32(0x28, 0x00AA_BBCC).unwrap(); // base upper + emu.write_u32(0x2C, 0x00DD_EEFF).unwrap(); // limit upper + assert!(emu.assigned_prefetch_range().is_none()); + // ... enable memory decoding... + emu.write_u32(0x4, MMIO_ENABLED).unwrap(); + assert_eq!( + emu.assigned_prefetch_range(), + Some(0x00AA_BBCC_FF00_0000..=0x00DD_EEFF_FFFF_FFFF) + ); + // ... then disable memory decoding it. + emu.write_u32(0x4, MMIO_DISABLED).unwrap(); + assert!(emu.assigned_prefetch_range().is_none()); + + // The validity of the assignment is determined using the combined 64-bit + // address, not the lower bits or the upper bits in isolation. + + // Lower bits of the limit are greater than the lower bits of the + // base, but the upper bits make that valid. + emu.write_u32(0x24, 0xFF00_FFF0).unwrap(); // limit + base + emu.write_u32(0x28, 0x00AA_BBCC).unwrap(); // base upper + emu.write_u32(0x2C, 0x00DD_EEFF).unwrap(); // limit upper + assert!(emu.assigned_prefetch_range().is_none()); + emu.write_u32(0x4, MMIO_ENABLED).unwrap(); + assert_eq!( + emu.assigned_prefetch_range(), + Some(0x00AA_BBCC_FFF0_0000..=0x00DD_EEFF_FF0F_FFFF) + ); + emu.write_u32(0x4, MMIO_DISABLED).unwrap(); + assert!(emu.assigned_prefetch_range().is_none()); + + // The base can equal the limit, which is a valid 1MB range. + emu.write_u32(0x24, 0xDD00_DD00).unwrap(); // limit + base + emu.write_u32(0x28, 0x00AA_BBCC).unwrap(); // base upper + emu.write_u32(0x2C, 0x00AA_BBCC).unwrap(); // limit upper + assert!(emu.assigned_prefetch_range().is_none()); + emu.write_u32(0x4, MMIO_ENABLED).unwrap(); + assert_eq!( + emu.assigned_prefetch_range(), + Some(0x00AA_BBCC_DD00_0000..=0x00AA_BBCC_DD0F_FFFF) + ); + emu.write_u32(0x4, MMIO_DISABLED).unwrap(); + assert!(emu.assigned_prefetch_range().is_none()); + } +} diff --git a/vm/devices/pci/pci_core/src/spec.rs b/vm/devices/pci/pci_core/src/spec.rs index 68285415c5..cb8f90d39e 100644 --- a/vm/devices/pci/pci_core/src/spec.rs +++ b/vm/devices/pci/pci_core/src/spec.rs @@ -127,6 +127,7 @@ pub mod hwid { // Other values: 0x02 - 0x0A BRIDGE_HOST = 0x00, BRIDGE_ISA = 0x01, + BRIDGE_PCI_TO_PCI = 0x04, BRIDGE_OTHER = 0x80, // Base System Peripheral (Class code: 0x08) @@ -238,6 +239,51 @@ pub mod cfg_space { pub const HEADER_TYPE_00_SIZE: u16 = 0x40; + open_enum::open_enum! { + /// Offsets into the type 01h configuration space header. + /// + /// Table pulled from + /// + /// | Offset | Bits 31-24 | Bits 23-16 | Bits 15-8 | Bits 7-0 | + /// |--------|----------------------------------|------------------------|--------------------------|--------------------- | + /// | 0x0 | Device ID | | Vendor ID | | + /// | 0x4 | Status | | Command | | + /// | 0x8 | Class code | | | Revision ID | + /// | 0xC | BIST | Header Type | Latency Timer | Cache Line Size | + /// | 0x10 | Base address #0 (BAR0) | | | | + /// | 0x14 | Base address #1 (BAR1) | | | | + /// | 0x18 | Secondary Latency Timer | Subordinate Bus Number | Secondary Bus Number | Primary Bus Number | + /// | 0x1C | Secondary Status | | I/O Limit | I/O Base | + /// | 0x20 | Memory Limit | | Memory Base | | + /// | 0x24 | Prefetchable Memory Limit | | Prefetchable Memory Base | | + /// | 0x28 | Prefetchable Base Upper 32 Bits | | | | + /// | 0x2C | Prefetchable Limit Upper 32 Bits | | | | + /// | 0x30 | I/O Limit Upper 16 Bits | | I/O Base Upper 16 Bits | | + /// | 0x34 | Reserved | | | Capabilities Pointer | + /// | 0x38 | Expansion ROM Base Address | | | | + /// | 0x3C | Bridge Control | | Interrupt PIN | Interrupt Line | + pub enum HeaderType01: u16 { + DEVICE_VENDOR = 0x00, + STATUS_COMMAND = 0x04, + CLASS_REVISION = 0x08, + BIST_HEADER = 0x0C, + BAR0 = 0x10, + BAR1 = 0x14, + LATENCY_BUS_NUMBERS = 0x18, + SEC_STATUS_IO_RANGE = 0x1C, + MEMORY_RANGE = 0x20, + PREFETCH_RANGE = 0x24, + PREFETCH_BASE_UPPER = 0x28, + PREFETCH_LIMIT_UPPER = 0x2C, + IO_RANGE_UPPER = 0x30, + RESERVED_CAP_PTR = 0x34, + EXPANSION_ROM_BASE = 0x38, + BRDIGE_CTRL_INTERRUPT = 0x3C, + } + } + + pub const HEADER_TYPE_01_SIZE: u16 = 0x40; + /// BAR in-band encoding bits. /// /// The low bits of the BAR are not actually part of the address. @@ -403,6 +449,42 @@ pub mod caps { } } + /// PCI Express Capabilities Register + #[bitfield(u16)] + #[derive(IntoBytes, Immutable, KnownLayout, FromBytes, Inspect)] + pub struct PciExpressCapabilities { + #[bits(4)] + pub capability_version: u16, + #[bits(4)] + pub device_port_type: DevicePortType, + pub slot_implemented: bool, + #[bits(5)] + pub interrupt_message_number: u16, + pub _undefined: bool, + pub flit_mode_supported: bool, + } + + #[derive(Debug)] + #[repr(u16)] + pub enum DevicePortType { + Endpoint = 0b0000, + RootPort = 0b0100, + } + + impl DevicePortType { + const fn from_bits(bits: u16) -> Self { + match bits { + 0b0000 => DevicePortType::Endpoint, + 0b1000 => DevicePortType::RootPort, + _ => unreachable!(), + } + } + + const fn into_bits(self) -> u16 { + self as u16 + } + } + /// Device Capabilities Register (From the 6.4 spec) #[bitfield(u32)] #[derive(IntoBytes, Immutable, KnownLayout, FromBytes, Inspect)] @@ -451,7 +533,7 @@ pub mod caps { pub initiate_function_level_reset: bool, } - /// Device Status Register + /// Device Status Register #[bitfield(u16)] #[derive(IntoBytes, Immutable, KnownLayout, FromBytes, Inspect)] pub struct DeviceStatus { From b8c5b27533691eb52b951e6572019b41095185cb Mon Sep 17 00:00:00 2001 From: Jack Schefer Date: Thu, 25 Sep 2025 13:00:45 -0700 Subject: [PATCH 12/12] pcie: replace hardcoded config space layout with type1 emulator --- Cargo.lock | 1 + vm/devices/pci/pcie/Cargo.toml | 1 + vm/devices/pci/pcie/src/root.rs | 152 ++++++++++++++------------------ 3 files changed, 69 insertions(+), 85 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 173a97d289..42e2d6d95a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5552,6 +5552,7 @@ dependencies = [ "chipset_device", "inspect", "pci_bus", + "pci_core", "tracelimit", "tracing", "vmcore", diff --git a/vm/devices/pci/pcie/Cargo.toml b/vm/devices/pci/pcie/Cargo.toml index 189db4f01f..3188e093d8 100644 --- a/vm/devices/pci/pcie/Cargo.toml +++ b/vm/devices/pci/pcie/Cargo.toml @@ -10,6 +10,7 @@ edition.workspace = true chipset_device.workspace = true inspect.workspace = true pci_bus.workspace = true +pci_core.workspace = true tracing.workspace = true tracelimit.workspace = true vmcore.workspace = true diff --git a/vm/devices/pci/pcie/src/root.rs b/vm/devices/pci/pcie/src/root.rs index d09e51cf8e..cbe3829b0a 100644 --- a/vm/devices/pci/pcie/src/root.rs +++ b/vm/devices/pci/pcie/src/root.rs @@ -12,6 +12,13 @@ use chipset_device::mmio::RegisterMmioIntercept; use inspect::Inspect; use inspect::InspectMut; use pci_bus::GenericPciBusDevice; +use pci_core::capabilities::pci_express::PciExpressCapability; +use pci_core::cfg_space_emu::ConfigSpaceType1Emulator; +use pci_core::spec::caps::pci_express::DevicePortType; +use pci_core::spec::hwid::ClassCode; +use pci_core::spec::hwid::HardwareIds; +use pci_core::spec::hwid::ProgrammingInterface; +use pci_core::spec::hwid::Subclass; use std::collections::HashMap; use std::sync::Arc; use vmcore::device_state::ChangeDeviceState; @@ -197,7 +204,12 @@ impl MmioIntercept for GenericPcieRootComplex { value = shift_read_value(cfg_offset, data.len(), value); } DecodedEcamAccess::DownstreamPort(port, bus_number, device_function, cfg_offset) => { - let _ = port.forward_cfg_read(&bus_number, &device_function, cfg_offset & !3, &mut value); + let _ = port.forward_cfg_read( + &bus_number, + &device_function, + cfg_offset & !3, + &mut value, + ); value = shift_read_value(cfg_offset, data.len(), value); } } @@ -240,11 +252,21 @@ impl MmioIntercept for GenericPcieRootComplex { write_value } else { let mut temp: u32 = 0; - let _ = port.forward_cfg_read(&bus_number, &device_function, rounded_offset, &mut temp); + let _ = port.forward_cfg_read( + &bus_number, + &device_function, + rounded_offset, + &mut temp, + ); combine_old_new_values(cfg_offset, temp, write_value, data.len()) }; - let _ = port.forward_cfg_write(&bus_number, &device_function, rounded_offset, merged_value); + let _ = port.forward_cfg_write( + &bus_number, + &device_function, + rounded_offset, + merged_value, + ); } } @@ -254,30 +276,33 @@ impl MmioIntercept for GenericPcieRootComplex { #[derive(Inspect)] struct RootPort { - // Minimal type 1 configuration space emulation for - // Linux and Windows to enumerate the port. This should - // be refactored into a dedicated type 1 emulator. - command_status_register: u32, - bus_number_registers: u32, - memory_limit_registers: u32, - prefetch_limit_registers: u32, - prefetch_base_upper_register: u32, - prefetch_limit_upper_register: u32, + cfg_space: ConfigSpaceType1Emulator, #[inspect(skip)] link: Option<(Arc, Box)>, } impl RootPort { - /// Constructs a new `RootPort` emulator. + /// Constructs a new [`RootPort`] emulator. pub fn new() -> Self { + let cfg_space = ConfigSpaceType1Emulator::new( + HardwareIds { + vendor_id: 0x1414, + device_id: 0xF111, + revision_id: 0, + prog_if: ProgrammingInterface::NONE, + sub_class: Subclass::BRIDGE_PCI_TO_PCI, + base_class: ClassCode::BRIDGE, + type0_sub_vendor_id: 0, + type0_sub_system_id: 0, + }, + vec![Box::new(PciExpressCapability::new( + DevicePortType::RootPort, + None, + ))], + ); Self { - command_status_register: 0, - bus_number_registers: 0, - memory_limit_registers: 0, - prefetch_limit_registers: 0, - prefetch_base_upper_register: 0, - prefetch_limit_upper_register: 0, + cfg_space, link: None, } } @@ -298,92 +323,49 @@ impl RootPort { } fn pci_cfg_read(&mut self, offset: u16, value: &mut u32) -> IoResult { - *value = match offset { - 0x00 => 0xF111_1414, // Device and Vendor IDs - 0x04 => self.command_status_register | 0x0010_0000, - 0x08 => 0x0604_0000, // Class code and revision - 0x0C => 0x0001_0000, // Header type 1 - 0x10 => 0x0000_0000, // BAR0 - 0x14 => 0x0000_0000, // BAR1 - 0x18 => self.bus_number_registers, - 0x1C => 0x0000_0000, // Secondary status and I/O range - 0x20 => self.memory_limit_registers, - 0x24 => self.prefetch_limit_registers, - 0x28 => self.prefetch_base_upper_register, - 0x2C => self.prefetch_limit_upper_register, - 0x30 => 0x0000_0000, // I/O base and limit 16 bit - 0x34 => 0x0000_0040, // Reserved and Capability pointer - 0x38 => 0x0000_0000, // Expansion ROM - 0x3C => 0x0000_0000, // Bridge control, interrupt pin/line - - // PCI Express capability structure - 0x40 => 0x0142_0010, // Capability header and PCI Express capabilities register - 0x44 => 0x0000_0000, // Device capabilities register - 0x48 => 0x0000_0000, // Device control and status registers - 0x4C => 0x0000_0000, // Link capabilities register - 0x50 => 0x0011_0000, // Link control and status registers - 0x54 => 0x0000_0000, // Slot capabilities register - 0x58 => 0x0000_0000, // Slot status and control registers - 0x5C => 0x0000_0000, // Root capabilities and control registers - 0x60 => 0x0000_0000, // Root status register - 0x64 => 0x0000_0000, // Device capabilities 2 register - 0x68 => 0x0000_0000, // Device status 2 and control 2 registers - 0x6C => 0x0000_0000, // Link capabilities 2 register - 0x70 => 0x0000_0000, // Link status 2 and control 2 registers - 0x74 => 0x0000_0000, // Slot capabilities 2 register - 0x78 => 0x0000_0000, // Slot status 2 and control 2 registers - - _ => 0xFFFF, - }; - - IoResult::Ok + self.cfg_space.read_u32(offset, value) } fn pci_cfg_write(&mut self, offset: u16, value: u32) -> IoResult { - match offset { - 0x04 => self.command_status_register = value, - 0x18 => self.bus_number_registers = value, - 0x20 => self.memory_limit_registers = value, - 0x24 => self.prefetch_limit_registers = value, - 0x28 => self.prefetch_base_upper_register = value, - 0x2C => self.prefetch_limit_upper_register = value, - _ => {} - }; - - IoResult::Ok + self.cfg_space.write_u32(offset, value) } fn assigned_bus_number(&self, bus: u8) -> bool { - let secondary_bus_number = ((self.bus_number_registers >> 8) & 0xFF) as u8; - let suboordinate_bus_number = ((self.bus_number_registers >> 16) & 0xFF) as u8; - - bus >= secondary_bus_number && bus <= suboordinate_bus_number + self.cfg_space.assigned_bus_range().contains(&bus) } - fn forward_cfg_read(&mut self, bus: &u8, device_function: &u8, cfg_offset: u16, value: &mut u32) -> IoResult { - let secondary_bus_number = ((self.bus_number_registers >> 8) & 0xFF) as u8; - let suboordinate_bus_number = ((self.bus_number_registers >> 16) & 0xFF) as u8; - - if *bus == secondary_bus_number && *device_function == 0 { + fn forward_cfg_read( + &mut self, + bus: &u8, + device_function: &u8, + cfg_offset: u16, + value: &mut u32, + ) -> IoResult { + let bus_range = self.cfg_space.assigned_bus_range(); + if *bus == *bus_range.start() && *device_function == 0 { if let Some((_, device)) = &mut self.link { let _ = device.pci_cfg_read(cfg_offset, value); } - } else if *bus > secondary_bus_number && *bus <= suboordinate_bus_number { + } else if bus_range.contains(bus) { tracelimit::warn_ratelimited!("multi-level hierarchies not implemented yet"); } IoResult::Ok } - fn forward_cfg_write(&mut self, bus: &u8, device_function: &u8, cfg_offset: u16, value: u32) -> IoResult { - let secondary_bus_number = ((self.bus_number_registers >> 8) & 0xFF) as u8; - let suboordinate_bus_number = ((self.bus_number_registers >> 16) & 0xFF) as u8; - - if *bus == secondary_bus_number && *device_function == 0 { + fn forward_cfg_write( + &mut self, + bus: &u8, + device_function: &u8, + cfg_offset: u16, + value: u32, + ) -> IoResult { + let bus_range = self.cfg_space.assigned_bus_range(); + if *bus == *bus_range.start() && *device_function == 0 { if let Some((_, device)) = &mut self.link { let _ = device.pci_cfg_write(cfg_offset, value); } - } else if *bus > secondary_bus_number && *bus <= suboordinate_bus_number { + } else if bus_range.contains(bus) { tracelimit::warn_ratelimited!("multi-level hierarchies not implemented yet"); }