From 7be4000cbe285b75c921d3142f5ce8499236f6da Mon Sep 17 00:00:00 2001 From: Solicey Date: Wed, 4 Dec 2024 19:08:47 +0800 Subject: [PATCH 01/29] create arch/x86_64 and pass compilation --- .cargo/config | 11 +++ .vscode/settings.json | 6 +- Cargo.lock | 46 +++++++++ Cargo.toml | 8 +- Makefile | 7 +- scripts/qemu-x86_64.ld | 56 +++++++++++ scripts/qemu-x86_64.mk | 11 +++ src/arch/mod.rs | 8 +- src/arch/x86_64/cpu.rs | 31 ++++++ src/arch/x86_64/entry.rs | 43 +++++++++ src/arch/x86_64/ipi.rs | 1 + src/arch/x86_64/mm.rs | 14 +++ src/arch/x86_64/mod.rs | 13 +++ src/arch/x86_64/multiboot.S | 120 +++++++++++++++++++++++ src/arch/x86_64/paging.rs | 165 ++++++++++++++++++++++++++++++++ src/arch/x86_64/s1pt.rs | 60 ++++++++++++ src/arch/x86_64/s2pt.rs | 53 ++++++++++ src/arch/x86_64/trap.rs | 1 + src/arch/x86_64/zone.rs | 25 +++++ src/consts.rs | 2 + src/device/irqchip/i8259/mod.rs | 13 +++ src/device/irqchip/mod.rs | 8 +- src/device/uart/mod.rs | 6 ++ src/device/uart/uart16550.rs | 5 + src/main.rs | 8 +- src/memory/mm.rs | 2 + src/platform/mod.rs | 9 +- src/platform/qemu_x86_64.rs | 13 +++ 28 files changed, 733 insertions(+), 12 deletions(-) create mode 100644 scripts/qemu-x86_64.ld create mode 100644 scripts/qemu-x86_64.mk create mode 100644 src/arch/x86_64/cpu.rs create mode 100644 src/arch/x86_64/entry.rs create mode 100644 src/arch/x86_64/ipi.rs create mode 100644 src/arch/x86_64/mm.rs create mode 100644 src/arch/x86_64/mod.rs create mode 100644 src/arch/x86_64/multiboot.S create mode 100644 src/arch/x86_64/paging.rs create mode 100644 src/arch/x86_64/s1pt.rs create mode 100644 src/arch/x86_64/s2pt.rs create mode 100644 src/arch/x86_64/trap.rs create mode 100644 src/arch/x86_64/zone.rs create mode 100644 src/device/irqchip/i8259/mod.rs create mode 100644 src/device/uart/uart16550.rs create mode 100644 src/platform/qemu_x86_64.rs diff --git a/.cargo/config b/.cargo/config index 473bf208..594b0862 100644 --- a/.cargo/config +++ b/.cargo/config @@ -16,4 +16,15 @@ linker = "loongarch64-unknown-linux-gnu-gcc" rustflags = [ "-Clink-arg=-Tscripts/3a5000-loongarch64.ld", "-Cforce-frame-pointers=yes", +] + +[target.x86_64-unknown-none] +linker = "rust-lld" +rustflags = [ + "-Clink-arg=-Tscripts/qemu-x86_64.ld", + "-Clink-arg=-no-pie", + "-Clinker-flavor=ld.lld", + "-Cforce-frame-pointers=yes", + "-Ctarget-feature=-mmx,-sse,+soft-float", + "-Cno-redzone=yes", ] \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index f2ff01e8..decd56f1 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -67,14 +67,16 @@ // Prevent "can't find crate for `test`" error on no_std // Ref: https://github.com/rust-lang/vscode-rust/issues/729 // For vscode-rust plugin users: - "rust.target": "aarch64-unknown-none", + // "rust.target": "aarch64-unknown-none", // "rust.target": "riscv64gc-unknown-none-elf", // "rust.target": "loongarch64-unknown-none", + "rust.target": "x86_64-unknown-none", "rust.all_targets": false, // For Rust Analyzer plugin users: - "rust-analyzer.cargo.target": "aarch64-unknown-none", + // "rust-analyzer.cargo.target": "aarch64-unknown-none", // "rust-analyzer.cargo.target": "riscv64gc-unknown-none-elf", // "rust-analyzer.cargo.target": "loongarch64-unknown-none", + "rust-analyzer.cargo.target": "x86_64-unknown-none", "rust-analyzer.checkOnSave.allTargets": false, // "rust-analyzer.cargo.features": [ // "board_qemu" diff --git a/Cargo.lock b/Cargo.lock index 5756202a..b81bf8aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -100,6 +100,8 @@ dependencies = [ "sbi-rt", "spin 0.9.8", "tock-registers", + "x86", + "x86_64", ] [[package]] @@ -155,6 +157,15 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b6fb7be1022539b1ea394ff4bcbad807a55c93841bb12c733f0be1048ea3e53" +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "regex" version = "1.10.4" @@ -221,6 +232,12 @@ dependencies = [ "semver", ] +[[package]] +name = "rustversion" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" + [[package]] name = "sbi-rt" version = "0.0.2" @@ -292,3 +309,32 @@ name = "tock-registers" version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "696941a0aee7e276a165a978b37918fd5d22c55c3d6bda197813070ca9c0f21c" + +[[package]] +name = "volatile" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "442887c63f2c839b346c192d047a7c87e73d0689c9157b00b53dcc27dd5ea793" + +[[package]] +name = "x86" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2781db97787217ad2a2845c396a5efe286f87467a5810836db6d74926e94a385" +dependencies = [ + "bit_field 0.10.2", + "bitflags 1.3.2", + "raw-cpuid", +] + +[[package]] +name = "x86_64" +version = "0.14.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96cb6fd45bfeab6a5055c5bffdb08768bd0c069f1d946debe585bbb380a7c062" +dependencies = [ + "bit_field 0.10.2", + "bitflags 2.5.0", + "rustversion", + "volatile", +] diff --git a/Cargo.toml b/Cargo.toml index 2a1ccea4..7f552493 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ fdt = { path = "./vendor/fdt" } [target.'cfg(target_arch = "aarch64")'.dependencies] aarch64-cpu = "9.4.0" -psci = { version = "0.1.0", default-features = false, features = ["smc"]} +psci = { version = "0.1.0", default-features = false, features = ["smc"] } [target.'cfg(target_arch = "riscv64")'.dependencies] sbi-rt = { version = "0.0.2", features = ["legacy"] } @@ -29,6 +29,10 @@ riscv-decode = "0.2.1" [target.'cfg(target_arch = "loongarch64")'.dependencies] loongArch64 = "0.2.4" +[target.'cfg(target_arch = "x86_64")'.dependencies] +x86 = "0.52" +x86_64 = "0.14" + [features] platform_qemu = [] platform_imx8mp = [] @@ -38,4 +42,4 @@ panic = "abort" debug = 2 [profile.release] -panic = "abort" \ No newline at end of file +panic = "abort" diff --git a/Makefile b/Makefile index c1c319e2..8696938f 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,12 @@ # Basic settings -ARCH ?= aarch64 +ARCH ?= x86_64 LOG ?= info STATS ?= off PORT ?= 2333 MODE ?= debug OBJCOPY ?= rust-objcopy --binary-architecture=$(ARCH) KDIR ?= ../../linux -FEATURES ?= platform_imx8mp +FEATURES ?= platform_qemu ifeq ($(ARCH),aarch64) RUSTC_TARGET := aarch64-unknown-none @@ -17,6 +17,9 @@ else ifeq ($(ARCH),riscv64) else ifeq ($(ARCH),loongarch64) RUSTC_TARGET := loongarch64-unknown-none GDB_ARCH := loongarch64 +else ifeq ($(ARCH),x86_64) + RUSTC_TARGET := x86_64-unknown-none + GDB_ARCH := i386:x86-64 else $(error Unsupported ARCH value: $(ARCH)) endif diff --git a/scripts/qemu-x86_64.ld b/scripts/qemu-x86_64.ld new file mode 100644 index 00000000..f0709e4e --- /dev/null +++ b/scripts/qemu-x86_64.ld @@ -0,0 +1,56 @@ +ENTRY(arch_entry) +BASE_ADDRESS = 0xffffff8000200000; + +SECTIONS +{ + . = BASE_ADDRESS; + skernel = .; + + stext = .; + .text : { + *(.text.entry) + *(.text.entry32) + *(.text.entry64) + *(.text .text.*) + } + + . = ALIGN(4K); + etext = .; + srodata = .; + .rodata : { + *(.rodata .rodata.*) + *(.srodata .srodata.*) + } + + . = ALIGN(4K); + erodata = .; + sdata = .; + .data : { + *(.data.entry_page_table) + *(.data .data.*) + *(.sdata .sdata.*) + } + + . = ALIGN(4K); + edata = .; + .bss : { + boot_stack = .; + *(.bss.stack) + . = ALIGN(4K); + boot_stack_top = .; + + sbss = .; + *(.bss .bss.*) + *(.sbss .sbss.*) + } + + . = ALIGN(4K); + ebss = .; + ekernel = .; + + /DISCARD/ : { + *(.eh_frame) + } + . = ALIGN(4K); + __core_end = .; +} diff --git a/scripts/qemu-x86_64.mk b/scripts/qemu-x86_64.mk new file mode 100644 index 00000000..f18e751b --- /dev/null +++ b/scripts/qemu-x86_64.mk @@ -0,0 +1,11 @@ +QEMU := qemu-system-x86_64 + +QEMU_ARGS := -machine q35 +QEMU_ARGS += -cpu host -accel kvm +# QEMU_ARGS += -smp 1 +QEMU_ARGS += -m 2G +QEMU_ARGS += -nographic +QEMU_ARGS += -kernel $(hvisor_bin) + +$(hvisor_bin): elf + $(OBJCOPY) $(hvisor_elf) --strip-all -O binary $@ \ No newline at end of file diff --git a/src/arch/mod.rs b/src/arch/mod.rs index 5b5c19b5..a33091cd 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -7,6 +7,9 @@ pub mod riscv64; #[cfg(target_arch = "loongarch64")] pub mod loongarch64; +#[cfg(target_arch = "x86_64")] +pub mod x86_64; + // export modules for external use #[cfg(target_arch = "aarch64")] pub use aarch64::*; @@ -15,4 +18,7 @@ pub use aarch64::*; pub use riscv64::*; #[cfg(target_arch = "loongarch64")] -pub use loongarch64::*; \ No newline at end of file +pub use loongarch64::*; + +#[cfg(target_arch = "x86_64")] +pub use x86_64::*; diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs new file mode 100644 index 00000000..d629b9a4 --- /dev/null +++ b/src/arch/x86_64/cpu.rs @@ -0,0 +1,31 @@ +pub fn cpu_start(cpuid: usize, start_addr: usize, opaque: usize) {} + +#[repr(C)] +#[derive(Debug)] +pub struct ArchCpu { + pub cpuid: usize, + pub power_on: bool, +} + +impl ArchCpu { + pub fn new(cpuid: usize) -> Self { + Self { + cpuid, + power_on: false, + } + } + + pub fn reset(&mut self, entry: usize, dtb: usize) {} + + pub fn run(&mut self) -> ! { + loop {} + } + + pub fn idle(&mut self) -> ! { + loop {} + } +} + +pub fn this_cpu_id() -> usize { + 0 +} diff --git a/src/arch/x86_64/entry.rs b/src/arch/x86_64/entry.rs new file mode 100644 index 00000000..e964a8c6 --- /dev/null +++ b/src/arch/x86_64/entry.rs @@ -0,0 +1,43 @@ +use core::arch::global_asm; + +use x86::msr::IA32_EFER; +use x86_64::registers::control::{Cr0Flags, Cr4Flags}; +use x86_64::registers::model_specific::EferFlags; + +const PHYS_VIRT_OFFSET: usize = 0xffff_ff80_0000_0000; +const BOOT_KERNEL_STACK_SIZE: usize = 4096 * 4; + +const CR0: u64 = Cr0Flags::PROTECTED_MODE_ENABLE.bits() + | Cr0Flags::MONITOR_COPROCESSOR.bits() + | Cr0Flags::TASK_SWITCHED.bits() + | Cr0Flags::NUMERIC_ERROR.bits() + | Cr0Flags::WRITE_PROTECT.bits() + | Cr0Flags::PAGING.bits(); +const CR4: u64 = Cr4Flags::PHYSICAL_ADDRESS_EXTENSION.bits() | Cr4Flags::PAGE_GLOBAL.bits(); +const EFER: u64 = EferFlags::LONG_MODE_ENABLE.bits() | EferFlags::NO_EXECUTE_ENABLE.bits(); + +global_asm!( + include_str!("multiboot.S"), + main_entry = sym crate::rust_main, + offset = const PHYS_VIRT_OFFSET, + boot_stack_size = const BOOT_KERNEL_STACK_SIZE, + cr0 = const CR0, + cr4 = const CR4, + efer_msr = const IA32_EFER, + efer = const EFER, +); + +#[naked] +#[no_mangle] +#[link_section = ".text.entry"] +pub unsafe extern "C" fn arch_entry() -> i32 { + core::arch::asm!( + " + .code32 + mov edi, eax // magic + mov esi, ebx // multiboot info + jmp entry32 + ", + options(noreturn), + ); +} diff --git a/src/arch/x86_64/ipi.rs b/src/arch/x86_64/ipi.rs new file mode 100644 index 00000000..0bb0769d --- /dev/null +++ b/src/arch/x86_64/ipi.rs @@ -0,0 +1 @@ +pub fn arch_send_event(cpu_id: u64, sgi_num: u64) {} diff --git a/src/arch/x86_64/mm.rs b/src/arch/x86_64/mm.rs new file mode 100644 index 00000000..8e27f3eb --- /dev/null +++ b/src/arch/x86_64/mm.rs @@ -0,0 +1,14 @@ +use crate::{ + arch::s1pt::Stage1PageTable, + arch::s2pt::Stage2PageTable, + consts::PAGE_SIZE, + error::HvResult, + memory::{ + addr::{align_down, align_up}, + GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion, MemorySet, HV_PT, + }, +}; + +pub fn new_s2_memory_set() -> MemorySet { + MemorySet::new(4) +} diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs new file mode 100644 index 00000000..35fd6cf0 --- /dev/null +++ b/src/arch/x86_64/mod.rs @@ -0,0 +1,13 @@ +#![allow(unused)] +pub mod cpu; +pub mod entry; +pub mod ipi; +pub mod mm; +pub mod paging; +pub mod s1pt; +pub mod s2pt; +pub mod trap; +pub mod zone; + +pub use s1pt::Stage1PageTable; +pub use s2pt::Stage2PageTable; diff --git a/src/arch/x86_64/multiboot.S b/src/arch/x86_64/multiboot.S new file mode 100644 index 00000000..41c1e412 --- /dev/null +++ b/src/arch/x86_64/multiboot.S @@ -0,0 +1,120 @@ +.equ MULTIBOOT_HEADER_MAGIC, 0x1BADB002 +.equ MULTIBOOT_HEADER_FLAGS, 0x00010002 +.equ MULTIBOOT_CHECKSUM, -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS) + +.section .text.entry + +.section .text.entry32 +.code32 + +.balign 4 +.type multiboot_header, STT_OBJECT +multiboot_header: + .int MULTIBOOT_HEADER_MAGIC + .int MULTIBOOT_HEADER_FLAGS + .int MULTIBOOT_CHECKSUM + .int multiboot_header - {offset} // header_addr + .int skernel - {offset} // load_addr + .int edata - {offset} // load_end + .int ebss - {offset} // bss_end_addr + .int arch_entry - {offset} // entry_addrs + +entry32: + // load the temporary GDT + lgdt [.Ltmp_gdt_desc_phys - {offset}] + mov ax, 0x18 // data segment selector + mov ss, ax + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + + // set PAE, PGE bit in CR4 + mov eax, {cr4} + mov cr4, eax + + // load the temporary page table + lea eax, [.Ltmp_pml4 - {offset}] + mov cr3, eax + + // set LME, NXE bit in IA32_EFER + mov ecx, {efer_msr} + mov edx, 0 + mov eax, {efer} + wrmsr + + // set protected mode, write protect, paging bit in CR0 + mov eax, {cr0} + mov cr0, eax + + // long return to the 64-bit entry + push 0x10 // code64 segment selector + lea eax, [entry64 - {offset}] + push eax + retf + +.section .text.entry64 +.code64 + +entry64: + // reload GDT by high address + movabs rax, offset .Ltmp_gdt_desc + lgdt [rax] + + // clear segment selectors + xor ax, ax + mov ss, ax + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + + // set stack and jump to rust_main + movabs rsp, offset boot_stack_top + movabs rax, offset {main_entry} + call rax +1: jmp 1b + +.section .rodata +.balign 8 +.Ltmp_gdt_desc_phys: + .short .Ltmp_gdt_end - .Ltmp_gdt - 1 // limit + .long .Ltmp_gdt - {offset} // base + +.balign 8 +.Ltmp_gdt_desc: + .short .Ltmp_gdt_end - .Ltmp_gdt - 1 // limit + .quad .Ltmp_gdt // base + +.section .data +.balign 16 +.Ltmp_gdt: + .quad 0x0000000000000000 // 0x00: null + .quad 0x00cf9b000000ffff // 0x08: code segment (base=0, limit=0xfffff, type=32bit code exec/read, DPL=0, 4k) + .quad 0x00af9b000000ffff // 0x10: code segment (base=0, limit=0xfffff, type=64bit code exec/read, DPL=0, 4k) + .quad 0x00cf93000000ffff // 0x18: data segment (base=0, limit=0xfffff, type=32bit data read/write, DPL=0, 4k) +.Ltmp_gdt_end: + +.balign 4096 +.Ltmp_pml4: + // 0x0000_0000 ~ 0x8000_0000 + .quad .Ltmp_pdpt_low - {offset} + 0x3 // PRESENT | WRITABLE | paddr(tmp_pdpt) + .zero 8 * 510 + // 0xffff_ff80_0000_0000 ~ 0xffff_ff80_8000_0000 + .quad .Ltmp_pdpt_high - {offset} + 0x3 // PRESENT | WRITABLE | paddr(tmp_pdpt) + +.Ltmp_pdpt_low: + .quad 0x0000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) + .quad 0x40000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) + .zero 8 * 510 + +.Ltmp_pdpt_high: + .quad 0x0000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) + .quad 0x40000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) + .zero 8 * 510 + +.section .bss.stack +.balign 4096 +boot_stack: + .space {boot_stack_size} +boot_stack_top: diff --git a/src/arch/x86_64/paging.rs b/src/arch/x86_64/paging.rs new file mode 100644 index 00000000..bdf89c37 --- /dev/null +++ b/src/arch/x86_64/paging.rs @@ -0,0 +1,165 @@ +#![allow(unused)] +use crate::error::{HvError, HvResult}; +use crate::memory::{Frame, MemFlags, MemoryRegion, PhysAddr, VirtAddr}; +use core::{fmt::Debug, marker::PhantomData}; + +#[derive(Debug)] +pub enum PagingError { + NoMemory, + NotMapped, + AlreadyMapped, + MappedToHugePage, +} + +pub type PagingResult = Result; + +#[repr(usize)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum PageSize { + Size4K = 0x1000, + Size2M = 0x20_0000, + Size1G = 0x4000_0000, +} + +impl PageSize { + pub const fn is_aligned(self, addr: usize) -> bool { + false + } + + pub const fn align_down(self, addr: usize) -> usize { + 0 + } + + pub const fn page_offset(self, addr: usize) -> usize { + 0 + } + + pub const fn is_huge(self) -> bool { + false + } +} + +pub trait GenericPTE: Debug + Clone { + /// Returns the physical address mapped by this entry. + fn addr(&self) -> PhysAddr; + /// Returns the flags of this entry. + fn flags(&self) -> MemFlags; + /// Returns whether this entry is zero. + fn is_unused(&self) -> bool; + /// Returns whether this entry flag indicates present. + fn is_present(&self) -> bool; + /// Returns whether this entry maps to a huge frame. + fn is_huge(&self) -> bool; + /// Set physical address for terminal entries. + fn set_addr(&mut self, paddr: PhysAddr); + /// Set flags for terminal entries. + fn set_flags(&mut self, flags: MemFlags, is_huge: bool); + /// Set physical address and flags for intermediate table entries. + fn set_table(&mut self, paddr: PhysAddr); + /// Set this entry to zero. + fn clear(&mut self); +} + +pub trait PagingInstr { + unsafe fn activate(root_paddr: PhysAddr); + fn flush(vaddr: Option); +} + +/// A basic read-only page table for address query only. +pub trait GenericPageTableImmut: Sized { + type VA: From + Into + Copy; + + unsafe fn from_root(root_paddr: PhysAddr) -> Self; + fn root_paddr(&self) -> PhysAddr; + fn query(&self, vaddr: Self::VA) -> PagingResult<(PhysAddr, MemFlags, PageSize)>; +} + +/// A extended mutable page table can change mappings. +pub trait GenericPageTable: GenericPageTableImmut { + fn new() -> Self; + + fn map(&mut self, region: &MemoryRegion) -> HvResult; + fn unmap(&mut self, region: &MemoryRegion) -> HvResult; + fn update( + &mut self, + vaddr: Self::VA, + paddr: PhysAddr, + flags: MemFlags, + ) -> PagingResult; + + fn clone(&self) -> Self; + + unsafe fn activate(&self); + fn flush(&self, vaddr: Option); +} + +pub struct HvPageTable { + _phantom: PhantomData<(VA, PTE, I)>, +} + +impl HvPageTable +where + VA: From + Into + Copy, + PTE: GenericPTE, + I: PagingInstr, +{ + #[allow(dead_code)] + pub fn dump(&self, limit: usize) {} + + /// Clone only the top level page table mapping from `src`. + pub fn clone_from(src: &impl GenericPageTableImmut) -> Self { + Self::new() + } +} + +impl GenericPageTableImmut for HvPageTable +where + VA: From + Into + Copy, + PTE: GenericPTE, + I: PagingInstr, +{ + type VA = VA; + + unsafe fn from_root(root_paddr: PhysAddr) -> Self { + Self::new() + } + + fn root_paddr(&self) -> PhysAddr { + 0 + } + + fn query(&self, vaddr: VA) -> PagingResult<(PhysAddr, MemFlags, PageSize)> { + Ok((0, MemFlags::READ, PageSize::Size4K)) + } +} + +impl GenericPageTable for HvPageTable +where + VA: From + Into + Copy, + PTE: GenericPTE, + I: PagingInstr, +{ + fn new() -> Self { + Self::new() + } + + fn map(&mut self, region: &MemoryRegion) -> HvResult { + Ok(()) + } + + fn unmap(&mut self, region: &MemoryRegion) -> HvResult { + Ok(()) + } + + fn update(&mut self, vaddr: VA, paddr: PhysAddr, flags: MemFlags) -> PagingResult { + Ok(PageSize::Size4K) + } + + fn clone(&self) -> Self { + Self::clone_from(self) + } + + unsafe fn activate(&self) {} + + fn flush(&self, vaddr: Option) {} +} diff --git a/src/arch/x86_64/s1pt.rs b/src/arch/x86_64/s1pt.rs new file mode 100644 index 00000000..48216489 --- /dev/null +++ b/src/arch/x86_64/s1pt.rs @@ -0,0 +1,60 @@ +use super::paging::{GenericPTE, HvPageTable, PagingInstr}; +use crate::{ + consts::PAGE_SIZE, + memory::{ + addr::{GuestPhysAddr, HostPhysAddr, PhysAddr}, + MemFlags, + }, +}; +use core::fmt; +use numeric_enum_macro::numeric_enum; + +numeric_enum! { + #[repr(u64)] + #[derive(Debug, Clone, Copy, Eq, PartialEq)] + enum MemType { + Normal = 0, + Device = 1, + } +} + +#[derive(Clone, Copy)] +#[repr(transparent)] +pub struct PageTableEntry(pub u64); + +impl GenericPTE for PageTableEntry { + fn addr(&self) -> HostPhysAddr { + 0 + } + fn flags(&self) -> MemFlags { + MemFlags::READ + } + fn is_unused(&self) -> bool { + false + } + fn is_present(&self) -> bool { + false + } + fn set_addr(&mut self, addr: HostPhysAddr) {} + fn set_flags(&mut self, flags: MemFlags, is_huge: bool) {} + fn set_table(&mut self, pa: HostPhysAddr) {} + fn clear(&mut self) {} + fn is_huge(&self) -> bool { + false + } +} + +impl fmt::Debug for PageTableEntry { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Ok(()) + } +} + +pub struct S1PTInstr; + +impl PagingInstr for S1PTInstr { + unsafe fn activate(root_paddr: HostPhysAddr) {} + fn flush(_vaddr: Option) {} +} + +pub type Stage1PageTable = HvPageTable; diff --git a/src/arch/x86_64/s2pt.rs b/src/arch/x86_64/s2pt.rs new file mode 100644 index 00000000..f4e677ab --- /dev/null +++ b/src/arch/x86_64/s2pt.rs @@ -0,0 +1,53 @@ +use super::paging::{GenericPTE, HvPageTable, PagingInstr}; +use crate::memory::addr::{GuestPhysAddr, HostPhysAddr, PhysAddr}; +use crate::memory::MemFlags; +use core::fmt; + +#[derive(Clone, Copy)] +#[repr(transparent)] +pub struct PageTableEntry(u64); + +impl GenericPTE for PageTableEntry { + fn addr(&self) -> HostPhysAddr { + 0 + } + + fn flags(&self) -> MemFlags { + MemFlags::READ + } + + fn is_unused(&self) -> bool { + false + } + + fn is_present(&self) -> bool { + false + } + + fn is_huge(&self) -> bool { + false + } + + fn set_addr(&mut self, paddr: HostPhysAddr) {} + + fn set_flags(&mut self, flags: MemFlags, is_huge: bool) {} + + fn set_table(&mut self, paddr: HostPhysAddr) {} + + fn clear(&mut self) {} +} + +impl fmt::Debug for PageTableEntry { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Ok(()) + } +} + +pub struct S2PTInstr; + +impl PagingInstr for S2PTInstr { + unsafe fn activate(root_paddr: HostPhysAddr) {} + fn flush(_vaddr: Option) {} +} + +pub type Stage2PageTable = HvPageTable; diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs new file mode 100644 index 00000000..1b09da50 --- /dev/null +++ b/src/arch/x86_64/trap.rs @@ -0,0 +1 @@ +pub fn install_trap_vector() {} diff --git a/src/arch/x86_64/zone.rs b/src/arch/x86_64/zone.rs new file mode 100644 index 00000000..d828da9e --- /dev/null +++ b/src/arch/x86_64/zone.rs @@ -0,0 +1,25 @@ +use crate::{ + config::*, + error::HvResult, + memory::{ + addr::align_down, addr::align_up, mmio_generic_handler, GuestPhysAddr, HostPhysAddr, + MemFlags, MemoryRegion, + }, + zone::Zone, +}; + +#[repr(C)] +#[derive(Debug, Clone)] +pub struct HvArchZoneConfig {} + +impl Zone { + pub fn pt_init(&mut self, mem_regions: &[HvConfigMemoryRegion]) -> HvResult { + Ok(()) + } + + pub fn mmio_init(&mut self, hv_config: &HvArchZoneConfig) {} + + pub fn isa_init(&mut self, fdt: &fdt::Fdt) {} + + pub fn irq_bitmap_init(&mut self, irqs: &[u32]) {} +} diff --git a/src/consts.rs b/src/consts.rs index 7a8565a3..227c9918 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -19,6 +19,8 @@ pub const MAX_CPU_NUM: usize = 4; pub const MAX_CPU_NUM: usize = 4; #[cfg(target_arch = "riscv64")] pub const MAX_CPU_NUM: usize = 4; +#[cfg(target_arch = "x86_64")] +pub const MAX_CPU_NUM: usize = 4; pub fn core_end() -> VirtAddr { __core_end as _ diff --git a/src/device/irqchip/i8259/mod.rs b/src/device/irqchip/i8259/mod.rs new file mode 100644 index 00000000..5d1ce2bf --- /dev/null +++ b/src/device/irqchip/i8259/mod.rs @@ -0,0 +1,13 @@ +use crate::zone::Zone; + +pub fn inject_irq(_irq: usize, _is_hardware: bool) {} + +pub fn percpu_init() {} + +pub fn primary_init_early() {} + +pub fn primary_init_late() {} + +impl Zone { + pub fn arch_irqchip_reset(&self) {} +} diff --git a/src/device/irqchip/mod.rs b/src/device/irqchip/mod.rs index 7d746ed5..9e6d1d2f 100644 --- a/src/device/irqchip/mod.rs +++ b/src/device/irqchip/mod.rs @@ -7,6 +7,9 @@ pub mod plic; #[cfg(target_arch = "loongarch64")] pub mod ls7a2000; +#[cfg(target_arch = "x86_64")] +pub mod i8259; + #[cfg(target_arch = "aarch64")] pub use gicv3::{inject_irq, percpu_init, primary_init_early, primary_init_late}; @@ -14,4 +17,7 @@ pub use gicv3::{inject_irq, percpu_init, primary_init_early, primary_init_late}; pub use plic::{inject_irq, percpu_init, primary_init_early, primary_init_late}; #[cfg(target_arch = "loongarch64")] -pub use ls7a2000::{inject_irq, percpu_init, primary_init_early, primary_init_late}; \ No newline at end of file +pub use ls7a2000::{inject_irq, percpu_init, primary_init_early, primary_init_late}; + +#[cfg(target_arch = "x86_64")] +pub use i8259::{inject_irq, percpu_init, primary_init_early, primary_init_late}; diff --git a/src/device/uart/mod.rs b/src/device/uart/mod.rs index 67f11061..778ee124 100644 --- a/src/device/uart/mod.rs +++ b/src/device/uart/mod.rs @@ -18,3 +18,9 @@ mod ns16440a; #[cfg(target_arch = "loongarch64")] pub use ns16440a::{console_getchar, console_putchar}; + +#[cfg(target_arch = "x86_64")] +mod uart16550; + +#[cfg(target_arch = "x86_64")] +pub use uart16550::{console_getchar, console_putchar}; diff --git a/src/device/uart/uart16550.rs b/src/device/uart/uart16550.rs new file mode 100644 index 00000000..5d534e04 --- /dev/null +++ b/src/device/uart/uart16550.rs @@ -0,0 +1,5 @@ +pub fn console_putchar(c: u8) {} + +pub fn console_getchar() -> Option { + None +} diff --git a/src/main.rs b/src/main.rs index c7fe3db6..bd6b014b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -28,6 +28,7 @@ extern crate lazy_static; #[macro_use] mod logging; mod arch; +mod config; mod consts; mod device; mod event; @@ -37,16 +38,15 @@ mod panic; mod percpu; mod platform; mod zone; -mod config; #[cfg(target_arch = "aarch64")] use crate::arch::mm::setup_parange; use crate::consts::MAX_CPU_NUM; use arch::{cpu::cpu_start, entry::arch_entry}; use config::root_zone_config; -use zone::zone_create; use core::sync::atomic::{AtomicI32, AtomicU32, Ordering}; use percpu::PerCpu; +use zone::zone_create; static INITED_CPUS: AtomicU32 = AtomicU32::new(0); static ENTERED_CPUS: AtomicU32 = AtomicU32::new(0); @@ -133,6 +133,9 @@ fn wakeup_secondary_cpus(this_id: usize, host_dtb: usize) { } fn rust_main(cpuid: usize, host_dtb: usize) { + #[cfg(target_arch = "x86_64")] + loop {} + arch::trap::install_trap_vector(); let mut is_primary = false; @@ -144,7 +147,6 @@ fn rust_main(cpuid: usize, host_dtb: usize) { clear_bss(); memory::heap::init(); memory::heap::test(); - } let cpu = PerCpu::new(cpuid); diff --git a/src/memory/mm.rs b/src/memory/mm.rs index 25360457..7ada9192 100644 --- a/src/memory/mm.rs +++ b/src/memory/mm.rs @@ -64,6 +64,8 @@ where pt: PT::new(), #[cfg(target_arch = "loongarch64")] pt: PT::new(), + #[cfg(target_arch = "x86_64")] + pt: PT::new(), } } diff --git a/src/platform/mod.rs b/src/platform/mod.rs index 56bd3c92..cdd8915a 100644 --- a/src/platform/mod.rs +++ b/src/platform/mod.rs @@ -1,6 +1,7 @@ use crate::{ config::{ - HvConfigMemoryRegion, HvZoneConfig, CONFIG_MAX_INTERRUPTS, CONFIG_MAX_MEMORY_REGIONS, CONFIG_NAME_MAXLEN, + HvConfigMemoryRegion, HvZoneConfig, CONFIG_MAX_INTERRUPTS, CONFIG_MAX_MEMORY_REGIONS, + CONFIG_NAME_MAXLEN, }, consts::INVALID_ADDRESS, }; @@ -29,6 +30,12 @@ pub mod ls3a5000_loongarch64; #[cfg(target_arch = "loongarch64")] pub use ls3a5000_loongarch64::*; +#[cfg(all(target_arch = "x86_64"))] +pub mod qemu_x86_64; + +#[cfg(all(target_arch = "x86_64"))] +use qemu_x86_64::*; + pub fn platform_root_zone_config() -> HvZoneConfig { // fill zero for memory regions and interrupts diff --git a/src/platform/qemu_x86_64.rs b/src/platform/qemu_x86_64.rs new file mode 100644 index 00000000..52f3d284 --- /dev/null +++ b/src/platform/qemu_x86_64.rs @@ -0,0 +1,13 @@ +use crate::{arch::zone::HvArchZoneConfig, config::*}; + +pub const ROOT_ZONE_DTB_ADDR: u64 = 0x00000000; +pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x00000000; +pub const ROOT_ZONE_ENTRY: u64 = 0x00000000; +pub const ROOT_ZONE_CPUS: u64 = 0; + +pub const ROOT_ZONE_NAME: &str = "root-linux"; + +pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 0] = []; + +pub const ROOT_ZONE_IRQS: [u32; 32] = [0; 32]; +pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig {}; From 90f14cfd59ede10eae72a16092080b4e78718cd4 Mon Sep 17 00:00:00 2001 From: Solicey Date: Thu, 19 Dec 2024 18:41:42 +0800 Subject: [PATCH 02/29] init UART, handle traps, enable APIC timer, BSP wakes up AP --- Cargo.lock | 83 +++++++++++++++++----------- Cargo.toml | 6 ++- scripts/qemu-x86_64.ld | 4 -- scripts/qemu-x86_64.mk | 5 +- src/arch/x86_64/ap_start.S | 57 ++++++++++++++++++++ src/arch/x86_64/cpu.rs | 95 ++++++++++++++++++++++++++++++++- src/arch/x86_64/entry.rs | 28 ++++++++-- src/arch/x86_64/gdt.rs | 79 +++++++++++++++++++++++++++ src/arch/x86_64/idt.rs | 33 ++++++++++++ src/arch/x86_64/lapic.rs | 90 +++++++++++++++++++++++++++++++ src/arch/x86_64/mod.rs | 3 ++ src/arch/x86_64/multiboot.S | 91 ++++++++++++++++++++----------- src/arch/x86_64/trap.S | 75 ++++++++++++++++++++++++++ src/arch/x86_64/trap.rs | 73 ++++++++++++++++++++++++- src/device/irqchip/i8259/mod.rs | 9 ++++ src/device/uart/uart16550.rs | 86 ++++++++++++++++++++++++++++- src/main.rs | 41 +++++++++++++- 17 files changed, 775 insertions(+), 83 deletions(-) create mode 100644 src/arch/x86_64/ap_start.S create mode 100644 src/arch/x86_64/gdt.rs create mode 100644 src/arch/x86_64/idt.rs create mode 100644 src/arch/x86_64/lapic.rs create mode 100644 src/arch/x86_64/trap.S diff --git a/Cargo.lock b/Cargo.lock index b81bf8aa..8f3f0f3a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -22,9 +22,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.2.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "bare-metal" @@ -35,6 +35,12 @@ dependencies = [ "rustc_version", ] +[[package]] +name = "bit" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b645c5c09a7d4035949cfce1a915785aaad6f17800c35fda8a8c311c491f284" + [[package]] name = "bit_field" version = "0.9.0" @@ -55,9 +61,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "bitmap-allocator" @@ -86,7 +92,7 @@ version = "0.1.0" dependencies = [ "aarch64-cpu", "bit_field 0.10.2", - "bitflags 2.5.0", + "bitflags 2.6.0", "bitmap-allocator", "buddy_system_allocator", "fdt", @@ -95,29 +101,31 @@ dependencies = [ "loongArch64", "numeric-enum-macro", "psci", + "raw-cpuid", "riscv", "riscv-decode", "sbi-rt", "spin 0.9.8", "tock-registers", + "x2apic", "x86", "x86_64", ] [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" dependencies = [ - "spin 0.5.2", + "spin 0.9.8", ] [[package]] name = "lock_api" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" dependencies = [ "autocfg", "scopeguard", @@ -125,9 +133,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.21" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "loongArch64" @@ -141,9 +149,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.2" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "numeric-enum-macro" @@ -151,6 +159,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "300e4bdb6b46b592948e700ea1ef24a4296491f6a0ee722b258040abd15a3714" +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "psci" version = "0.1.3" @@ -168,9 +182,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.4" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -180,9 +194,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -191,9 +205,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "riscv" @@ -209,9 +223,9 @@ dependencies = [ [[package]] name = "riscv-decode" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec7a6dc0b0bb96a4d23271864a45c0d24dcd9dde2a1b630a35f79fa29c588bf" +checksum = "cf8b4cfb0da0528321d22daee4299a23a8c5ac8848623d716e898d2a9eec0694" [[package]] name = "riscv-target" @@ -277,12 +291,6 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - [[package]] name = "spin" version = "0.7.1" @@ -316,6 +324,19 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "442887c63f2c839b346c192d047a7c87e73d0689c9157b00b53dcc27dd5ea793" +[[package]] +name = "x2apic" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbcd582541cbb8ef1dfc24a3c849a64ff074b1b512af723ad90056558d424602" +dependencies = [ + "bit", + "bitflags 1.3.2", + "paste", + "raw-cpuid", + "x86_64", +] + [[package]] name = "x86" version = "0.52.0" @@ -329,12 +350,12 @@ dependencies = [ [[package]] name = "x86_64" -version = "0.14.12" +version = "0.14.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96cb6fd45bfeab6a5055c5bffdb08768bd0c069f1d946debe585bbb380a7c062" +checksum = "100555a863c0092238c2e0e814c1096c1e5cf066a309c696a87e907b5f8c5d69" dependencies = [ "bit_field 0.10.2", - "bitflags 2.5.0", + "bitflags 1.3.2", "rustversion", "volatile", ] diff --git a/Cargo.toml b/Cargo.toml index 7f552493..11e03933 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,8 +30,10 @@ riscv-decode = "0.2.1" loongArch64 = "0.2.4" [target.'cfg(target_arch = "x86_64")'.dependencies] -x86 = "0.52" -x86_64 = "0.14" +x86 = "0.52.0" +x86_64 = "=0.14.10" +x2apic = "0.4.3" +raw-cpuid = "10.7.0" [features] platform_qemu = [] diff --git a/scripts/qemu-x86_64.ld b/scripts/qemu-x86_64.ld index f0709e4e..4758d25a 100644 --- a/scripts/qemu-x86_64.ld +++ b/scripts/qemu-x86_64.ld @@ -34,11 +34,7 @@ SECTIONS . = ALIGN(4K); edata = .; .bss : { - boot_stack = .; *(.bss.stack) - . = ALIGN(4K); - boot_stack_top = .; - sbss = .; *(.bss .bss.*) *(.sbss .sbss.*) diff --git a/scripts/qemu-x86_64.mk b/scripts/qemu-x86_64.mk index f18e751b..dae152be 100644 --- a/scripts/qemu-x86_64.mk +++ b/scripts/qemu-x86_64.mk @@ -2,10 +2,11 @@ QEMU := qemu-system-x86_64 QEMU_ARGS := -machine q35 QEMU_ARGS += -cpu host -accel kvm -# QEMU_ARGS += -smp 1 +QEMU_ARGS += -smp 4 +QEMU_ARGS += -serial mon:stdio QEMU_ARGS += -m 2G QEMU_ARGS += -nographic -QEMU_ARGS += -kernel $(hvisor_bin) +QEMU_ARGS += -kernel $(hvisor_elf) $(hvisor_bin): elf $(OBJCOPY) $(hvisor_elf) --strip-all -O binary $@ \ No newline at end of file diff --git a/src/arch/x86_64/ap_start.S b/src/arch/x86_64/ap_start.S new file mode 100644 index 00000000..ffdbaf30 --- /dev/null +++ b/src/arch/x86_64/ap_start.S @@ -0,0 +1,57 @@ +.equ pa_ap_start32, ap_start32 - ap_start16 + {ap_start_page_paddr} +.equ pa_ap_gdt, .Lap_tmp_gdt - ap_start16 + {ap_start_page_paddr} +.equ pa_ap_gdt_desc, .Lap_tmp_gdt_desc - ap_start16 + {ap_start_page_paddr} +.equ stack_ptr, {ap_start_page_paddr} + 0xff0 +.equ entry_ptr, {ap_start_page_paddr} + 0xff8 + +.section .text +.code16 +// 0x6000 +.p2align 12 +.global ap_start16 +ap_start16: + cli + // clear cache + wbinvd + + xor ax, ax + mov ds, ax + mov es, ax + mov ss, ax + mov fs, ax + mov gs, ax + + // load the 64-bit GDT + lgdt [pa_ap_gdt_desc] + + // switch to protected-mode + mov eax, cr0 + or eax, (1 << 0) + mov cr0, eax + + // far jump to 32-bit code. 0x8 is code32 segment selector + ljmp 0x8, offset pa_ap_start32 + +.code32 +ap_start32: + mov esp, [stack_ptr] + mov eax, [entry_ptr] + jmp eax + +.balign 8 +.Lap_tmp_gdt_desc: + .short .Lap_tmp_gdt_end - .Lap_tmp_gdt - 1 // limit + .long pa_ap_gdt // base + +.balign 16 +.Lap_tmp_gdt: + .quad 0x0000000000000000 // 0x00: null + .quad 0x00cf9b000000ffff // 0x08: code segment (base=0, limit=0xfffff, type=32bit code exec/read, DPL=0, 4k) + .quad 0x00af9b000000ffff // 0x10: code segment (base=0, limit=0xfffff, type=64bit code exec/read, DPL=0, 4k) + .quad 0x00cf93000000ffff // 0x18: data segment (base=0, limit=0xfffff, type=32bit data read/write, DPL=0, 4k) +.Lap_tmp_gdt_end: + +// 0x7000 +.p2align 12 +.global ap_end +ap_end: \ No newline at end of file diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index d629b9a4..861f6e54 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -1,20 +1,108 @@ -pub fn cpu_start(cpuid: usize, start_addr: usize, opaque: usize) {} +use crate::arch::gdt::GdtStruct; +use crate::arch::lapic::{busy_wait, local_apic}; +use crate::consts::{core_end, PER_CPU_SIZE}; +use crate::memory::{addr::phys_to_virt, PhysAddr, PAGE_SIZE}; +use alloc::boxed::Box; +use core::arch::global_asm; +use core::time::Duration; +use raw_cpuid::CpuId; +use x86_64::structures::tss::TaskStateSegment; + +const AP_START_PAGE_IDX: u8 = 6; +const AP_START_PAGE_PADDR: PhysAddr = AP_START_PAGE_IDX as usize * PAGE_SIZE; + +global_asm!( + include_str!("ap_start.S"), + ap_start_page_paddr = const AP_START_PAGE_PADDR, +); + +unsafe fn setup_ap_start_page(cpuid: usize) { + extern "C" { + fn ap_start16(); + fn ap_end(); + fn ap_entry32(); + } + const U64_PER_PAGE: usize = PAGE_SIZE / 8; + + let ap_start_page_ptr = phys_to_virt(AP_START_PAGE_PADDR) as *mut usize; + let ap_start_page = core::slice::from_raw_parts_mut(ap_start_page_ptr, U64_PER_PAGE); + core::ptr::copy_nonoverlapping( + ap_start16 as *const usize, + ap_start_page_ptr, + (ap_end as usize - ap_start16 as usize) / 8, + ); + ap_start_page[U64_PER_PAGE - 2] = core_end() as usize + (cpuid + 1) * PER_CPU_SIZE; + ap_start_page[U64_PER_PAGE - 1] = ap_entry32 as usize; +} + +pub fn cpu_start(cpuid: usize, start_addr: usize, opaque: usize) { + unsafe { setup_ap_start_page(cpuid) }; + + let lapic = local_apic(); + + // Intel SDM Vol 3C, Section 8.4.4, MP Initialization Example + unsafe { lapic.send_init_ipi(cpuid as u32) }; + busy_wait(Duration::from_millis(10)); // 10ms + unsafe { lapic.send_sipi(AP_START_PAGE_IDX, cpuid as u32) }; + busy_wait(Duration::from_micros(200)); // 200us + unsafe { lapic.send_sipi(AP_START_PAGE_IDX, cpuid as u32) }; +} + +#[repr(C)] +#[derive(Debug, Default, Clone, Copy)] +pub struct TrapFrame { + pub rax: u64, + pub rcx: u64, + pub rdx: u64, + pub rbx: u64, + pub rbp: u64, + pub rsi: u64, + pub rdi: u64, + pub r8: u64, + pub r9: u64, + pub r10: u64, + pub r11: u64, + pub r12: u64, + pub r13: u64, + pub r14: u64, + pub r15: u64, + + // pushed by 'trap.S' + pub vector: u64, + pub error_code: u64, + + // pushed by CPU + pub rip: u64, + pub cs: u64, + pub rflags: u64, + pub rsp: u64, + pub ss: u64, +} #[repr(C)] #[derive(Debug)] pub struct ArchCpu { pub cpuid: usize, pub power_on: bool, + pub gdt: GdtStruct, } impl ArchCpu { pub fn new(cpuid: usize) -> Self { + let boxed = Box::new(TaskStateSegment::new()); + let tss = Box::leak(boxed); Self { cpuid, power_on: false, + gdt: GdtStruct::new(tss), } } + pub fn per_cpu_init(&'static self) { + self.gdt.load(); + self.gdt.load_tss(); + } + pub fn reset(&mut self, entry: usize, dtb: usize) {} pub fn run(&mut self) -> ! { @@ -27,5 +115,8 @@ impl ArchCpu { } pub fn this_cpu_id() -> usize { - 0 + match CpuId::new().get_feature_info() { + Some(info) => info.initial_local_apic_id() as usize, + None => 0, + } } diff --git a/src/arch/x86_64/entry.rs b/src/arch/x86_64/entry.rs index e964a8c6..5e98da95 100644 --- a/src/arch/x86_64/entry.rs +++ b/src/arch/x86_64/entry.rs @@ -1,11 +1,15 @@ +use super::cpu::this_cpu_id; +use crate::arch::cpu; +use crate::consts::PER_CPU_SIZE; +use crate::rust_main; use core::arch::global_asm; - use x86::msr::IA32_EFER; use x86_64::registers::control::{Cr0Flags, Cr4Flags}; use x86_64::registers::model_specific::EferFlags; +const MULTIBOOT_HEADER_MAGIC: i32 = 0x1BADB002; +const MULTIBOOT_HEADER_FLAGS: i32 = 0x00010002; const PHYS_VIRT_OFFSET: usize = 0xffff_ff80_0000_0000; -const BOOT_KERNEL_STACK_SIZE: usize = 4096 * 4; const CR0: u64 = Cr0Flags::PROTECTED_MODE_ENABLE.bits() | Cr0Flags::MONITOR_COPROCESSOR.bits() @@ -18,9 +22,12 @@ const EFER: u64 = EferFlags::LONG_MODE_ENABLE.bits() | EferFlags::NO_EXECUTE_ENA global_asm!( include_str!("multiboot.S"), - main_entry = sym crate::rust_main, + multiboot_header_magic = const MULTIBOOT_HEADER_MAGIC, + multiboot_header_flags = const MULTIBOOT_HEADER_FLAGS, + rust_entry = sym rust_entry, + rust_entry_secondary = sym rust_entry_secondary, offset = const PHYS_VIRT_OFFSET, - boot_stack_size = const BOOT_KERNEL_STACK_SIZE, + per_cpu_size = const PER_CPU_SIZE, cr0 = const CR0, cr4 = const CR4, efer_msr = const IA32_EFER, @@ -36,8 +43,19 @@ pub unsafe extern "C" fn arch_entry() -> i32 { .code32 mov edi, eax // magic mov esi, ebx // multiboot info - jmp entry32 + jmp bsp_entry32 ", options(noreturn), ); } + +fn rust_entry() { + crate::clear_bss(); + println!(""); + rust_main(this_cpu_id(), 0); +} + +fn rust_entry_secondary() { + println!("CPUID: {}", this_cpu_id()); + loop {} +} diff --git a/src/arch/x86_64/gdt.rs b/src/arch/x86_64/gdt.rs new file mode 100644 index 00000000..762c4482 --- /dev/null +++ b/src/arch/x86_64/gdt.rs @@ -0,0 +1,79 @@ +use crate::arch::cpu::{self, this_cpu_id}; +use alloc::boxed::Box; +use alloc::collections::btree_map::BTreeMap; +use spin::Mutex; +use x86_64::instructions::tables::{lgdt, load_tss}; +use x86_64::registers::segmentation::{Segment, SegmentSelector, CS}; +use x86_64::structures::gdt::{Descriptor, DescriptorFlags}; +use x86_64::structures::{tss::TaskStateSegment, DescriptorTablePointer}; +use x86_64::{addr::VirtAddr, PrivilegeLevel}; + +#[repr(align(16))] +#[derive(Debug)] +pub struct GdtStruct { + table: [u64; 16], + tss: &'static TaskStateSegment, +} + +/*lazy_static! { + static ref TSS: Mutex> = Mutex::new(BTreeMap::new()); + static ref GDT: Mutex> = Mutex::new(BTreeMap::new()); +}*/ + +impl GdtStruct { + pub const KCODE32_SELECTOR: SegmentSelector = SegmentSelector::new(1, PrivilegeLevel::Ring0); + pub const KCODE64_SELECTOR: SegmentSelector = SegmentSelector::new(2, PrivilegeLevel::Ring0); + pub const KDATA_SELECTOR: SegmentSelector = SegmentSelector::new(3, PrivilegeLevel::Ring0); + pub const TSS_SELECTOR: SegmentSelector = SegmentSelector::new(4, PrivilegeLevel::Ring0); + + pub fn new(tss: &'static TaskStateSegment) -> Self { + let mut table: [u64; 16] = [0; 16]; + table[1] = DescriptorFlags::KERNEL_CODE32.bits(); + table[2] = DescriptorFlags::KERNEL_CODE64.bits(); + table[3] = DescriptorFlags::KERNEL_DATA.bits(); + if let Descriptor::SystemSegment(low, high) = Descriptor::tss_segment(&tss) { + table[4] = low; + table[5] = high; + } + Self { table, tss } + } + + fn pointer(&self) -> DescriptorTablePointer { + DescriptorTablePointer { + base: VirtAddr::new(self.table.as_ptr() as u64), + limit: (core::mem::size_of_val(&self.table) - 1) as u16, + } + } + + pub fn load(&'static self) { + unsafe { + lgdt(&self.pointer()); + CS::set_reg(GdtStruct::KCODE64_SELECTOR); + } + } + + pub fn load_tss(&'static self) { + unsafe { + load_tss(GdtStruct::TSS_SELECTOR); + } + } +} + +/*fn new_static_tss() -> &'static TaskStateSegment { + let mut boxed = Box::new(TaskStateSegment::new()); + Box::leak(boxed) +} + +pub fn load_gdt_tss() { + println!("Initializing GDT and TSS..."); + + let tss = &mut TSS.lock(); + let gdt = &mut GDT.lock(); + let cpuid = this_cpu_id(); + + tss.insert(cpuid, TaskStateSegment::new()); + gdt.insert(cpuid, GdtStruct::new(tss.get(&cpuid).unwrap())); + if let Some(tss) = tss.get(&cpuid) { + gdt.insert(cpuid, GdtStruct::new(tss)); + } +}*/ diff --git a/src/arch/x86_64/idt.rs b/src/arch/x86_64/idt.rs new file mode 100644 index 00000000..d67f937b --- /dev/null +++ b/src/arch/x86_64/idt.rs @@ -0,0 +1,33 @@ +use x86_64::structures::idt::{Entry, HandlerFunc, InterruptDescriptorTable}; + +const NUM_INT: usize = 256; + +pub struct IdtStruct { + table: InterruptDescriptorTable, +} + +impl IdtStruct { + pub fn new() -> Self { + extern "C" { + #[link_name = "_hyp_trap_vector"] + static ENTRIES: [extern "C" fn(); NUM_INT]; + } + let mut idt = Self { + table: InterruptDescriptorTable::new(), + }; + let entries = unsafe { + core::slice::from_raw_parts_mut( + &mut idt.table as *mut _ as *mut Entry, + NUM_INT, + ) + }; + for i in 0..NUM_INT { + entries[i].set_handler_fn(unsafe { core::mem::transmute(ENTRIES[i]) }); + } + idt + } + + pub fn load(&'static self) { + self.table.load(); + } +} diff --git a/src/arch/x86_64/lapic.rs b/src/arch/x86_64/lapic.rs new file mode 100644 index 00000000..32e58a6f --- /dev/null +++ b/src/arch/x86_64/lapic.rs @@ -0,0 +1,90 @@ +use self::vectors::*; +use crate::device::irqchip::i8259::enable_irq; +use core::time::Duration; +use raw_cpuid::CpuId; +use x2apic::lapic::{LocalApic, LocalApicBuilder, TimerDivide, TimerMode}; +use x86_64::instructions::port::Port; + +type TimeValue = Duration; + +pub mod vectors { + pub const APIC_TIMER_VECTOR: u8 = 0xf0; + pub const APIC_SPURIOUS_VECTOR: u8 = 0xf1; + pub const APIC_ERROR_VECTOR: u8 = 0xf2; +} + +static mut LOCAL_APIC: Option = None; +static mut CPU_FREQ_MHZ: u64 = 4_000; +const LAPIC_TICKS_PER_SEC: u64 = 1_000_000_000; // TODO: need to calibrate +const TICKS_PER_SEC: u64 = 1; + +pub fn local_apic<'a>() -> &'a mut LocalApic { + // It's safe as LAPIC is per-cpu. + unsafe { LOCAL_APIC.as_mut().unwrap() } +} + +pub fn current_ticks() -> u64 { + unsafe { core::arch::x86_64::_rdtsc() } +} + +pub fn ticks_to_nanos(ticks: u64) -> u64 { + ticks * 1_000 / unsafe { CPU_FREQ_MHZ } +} + +pub fn current_time() -> TimeValue { + TimeValue::from_nanos(ticks_to_nanos(current_ticks())) +} + +pub fn busy_wait(duration: Duration) { + busy_wait_until(current_time() + duration); +} + +fn busy_wait_until(deadline: TimeValue) { + while current_time() < deadline { + core::hint::spin_loop(); + } +} + +pub fn init_primary() { + println!("Initializing Local APIC..."); + + unsafe { + // Disable 8259A interrupt controllers + Port::::new(0x20).write(0xff); + Port::::new(0xA0).write(0xff); + } + + let mut lapic = LocalApicBuilder::new() + .timer_vector(APIC_TIMER_VECTOR as _) + .error_vector(APIC_ERROR_VECTOR as _) + .spurious_vector(APIC_SPURIOUS_VECTOR as _) + .build() + .unwrap(); + + if let Some(freq) = CpuId::new() + .get_processor_frequency_info() + .map(|info| info.processor_max_frequency()) + { + if freq > 0 { + println!("Got TSC frequency by CPUID: {} MHz", freq); + unsafe { CPU_FREQ_MHZ = freq as u64 } + } + } + + /*if let Some(sth) = CpuId::new().get_processor_brand_string() { + println!("{:?}", sth); + }*/ + + unsafe { + lapic.enable(); + lapic.set_timer_mode(TimerMode::Periodic); + lapic.set_timer_divide(TimerDivide::Div256); + lapic.set_timer_initial((LAPIC_TICKS_PER_SEC / TICKS_PER_SEC) as u32); + } + + unsafe { + LOCAL_APIC = Some(lapic); + } + + enable_irq(); +} diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 35fd6cf0..ee5f4f31 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -1,7 +1,10 @@ #![allow(unused)] pub mod cpu; pub mod entry; +pub mod gdt; +pub mod idt; pub mod ipi; +pub mod lapic; pub mod mm; pub mod paging; pub mod s1pt; diff --git a/src/arch/x86_64/multiboot.S b/src/arch/x86_64/multiboot.S index 41c1e412..47623cb8 100644 --- a/src/arch/x86_64/multiboot.S +++ b/src/arch/x86_64/multiboot.S @@ -1,6 +1,4 @@ -.equ MULTIBOOT_HEADER_MAGIC, 0x1BADB002 -.equ MULTIBOOT_HEADER_FLAGS, 0x00010002 -.equ MULTIBOOT_CHECKSUM, -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS) +.equ boot_stack_top, __core_end + {per_cpu_size} .section .text.entry @@ -10,19 +8,18 @@ .balign 4 .type multiboot_header, STT_OBJECT multiboot_header: - .int MULTIBOOT_HEADER_MAGIC - .int MULTIBOOT_HEADER_FLAGS - .int MULTIBOOT_CHECKSUM + .int {multiboot_header_magic} + .int {multiboot_header_flags} + .int -({multiboot_header_magic} + {multiboot_header_flags}) .int multiboot_header - {offset} // header_addr .int skernel - {offset} // load_addr .int edata - {offset} // load_end .int ebss - {offset} // bss_end_addr .int arch_entry - {offset} // entry_addrs -entry32: - // load the temporary GDT - lgdt [.Ltmp_gdt_desc_phys - {offset}] - mov ax, 0x18 // data segment selector +.macro ENTRY32_COMMON + // set data segment selectors + mov ax, 0x18 mov ss, ax mov ds, ax mov es, ax @@ -46,34 +43,70 @@ entry32: // set protected mode, write protect, paging bit in CR0 mov eax, {cr0} mov cr0, eax +.endm + +.macro ENTRY64_COMMON + // clear segment selectors + xor ax, ax + mov ss, ax + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax +.endm + +bsp_entry32: + // load the temporary GDT + lgdt [.Ltmp_gdt_desc_phys - {offset}] + ENTRY32_COMMON // long return to the 64-bit entry push 0x10 // code64 segment selector - lea eax, [entry64 - {offset}] + lea eax, [bsp_entry64 - {offset}] + push eax + retf + +.global ap_entry32 +ap_entry32: + ENTRY32_COMMON + + // long return to the 64-bit entry + push 0x10 // code64 segment selector + lea eax, [ap_entry64 - {offset}] push eax retf .section .text.entry64 .code64 -entry64: +bsp_entry64: // reload GDT by high address movabs rax, offset .Ltmp_gdt_desc lgdt [rax] - // clear segment selectors - xor ax, ax - mov ss, ax - mov ds, ax - mov es, ax - mov fs, ax - mov gs, ax + ENTRY64_COMMON - // set stack and jump to rust_main + // set stack and jump to rust_entry movabs rsp, offset boot_stack_top - movabs rax, offset {main_entry} + movabs rax, offset {rust_entry} + call rax + jmp .Lhlt + +ap_entry64: + ENTRY64_COMMON + + // set rsp to high address + mov rax, {offset} + add rsp, rax + + // jump to rust_entry_secondary + movabs rax, offset {rust_entry_secondary} call rax -1: jmp 1b + jmp .Lhlt + +.Lhlt: + hlt + jmp .Lhlt .section .rodata .balign 8 @@ -104,17 +137,11 @@ entry64: .quad .Ltmp_pdpt_high - {offset} + 0x3 // PRESENT | WRITABLE | paddr(tmp_pdpt) .Ltmp_pdpt_low: - .quad 0x0000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) - .quad 0x40000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) + .quad 0x0000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) + .quad 0x40000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x4000_0000) .zero 8 * 510 .Ltmp_pdpt_high: - .quad 0x0000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) - .quad 0x40000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) + .quad 0x0000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) + .quad 0x40000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x4000_0000) .zero 8 * 510 - -.section .bss.stack -.balign 4096 -boot_stack: - .space {boot_stack_size} -boot_stack_top: diff --git a/src/arch/x86_64/trap.S b/src/arch/x86_64/trap.S new file mode 100644 index 00000000..613f1a15 --- /dev/null +++ b/src/arch/x86_64/trap.S @@ -0,0 +1,75 @@ +.equ NUM_INT, 256 + +.altmacro +.macro DEF_HANDLER, i +.Ltrap_handler_\i: +.if \i == 8 || (\i >= 10 && \i <= 14) || \i == 17 + // error code pushed by CPU + push \i // interrupt vector + jmp .Ltrap_common +.else + push 0 // fill in error code in trap frame + push \i // interrupt vector + jmp .Ltrap_common +.endif +.endm + +.macro DEF_TABLE_ENTRY, i + .quad .Ltrap_handler_\i +.endm + +.section .text +_trap_handlers: +.set i, 0 +.rept NUM_INT + DEF_HANDLER %i + .set i, i + 1 +.endr + +.Ltrap_common: + push r15 + push r14 + push r13 + push r12 + push r11 + push r10 + push r9 + push r8 + push rdi + push rsi + push rbp + push rbx + push rdx + push rcx + push rax + + mov rdi, rsp + call {0} + + pop rax + pop rcx + pop rdx + pop rbx + pop rbp + pop rsi + pop rdi + pop r8 + pop r9 + pop r10 + pop r11 + pop r12 + pop r13 + pop r14 + pop r15 + + add rsp, 16 // pop vector, error_code + iretq + +.section .rodata +.global _hyp_trap_vector +_hyp_trap_vector: +.set i, 0 +.rept NUM_INT + DEF_TABLE_ENTRY %i + .set i, i + 1 +.endr \ No newline at end of file diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index 1b09da50..9a4a7e0d 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -1 +1,72 @@ -pub fn install_trap_vector() {} +use alloc::vec; + +use super::{cpu::TrapFrame, lapic::vectors::APIC_TIMER_VECTOR}; +use crate::arch::{idt::IdtStruct, lapic::local_apic}; +use core::arch::global_asm; + +global_asm!( + include_str!("trap.S"), + sym arch_handle_trap +); + +const IRQ_VECTOR_START: u8 = 0x20; +const IRQ_VECTOR_END: u8 = 0xff; + +#[allow(dead_code)] +#[allow(non_snake_case)] +#[allow(non_upper_case_globals)] +pub mod ExceptionType { + pub const DIVIDE_ERROR: u8 = 0; + pub const DEBUG: u8 = 1; + pub const NMI_INTERRUPT: u8 = 2; + pub const BREAKPOINT: u8 = 3; + pub const OVERFLOW: u8 = 4; + pub const BOUND_RANGE_EXCEEDED: u8 = 5; + pub const INVALID_OPCODE: u8 = 6; + pub const DEVICE_NOT_AVAILABLE: u8 = 7; + pub const DOUBLE_FAULT: u8 = 8; + pub const COPROCESSOR_SEGMENT_OVERRUN: u8 = 9; + pub const INVALID_TSS: u8 = 10; + pub const SEGMENT_NOT_PRESENT: u8 = 11; + pub const STACK_SEGMENT_FAULT: u8 = 12; + pub const GENERAL_PROTECTION: u8 = 13; + pub const PAGE_FAULT: u8 = 14; + pub const FLOATING_POINT_ERROR: u8 = 16; + pub const ALIGNMENT_CHECK: u8 = 17; + pub const MACHINE_CHECK: u8 = 18; + pub const SIMD_FLOATING_POINT_EXCEPTION: u8 = 19; +} + +lazy_static::lazy_static! { + static ref IDT: IdtStruct = IdtStruct::new(); +} + +pub fn install_trap_vector() { + IDT.load(); +} + +#[no_mangle] +pub fn arch_handle_trap(tf: &mut TrapFrame) { + // println!("trap {} @ {:#x}", tf.vector, tf.rip); + match tf.vector as u8 { + IRQ_VECTOR_START..=IRQ_VECTOR_END => handle_irq(tf.vector as u8), + _ => { + println!( + "Unhandled exception {} (error_code = {:#x}) @ {:#x}", + tf.vector, tf.error_code, tf.rip + ); + } + } +} + +fn handle_irq(vector: u8) { + match vector { + APIC_TIMER_VECTOR => { + // println!("Timer"); + unsafe { local_apic().end_of_interrupt() }; + } + _ => { + println!("Unhandled irq {}", vector); + } + } +} diff --git a/src/device/irqchip/i8259/mod.rs b/src/device/irqchip/i8259/mod.rs index 5d1ce2bf..1fc4d770 100644 --- a/src/device/irqchip/i8259/mod.rs +++ b/src/device/irqchip/i8259/mod.rs @@ -1,4 +1,13 @@ use crate::zone::Zone; +use core::arch::asm; + +pub fn enable_irq() { + unsafe { asm!("sti") }; +} + +pub fn disable_irq() { + unsafe { asm!("cli") }; +} pub fn inject_irq(_irq: usize, _is_hardware: bool) {} diff --git a/src/device/uart/uart16550.rs b/src/device/uart/uart16550.rs index 5d534e04..8352184a 100644 --- a/src/device/uart/uart16550.rs +++ b/src/device/uart/uart16550.rs @@ -1,5 +1,87 @@ -pub fn console_putchar(c: u8) {} +#![allow(dead_code)] +use spin::Mutex; +use x86_64::instructions::port::{PortReadOnly, PortWriteOnly}; + +const UART_CLOCK_FACTOR: usize = 16; +const OSC_FREQ: usize = 1_843_200; + +lazy_static::lazy_static! { + static ref COM1: Mutex = { + let mut uart = Uart16550::new(0x3f8); + uart.init(115200); + Mutex::new(uart) + }; +} + +struct Uart16550 { + rhr: PortReadOnly, // receive holding + thr: PortWriteOnly, // transmit holding + ier: PortWriteOnly, // interrupt enable + fcr: PortWriteOnly, // fifo control + lcr: PortWriteOnly, // line control + mcr: PortWriteOnly, // modem control + lsr: PortReadOnly, // line status +} + +impl Uart16550 { + const fn new(base_port: u16) -> Self { + Self { + rhr: PortReadOnly::new(base_port), + thr: PortWriteOnly::new(base_port), + ier: PortWriteOnly::new(base_port + 1), + fcr: PortWriteOnly::new(base_port + 2), + lcr: PortWriteOnly::new(base_port + 3), + mcr: PortWriteOnly::new(base_port + 4), + lsr: PortReadOnly::new(base_port + 5), + } + } + + fn init(&mut self, baud_rate: usize) { + unsafe { + // disable interrupts + self.ier.write(0x00); + + // enable DLAB, set baud rate + let divisor = OSC_FREQ / (baud_rate * UART_CLOCK_FACTOR); + self.lcr.write(0x80); + self.thr.write((divisor & 0xff) as u8); + self.ier.write((divisor >> 8) as u8); + + // disable DLAB, set word length to 8 bits + self.lcr.write(0x03); + + // enable fifo, clear tx/rx queues + // set interrupt level to 14 bytes + self.fcr.write(0xC7); + + // data terminal ready, request to send + // enable option 2 output (used as interrupt line for CPU) + self.mcr.write(0x0B); + } + } + + fn putchar(&mut self, c: u8) { + unsafe { + while self.lsr.read() & (1 << 5) == 0 {} + self.thr.write(c); + } + } + + fn getchar(&mut self) -> Option { + unsafe { + if self.lsr.read() & 1 != 0 { + Some(self.rhr.read()) + } else { + None + } + } + } +} + +pub fn console_putchar(c: u8) { + COM1.lock().putchar(c); +} pub fn console_getchar() -> Option { - None + COM1.lock().getchar() } diff --git a/src/main.rs b/src/main.rs index bd6b014b..cd3cf73a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -42,7 +42,10 @@ mod zone; #[cfg(target_arch = "aarch64")] use crate::arch::mm::setup_parange; use crate::consts::MAX_CPU_NUM; -use arch::{cpu::cpu_start, entry::arch_entry}; +use arch::{ + cpu::{self, cpu_start, ArchCpu}, + entry::arch_entry, +}; use config::root_zone_config; use core::sync::atomic::{AtomicI32, AtomicU32, Ordering}; use percpu::PerCpu; @@ -124,6 +127,9 @@ fn per_cpu_init(cpu: &mut PerCpu) { } fn wakeup_secondary_cpus(this_id: usize, host_dtb: usize) { + #[cfg(target_arch = "x86_64")] + arch::lapic::init_primary(); + for cpu_id in 0..MAX_CPU_NUM { if cpu_id == this_id { continue; @@ -132,9 +138,40 @@ fn wakeup_secondary_cpus(this_id: usize, host_dtb: usize) { } } -fn rust_main(cpuid: usize, host_dtb: usize) { +fn x86_rust_main_tmp(cpuid: usize, host_dtb: usize) { + arch::trap::install_trap_vector(); // load idt + + let mut is_primary = false; + println!("Hello, HVISOR!"); + if MASTER_CPU.load(Ordering::Acquire) == -1 { + MASTER_CPU.store(cpuid as i32, Ordering::Release); + is_primary = true; + memory::heap::init(); + memory::heap::test(); + } + + let cpu = PerCpu::new(cpuid); + println!( + "Booting CPU {}: {:p} arch:{:p}, DTB: {:#x}", + cpu.id, cpu as *const _, &cpu.arch_cpu as *const _, host_dtb + ); + #[cfg(target_arch = "x86_64")] + cpu.arch_cpu.per_cpu_init(); // load gdt and tss + + if is_primary { + wakeup_secondary_cpus(cpu.id, host_dtb); + } + + // x86_64::instructions::interrupts::int3(); + println!("END OF MAIN"); + loop {} +} + +fn rust_main(cpuid: usize, host_dtb: usize) { + #[cfg(target_arch = "x86_64")] + x86_rust_main_tmp(cpuid, host_dtb); arch::trap::install_trap_vector(); From 65fb9311f7003c94194bf688e1a6994b04a0e7b6 Mon Sep 17 00:00:00 2001 From: Solicey Date: Thu, 2 Jan 2025 18:32:03 +0800 Subject: [PATCH 03/29] enable VMX, configure VMCS, test with temporary guest code --- src/arch/x86_64/cpu.rs | 204 ++++++++++++-- src/arch/x86_64/entry.rs | 10 +- src/arch/x86_64/gdt.rs | 7 +- src/arch/x86_64/mod.rs | 1 + src/arch/x86_64/vmx.rs | 486 ++++++++++++++++++++++++++++++++ src/device/irqchip/i8259/mod.rs | 4 +- src/main.rs | 28 +- src/memory/frame.rs | 5 +- 8 files changed, 713 insertions(+), 32 deletions(-) create mode 100644 src/arch/x86_64/vmx.rs diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index 861f6e54..638bb603 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -1,21 +1,70 @@ use crate::arch::gdt::GdtStruct; use crate::arch::lapic::{busy_wait, local_apic}; +use crate::arch::vmx::*; use crate::consts::{core_end, PER_CPU_SIZE}; -use crate::memory::{addr::phys_to_virt, PhysAddr, PAGE_SIZE}; +use crate::error::{HvError, HvResult}; +use crate::memory::{addr::phys_to_virt, Frame, PhysAddr, PAGE_SIZE}; +use crate::percpu::this_cpu_data; use alloc::boxed::Box; -use core::arch::global_asm; +use core::arch::{asm, global_asm}; +use core::mem::size_of; use core::time::Duration; use raw_cpuid::CpuId; use x86_64::structures::tss::TaskStateSegment; const AP_START_PAGE_IDX: u8 = 6; const AP_START_PAGE_PADDR: PhysAddr = AP_START_PAGE_IDX as usize * PAGE_SIZE; +const VM_EXIT_INSTR_LEN_VMCALL: u8 = 3; global_asm!( include_str!("ap_start.S"), ap_start_page_paddr = const AP_START_PAGE_PADDR, ); +macro_rules! save_regs_to_stack { + () => { + " + push r15 + push r14 + push r13 + push r12 + push r11 + push r10 + push r9 + push r8 + push rdi + push rsi + push rbp + sub rsp, 8 + push rbx + push rdx + push rcx + push rax" + }; +} + +macro_rules! restore_regs_from_stack { + () => { + " + pop rax + pop rcx + pop rdx + pop rbx + add rsp, 8 + pop rbp + pop rsi + pop rdi + pop r8 + pop r9 + pop r10 + pop r11 + pop r12 + pop r13 + pop r14 + pop r15" + }; +} + unsafe fn setup_ap_start_page(cpuid: usize) { extern "C" { fn ap_start16(); @@ -51,10 +100,29 @@ pub fn cpu_start(cpuid: usize, start_addr: usize, opaque: usize) { #[repr(C)] #[derive(Debug, Default, Clone, Copy)] pub struct TrapFrame { + pub usr: [u64; 15], + + // pushed by 'trap.S' + pub vector: u64, + pub error_code: u64, + + // pushed by CPU + pub rip: u64, + pub cs: u64, + pub rflags: u64, + pub rsp: u64, + pub ss: u64, +} + +/// General-Purpose Registers for 64-bit x86 architecture. +#[repr(C)] +#[derive(Debug, Default, Clone)] +pub struct GeneralRegisters { pub rax: u64, pub rcx: u64, pub rdx: u64, pub rbx: u64, + _unused_rsp: u64, pub rbp: u64, pub rsi: u64, pub rdi: u64, @@ -66,25 +134,20 @@ pub struct TrapFrame { pub r13: u64, pub r14: u64, pub r15: u64, - - // pushed by 'trap.S' - pub vector: u64, - pub error_code: u64, - - // pushed by CPU - pub rip: u64, - pub cs: u64, - pub rflags: u64, - pub rsp: u64, - pub ss: u64, } #[repr(C)] #[derive(Debug)] pub struct ArchCpu { + // guest_regs and host_stack_top should always be at the first. + guest_regs: GeneralRegisters, + host_stack_top: u64, pub cpuid: usize, pub power_on: bool, pub gdt: GdtStruct, + vmcs_revision_id: u32, + vmxon_region: VmxRegion, + vmcs_region: VmxRegion, } impl ArchCpu { @@ -95,23 +158,113 @@ impl ArchCpu { cpuid, power_on: false, gdt: GdtStruct::new(tss), + vmcs_revision_id: 0, + vmxon_region: VmxRegion::uninit(), + vmcs_region: VmxRegion::uninit(), + guest_regs: GeneralRegisters::default(), + host_stack_top: 0, } } - pub fn per_cpu_init(&'static self) { - self.gdt.load(); - self.gdt.load_tss(); + pub unsafe fn init(&mut self, entry: usize, dtb: usize) { + self.activate_vmx(); + self.setup_vmcs(entry); } - pub fn reset(&mut self, entry: usize, dtb: usize) {} + unsafe fn activate_vmx(&mut self) { + assert!(check_vmx_support()); + assert!(!is_vmx_enabled()); + + // enable VMXON + enable_vmxon().unwrap(); + + // TODO: check related registers + + // get VMCS revision identifier in IA32_VMX_BASIC MSR + self.vmcs_revision_id = get_vmcs_revision_id(); + self.vmxon_region = VmxRegion::new(self.vmcs_revision_id, false).unwrap(); + + execute_vmxon(self.vmxon_region.start_paddr() as u64).unwrap(); + + info!( + "VMX enabled, region: 0x{:x}", + self.vmxon_region.start_paddr(), + ); + } + + unsafe fn setup_vmcs(&mut self, entry: usize) { + self.vmcs_region = VmxRegion::new(self.vmcs_revision_id, false).unwrap(); + + enable_vmcs(self.vmcs_region.start_paddr() as u64).unwrap(); + setup_vmcs_host(Self::vmx_exit as usize).unwrap(); + setup_vmcs_guest(entry).unwrap(); + setup_vmcs_control().unwrap(); + + info!( + "VMCS enabled, region: 0x{:x}", + self.vmcs_region.start_paddr(), + ); + } pub fn run(&mut self) -> ! { + assert!(this_cpu_id() == self.cpuid); + // TODO: this_cpu_data().cpu_on_entry + unsafe { + self.init(test_guest as usize, this_cpu_data().dtb_ipa); + set_host_rsp(&self.host_stack_top as *const _ as usize).unwrap(); + self.vmx_launch(); + } loop {} } pub fn idle(&mut self) -> ! { + assert!(this_cpu_id() == self.cpuid); + unsafe { self.init(0, this_cpu_data().dtb_ipa) }; loop {} } + + #[naked] + unsafe extern "C" fn vmx_launch(&mut self) -> ! { + asm!( + "mov [rdi + {host_stack_top}], rsp", // save current RSP to host_stack_top + "mov rsp, rdi", // set RSP to guest regs area + restore_regs_from_stack!(), + "vmlaunch", + "jmp {failed}", + host_stack_top = const size_of::(), + failed = sym Self::vmx_entry_failed, + options(noreturn), + ) + } + + #[naked] + unsafe extern "C" fn vmx_exit(&mut self) -> ! { + asm!( + save_regs_to_stack!(), + "mov r15, rsp", // save temporary RSP to r15 + "mov rdi, rsp", // set the first arg to RSP + "mov rsp, [rsp + {host_stack_top}]", // set RSP to host_stack_top + "call {vmexit_handler}", // call vmexit_handler + "mov rsp, r15", // load temporary RSP from r15 + restore_regs_from_stack!(), + "vmresume", + "jmp {failed}", + host_stack_top = const size_of::(), + vmexit_handler = sym Self::vmexit_handler, + failed = sym Self::vmx_entry_failed, + options(noreturn), + ); + } + + unsafe fn vmx_entry_failed() -> ! { + panic!("VMX instruction error: {}", instruction_error()); + } + + unsafe fn vmexit_handler(&mut self) { + let exit_info = exit_info().unwrap(); + debug!("vmexit rax:{} {:#x?}", self.guest_regs.rax, exit_info); + advance_guest_rip(VM_EXIT_INSTR_LEN_VMCALL).unwrap(); + } } pub fn this_cpu_id() -> usize { @@ -120,3 +273,20 @@ pub fn this_cpu_id() -> usize { None => 0, } } + +#[naked] +unsafe extern "C" fn test_guest() -> ! { + core::arch::asm!( + " + mov rax, 0 + mov rdi, 2 + mov rsi, 3 + mov rdx, 3 + mov rcx, 3 + 2: + vmcall + add rax, 1 + jmp 2b", + options(noreturn), + ); +} diff --git a/src/arch/x86_64/entry.rs b/src/arch/x86_64/entry.rs index 5e98da95..1d43a4e8 100644 --- a/src/arch/x86_64/entry.rs +++ b/src/arch/x86_64/entry.rs @@ -1,6 +1,7 @@ use super::cpu::this_cpu_id; use crate::arch::cpu; use crate::consts::PER_CPU_SIZE; +use crate::memory::addr::PHYS_VIRT_OFFSET; use crate::rust_main; use core::arch::global_asm; use x86::msr::IA32_EFER; @@ -9,7 +10,7 @@ use x86_64::registers::model_specific::EferFlags; const MULTIBOOT_HEADER_MAGIC: i32 = 0x1BADB002; const MULTIBOOT_HEADER_FLAGS: i32 = 0x00010002; -const PHYS_VIRT_OFFSET: usize = 0xffff_ff80_0000_0000; +const X86_PHYS_VIRT_OFFSET: usize = 0xffff_ff80_0000_0000; const CR0: u64 = Cr0Flags::PROTECTED_MODE_ENABLE.bits() | Cr0Flags::MONITOR_COPROCESSOR.bits() @@ -26,7 +27,7 @@ global_asm!( multiboot_header_flags = const MULTIBOOT_HEADER_FLAGS, rust_entry = sym rust_entry, rust_entry_secondary = sym rust_entry_secondary, - offset = const PHYS_VIRT_OFFSET, + offset = const X86_PHYS_VIRT_OFFSET, per_cpu_size = const PER_CPU_SIZE, cr0 = const CR0, cr4 = const CR4, @@ -51,11 +52,12 @@ pub unsafe extern "C" fn arch_entry() -> i32 { fn rust_entry() { crate::clear_bss(); + unsafe { PHYS_VIRT_OFFSET = X86_PHYS_VIRT_OFFSET }; println!(""); rust_main(this_cpu_id(), 0); } fn rust_entry_secondary() { - println!("CPUID: {}", this_cpu_id()); - loop {} + // println!("CPUID: {}", this_cpu_id()); + rust_main(this_cpu_id(), 0); } diff --git a/src/arch/x86_64/gdt.rs b/src/arch/x86_64/gdt.rs index 762c4482..d4707bdf 100644 --- a/src/arch/x86_64/gdt.rs +++ b/src/arch/x86_64/gdt.rs @@ -45,15 +45,10 @@ impl GdtStruct { } } - pub fn load(&'static self) { + pub fn load(&self) { unsafe { lgdt(&self.pointer()); CS::set_reg(GdtStruct::KCODE64_SELECTOR); - } - } - - pub fn load_tss(&'static self) { - unsafe { load_tss(GdtStruct::TSS_SELECTOR); } } diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index ee5f4f31..c569b835 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -10,6 +10,7 @@ pub mod paging; pub mod s1pt; pub mod s2pt; pub mod trap; +pub mod vmx; pub mod zone; pub use s1pt::Stage1PageTable; diff --git a/src/arch/x86_64/vmx.rs b/src/arch/x86_64/vmx.rs new file mode 100644 index 00000000..defb340b --- /dev/null +++ b/src/arch/x86_64/vmx.rs @@ -0,0 +1,486 @@ +#![allow(non_camel_case_types)] +#![allow(clippy::upper_case_acronyms)] + +use crate::error::{HvError, HvResult}; +use crate::memory::{Frame, PhysAddr}; +use bit_field::BitField; +use bitflags::{bitflags, Flags}; +use raw_cpuid::CpuId; +use x86::dtables::{self, DescriptorTablePointer}; +use x86::msr::{ + IA32_EFER, IA32_FEATURE_CONTROL, IA32_FS_BASE, IA32_GS_BASE, IA32_PAT, IA32_VMX_BASIC, + IA32_VMX_ENTRY_CTLS, IA32_VMX_EXIT_CTLS, IA32_VMX_PINBASED_CTLS, IA32_VMX_PROCBASED_CTLS, + IA32_VMX_PROCBASED_CTLS2, IA32_VMX_TRUE_ENTRY_CTLS, IA32_VMX_TRUE_EXIT_CTLS, + IA32_VMX_TRUE_PINBASED_CTLS, IA32_VMX_TRUE_PROCBASED_CTLS, +}; +use x86::segmentation::SegmentSelector; +use x86::vmx::vmcs::control::{ + EntryControls, ExitControls, PinbasedControls, PrimaryControls, SecondaryControls, +}; +use x86::vmx::vmcs::*; +use x86::{bits64::vmx, vmx::VmFail}; +use x86_64::registers::control::{Cr0, Cr0Flags, Cr3, Cr4, Cr4Flags}; +use x86_64::registers::model_specific::Msr; + +bitflags! { + pub struct FeatureControlFlags: u64 { + // Lock bit: when set, locks this MSR from being written. when clear, + // VMXON causes a #GP. + const LOCKED = 1 << 0; + // Enable VMX inside SMX operation. + const VMXON_ENABLED_INSIDE_SMX = 1 << 1; + // Enable VMX outside SMX operation. + const VMXON_ENABLED_OUTSIDE_SMX = 1 << 2; + } +} + +pub fn vmread(field: u32) -> x86::vmx::Result { + unsafe { vmx::vmread(field as u32) } +} + +pub fn vmwrite>(field: u32, value: T) -> x86::vmx::Result<()> { + unsafe { vmx::vmwrite(field as u32, value.into()) } +} + +numeric_enum_macro::numeric_enum! { +#[repr(u32)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[allow(non_camel_case_types)] +/// VMX basic exit reasons. (SDM Vol. 3D, Appendix C) +pub enum VmxExitReason { + EXCEPTION_NMI = 0, + EXTERNAL_INTERRUPT = 1, + TRIPLE_FAULT = 2, + INIT = 3, + SIPI = 4, + SMI = 5, + OTHER_SMI = 6, + INTERRUPT_WINDOW = 7, + NMI_WINDOW = 8, + TASK_SWITCH = 9, + CPUID = 10, + GETSEC = 11, + HLT = 12, + INVD = 13, + INVLPG = 14, + RDPMC = 15, + RDTSC = 16, + RSM = 17, + VMCALL = 18, + VMCLEAR = 19, + VMLAUNCH = 20, + VMPTRLD = 21, + VMPTRST = 22, + VMREAD = 23, + VMRESUME = 24, + VMWRITE = 25, + VMOFF = 26, + VMON = 27, + CR_ACCESS = 28, + DR_ACCESS = 29, + IO_INSTRUCTION = 30, + MSR_READ = 31, + MSR_WRITE = 32, + INVALID_GUEST_STATE = 33, + MSR_LOAD_FAIL = 34, + MWAIT_INSTRUCTION = 36, + MONITOR_TRAP_FLAG = 37, + MONITOR_INSTRUCTION = 39, + PAUSE_INSTRUCTION = 40, + MCE_DURING_VMENTRY = 41, + TPR_BELOW_THRESHOLD = 43, + APIC_ACCESS = 44, + VIRTUALIZED_EOI = 45, + GDTR_IDTR = 46, + LDTR_TR = 47, + EPT_VIOLATION = 48, + EPT_MISCONFIG = 49, + INVEPT = 50, + RDTSCP = 51, + PREEMPTION_TIMER = 52, + INVVPID = 53, + WBINVD = 54, + XSETBV = 55, + APIC_WRITE = 56, + RDRAND = 57, + INVPCID = 58, + VMFUNC = 59, + ENCLS = 60, + RDSEED = 61, + PML_FULL = 62, + XSAVES = 63, + XRSTORS = 64, + PCONFIG = 65, + SPP_EVENT = 66, + UMWAIT = 67, + TPAUSE = 68, + LOADIWKEY = 69, +} +} + +/// VM-Exit Informations. (SDM Vol. 3C, Section 24.9.1) +#[derive(Debug)] +pub struct VmxExitInfo { + /// VM-entry failure. (0 = true VM exit; 1 = VM-entry failure) + pub entry_failure: bool, + /// Basic exit reason. + pub exit_reason: VmxExitReason, + /// For VM exits resulting from instruction execution, this field receives + /// the length in bytes of the instruction whose execution led to the VM exit. + pub exit_instruction_length: u32, + /// Guest `RIP` where the VM exit occurs. + pub guest_rip: usize, +} + +#[derive(Debug)] +pub struct VmxRegion { + frame: Frame, +} + +impl VmxRegion { + pub fn uninit() -> Self { + Self { + frame: unsafe { Frame::from_paddr(0) }, + } + } + + pub fn new(revision_id: u32, shadow_indicator: bool) -> HvResult { + let frame = Frame::new_zero()?; + unsafe { + (*(frame.start_paddr() as *mut u32)) + .set_bits(0..=30, revision_id) + .set_bit(31, shadow_indicator); + } + Ok(Self { frame }) + } + + pub fn start_paddr(&self) -> PhysAddr { + self.frame.start_paddr() + } +} + +pub fn check_vmx_support() -> bool { + if let Some(feature) = CpuId::new().get_feature_info() { + feature.has_vmx() + } else { + false + } +} + +pub fn is_vmx_enabled() -> bool { + Cr4::read().contains(Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS) +} + +pub unsafe fn enable_vmxon() -> HvResult { + let mut ctrl_reg = Msr::new(IA32_FEATURE_CONTROL); + let ctrl_flag = FeatureControlFlags::from_bits_truncate(ctrl_reg.read()); + let locked = ctrl_flag.contains(FeatureControlFlags::LOCKED); + let vmxon_outside = ctrl_flag.contains(FeatureControlFlags::VMXON_ENABLED_OUTSIDE_SMX); + if !locked { + ctrl_reg.write( + (ctrl_flag + | FeatureControlFlags::LOCKED + | FeatureControlFlags::VMXON_ENABLED_OUTSIDE_SMX) + .bits(), + ) + } else if !vmxon_outside { + return Err(hv_err!(EPERM, "VMX disabled by BIOS")); + } + Ok(()) +} + +pub unsafe fn get_vmcs_revision_id() -> u32 { + let vmx_basic_reg = Msr::new(IA32_VMX_BASIC); + let vmx_basic_flag = vmx_basic_reg.read(); + vmx_basic_flag.get_bits(0..=30) as u32 +} + +pub unsafe fn execute_vmxon(start_paddr: u64) -> HvResult { + // enable VMX using the VMXE bit + Cr4::write(Cr4::read() | Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS); + // execute VMXON + vmx::vmxon(start_paddr)?; + + Ok(()) +} + +pub unsafe fn enable_vmcs(start_paddr: u64) -> HvResult { + vmx::vmclear(start_paddr)?; + vmx::vmptrld(start_paddr)?; + + Ok(()) +} + +// natural-width +type unw = u64; + +pub unsafe fn setup_vmcs_host(vmx_exit: usize) -> HvResult { + vmwrite::(host::IA32_PAT_FULL, Msr::new(IA32_PAT).read())?; + vmwrite::(host::IA32_EFER_FULL, Msr::new(IA32_EFER).read())?; + + vmwrite::(host::CR0, Cr0::read_raw())?; + vmwrite::(host::CR3, Cr3::read_raw().0.start_address().as_u64())?; + vmwrite::(host::CR4, Cr4::read_raw())?; + + vmwrite::(host::ES_SELECTOR, x86::segmentation::es().bits())?; + vmwrite::(host::CS_SELECTOR, x86::segmentation::cs().bits())?; + vmwrite::(host::SS_SELECTOR, x86::segmentation::ss().bits())?; + vmwrite::(host::DS_SELECTOR, x86::segmentation::ds().bits())?; + vmwrite::(host::FS_SELECTOR, x86::segmentation::fs().bits())?; + vmwrite::(host::GS_SELECTOR, x86::segmentation::gs().bits())?; + + vmwrite::(host::FS_BASE, Msr::new(IA32_FS_BASE).read())?; + vmwrite::(host::GS_BASE, Msr::new(IA32_GS_BASE).read())?; + + let tr = unsafe { x86::task::tr() }; + let mut gdtp = DescriptorTablePointer::::default(); + let mut idtp = DescriptorTablePointer::::default(); + unsafe { + dtables::sgdt(&mut gdtp); + dtables::sidt(&mut idtp); + } + + vmwrite::(host::TR_SELECTOR, tr.bits())?; + vmwrite::(host::TR_BASE, get_tr_base(tr, &gdtp))?; + vmwrite::(host::GDTR_BASE, gdtp.base as unw)?; + vmwrite::(host::IDTR_BASE, idtp.base as unw)?; + vmwrite::(host::RIP, vmx_exit as unw)?; + + vmwrite::(host::IA32_SYSENTER_ESP, 0)?; + vmwrite::(host::IA32_SYSENTER_EIP, 0)?; + vmwrite::(host::IA32_SYSENTER_CS, 0)?; + + // VmcsHostNW::RSP.write(0)?; // TODO + Ok(()) +} + +pub unsafe fn setup_vmcs_guest(entry: usize) -> HvResult { + // Enable protected mode and paging. + let cr0_guest = Cr0Flags::PROTECTED_MODE_ENABLE + | Cr0Flags::EXTENSION_TYPE + | Cr0Flags::NUMERIC_ERROR + | Cr0Flags::PAGING; + let cr0_host_owned = + Cr0Flags::NUMERIC_ERROR | Cr0Flags::NOT_WRITE_THROUGH | Cr0Flags::CACHE_DISABLE; + let cr0_read_shadow = Cr0Flags::NUMERIC_ERROR; + + vmwrite::(guest::CR0, cr0_guest.bits())?; + vmwrite::(control::CR0_GUEST_HOST_MASK, cr0_host_owned.bits())?; + vmwrite::(control::CR0_READ_SHADOW, cr0_read_shadow.bits())?; + + // Enable physical address extensions that required in IA-32e mode. + let cr4_guest = Cr4Flags::PHYSICAL_ADDRESS_EXTENSION | Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS; + let cr4_host_owned = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS; + let cr4_read_shadow = 0; + + vmwrite::(guest::CR4, cr4_guest.bits())?; + vmwrite::(control::CR4_GUEST_HOST_MASK, cr4_host_owned.bits())?; + vmwrite::(control::CR4_READ_SHADOW, cr4_read_shadow)?; + + macro_rules! set_guest_segment { + ($seg: ident, $access_rights: expr) => {{ + use guest::*; + vmwrite::(concat_idents!($seg, _SELECTOR), 0)?; + vmwrite::(concat_idents!($seg, _BASE), 0)?; + vmwrite::(concat_idents!($seg, _LIMIT), 0xffff)?; + vmwrite::(concat_idents!($seg, _ACCESS_RIGHTS), $access_rights)?; + }}; + } + + set_guest_segment!(ES, 0x93); // 16-bit, present, data, read/write, accessed + set_guest_segment!(CS, 0x209b); // 64-bit, present, code, exec/read, accessed + set_guest_segment!(SS, 0x93); + set_guest_segment!(DS, 0x93); + set_guest_segment!(FS, 0x93); + set_guest_segment!(GS, 0x93); + set_guest_segment!(TR, 0x8b); // present, system, 32-bit TSS busy + set_guest_segment!(LDTR, 0x82); // present, system, LDT + + vmwrite::(guest::GDTR_BASE, 0)?; + vmwrite::(guest::GDTR_LIMIT, 0xffff)?; + vmwrite::(guest::IDTR_BASE, 0)?; + vmwrite::(guest::IDTR_LIMIT, 0xffff)?; + + vmwrite::(guest::CR3, Cr3::read_raw().0.start_address().as_u64())?; + vmwrite::(guest::DR7, 0x400)?; + vmwrite::(guest::RSP, 0)?; + vmwrite::(guest::RIP, entry as unw)?; + vmwrite::(guest::RFLAGS, 0x2)?; + vmwrite::(guest::PENDING_DBG_EXCEPTIONS, 0)?; + vmwrite::(guest::IA32_SYSENTER_ESP, 0)?; + vmwrite::(guest::IA32_SYSENTER_EIP, 0)?; + vmwrite::(guest::IA32_SYSENTER_CS, 0)?; + + vmwrite::(guest::INTERRUPTIBILITY_STATE, 0)?; + vmwrite::(guest::ACTIVITY_STATE, 0)?; + vmwrite::(guest::VMX_PREEMPTION_TIMER_VALUE, 0)?; + + vmwrite::(guest::LINK_PTR_FULL, u64::MAX)?; + vmwrite::(guest::IA32_DEBUGCTL_FULL, 0)?; + vmwrite::(guest::IA32_PAT_FULL, Msr::new(IA32_PAT).read())?; + vmwrite::(guest::IA32_EFER_FULL, Msr::new(IA32_EFER).read())?; + + Ok(()) +} + +pub unsafe fn setup_vmcs_control() -> HvResult { + // Intercept NMI, pass-through external interrupts. + set_control( + control::PINBASED_EXEC_CONTROLS, + Msr::new(IA32_VMX_TRUE_PINBASED_CTLS), + Msr::new(IA32_VMX_PINBASED_CTLS).read() as u32, + PinbasedControls::NMI_EXITING.bits(), + 0, + )?; + + // Activate secondary controls, disable CR3 load/store interception. + set_control( + control::PRIMARY_PROCBASED_EXEC_CONTROLS, + Msr::new(IA32_VMX_TRUE_PROCBASED_CTLS), + Msr::new(IA32_VMX_PROCBASED_CTLS).read() as u32, + PrimaryControls::SECONDARY_CONTROLS.bits(), + (PrimaryControls::CR3_LOAD_EXITING | PrimaryControls::CR3_STORE_EXITING).bits(), + )?; + + // Enable RDTSCP, INVPCID. + set_control( + control::SECONDARY_PROCBASED_EXEC_CONTROLS, + Msr::new(IA32_VMX_PROCBASED_CTLS2), + 0, + (SecondaryControls::ENABLE_RDTSCP | SecondaryControls::ENABLE_INVPCID).bits(), + 0, + )?; + + // Switch to 64-bit host, switch IA32_PAT/IA32_EFER on VM exit. + set_control( + control::VMEXIT_CONTROLS, + Msr::new(IA32_VMX_TRUE_EXIT_CTLS), + Msr::new(IA32_VMX_EXIT_CTLS).read() as u32, + (ExitControls::HOST_ADDRESS_SPACE_SIZE + | ExitControls::SAVE_IA32_PAT + | ExitControls::LOAD_IA32_PAT + | ExitControls::SAVE_IA32_EFER + | ExitControls::LOAD_IA32_EFER) + .bits(), + 0, + )?; + + // Switch to 64-bit guest, load guest IA32_PAT/IA32_EFER on VM entry. + set_control( + control::VMENTRY_CONTROLS, + Msr::new(IA32_VMX_TRUE_ENTRY_CTLS), + Msr::new(IA32_VMX_ENTRY_CTLS).read() as u32, + (EntryControls::IA32E_MODE_GUEST + | EntryControls::LOAD_IA32_PAT + | EntryControls::LOAD_IA32_EFER) + .bits(), + 0, + )?; + + // No MSR switches if hypervisor doesn't use and there is only one vCPU. + vmwrite::(control::VMEXIT_MSR_STORE_COUNT, 0)?; + vmwrite::(control::VMEXIT_MSR_LOAD_COUNT, 0)?; + vmwrite::(control::VMENTRY_MSR_LOAD_COUNT, 0)?; + + // Pass-through exceptions, I/O instructions, and MSR read/write. + vmwrite::(control::EXCEPTION_BITMAP, 0)?; + vmwrite::(control::IO_BITMAP_A_ADDR_FULL, 0)?; + vmwrite::(control::IO_BITMAP_B_ADDR_FULL, 0)?; + vmwrite::(control::MSR_BITMAPS_ADDR_FULL, 0)?; + + Ok(()) +} + +fn get_tr_base(tr: SegmentSelector, gdt: &DescriptorTablePointer) -> u64 { + let index = tr.index() as usize; + let table_len = (gdt.limit as usize + 1) / core::mem::size_of::(); + let table = unsafe { core::slice::from_raw_parts(gdt.base, table_len) }; + let entry = table[index]; + if entry & (1 << 47) != 0 { + // present + let base_low = entry.get_bits(16..40) | entry.get_bits(56..64) << 24; + let base_high = table[index + 1] & 0xffff_ffff; + base_low | base_high << 32 + } else { + // no present + 0 + } +} + +pub fn set_control( + control: u32, + capability_msr: Msr, + old_value: u32, + set: u32, + clear: u32, +) -> HvResult<()> { + let cap = unsafe { capability_msr.read() }; + let allowed0 = cap as u32; + let allowed1 = (cap >> 32) as u32; + assert_eq!(allowed0 & allowed1, allowed0); + debug!( + "set {:#x}: {:#x} (+{:#x}, -{:#x})", + control, old_value, set, clear + ); + if (set & clear) != 0 { + return Err(hv_err!( + EPERM, + format!("can not set and clear the same bit in {:#x}", control) + )); + } + if (allowed1 & set) != set { + // failed if set 0-bits in allowed1 + return Err(hv_err!( + EPERM, + format!("can not set bits {:#x} in {:#x}", set, control) + )); + } + if (allowed0 & clear) != 0 { + // failed if clear 1-bits in allowed0 + return Err(hv_err!( + EPERM, + format!("can not clear bits {:#x} in {:#x}", clear, control) + )); + } + // SDM Vol. 3C, Section 31.5.1, Algorithm 3 + let flexible = !allowed0 & allowed1; // therse bits can be either 0 or 1 + let unknown = flexible & !(set | clear); // hypervisor untouched bits + let default = unknown & old_value; // these bits keep unchanged in old value + let fixed1 = allowed0; // these bits are fixed to 1 + vmwrite(control, fixed1 | default | set)?; + Ok(()) +} + +impl From for HvError { + fn from(err: VmFail) -> Self { + hv_err!(EFAULT, format!("VMX instruction failed: {:?}", err)) + } +} + +pub unsafe fn advance_guest_rip(instr_len: u8) -> HvResult { + Ok(vmwrite::( + guest::RIP, + (vmread(guest::RIP)? + instr_len as u64), + )?) +} + +pub unsafe fn instruction_error() -> u32 { + vmread(ro::VM_INSTRUCTION_ERROR).unwrap() as u32 +} + +pub unsafe fn set_host_rsp(paddr: usize) -> HvResult { + Ok(vmwrite::(host::RSP, paddr as unw)?) +} + +pub unsafe fn exit_info() -> HvResult { + let full_reason = vmread(ro::EXIT_REASON)? as u32; + Ok(VmxExitInfo { + exit_reason: full_reason + .get_bits(0..16) + .try_into() + .expect("Unknown VM-exit reason"), + entry_failure: full_reason.get_bit(31), + exit_instruction_length: vmread(ro::VMEXIT_INSTRUCTION_LEN)? as u32, + guest_rip: vmread(guest::RIP)? as usize, + }) +} diff --git a/src/device/irqchip/i8259/mod.rs b/src/device/irqchip/i8259/mod.rs index 1fc4d770..989d5b3f 100644 --- a/src/device/irqchip/i8259/mod.rs +++ b/src/device/irqchip/i8259/mod.rs @@ -13,7 +13,9 @@ pub fn inject_irq(_irq: usize, _is_hardware: bool) {} pub fn percpu_init() {} -pub fn primary_init_early() {} +pub fn primary_init_early() { + warn!("x86_64: irqchip: primary_init_early do nothing"); +} pub fn primary_init_late() {} diff --git a/src/main.rs b/src/main.rs index cd3cf73a..3170bd16 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,6 +14,7 @@ #![feature(asm_const)] #![feature(naked_functions)] // surpport naked function #![feature(core_panic)] +#![feature(concat_idents)] // 支持内联汇编 // #![deny(warnings, missing_docs)] // 将warnings作为error #[macro_use] @@ -105,7 +106,8 @@ fn primary_init_early() { device::irqchip::primary_init_early(); // crate::arch::mm::init_hv_page_table().unwrap(); - zone_create(root_zone_config()).unwrap(); + // TODO: + // zone_create(root_zone_config()).unwrap(); INIT_EARLY_OK.store(1, Ordering::Release); } @@ -157,15 +159,35 @@ fn x86_rust_main_tmp(cpuid: usize, host_dtb: usize) { ); #[cfg(target_arch = "x86_64")] - cpu.arch_cpu.per_cpu_init(); // load gdt and tss + cpu.arch_cpu.gdt.load(); // load gdt and tss if is_primary { wakeup_secondary_cpus(cpu.id, host_dtb); } + ENTERED_CPUS.fetch_add(1, Ordering::SeqCst); + wait_for(|| PerCpu::entered_cpus() < MAX_CPU_NUM as _); + assert_eq!(PerCpu::entered_cpus(), MAX_CPU_NUM as _); + + println!( + "{} CPU {} has entered.", + if is_primary { "Primary" } else { "Secondary" }, + cpu.id + ); + + if is_primary { + primary_init_early(); // create root zone here + + // TODO: tmp + cpu.boot_cpu = true; + } else { + wait_for_counter(&INIT_EARLY_OK, 1); + } + // x86_64::instructions::interrupts::int3(); - println!("END OF MAIN"); + // info!("END OF MAIN"); + cpu.run_vm(); loop {} } diff --git a/src/memory/frame.rs b/src/memory/frame.rs index 5078d702..c56ace22 100644 --- a/src/memory/frame.rs +++ b/src/memory/frame.rs @@ -8,6 +8,7 @@ use spin::Mutex; use super::addr::{align_down, align_up, is_aligned, PhysAddr}; use crate::consts::PAGE_SIZE; use crate::error::HvResult; +use crate::memory::addr::virt_to_phys; // Support max 1M * 4096 = 1GB memory. type FrameAlloc = bitmap_allocator::BitAlloc1M; @@ -229,7 +230,9 @@ pub fn init() { let mem_pool_start = crate::consts::mem_pool_start(); let mem_pool_end = align_down(crate::consts::hv_end()); let mem_pool_size = mem_pool_end - mem_pool_start; - FRAME_ALLOCATOR.lock().init(mem_pool_start, mem_pool_size); + FRAME_ALLOCATOR + .lock() + .init(virt_to_phys(mem_pool_start), mem_pool_size); info!( "Frame allocator initialization finished: {:#x?}", From f64bad857007611cf872a6081cb7ab29ba1479c9 Mon Sep 17 00:00:00 2001 From: Solicey Date: Mon, 20 Jan 2025 11:00:17 +0800 Subject: [PATCH 04/29] enable EPT --- src/arch/x86_64/cpu.rs | 86 +++++---- src/arch/x86_64/entry.rs | 3 +- src/arch/x86_64/mm.rs | 26 +++ src/arch/x86_64/paging.rs | 354 ++++++++++++++++++++++++++++++++++-- src/arch/x86_64/s1pt.rs | 4 +- src/arch/x86_64/s2pt.rs | 225 +++++++++++++++++++++-- src/arch/x86_64/trap.rs | 60 +++++- src/arch/x86_64/vmx.rs | 108 ++++++++--- src/arch/x86_64/zone.rs | 31 ++++ src/main.rs | 5 +- src/platform/qemu_x86_64.rs | 58 +++++- 11 files changed, 846 insertions(+), 114 deletions(-) diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index 638bb603..b94a75cc 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -4,17 +4,20 @@ use crate::arch::vmx::*; use crate::consts::{core_end, PER_CPU_SIZE}; use crate::error::{HvError, HvResult}; use crate::memory::{addr::phys_to_virt, Frame, PhysAddr, PAGE_SIZE}; +use crate::memory::{GuestPhysAddr, HostPhysAddr}; use crate::percpu::this_cpu_data; +use crate::platform::qemu_x86_64::*; use alloc::boxed::Box; use core::arch::{asm, global_asm}; +use core::fmt::{Debug, Formatter, Result}; use core::mem::size_of; +use core::panicking::panic; use core::time::Duration; use raw_cpuid::CpuId; use x86_64::structures::tss::TaskStateSegment; const AP_START_PAGE_IDX: u8 = 6; const AP_START_PAGE_PADDR: PhysAddr = AP_START_PAGE_IDX as usize * PAGE_SIZE; -const VM_EXIT_INSTR_LEN_VMCALL: u8 = 3; global_asm!( include_str!("ap_start.S"), @@ -137,7 +140,6 @@ pub struct GeneralRegisters { } #[repr(C)] -#[derive(Debug)] pub struct ArchCpu { // guest_regs and host_stack_top should always be at the first. guest_regs: GeneralRegisters, @@ -166,54 +168,61 @@ impl ArchCpu { } } - pub unsafe fn init(&mut self, entry: usize, dtb: usize) { - self.activate_vmx(); - self.setup_vmcs(entry); + pub fn init(&mut self, entry: GuestPhysAddr, dtb: usize) -> HvResult { + self.activate_vmx()?; + self.setup_vmcs(entry)?; + Ok(()) } - unsafe fn activate_vmx(&mut self) { + fn activate_vmx(&mut self) -> HvResult { assert!(check_vmx_support()); assert!(!is_vmx_enabled()); // enable VMXON - enable_vmxon().unwrap(); + unsafe { enable_vmxon()? }; // TODO: check related registers // get VMCS revision identifier in IA32_VMX_BASIC MSR self.vmcs_revision_id = get_vmcs_revision_id(); - self.vmxon_region = VmxRegion::new(self.vmcs_revision_id, false).unwrap(); + self.vmxon_region = VmxRegion::new(self.vmcs_revision_id, false)?; - execute_vmxon(self.vmxon_region.start_paddr() as u64).unwrap(); + unsafe { execute_vmxon(self.vmxon_region.start_paddr() as u64)? }; info!( "VMX enabled, region: 0x{:x}", self.vmxon_region.start_paddr(), ); + Ok(()) } - unsafe fn setup_vmcs(&mut self, entry: usize) { - self.vmcs_region = VmxRegion::new(self.vmcs_revision_id, false).unwrap(); + fn setup_vmcs(&mut self, entry: GuestPhysAddr) -> HvResult { + self.vmcs_region = VmxRegion::new(self.vmcs_revision_id, false)?; - enable_vmcs(self.vmcs_region.start_paddr() as u64).unwrap(); - setup_vmcs_host(Self::vmx_exit as usize).unwrap(); - setup_vmcs_guest(entry).unwrap(); - setup_vmcs_control().unwrap(); + unsafe { enable_vmcs(self.vmcs_region.start_paddr() as u64)? }; + setup_vmcs_host(Self::vmx_exit as usize)?; + setup_vmcs_guest(entry)?; + setup_vmcs_control()?; info!( "VMCS enabled, region: 0x{:x}", self.vmcs_region.start_paddr(), ); + + Ok(()) } pub fn run(&mut self) -> ! { assert!(this_cpu_id() == self.cpuid); // TODO: this_cpu_data().cpu_on_entry - unsafe { - self.init(test_guest as usize, this_cpu_data().dtb_ipa); - set_host_rsp(&self.host_stack_top as *const _ as usize).unwrap(); - self.vmx_launch(); - } + self.init(GUEST_ENTRY, this_cpu_data().dtb_ipa).unwrap(); + + this_cpu_data().activate_gpm(); + set_host_rsp(&self.host_stack_top as *const _ as usize).unwrap(); + set_guest_page_table(GUEST_PT1).unwrap(); + set_guest_stack_pointer(GUEST_STACK_TOP).unwrap(); + + unsafe { self.vmx_launch() }; loop {} } @@ -260,10 +269,12 @@ impl ArchCpu { panic!("VMX instruction error: {}", instruction_error()); } - unsafe fn vmexit_handler(&mut self) { - let exit_info = exit_info().unwrap(); - debug!("vmexit rax:{} {:#x?}", self.guest_regs.rax, exit_info); - advance_guest_rip(VM_EXIT_INSTR_LEN_VMCALL).unwrap(); + fn vmexit_handler(&mut self) { + crate::arch::trap::handle_vmexit(self).unwrap(); + } + + pub fn regs(&self) -> &GeneralRegisters { + &self.guest_regs } } @@ -274,19 +285,16 @@ pub fn this_cpu_id() -> usize { } } -#[naked] -unsafe extern "C" fn test_guest() -> ! { - core::arch::asm!( - " - mov rax, 0 - mov rdi, 2 - mov rsi, 3 - mov rdx, 3 - mov rcx, 3 - 2: - vmcall - add rax, 1 - jmp 2b", - options(noreturn), - ); +impl Debug for ArchCpu { + fn fmt(&self, f: &mut Formatter) -> Result { + (|| -> HvResult { + Ok(f.debug_struct("ArchCpu") + .field("guest_regs", &self.guest_regs) + .field("rip", &guest_rip()) + .field("rsp", &guest_rsp()) + .field("cr3", &guest_cr3()) + .finish()) + })() + .unwrap() + } } diff --git a/src/arch/x86_64/entry.rs b/src/arch/x86_64/entry.rs index 1d43a4e8..50226438 100644 --- a/src/arch/x86_64/entry.rs +++ b/src/arch/x86_64/entry.rs @@ -1,5 +1,4 @@ -use super::cpu::this_cpu_id; -use crate::arch::cpu; +use crate::arch::cpu::this_cpu_id; use crate::consts::PER_CPU_SIZE; use crate::memory::addr::PHYS_VIRT_OFFSET; use crate::rust_main; diff --git a/src/arch/x86_64/mm.rs b/src/arch/x86_64/mm.rs index 8e27f3eb..41aa9a68 100644 --- a/src/arch/x86_64/mm.rs +++ b/src/arch/x86_64/mm.rs @@ -7,8 +7,34 @@ use crate::{ addr::{align_down, align_up}, GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion, MemorySet, HV_PT, }, + platform::qemu_x86_64::*, }; pub fn new_s2_memory_set() -> MemorySet { MemorySet::new(4) } + +pub fn init_hv_page_table() -> HvResult { + use x86_64::structures::paging::{PageTable, PageTableFlags as PTF}; + let pt1 = unsafe { &mut *(gpa_as_mut_ptr(GUEST_PT1) as *mut PageTable) }; + let pt2 = unsafe { &mut *(gpa_as_mut_ptr(GUEST_PT2) as *mut PageTable) }; + // identity mapping + pt1[0].set_addr( + x86_64::PhysAddr::new(GUEST_PT2 as _), + PTF::PRESENT | PTF::WRITABLE, + ); + pt2[0].set_addr( + x86_64::PhysAddr::new(0), + PTF::PRESENT | PTF::WRITABLE | PTF::HUGE_PAGE, + ); + + unsafe { + core::ptr::copy_nonoverlapping( + crate::platform::qemu_x86_64::test_guest_2 as usize as *const u8, + gpa_as_mut_ptr(GUEST_ENTRY), + 0x1000, + ); + } + + Ok(()) +} diff --git a/src/arch/x86_64/paging.rs b/src/arch/x86_64/paging.rs index bdf89c37..ca737f14 100644 --- a/src/arch/x86_64/paging.rs +++ b/src/arch/x86_64/paging.rs @@ -1,7 +1,10 @@ #![allow(unused)] +use alloc::vec::Vec; + +use crate::consts::PAGE_SIZE; use crate::error::{HvError, HvResult}; -use crate::memory::{Frame, MemFlags, MemoryRegion, PhysAddr, VirtAddr}; -use core::{fmt::Debug, marker::PhantomData}; +use crate::memory::{addr::is_aligned, Frame, MemFlags, MemoryRegion, PhysAddr, VirtAddr}; +use core::{fmt::Debug, marker::PhantomData, slice}; #[derive(Debug)] pub enum PagingError { @@ -13,6 +16,15 @@ pub enum PagingError { pub type PagingResult = Result; +impl From for HvError { + fn from(err: PagingError) -> Self { + match err { + PagingError::NoMemory => hv_err!(ENOMEM), + _ => hv_err!(EFAULT, format!("{:?}", err)), + } + } +} + #[repr(usize)] #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum PageSize { @@ -21,21 +33,34 @@ pub enum PageSize { Size1G = 0x4000_0000, } +#[derive(Debug, Copy, Clone)] +pub struct Page { + vaddr: VA, + size: PageSize, +} + impl PageSize { pub const fn is_aligned(self, addr: usize) -> bool { - false + self.page_offset(addr) == 0 } pub const fn align_down(self, addr: usize) -> usize { - 0 + addr & !(self as usize - 1) } pub const fn page_offset(self, addr: usize) -> usize { - 0 + addr & (self as usize - 1) } pub const fn is_huge(self) -> bool { - false + matches!(self, Self::Size1G | Self::Size2M) + } +} + +impl + Copy> Page { + pub fn new_aligned(vaddr: VA, size: PageSize) -> Self { + debug_assert!(size.is_aligned(vaddr.into())); + Self { vaddr, size } } } @@ -93,26 +118,59 @@ pub trait GenericPageTable: GenericPageTableImmut { fn flush(&self, vaddr: Option); } -pub struct HvPageTable { +// TODO: lock +pub struct Level4PageTable { + root: Frame, + // Intermediate level table frames. + intrm_tables: Vec, _phantom: PhantomData<(VA, PTE, I)>, } -impl HvPageTable +impl Level4PageTable where VA: From + Into + Copy, PTE: GenericPTE, I: PagingInstr, { #[allow(dead_code)] - pub fn dump(&self, limit: usize) {} + pub fn dump(&self, limit: usize) { + // TODO: lock + // static LOCK: Mutex<()> = Mutex::new(()); + // let _lock = LOCK.lock(); + + println!("Root: {:x?}", self.root_paddr()); + self.walk( + table_of(self.root_paddr()), + 0, + 0, + limit, + &|level: usize, idx: usize, vaddr: usize, entry: &PTE| { + for _ in 0..level * 2 { + print!(" "); + } + println!( + "[ADDR:{:#x?} level:{} - idx:{:03}], vaddr:{:08x?}: {:x?}", + entry as *const _ as VirtAddr, level, idx, vaddr, entry + ); + }, + ); + } /// Clone only the top level page table mapping from `src`. pub fn clone_from(src: &impl GenericPageTableImmut) -> Self { - Self::new() + // XXX: The clonee won't track intermediate tables, must ensure it lives shorter than the + // original page table. + let pt = Self::new(); + let dst_p4_table = + unsafe { slice::from_raw_parts_mut(pt.root_paddr() as *mut PTE, ENTRY_COUNT) }; + let src_p4_table = + unsafe { slice::from_raw_parts(src.root_paddr() as *const PTE, ENTRY_COUNT) }; + dst_p4_table.clone_from_slice(src_p4_table); + pt } } -impl GenericPageTableImmut for HvPageTable +impl GenericPageTableImmut for Level4PageTable where VA: From + Into + Copy, PTE: GenericPTE, @@ -121,45 +179,301 @@ where type VA = VA; unsafe fn from_root(root_paddr: PhysAddr) -> Self { - Self::new() + Self { + root: Frame::from_paddr(root_paddr), + intrm_tables: Vec::new(), + _phantom: PhantomData, + } } fn root_paddr(&self) -> PhysAddr { - 0 + self.root.start_paddr() } fn query(&self, vaddr: VA) -> PagingResult<(PhysAddr, MemFlags, PageSize)> { - Ok((0, MemFlags::READ, PageSize::Size4K)) + // TODO: lock + let (entry, size) = self.get_entry_mut(vaddr)?; + if entry.is_unused() { + return Err(PagingError::NotMapped); + } + let off = size.page_offset(vaddr.into()); + Ok((entry.addr() + off, entry.flags(), size)) } } -impl GenericPageTable for HvPageTable +impl GenericPageTable for Level4PageTable where VA: From + Into + Copy, PTE: GenericPTE, I: PagingInstr, { fn new() -> Self { - Self::new() + let frame = Frame::new_zero().expect("failed to allocate root frame for host page table"); + Self { + root: frame, + intrm_tables: Vec::new(), + _phantom: PhantomData, + } } fn map(&mut self, region: &MemoryRegion) -> HvResult { + assert!( + is_aligned(region.start.into()), + "region.start = {:#x?}", + region.start.into() + ); + assert!(is_aligned(region.size), "region.size = {:#x?}", region.size); + trace!( + "create mapping in {}: {:#x?}", + core::any::type_name::(), + region + ); + // TODO: lock + let mut vaddr = region.start.into(); + let mut size = region.size; + while size > 0 { + let paddr = region.mapper.map_fn(vaddr); + let page_size = if PageSize::Size1G.is_aligned(vaddr) + && PageSize::Size1G.is_aligned(paddr) + && size >= PageSize::Size1G as usize + && !region.flags.contains(MemFlags::NO_HUGEPAGES) + { + PageSize::Size1G + } else if PageSize::Size2M.is_aligned(vaddr) + && PageSize::Size2M.is_aligned(paddr) + && size >= PageSize::Size2M as usize + && !region.flags.contains(MemFlags::NO_HUGEPAGES) + { + PageSize::Size2M + } else { + PageSize::Size4K + }; + let page = Page::new_aligned(vaddr.into(), page_size); + self.map_page(page, paddr, region.flags) + .map_err(|e: PagingError| { + error!( + "failed to map page: {:#x?}({:?}) -> {:#x?}, {:?}", + vaddr, page_size, paddr, e + ); + e + })?; + vaddr += page_size as usize; + size -= page_size as usize; + } Ok(()) } fn unmap(&mut self, region: &MemoryRegion) -> HvResult { + trace!( + "destroy mapping in {}: {:#x?}", + core::any::type_name::(), + region + ); + // TODO: lock + let mut vaddr = region.start.into(); + let mut size = region.size; + while size > 0 { + let (_, page_size) = self.unmap_page(vaddr.into()).map_err(|e| { + error!("failed to unmap page: {:#x?}, {:?}", vaddr, e); + e + })?; + if !page_size.is_aligned(vaddr) { + error!("error vaddr={:#x?}", vaddr); + loop {} + } + assert!(page_size.is_aligned(vaddr)); + assert!(page_size as usize <= size); + vaddr += page_size as usize; + size -= page_size as usize; + } Ok(()) } fn update(&mut self, vaddr: VA, paddr: PhysAddr, flags: MemFlags) -> PagingResult { - Ok(PageSize::Size4K) + let (entry, size) = self.get_entry_mut(vaddr)?; + entry.set_addr(paddr); + entry.set_flags(flags, size.is_huge()); + Ok(size) } fn clone(&self) -> Self { - Self::clone_from(self) + let mut pt = Self::clone_from(self); + // clone with lock to avoid data racing between it and its clonees. + // TODO: pt.clonee_lock = self.clonee_lock.clone(); + pt + } + + unsafe fn activate(&self) { + I::activate(self.root_paddr()) + } + + fn flush(&self, vaddr: Option) { + I::flush(vaddr.map(Into::into)) } +} + +// TODO: temp +impl Level4PageTable +where + VA: From + Into + Copy, + PTE: GenericPTE, + I: PagingInstr, +{ + fn get_entry_mut(&self, vaddr: VA) -> PagingResult<(&mut PTE, PageSize)> { + let vaddr = vaddr.into(); + + let p4 = table_of_mut::(self.root_paddr()); + let p4e = &mut p4[p4_index(vaddr)]; + + let p3 = next_table_mut(p4e)?; + let p3e = &mut p3[p3_index(vaddr)]; + if p3e.is_huge() { + return Ok((p3e, PageSize::Size1G)); + } + + let p2 = next_table_mut(p3e)?; + let p2e = &mut p2[p2_index(vaddr)]; + if p2e.is_huge() { + return Ok((p2e, PageSize::Size2M)); + } + + let p1 = next_table_mut(p2e)?; + let p1e = &mut p1[p1_index(vaddr)]; + Ok((p1e, PageSize::Size4K)) + } + + fn alloc_intrm_table(&mut self) -> HvResult { + let frame = Frame::new_zero()?; + let paddr = frame.start_paddr(); + self.intrm_tables.push(frame); + Ok(paddr) + } + + fn get_entry_mut_or_create(&mut self, page: Page) -> PagingResult<&mut PTE> { + let vaddr: usize = page.vaddr.into(); + + let p4 = table_of_mut::(self.root_paddr()); + let p4e = &mut p4[p4_index(vaddr)]; - unsafe fn activate(&self) {} + let p3 = next_table_mut_or_create(p4e, || self.alloc_intrm_table())?; + let p3e = &mut p3[p3_index(vaddr)]; + if page.size == PageSize::Size1G { + return Ok(p3e); + } - fn flush(&self, vaddr: Option) {} + let p2 = next_table_mut_or_create(p3e, || self.alloc_intrm_table())?; + let p2e = &mut p2[p2_index(vaddr)]; + if page.size == PageSize::Size2M { + return Ok(p2e); + } + + let p1 = next_table_mut_or_create(p2e, || self.alloc_intrm_table())?; + let p1e = &mut p1[p1_index(vaddr)]; + Ok(p1e) + } + + fn map_page( + &mut self, + page: Page, + paddr: PhysAddr, + flags: MemFlags, + ) -> PagingResult<&mut PTE> { + let entry = self.get_entry_mut_or_create(page)?; + if !entry.is_unused() { + return Err(PagingError::AlreadyMapped); + } + entry.set_addr(page.size.align_down(paddr)); + entry.set_flags(flags, page.size.is_huge()); + Ok(entry) + } + + fn unmap_page(&mut self, vaddr: VA) -> PagingResult<(PhysAddr, PageSize)> { + let (entry, size) = self.get_entry_mut(vaddr)?; + if entry.is_unused() { + return Err(PagingError::NotMapped); + } + let paddr = entry.addr(); + entry.clear(); + Ok((paddr, size)) + } + + fn walk( + &self, + table: &[PTE], + level: usize, + start_vaddr: usize, + limit: usize, + func: &impl Fn(usize, usize, usize, &PTE), + ) { + let mut n = 0; + for (i, entry) in table.iter().enumerate() { + let vaddr = start_vaddr + (i << (12 + (3 - level) * 9)); + if entry.is_present() { + func(level, i, vaddr, entry); + if level < 3 { + match next_table_mut(entry) { + Ok(entry) => self.walk(entry, level + 1, vaddr, limit, func), + Err(PagingError::MappedToHugePage) => {} + _ => unreachable!(), + } + } + n += 1; + if n >= limit { + break; + } + } + } + } +} + +const LEVELS: usize = 4; +const ENTRY_COUNT: usize = 512; + +const fn p4_index(vaddr: usize) -> usize { + (vaddr >> (12 + 27)) & (ENTRY_COUNT - 1) +} + +const fn p3_index(vaddr: usize) -> usize { + (vaddr >> (12 + 18)) & (ENTRY_COUNT - 1) +} + +const fn p2_index(vaddr: usize) -> usize { + (vaddr >> (12 + 9)) & (ENTRY_COUNT - 1) +} + +const fn p1_index(vaddr: usize) -> usize { + (vaddr >> 12) & (ENTRY_COUNT - 1) +} + +fn table_of<'a, E>(paddr: PhysAddr) -> &'a [E] { + let ptr = paddr as *const E; + unsafe { slice::from_raw_parts(ptr, ENTRY_COUNT) } +} + +fn table_of_mut<'a, E>(paddr: PhysAddr) -> &'a mut [E] { + let ptr = paddr as *mut E; + unsafe { slice::from_raw_parts_mut(ptr, ENTRY_COUNT) } +} + +fn next_table_mut<'a, E: GenericPTE>(entry: &E) -> PagingResult<&'a mut [E]> { + if !entry.is_present() { + Err(PagingError::NotMapped) + } else if entry.is_huge() { + Err(PagingError::MappedToHugePage) + } else { + Ok(table_of_mut(entry.addr())) + } +} + +fn next_table_mut_or_create<'a, E: GenericPTE>( + entry: &mut E, + mut allocator: impl FnMut() -> HvResult, +) -> PagingResult<&'a mut [E]> { + if entry.is_unused() { + let paddr = allocator().map_err(|_| PagingError::NoMemory)?; + entry.set_table(paddr); + Ok(table_of_mut(paddr)) + } else { + next_table_mut(entry) + } } diff --git a/src/arch/x86_64/s1pt.rs b/src/arch/x86_64/s1pt.rs index 48216489..5498ae33 100644 --- a/src/arch/x86_64/s1pt.rs +++ b/src/arch/x86_64/s1pt.rs @@ -1,4 +1,4 @@ -use super::paging::{GenericPTE, HvPageTable, PagingInstr}; +use super::paging::{GenericPTE, Level4PageTable, PagingInstr}; use crate::{ consts::PAGE_SIZE, memory::{ @@ -57,4 +57,4 @@ impl PagingInstr for S1PTInstr { fn flush(_vaddr: Option) {} } -pub type Stage1PageTable = HvPageTable; +pub type Stage1PageTable = Level4PageTable; diff --git a/src/arch/x86_64/s2pt.rs b/src/arch/x86_64/s2pt.rs index f4e677ab..a18f85ab 100644 --- a/src/arch/x86_64/s2pt.rs +++ b/src/arch/x86_64/s2pt.rs @@ -1,53 +1,252 @@ -use super::paging::{GenericPTE, HvPageTable, PagingInstr}; +use super::paging::{GenericPTE, Level4PageTable, PagingInstr}; +use crate::consts::PAGE_SIZE; use crate::memory::addr::{GuestPhysAddr, HostPhysAddr, PhysAddr}; use crate::memory::MemFlags; +use bit_field::BitField; +use bitflags::bitflags; +use core::arch::asm; use core::fmt; +bitflags! { + /// EPT entry flags. (SDM Vol. 3C, Section 28.3.2) + struct DescriptorAttr: u64 { + /// Read access. + const READ = 1 << 0; + /// Write access. + const WRITE = 1 << 1; + /// Execute access. + const EXECUTE = 1 << 2; + /// EPT memory type. Only for terminate pages. + const MEM_TYPE_MASK = 0b111 << 3; + /// Ignore PAT memory type. Only for terminate pages. + const IGNORE_PAT = 1 << 6; + /// Specifies that the entry maps a huge frame instead of a page table. + /// Only allowed in P2 or P3 tables. + const HUGE_PAGE = 1 << 7; + /// If bit 6 of EPTP is 1, accessed flag for EPT. + const ACCESSED = 1 << 8; + /// If bit 6 of EPTP is 1, dirty flag for EPT. + const DIRTY = 1 << 9; + /// Execute access for user-mode linear addresses. + const EXECUTE_FOR_USER = 1 << 10; + } +} + +/// INVEPT type. (SDM Vol. 3C, Section 30.3) +#[repr(u64)] +#[derive(Debug)] +#[allow(dead_code)] +pub enum InvS2PTType { + /// The logical processor invalidates all mappings associated with bits + /// 51:12 of the EPT pointer (EPTP) specified in the INVEPT descriptor. + /// It may invalidate other mappings as well. + SingleContext = 1, + /// The logical processor invalidates mappings associated with all EPTPs. + Global = 2, +} + +bitflags! { + /// Extended-Page-Table Pointer. (SDM Vol. 3C, Section 24.6.11) + pub struct S2PTPointer: u64 { + /// EPT paging-structure memory type: Uncacheable (UC). + #[allow(clippy::identity_op)] + const MEM_TYPE_UC = 0 << 0; + /// EPT paging-structure memory type: Write-back (WB). + #[allow(clippy::identity_op)] + const MEM_TYPE_WB = 6 << 0; + /// EPT page-walk length 1. + const WALK_LENGTH_1 = 0 << 3; + /// EPT page-walk length 2. + const WALK_LENGTH_2 = 1 << 3; + /// EPT page-walk length 3. + const WALK_LENGTH_3 = 2 << 3; + /// EPT page-walk length 4. + const WALK_LENGTH_4 = 3 << 3; + /// Setting this control to 1 enables accessed and dirty flags for EPT. + const ENABLE_ACCESSED_DIRTY = 1 << 6; + } +} + +impl S2PTPointer { + pub fn from_table_phys(root_paddr: HostPhysAddr) -> Self { + let aligned_addr = root_paddr & !(PAGE_SIZE - 1); + let flags = unsafe { Self::from_bits_retain(aligned_addr as u64) }; + flags | Self::MEM_TYPE_WB | Self::WALK_LENGTH_4 | Self::ENABLE_ACCESSED_DIRTY + } +} + +numeric_enum_macro::numeric_enum! { + #[repr(u8)] + #[derive(Debug, PartialEq, Clone, Copy)] + /// EPT memory typing. (SDM Vol. 3C, Section 28.3.7) + enum MemType { + Uncacheable = 0, + WriteCombining = 1, + WriteThrough = 4, + WriteProtected = 5, + WriteBack = 6, + } +} + +impl DescriptorAttr { + fn set_mem_type(&mut self, mem_type: MemType) { + let mut bits = self.bits(); + bits.set_bits(3..6, mem_type as u64); + *self = Self::from_bits_truncate(bits) + } + + fn mem_type(&self) -> Result { + MemType::try_from(self.bits().get_bits(3..6) as u8) + } +} + +impl From for MemFlags { + fn from(attr: DescriptorAttr) -> Self { + let mut flags = Self::empty(); + if attr.contains(DescriptorAttr::READ) { + flags |= Self::READ; + } + if attr.contains(DescriptorAttr::WRITE) { + flags |= Self::WRITE; + } + if attr.contains(DescriptorAttr::EXECUTE) { + flags |= Self::EXECUTE; + } + if let Ok(MemType::Uncacheable) = attr.mem_type() { + flags |= Self::IO; + } + flags + } +} + +impl From for DescriptorAttr { + fn from(flags: MemFlags) -> Self { + if flags.is_empty() { + return Self::empty(); + } + let mut attr = Self::empty(); + if flags.contains(MemFlags::READ) { + attr |= Self::READ; + } + if flags.contains(MemFlags::WRITE) { + attr |= Self::WRITE; + } + if flags.contains(MemFlags::EXECUTE) { + attr |= Self::EXECUTE; + } + if !flags.contains(MemFlags::IO) { + attr.set_mem_type(MemType::WriteBack); + } + attr + } +} + #[derive(Clone, Copy)] #[repr(transparent)] pub struct PageTableEntry(u64); +impl PageTableEntry { + const PHYS_ADDR_MASK: usize = 0x000f_ffff_ffff_f000; // 12..52 + + fn memory_type(&self) -> MemType { + DescriptorAttr::from_bits_truncate(self.0) + .mem_type() + .unwrap() + } +} + impl GenericPTE for PageTableEntry { fn addr(&self) -> HostPhysAddr { - 0 + self.0 as usize & Self::PHYS_ADDR_MASK } fn flags(&self) -> MemFlags { - MemFlags::READ + DescriptorAttr::from_bits_truncate(self.0).into() } fn is_unused(&self) -> bool { - false + self.0 == 0 } fn is_present(&self) -> bool { - false + self.0 & 0x7 != 0 // RWX != 0 } fn is_huge(&self) -> bool { - false + DescriptorAttr::from_bits_truncate(self.0).contains(DescriptorAttr::HUGE_PAGE) } - fn set_addr(&mut self, paddr: HostPhysAddr) {} + fn set_addr(&mut self, paddr: HostPhysAddr) { + self.0 = + (self.0 & !Self::PHYS_ADDR_MASK as u64) | (paddr as u64 & Self::PHYS_ADDR_MASK as u64); + } - fn set_flags(&mut self, flags: MemFlags, is_huge: bool) {} + fn set_flags(&mut self, flags: MemFlags, is_huge: bool) { + let mut attr = DescriptorAttr::from(flags); + if is_huge { + attr |= DescriptorAttr::HUGE_PAGE; + } + self.0 = (attr.bits() & !Self::PHYS_ADDR_MASK as u64) + | (self.0 as u64 & Self::PHYS_ADDR_MASK as u64); + } - fn set_table(&mut self, paddr: HostPhysAddr) {} + fn set_table(&mut self, paddr: HostPhysAddr) { + let attr = DescriptorAttr::READ | DescriptorAttr::WRITE | DescriptorAttr::EXECUTE; + self.0 = (attr.bits() & !Self::PHYS_ADDR_MASK as u64) + | (paddr as u64 & Self::PHYS_ADDR_MASK as u64); + } - fn clear(&mut self) {} + fn clear(&mut self) { + self.0 = 0 + } } impl fmt::Debug for PageTableEntry { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - Ok(()) + f.debug_struct("Stage2PageTableEntry") + .field("raw", &self.0) + .field("paddr", &self.addr()) + .field("flags", &self.flags()) + .field("memory_type", &self.memory_type()) + .finish() } } +/// Invalidate Translations Derived from EPT. (SDM Vol. 3C, Section 30.3) +/// +/// Invalidates mappings in the translation lookaside buffers (TLBs) and +/// paging-structure caches that were derived from extended page tables (EPT). +/// (See Chapter 28, “VMX Support for Address Translation”.) Invalidation is +/// based on the INVEPT type specified in the register operand and the INVEPT +/// descriptor specified in the memory operand. +unsafe fn invs2pt(inv_type: InvS2PTType, s2ptp: u64) { + let invs2pt_desc = [s2ptp, 0]; + asm!("invept {0}, [{1}]", in(reg) inv_type as u64, in(reg) &invs2pt_desc); +} + pub struct S2PTInstr; impl PagingInstr for S2PTInstr { - unsafe fn activate(root_paddr: HostPhysAddr) {} + unsafe fn activate(root_paddr: HostPhysAddr) { + let s2ptp = S2PTPointer::from_table_phys(root_paddr).bits(); + info!( + "s2pt activate, root paddr: 0x{:x}, s2ptp: 0x{:x}", + root_paddr, s2ptp + ); + crate::arch::vmx::set_s2ptp(s2ptp).unwrap(); + unsafe { invs2pt(InvS2PTType::SingleContext, s2ptp) }; + } + fn flush(_vaddr: Option) {} } -pub type Stage2PageTable = HvPageTable; +/// Information about nested page faults. +#[derive(Debug)] +pub struct Stage2PageFaultInfo { + /// Access type that caused the nested page fault. + pub access_flags: MemFlags, + /// Guest physical address that caused the nested page fault. + pub fault_guest_paddr: GuestPhysAddr, +} + +pub type Stage2PageTable = Level4PageTable; diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index 9a4a7e0d..cac4175d 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -1,8 +1,11 @@ -use alloc::vec; - -use super::{cpu::TrapFrame, lapic::vectors::APIC_TIMER_VECTOR}; +use super::{ + cpu::{ArchCpu, TrapFrame}, + lapic::vectors::APIC_TIMER_VECTOR, +}; use crate::arch::{idt::IdtStruct, lapic::local_apic}; -use core::arch::global_asm; +use crate::{arch::vmx::*, error::HvResult}; +use alloc::vec; +use core::arch::{self, global_asm}; global_asm!( include_str!("trap.S"), @@ -11,6 +14,7 @@ global_asm!( const IRQ_VECTOR_START: u8 = 0x20; const IRQ_VECTOR_END: u8 = 0xff; +const VM_EXIT_INSTR_LEN_VMCALL: u8 = 3; #[allow(dead_code)] #[allow(non_snake_case)] @@ -70,3 +74,51 @@ fn handle_irq(vector: u8) { } } } + +fn handle_hypercall(arch_cpu: &mut ArchCpu) -> HvResult { + let regs = arch_cpu.regs(); + debug!( + "VM exit: VMCALL({:#x}): {:?}", + regs.rax, + [regs.rdi, regs.rsi, regs.rdx, regs.rcx] + ); + advance_guest_rip(VM_EXIT_INSTR_LEN_VMCALL)?; + Ok(()) +} + +fn handle_ept_violation(guest_rip: usize, arch_cpu: &mut ArchCpu) -> HvResult { + let fault_info = ept_violation_info()?; + panic!( + "VM exit: EPT violation @ {:#x}, fault_paddr={:#x}, access_flags=({:?}), arch_cpu: {:#x?}", + guest_rip, fault_info.fault_guest_paddr, fault_info.access_flags, arch_cpu + ); +} + +pub fn handle_vmexit(arch_cpu: &mut ArchCpu) -> HvResult { + let exit_info = exit_info()?; + debug!("VM exit: {:#x?}", exit_info); + + if exit_info.entry_failure { + panic!("VM entry failed: {:#x?}", exit_info); + } + + let res = match exit_info.exit_reason { + VmxExitReason::VMCALL => handle_hypercall(arch_cpu), + VmxExitReason::EPT_VIOLATION => handle_ept_violation(exit_info.guest_rip, arch_cpu), + _ => panic!( + "Unhandled VM-Exit reason {:?}:\n{:#x?}", + exit_info.exit_reason, + arch_cpu.regs() + ), + }; + + if res.is_err() { + panic!( + "Failed to handle VM-exit {:?}:\n{:#x?}", + exit_info.exit_reason, + arch_cpu.regs() + ); + } + + Ok(()) +} diff --git a/src/arch/x86_64/vmx.rs b/src/arch/x86_64/vmx.rs index defb340b..ab3e35c6 100644 --- a/src/arch/x86_64/vmx.rs +++ b/src/arch/x86_64/vmx.rs @@ -1,8 +1,9 @@ #![allow(non_camel_case_types)] #![allow(clippy::upper_case_acronyms)] - +use crate::arch::s2pt::Stage2PageFaultInfo; +use crate::consts::PAGE_SIZE; use crate::error::{HvError, HvResult}; -use crate::memory::{Frame, PhysAddr}; +use crate::memory::{Frame, GuestPhysAddr, HostPhysAddr, MemFlags, PhysAddr}; use bit_field::BitField; use bitflags::{bitflags, Flags}; use raw_cpuid::CpuId; @@ -189,9 +190,9 @@ pub unsafe fn enable_vmxon() -> HvResult { Ok(()) } -pub unsafe fn get_vmcs_revision_id() -> u32 { +pub fn get_vmcs_revision_id() -> u32 { let vmx_basic_reg = Msr::new(IA32_VMX_BASIC); - let vmx_basic_flag = vmx_basic_reg.read(); + let vmx_basic_flag = unsafe { vmx_basic_reg.read() }; vmx_basic_flag.get_bits(0..=30) as u32 } @@ -214,9 +215,9 @@ pub unsafe fn enable_vmcs(start_paddr: u64) -> HvResult { // natural-width type unw = u64; -pub unsafe fn setup_vmcs_host(vmx_exit: usize) -> HvResult { - vmwrite::(host::IA32_PAT_FULL, Msr::new(IA32_PAT).read())?; - vmwrite::(host::IA32_EFER_FULL, Msr::new(IA32_EFER).read())?; +pub fn setup_vmcs_host(vmx_exit: usize) -> HvResult { + vmwrite::(host::IA32_PAT_FULL, unsafe { Msr::new(IA32_PAT).read() })?; + vmwrite::(host::IA32_EFER_FULL, unsafe { Msr::new(IA32_EFER).read() })?; vmwrite::(host::CR0, Cr0::read_raw())?; vmwrite::(host::CR3, Cr3::read_raw().0.start_address().as_u64())?; @@ -229,8 +230,8 @@ pub unsafe fn setup_vmcs_host(vmx_exit: usize) -> HvResult { vmwrite::(host::FS_SELECTOR, x86::segmentation::fs().bits())?; vmwrite::(host::GS_SELECTOR, x86::segmentation::gs().bits())?; - vmwrite::(host::FS_BASE, Msr::new(IA32_FS_BASE).read())?; - vmwrite::(host::GS_BASE, Msr::new(IA32_GS_BASE).read())?; + vmwrite::(host::FS_BASE, unsafe { Msr::new(IA32_FS_BASE).read() })?; + vmwrite::(host::GS_BASE, unsafe { Msr::new(IA32_GS_BASE).read() })?; let tr = unsafe { x86::task::tr() }; let mut gdtp = DescriptorTablePointer::::default(); @@ -254,7 +255,7 @@ pub unsafe fn setup_vmcs_host(vmx_exit: usize) -> HvResult { Ok(()) } -pub unsafe fn setup_vmcs_guest(entry: usize) -> HvResult { +pub fn setup_vmcs_guest(entry: usize) -> HvResult { // Enable protected mode and paging. let cr0_guest = Cr0Flags::PROTECTED_MODE_ENABLE | Cr0Flags::EXTENSION_TYPE @@ -301,7 +302,7 @@ pub unsafe fn setup_vmcs_guest(entry: usize) -> HvResult { vmwrite::(guest::IDTR_BASE, 0)?; vmwrite::(guest::IDTR_LIMIT, 0xffff)?; - vmwrite::(guest::CR3, Cr3::read_raw().0.start_address().as_u64())?; + vmwrite::(guest::CR3, 0)?; vmwrite::(guest::DR7, 0x400)?; vmwrite::(guest::RSP, 0)?; vmwrite::(guest::RIP, entry as unw)?; @@ -317,18 +318,18 @@ pub unsafe fn setup_vmcs_guest(entry: usize) -> HvResult { vmwrite::(guest::LINK_PTR_FULL, u64::MAX)?; vmwrite::(guest::IA32_DEBUGCTL_FULL, 0)?; - vmwrite::(guest::IA32_PAT_FULL, Msr::new(IA32_PAT).read())?; - vmwrite::(guest::IA32_EFER_FULL, Msr::new(IA32_EFER).read())?; + vmwrite::(guest::IA32_PAT_FULL, unsafe { Msr::new(IA32_PAT).read() })?; + vmwrite::(guest::IA32_EFER_FULL, unsafe { Msr::new(IA32_EFER).read() })?; Ok(()) } -pub unsafe fn setup_vmcs_control() -> HvResult { +pub fn setup_vmcs_control() -> HvResult { // Intercept NMI, pass-through external interrupts. set_control( control::PINBASED_EXEC_CONTROLS, Msr::new(IA32_VMX_TRUE_PINBASED_CTLS), - Msr::new(IA32_VMX_PINBASED_CTLS).read() as u32, + unsafe { Msr::new(IA32_VMX_PINBASED_CTLS).read() } as u32, PinbasedControls::NMI_EXITING.bits(), 0, )?; @@ -337,17 +338,20 @@ pub unsafe fn setup_vmcs_control() -> HvResult { set_control( control::PRIMARY_PROCBASED_EXEC_CONTROLS, Msr::new(IA32_VMX_TRUE_PROCBASED_CTLS), - Msr::new(IA32_VMX_PROCBASED_CTLS).read() as u32, + unsafe { Msr::new(IA32_VMX_PROCBASED_CTLS).read() } as u32, PrimaryControls::SECONDARY_CONTROLS.bits(), (PrimaryControls::CR3_LOAD_EXITING | PrimaryControls::CR3_STORE_EXITING).bits(), )?; - // Enable RDTSCP, INVPCID. + // Enable EPT, RDTSCP, INVPCID. set_control( control::SECONDARY_PROCBASED_EXEC_CONTROLS, Msr::new(IA32_VMX_PROCBASED_CTLS2), 0, - (SecondaryControls::ENABLE_RDTSCP | SecondaryControls::ENABLE_INVPCID).bits(), + (SecondaryControls::ENABLE_EPT + | SecondaryControls::ENABLE_RDTSCP + | SecondaryControls::ENABLE_INVPCID) + .bits(), 0, )?; @@ -355,7 +359,7 @@ pub unsafe fn setup_vmcs_control() -> HvResult { set_control( control::VMEXIT_CONTROLS, Msr::new(IA32_VMX_TRUE_EXIT_CTLS), - Msr::new(IA32_VMX_EXIT_CTLS).read() as u32, + unsafe { Msr::new(IA32_VMX_EXIT_CTLS).read() } as u32, (ExitControls::HOST_ADDRESS_SPACE_SIZE | ExitControls::SAVE_IA32_PAT | ExitControls::LOAD_IA32_PAT @@ -369,7 +373,7 @@ pub unsafe fn setup_vmcs_control() -> HvResult { set_control( control::VMENTRY_CONTROLS, Msr::new(IA32_VMX_TRUE_ENTRY_CTLS), - Msr::new(IA32_VMX_ENTRY_CTLS).read() as u32, + unsafe { Msr::new(IA32_VMX_ENTRY_CTLS).read() } as u32, (EntryControls::IA32E_MODE_GUEST | EntryControls::LOAD_IA32_PAT | EntryControls::LOAD_IA32_EFER) @@ -457,22 +461,36 @@ impl From for HvError { } } -pub unsafe fn advance_guest_rip(instr_len: u8) -> HvResult { - Ok(vmwrite::( - guest::RIP, - (vmread(guest::RIP)? + instr_len as u64), - )?) +pub fn advance_guest_rip(instr_len: u8) -> HvResult { + unsafe { + Ok(vmwrite::( + guest::RIP, + (vmread(guest::RIP)? + instr_len as u64), + )?) + } } -pub unsafe fn instruction_error() -> u32 { +pub fn instruction_error() -> u32 { vmread(ro::VM_INSTRUCTION_ERROR).unwrap() as u32 } -pub unsafe fn set_host_rsp(paddr: usize) -> HvResult { - Ok(vmwrite::(host::RSP, paddr as unw)?) +pub fn set_host_rsp(rsp: HostPhysAddr) -> HvResult { + Ok(vmwrite::(host::RSP, rsp as unw)?) +} + +pub fn set_guest_page_table(cr3: GuestPhysAddr) -> HvResult { + Ok(vmwrite::(guest::CR3, cr3 as unw)?) } -pub unsafe fn exit_info() -> HvResult { +pub fn set_guest_stack_pointer(rsp: GuestPhysAddr) -> HvResult { + Ok(vmwrite::(guest::RSP, rsp as unw)?) +} + +pub fn set_s2ptp(s2ptp: u64) -> HvResult { + Ok(vmwrite::(control::EPTP_FULL, s2ptp as u64)?) +} + +pub fn exit_info() -> HvResult { let full_reason = vmread(ro::EXIT_REASON)? as u32; Ok(VmxExitInfo { exit_reason: full_reason @@ -484,3 +502,35 @@ pub unsafe fn exit_info() -> HvResult { guest_rip: vmread(guest::RIP)? as usize, }) } + +pub fn ept_violation_info() -> HvResult { + // SDM Vol. 3C, Section 27.2.1, Table 27-7 + let qualification = vmread(ro::EXIT_QUALIFICATION)? as u64; + let fault_guest_paddr = vmread(ro::GUEST_PHYSICAL_ADDR_FULL)? as usize; + let mut access_flags = MemFlags::empty(); + if qualification.get_bit(0) { + access_flags |= MemFlags::READ; + } + if qualification.get_bit(1) { + access_flags |= MemFlags::WRITE; + } + if qualification.get_bit(2) { + access_flags |= MemFlags::EXECUTE; + } + Ok(Stage2PageFaultInfo { + access_flags, + fault_guest_paddr, + }) +} + +pub fn guest_rip() -> unw { + vmread(guest::RIP).unwrap() as unw +} + +pub fn guest_rsp() -> unw { + vmread(guest::RSP).unwrap() as unw +} + +pub fn guest_cr3() -> unw { + vmread(guest::CR3).unwrap() as unw +} diff --git a/src/arch/x86_64/zone.rs b/src/arch/x86_64/zone.rs index d828da9e..077bdf2f 100644 --- a/src/arch/x86_64/zone.rs +++ b/src/arch/x86_64/zone.rs @@ -14,6 +14,37 @@ pub struct HvArchZoneConfig {} impl Zone { pub fn pt_init(&mut self, mem_regions: &[HvConfigMemoryRegion]) -> HvResult { + // The first memory region is used to map the guest physical memory. + + for mem_region in mem_regions.iter() { + let mut flags = MemFlags::READ | MemFlags::WRITE | MemFlags::EXECUTE; + if mem_region.mem_type == MEM_TYPE_IO { + flags |= MemFlags::IO; + } + match mem_region.mem_type { + MEM_TYPE_RAM | MEM_TYPE_IO => { + self.gpm.insert(MemoryRegion::new_with_offset_mapper( + mem_region.virtual_start as GuestPhysAddr, + mem_region.physical_start as HostPhysAddr, + mem_region.size as _, + flags, + ))? + } + /*TODO: MEM_TYPE_VIRTIO => { + self.mmio_region_register( + mem_region.physical_start as _, + mem_region.size as _, + mmio_virtio_handler, + mem_region.physical_start as _, + ); + }*/ + _ => { + panic!("Unsupported memory type: {}", mem_region.mem_type) + } + } + } + + info!("VM stage 2 memory set: {:#x?}", self.gpm); Ok(()) } diff --git a/src/main.rs b/src/main.rs index 3170bd16..3f02f619 100644 --- a/src/main.rs +++ b/src/main.rs @@ -104,10 +104,11 @@ fn primary_init_early() { event::init(MAX_CPU_NUM); device::irqchip::primary_init_early(); - // crate::arch::mm::init_hv_page_table().unwrap(); + // TODO: tmp + crate::arch::mm::init_hv_page_table().unwrap(); // TODO: - // zone_create(root_zone_config()).unwrap(); + zone_create(root_zone_config()).unwrap(); INIT_EARLY_OK.store(1, Ordering::Release); } diff --git a/src/platform/qemu_x86_64.rs b/src/platform/qemu_x86_64.rs index 52f3d284..c35484e6 100644 --- a/src/platform/qemu_x86_64.rs +++ b/src/platform/qemu_x86_64.rs @@ -1,13 +1,65 @@ -use crate::{arch::zone::HvArchZoneConfig, config::*}; +use crate::{ + arch::zone::HvArchZoneConfig, + config::*, + memory::{GuestPhysAddr, HostPhysAddr}, +}; pub const ROOT_ZONE_DTB_ADDR: u64 = 0x00000000; pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x00000000; pub const ROOT_ZONE_ENTRY: u64 = 0x00000000; -pub const ROOT_ZONE_CPUS: u64 = 0; +pub const ROOT_ZONE_CPUS: u64 = (1 << 0); pub const ROOT_ZONE_NAME: &str = "root-linux"; -pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 0] = []; +pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 1] = [HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x100_0000, + virtual_start: 0x0, + size: 0x100_0000, +}]; pub const ROOT_ZONE_IRQS: [u32; 32] = [0; 32]; pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig {}; + +// TODO: temp +pub const GUEST_PT1: GuestPhysAddr = 0x1000; +pub const GUEST_PT2: GuestPhysAddr = 0x2000; +pub const GUEST_ENTRY: GuestPhysAddr = 0x8000; +pub const GUEST_STACK_TOP: GuestPhysAddr = 0x7000; +pub const GUEST_PHYS_MEMORY_START: HostPhysAddr = 0x100_0000; + +pub fn gpa_as_mut_ptr(guest_paddr: GuestPhysAddr) -> *mut u8 { + let offset = GUEST_PHYS_MEMORY_START as usize; + let host_vaddr = guest_paddr + offset; + host_vaddr as *mut u8 +} + +#[naked] +pub unsafe extern "C" fn test_guest() -> ! { + core::arch::asm!( + " + mov rax, 0 + mov rdi, 2 + mov rsi, 3 + mov rdx, 3 + mov rcx, 3 + 2: + vmcall + add rax, 1 + jmp 2b", + options(noreturn), + ); +} + +pub unsafe extern "C" fn test_guest_2() -> ! { + core::arch::asm!( + "vmcall", + inout("rax") 0 => _, + in("rdi") 2, + in("rsi") 3, + in("rdx") 3, + in("rcx") 3, + ); + core::arch::asm!("mov qword ptr [$0xffff233], $2333"); // panic + loop {} +} From 743bfed1621672280c5d29d6c9d6c87c8462271e Mon Sep 17 00:00:00 2001 From: Solicey Date: Fri, 24 Jan 2025 23:36:46 +0800 Subject: [PATCH 05/29] enable UART and APIC timer virtualization, test with temporary guest OS --- .gitignore | 1 + scripts/qemu-x86_64.mk | 6 + src/arch/x86_64/cpu.rs | 175 ++++++--- src/arch/x86_64/device.rs | 37 ++ src/arch/x86_64/entry.rs | 13 +- src/arch/x86_64/gdt.rs | 19 +- src/arch/x86_64/lapic.rs | 10 +- src/arch/x86_64/mm.rs | 9 +- src/arch/x86_64/mod.rs | 2 + src/arch/x86_64/msr.rs | 169 ++++++++ src/arch/x86_64/paging.rs | 393 +++++++++++-------- src/arch/x86_64/s2pt.rs | 4 - src/arch/x86_64/trap.rs | 241 ++++++++++-- src/arch/x86_64/vmx.rs | 480 ++++++++++++++++------- src/arch/x86_64/zone.rs | 5 +- src/device/irqchip/mod.rs | 4 +- src/device/irqchip/pic/i8259.rs | 25 ++ src/device/irqchip/pic/lapic.rs | 193 +++++++++ src/device/irqchip/{i8259 => pic}/mod.rs | 3 + src/device/uart/mod.rs | 2 +- src/device/uart/uart16550.rs | 158 +++++++- src/main.rs | 2 +- src/platform/qemu_x86_64.rs | 36 +- 23 files changed, 1520 insertions(+), 467 deletions(-) create mode 100644 src/arch/x86_64/device.rs create mode 100644 src/arch/x86_64/msr.rs create mode 100644 src/device/irqchip/pic/i8259.rs create mode 100644 src/device/irqchip/pic/lapic.rs rename src/device/irqchip/{i8259 => pic}/mod.rs (92%) diff --git a/.gitignore b/.gitignore index ed333980..64a4e2a3 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ /images/riscv64/virtdisk/* /images/riscv64/kernel/* /images/riscv64/devicetree/*.dtb +/images/x86_64/* /tools/hvisor /tmp *.mod.[co] diff --git a/scripts/qemu-x86_64.mk b/scripts/qemu-x86_64.mk index dae152be..0916d46d 100644 --- a/scripts/qemu-x86_64.mk +++ b/scripts/qemu-x86_64.mk @@ -1,12 +1,18 @@ QEMU := qemu-system-x86_64 +zone0_bios := $(image_dir)/rvm-bios.bin +zone0_kernel := $(image_dir)/nimbos.bin + QEMU_ARGS := -machine q35 QEMU_ARGS += -cpu host -accel kvm QEMU_ARGS += -smp 4 QEMU_ARGS += -serial mon:stdio QEMU_ARGS += -m 2G QEMU_ARGS += -nographic + QEMU_ARGS += -kernel $(hvisor_elf) +QEMU_ARGS += -device loader,file="$(zone0_bios)",addr=0x1008000,force-raw=on +QEMU_ARGS += -device loader,file="$(zone0_kernel)",addr=0x1200000,force-raw=on $(hvisor_bin): elf $(OBJCOPY) $(hvisor_elf) --strip-all -O binary $@ \ No newline at end of file diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index b94a75cc..54b26210 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -1,21 +1,30 @@ -use crate::arch::gdt::GdtStruct; -use crate::arch::lapic::{busy_wait, local_apic}; -use crate::arch::vmx::*; -use crate::consts::{core_end, PER_CPU_SIZE}; -use crate::error::{HvError, HvResult}; -use crate::memory::{addr::phys_to_virt, Frame, PhysAddr, PAGE_SIZE}; -use crate::memory::{GuestPhysAddr, HostPhysAddr}; -use crate::percpu::this_cpu_data; -use crate::platform::qemu_x86_64::*; -use alloc::boxed::Box; -use core::arch::{asm, global_asm}; -use core::fmt::{Debug, Formatter, Result}; -use core::mem::size_of; -use core::panicking::panic; -use core::time::Duration; +use crate::{ + arch::{ + gdt::GdtStruct, + lapic::{busy_wait, local_apic}, + msr::Msr::*, + msr::MsrBitmap, + vmx::*, + }, + consts::{core_end, PER_CPU_SIZE}, + device::irqchip::pic::lapic::VirtApicTimer, + error::{HvError, HvResult}, + memory::{addr::phys_to_virt, GuestPhysAddr, PhysAddr, PAGE_SIZE}, + percpu::this_cpu_data, + platform::qemu_x86_64::*, +}; +use alloc::{boxed::Box, collections::vec_deque::VecDeque}; +use core::{ + arch::{asm, global_asm}, + fmt::{Debug, Formatter, Result}, + mem::size_of, + time::Duration, +}; use raw_cpuid::CpuId; use x86_64::structures::tss::TaskStateSegment; +use super::msr::Msr; + const AP_START_PAGE_IDX: u8 = 6; const AP_START_PAGE_PADDR: PhysAddr = AP_START_PAGE_IDX as usize * PAGE_SIZE; @@ -100,23 +109,6 @@ pub fn cpu_start(cpuid: usize, start_addr: usize, opaque: usize) { unsafe { lapic.send_sipi(AP_START_PAGE_IDX, cpuid as u32) }; } -#[repr(C)] -#[derive(Debug, Default, Clone, Copy)] -pub struct TrapFrame { - pub usr: [u64; 15], - - // pushed by 'trap.S' - pub vector: u64, - pub error_code: u64, - - // pushed by CPU - pub rip: u64, - pub cs: u64, - pub rflags: u64, - pub rsp: u64, - pub ss: u64, -} - /// General-Purpose Registers for 64-bit x86 architecture. #[repr(C)] #[derive(Debug, Default, Clone)] @@ -141,7 +133,7 @@ pub struct GeneralRegisters { #[repr(C)] pub struct ArchCpu { - // guest_regs and host_stack_top should always be at the first. + // guest_regs and host_stack_top should always be at first. guest_regs: GeneralRegisters, host_stack_top: u64, pub cpuid: usize, @@ -150,6 +142,9 @@ pub struct ArchCpu { vmcs_revision_id: u32, vmxon_region: VmxRegion, vmcs_region: VmxRegion, + msr_bitmap: MsrBitmap, + apic_timer: VirtApicTimer, + pending_events: VecDeque<(u8, Option)>, } impl ArchCpu { @@ -165,6 +160,9 @@ impl ArchCpu { vmcs_region: VmxRegion::uninit(), guest_regs: GeneralRegisters::default(), host_stack_top: 0, + msr_bitmap: MsrBitmap::uninit(), + apic_timer: VirtApicTimer::new(), + pending_events: VecDeque::with_capacity(8), } } @@ -174,6 +172,47 @@ impl ArchCpu { Ok(()) } + pub fn run(&mut self) -> ! { + assert!(this_cpu_id() == self.cpuid); + // TODO: this_cpu_data().cpu_on_entry + self.init(GUEST_ENTRY, this_cpu_data().dtb_ipa).unwrap(); + + this_cpu_data().activate_gpm(); + set_host_rsp(&self.host_stack_top as *const _ as usize).unwrap(); + set_guest_page_table(GUEST_PT1).unwrap(); + set_guest_stack_pointer(GUEST_STACK_TOP).unwrap(); + + unsafe { self.vmx_launch() }; + loop {} + } + + pub fn idle(&mut self) -> ! { + assert!(this_cpu_id() == self.cpuid); + unsafe { self.init(0, this_cpu_data().dtb_ipa) }; + loop {} + } + + /// Guest general-purpose registers. + pub fn regs(&self) -> &GeneralRegisters { + &self.guest_regs + } + + /// Mutable reference of guest general-purpose registers. + pub fn regs_mut(&mut self) -> &mut GeneralRegisters { + &mut self.guest_regs + } + + /// Returns the mutable reference of [`VirtApicTimer`]. + pub fn apic_timer_mut(&mut self) -> &mut VirtApicTimer { + &mut self.apic_timer + } + + /// Add a virtual interrupt or exception to the pending events list, + /// and try to inject it before later VM entries. + pub fn inject_event(&mut self, vector: u8, err_code: Option) { + self.pending_events.push_back((vector, err_code)); + } + fn activate_vmx(&mut self) -> HvResult { assert!(check_vmx_support()); assert!(!is_vmx_enabled()); @@ -189,49 +228,23 @@ impl ArchCpu { unsafe { execute_vmxon(self.vmxon_region.start_paddr() as u64)? }; - info!( - "VMX enabled, region: 0x{:x}", - self.vmxon_region.start_paddr(), - ); Ok(()) } fn setup_vmcs(&mut self, entry: GuestPhysAddr) -> HvResult { self.vmcs_region = VmxRegion::new(self.vmcs_revision_id, false)?; + self.msr_bitmap = MsrBitmap::init()?; + self.setup_msr_bitmap()?; + unsafe { enable_vmcs(self.vmcs_region.start_paddr() as u64)? }; setup_vmcs_host(Self::vmx_exit as usize)?; setup_vmcs_guest(entry)?; - setup_vmcs_control()?; - - info!( - "VMCS enabled, region: 0x{:x}", - self.vmcs_region.start_paddr(), - ); + setup_vmcs_control(self.msr_bitmap.phys_addr())?; Ok(()) } - pub fn run(&mut self) -> ! { - assert!(this_cpu_id() == self.cpuid); - // TODO: this_cpu_data().cpu_on_entry - self.init(GUEST_ENTRY, this_cpu_data().dtb_ipa).unwrap(); - - this_cpu_data().activate_gpm(); - set_host_rsp(&self.host_stack_top as *const _ as usize).unwrap(); - set_guest_page_table(GUEST_PT1).unwrap(); - set_guest_stack_pointer(GUEST_STACK_TOP).unwrap(); - - unsafe { self.vmx_launch() }; - loop {} - } - - pub fn idle(&mut self) -> ! { - assert!(this_cpu_id() == self.cpuid); - unsafe { self.init(0, this_cpu_data().dtb_ipa) }; - loop {} - } - #[naked] unsafe extern "C" fn vmx_launch(&mut self) -> ! { asm!( @@ -271,10 +284,42 @@ impl ArchCpu { fn vmexit_handler(&mut self) { crate::arch::trap::handle_vmexit(self).unwrap(); + // Check if there is an APIC timer interrupt + if self.apic_timer.check_interrupt() { + self.inject_event(self.apic_timer.vector(), None); + } + self.check_pending_events().unwrap(); } - pub fn regs(&self) -> &GeneralRegisters { - &self.guest_regs + /// Try to inject a pending event before next VM entry. + fn check_pending_events(&mut self) -> HvResult { + if let Some(event) = self.pending_events.front() { + let allow_interrupt = allow_interrupt()?; + if event.0 < 32 || allow_interrupt { + // if it's an exception, or an interrupt that is not blocked, inject it directly. + inject_event(event.0, event.1)?; + self.pending_events.pop_front(); + } else { + // interrupts are blocked, enable interrupt-window exiting. + set_interrupt_window(true)?; + } + } + Ok(()) + } + + fn setup_msr_bitmap(&mut self) -> HvResult { + // Intercept IA32_APIC_BASE MSR accesses + let msr = IA32_APIC_BASE; + self.msr_bitmap.set_read_intercept(msr, true); + self.msr_bitmap.set_write_intercept(msr, true); + // Intercept all x2APIC MSR accesses + for addr in 0x800_u32..=0x83f_u32 { + if let Ok(msr) = Msr::try_from(addr) { + self.msr_bitmap.set_read_intercept(msr, true); + self.msr_bitmap.set_write_intercept(msr, true); + } + } + Ok(()) } } diff --git a/src/arch/x86_64/device.rs b/src/arch/x86_64/device.rs new file mode 100644 index 00000000..dd3d1973 --- /dev/null +++ b/src/arch/x86_64/device.rs @@ -0,0 +1,37 @@ +use crate::{ + device::{irqchip::pic::i8259::VirtI8259Pic, uart::VirtUart16550}, + error::HvResult, +}; +use alloc::{sync::Arc, vec, vec::Vec}; + +pub trait PortIoDevice: Send + Sync { + fn port_range(&self) -> core::ops::Range; + fn read(&self, port: u16, access_size: u8) -> HvResult; + fn write(&self, port: u16, access_size: u8, value: u32) -> HvResult; +} + +pub struct VirtDeviceList { + port_io_devices: Vec>, +} + +impl VirtDeviceList { + pub fn find_port_io_device(&self, port: u16) -> Option<&Arc> { + self.port_io_devices + .iter() + .find(|dev| dev.port_range().contains(&port)) + } +} + +lazy_static::lazy_static! { + static ref VIRT_DEVICES : VirtDeviceList = VirtDeviceList { + port_io_devices: vec![ + Arc::new(VirtUart16550::new(0x3f8)), // COM1 + Arc::new(VirtI8259Pic::new(0x20)), // PIC1 + Arc::new(VirtI8259Pic::new(0xA0)), // PIC2 + ], + }; +} + +pub fn all_virt_devices() -> &'static VirtDeviceList { + &VIRT_DEVICES +} diff --git a/src/arch/x86_64/entry.rs b/src/arch/x86_64/entry.rs index 50226438..c77afcc6 100644 --- a/src/arch/x86_64/entry.rs +++ b/src/arch/x86_64/entry.rs @@ -1,11 +1,12 @@ -use crate::arch::cpu::this_cpu_id; -use crate::consts::PER_CPU_SIZE; -use crate::memory::addr::PHYS_VIRT_OFFSET; -use crate::rust_main; +use crate::{ + arch::cpu::this_cpu_id, consts::PER_CPU_SIZE, memory::addr::PHYS_VIRT_OFFSET, rust_main, +}; use core::arch::global_asm; use x86::msr::IA32_EFER; -use x86_64::registers::control::{Cr0Flags, Cr4Flags}; -use x86_64::registers::model_specific::EferFlags; +use x86_64::registers::{ + control::{Cr0Flags, Cr4Flags}, + model_specific::EferFlags, +}; const MULTIBOOT_HEADER_MAGIC: i32 = 0x1BADB002; const MULTIBOOT_HEADER_FLAGS: i32 = 0x00010002; diff --git a/src/arch/x86_64/gdt.rs b/src/arch/x86_64/gdt.rs index d4707bdf..113b920b 100644 --- a/src/arch/x86_64/gdt.rs +++ b/src/arch/x86_64/gdt.rs @@ -1,12 +1,17 @@ use crate::arch::cpu::{self, this_cpu_id}; -use alloc::boxed::Box; -use alloc::collections::btree_map::BTreeMap; +use alloc::{boxed::Box, collections::btree_map::BTreeMap}; use spin::Mutex; -use x86_64::instructions::tables::{lgdt, load_tss}; -use x86_64::registers::segmentation::{Segment, SegmentSelector, CS}; -use x86_64::structures::gdt::{Descriptor, DescriptorFlags}; -use x86_64::structures::{tss::TaskStateSegment, DescriptorTablePointer}; -use x86_64::{addr::VirtAddr, PrivilegeLevel}; +use x86_64::{ + addr::VirtAddr, + instructions::tables::{lgdt, load_tss}, + registers::segmentation::{Segment, SegmentSelector, CS}, + structures::{ + gdt::{Descriptor, DescriptorFlags}, + tss::TaskStateSegment, + DescriptorTablePointer, + }, + PrivilegeLevel, +}; #[repr(align(16))] #[derive(Debug)] diff --git a/src/arch/x86_64/lapic.rs b/src/arch/x86_64/lapic.rs index 32e58a6f..27c0ae97 100644 --- a/src/arch/x86_64/lapic.rs +++ b/src/arch/x86_64/lapic.rs @@ -1,5 +1,5 @@ use self::vectors::*; -use crate::device::irqchip::i8259::enable_irq; +use crate::device::irqchip::pic::enable_irq; use core::time::Duration; use raw_cpuid::CpuId; use x2apic::lapic::{LocalApic, LocalApicBuilder, TimerDivide, TimerMode}; @@ -16,7 +16,7 @@ pub mod vectors { static mut LOCAL_APIC: Option = None; static mut CPU_FREQ_MHZ: u64 = 4_000; const LAPIC_TICKS_PER_SEC: u64 = 1_000_000_000; // TODO: need to calibrate -const TICKS_PER_SEC: u64 = 1; +const TICKS_PER_SEC: u64 = 100; pub fn local_apic<'a>() -> &'a mut LocalApic { // It's safe as LAPIC is per-cpu. @@ -31,8 +31,12 @@ pub fn ticks_to_nanos(ticks: u64) -> u64 { ticks * 1_000 / unsafe { CPU_FREQ_MHZ } } +pub fn current_time_nanos() -> u64 { + ticks_to_nanos(current_ticks()) +} + pub fn current_time() -> TimeValue { - TimeValue::from_nanos(ticks_to_nanos(current_ticks())) + TimeValue::from_nanos(current_time_nanos()) } pub fn busy_wait(duration: Duration) { diff --git a/src/arch/x86_64/mm.rs b/src/arch/x86_64/mm.rs index 41aa9a68..a76ae01b 100644 --- a/src/arch/x86_64/mm.rs +++ b/src/arch/x86_64/mm.rs @@ -1,12 +1,5 @@ use crate::{ - arch::s1pt::Stage1PageTable, - arch::s2pt::Stage2PageTable, - consts::PAGE_SIZE, - error::HvResult, - memory::{ - addr::{align_down, align_up}, - GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion, MemorySet, HV_PT, - }, + arch::s1pt::Stage1PageTable, arch::s2pt::Stage2PageTable, error::HvResult, memory::MemorySet, platform::qemu_x86_64::*, }; diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index c569b835..cb10ec27 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -1,11 +1,13 @@ #![allow(unused)] pub mod cpu; +pub mod device; pub mod entry; pub mod gdt; pub mod idt; pub mod ipi; pub mod lapic; pub mod mm; +pub mod msr; pub mod paging; pub mod s1pt; pub mod s2pt; diff --git a/src/arch/x86_64/msr.rs b/src/arch/x86_64/msr.rs new file mode 100644 index 00000000..966e4856 --- /dev/null +++ b/src/arch/x86_64/msr.rs @@ -0,0 +1,169 @@ +use x86::msr::{rdmsr, wrmsr}; + +use crate::{ + error::HvResult, + memory::{Frame, HostPhysAddr}, +}; + +numeric_enum_macro::numeric_enum! { +#[repr(u32)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[allow(non_camel_case_types)] +/// X86 model-specific registers. (SDM Vol. 4) +pub enum Msr { + /// APIC Location and Status (R/W) See Table 35-2. See Section 10.4.4, Local APIC Status and Location. + IA32_APIC_BASE = 0x1b, + IA32_FEATURE_CONTROL = 0x3a, + IA32_PAT = 0x277, + + IA32_VMX_BASIC = 0x480, + IA32_VMX_PINBASED_CTLS = 0x481, + IA32_VMX_PROCBASED_CTLS = 0x482, + IA32_VMX_EXIT_CTLS = 0x483, + IA32_VMX_ENTRY_CTLS = 0x484, + IA32_VMX_MISC = 0x485, + IA32_VMX_CR0_FIXED0 = 0x486, + IA32_VMX_CR0_FIXED1 = 0x487, + IA32_VMX_CR4_FIXED0 = 0x488, + IA32_VMX_CR4_FIXED1 = 0x489, + IA32_VMX_PROCBASED_CTLS2 = 0x48b, + IA32_VMX_EPT_VPID_CAP = 0x48c, + IA32_VMX_TRUE_PINBASED_CTLS = 0x48d, + IA32_VMX_TRUE_PROCBASED_CTLS = 0x48e, + IA32_VMX_TRUE_EXIT_CTLS = 0x48f, + IA32_VMX_TRUE_ENTRY_CTLS = 0x490, + + /// x2APIC Msr + + /// ID register. + APICID = 0x802, + /// Version register. + VERSION = 0x803, + /// End-Of-Interrupt register. + EOI = 0x80B, + /// Logical Destination Register. + LDR = 0x80D, + /// Spurious Interrupt Vector register. + SIVR = 0x80F, + /// Interrupt Command register. + ICR = 0x830, + /// LVT Timer Interrupt register. + LVT_TIMER = 0x832, + /// LVT Thermal Sensor Interrupt register. + LVT_THERMAL = 0x833, + /// LVT Performance Monitor register. + LVT_PMI = 0x834, + /// LVT LINT0 register. + LVT_LINT0 = 0x835, + /// LVT LINT1 register. + LVT_LINT1 = 0x836, + /// LVT Error register. + LVT_ERR = 0x837, + /// Initial Count register. + INIT_COUNT = 0x838, + /// Current Count register. + CUR_COUNT = 0x839, + /// Divide Configuration register. + DIV_CONF = 0x83E, + + IA32_EFER = 0xc000_0080, + IA32_STAR = 0xc000_0081, + IA32_LSTAR = 0xc000_0082, + IA32_CSTAR = 0xc000_0083, + IA32_FMASK = 0xc000_0084, + + IA32_FS_BASE = 0xc000_0100, + IA32_GS_BASE = 0xc000_0101, + IA32_KERNEL_GSBASE = 0xc000_0102, +} +} + +impl Msr { + /// Read 64 bits msr register. + #[inline(always)] + pub fn read(self) -> u64 { + unsafe { rdmsr(self as _) } + } + + /// Write 64 bits to msr register. + /// + /// # Safety + /// + /// The caller must ensure that this write operation has no unsafe side + /// effects. + #[inline(always)] + pub unsafe fn write(self, value: u64) { + wrmsr(self as _, value) + } +} + +#[derive(Debug)] +pub struct MsrBitmap { + frame: Frame, +} + +impl MsrBitmap { + pub fn uninit() -> Self { + Self { + frame: unsafe { Frame::from_paddr(0) }, + } + } + + pub fn init() -> HvResult { + Ok(Self { + frame: Frame::new_zero()?, + }) + } + + pub fn passthrough_all() -> HvResult { + Ok(Self { + frame: Frame::new_zero()?, + }) + } + + pub fn intercept_all() -> HvResult { + let mut frame = Frame::new()?; + frame.fill(u8::MAX); + Ok(Self { frame }) + } + + pub fn phys_addr(&self) -> HostPhysAddr { + self.frame.start_paddr() + } + + pub fn set_read_intercept(&mut self, msr: Msr, intercept: bool) { + self.set_intercept(msr as u32, false, intercept); + } + + pub fn set_write_intercept(&mut self, msr: Msr, intercept: bool) { + self.set_intercept(msr as u32, true, intercept); + } + + fn set_intercept(&mut self, msr: u32, is_write: bool, intercept: bool) { + let offset = if msr <= 0x1fff { + if !is_write { + 0 // Read bitmap for low MSRs (0x0000_0000..0x0000_1FFF) + } else { + 2 // Write bitmap for low MSRs (0x0000_0000..0x0000_1FFF) + } + } else if (0xc000_0000..=0xc000_1fff).contains(&msr) { + if !is_write { + 1 // Read bitmap for high MSRs (0xC000_0000..0xC000_1FFF) + } else { + 3 // Write bitmap for high MSRs (0xC000_0000..0xC000_1FFF) + } + } else { + unreachable!() + } * 1024; + let bitmap = + unsafe { core::slice::from_raw_parts_mut(self.frame.as_mut_ptr().add(offset), 1024) }; + let msr = msr & 0x1fff; + let byte = (msr / 8) as usize; + let bits = msr % 8; + if intercept { + bitmap[byte] |= 1 << bits; + } else { + bitmap[byte] &= !(1 << bits); + } + } +} diff --git a/src/arch/x86_64/paging.rs b/src/arch/x86_64/paging.rs index ca737f14..d8b84a74 100644 --- a/src/arch/x86_64/paging.rs +++ b/src/arch/x86_64/paging.rs @@ -1,10 +1,13 @@ -#![allow(unused)] -use alloc::vec::Vec; - -use crate::consts::PAGE_SIZE; -use crate::error::{HvError, HvResult}; -use crate::memory::{addr::is_aligned, Frame, MemFlags, MemoryRegion, PhysAddr, VirtAddr}; +use crate::{ + error::{HvError, HvResult}, + memory::{addr::is_aligned, Frame, MemFlags, MemoryRegion, PhysAddr, VirtAddr}, +}; +use alloc::{sync::Arc, vec::Vec}; use core::{fmt::Debug, marker::PhantomData, slice}; +use spin::Mutex; + +const LEVELS: usize = 4; +const ENTRY_COUNT: usize = 512; #[derive(Debug)] pub enum PagingError { @@ -118,25 +121,81 @@ pub trait GenericPageTable: GenericPageTableImmut { fn flush(&self, vaddr: Option); } -// TODO: lock -pub struct Level4PageTable { +/// A immutable level-4 page table implements `GenericPageTableImmut`. +pub struct Level4PageTableImmut { + // Root table frame. root: Frame, - // Intermediate level table frames. - intrm_tables: Vec, - _phantom: PhantomData<(VA, PTE, I)>, + // Phantom data. + _phantom: PhantomData<(VA, PTE)>, } -impl Level4PageTable +impl Level4PageTableImmut where VA: From + Into + Copy, PTE: GenericPTE, - I: PagingInstr, { - #[allow(dead_code)] + fn new() -> Self { + let frame = Frame::new_zero().expect("failed to allocate root frame for host page table"); + Self { + root: frame, + _phantom: PhantomData, + } + } + + fn get_entry_mut(&self, vaddr: VA) -> PagingResult<(&mut PTE, PageSize)> { + let vaddr = vaddr.into(); + + let p4 = table_of_mut::(self.root_paddr()); + let p4e = &mut p4[p4_index(vaddr)]; + + let p3 = next_table_mut(p4e)?; + let p3e = &mut p3[p3_index(vaddr)]; + if p3e.is_huge() { + return Ok((p3e, PageSize::Size1G)); + } + + let p2 = next_table_mut(p3e)?; + let p2e = &mut p2[p2_index(vaddr)]; + if p2e.is_huge() { + return Ok((p2e, PageSize::Size2M)); + } + + let p1 = next_table_mut(p2e)?; + let p1e = &mut p1[p1_index(vaddr)]; + Ok((p1e, PageSize::Size4K)) + } + + fn walk( + &self, + table: &[PTE], + level: usize, + start_vaddr: usize, + limit: usize, + func: &impl Fn(usize, usize, usize, &PTE), + ) { + let mut n = 0; + for (i, entry) in table.iter().enumerate() { + let vaddr = start_vaddr + (i << (12 + (3 - level) * 9)); + if entry.is_present() { + func(level, i, vaddr, entry); + if level < 3 { + match next_table_mut(entry) { + Ok(entry) => self.walk(entry, level + 1, vaddr, limit, func), + Err(PagingError::MappedToHugePage) => {} + _ => unreachable!(), + } + } + n += 1; + if n >= limit { + break; + } + } + } + } + pub fn dump(&self, limit: usize) { - // TODO: lock - // static LOCK: Mutex<()> = Mutex::new(()); - // let _lock = LOCK.lock(); + static LOCK: Mutex<()> = Mutex::new(()); + let _lock = LOCK.lock(); println!("Root: {:x?}", self.root_paddr()); self.walk( @@ -155,6 +214,150 @@ where }, ); } +} + +impl GenericPageTableImmut for Level4PageTableImmut +where + VA: From + Into + Copy, + PTE: GenericPTE, +{ + type VA = VA; + + unsafe fn from_root(root_paddr: PhysAddr) -> Self { + Self { + root: Frame::from_paddr(root_paddr), + _phantom: PhantomData, + } + } + + fn root_paddr(&self) -> PhysAddr { + self.root.start_paddr() + } + + fn query(&self, vaddr: Self::VA) -> PagingResult<(PhysAddr, MemFlags, PageSize)> { + let (entry, size) = self.get_entry_mut(vaddr)?; + if entry.is_unused() { + return Err(PagingError::NotMapped); + } + let off = size.page_offset(vaddr.into()); + Ok((entry.addr() + off, entry.flags(), size)) + } +} + +/// A extended level-3/4 page table that can change its mapping. It also tracks all intermediate +/// level tables. Locks need to be used if change the same page table concurrently. +struct Level4PageTableUnlocked { + inner: Level4PageTableImmut, + /// Intermediate level table frames. + intrm_tables: Vec, + /// Phantom data. + _phantom: PhantomData<(VA, PTE, I)>, +} + +impl Level4PageTableUnlocked +where + VA: From + Into + Copy, + PTE: GenericPTE, + I: PagingInstr, +{ + fn new() -> Self { + Self { + inner: Level4PageTableImmut::new(), + intrm_tables: Vec::new(), + _phantom: PhantomData, + } + } + + unsafe fn from_root(root_paddr: PhysAddr) -> Self { + Self { + inner: Level4PageTableImmut::from_root(root_paddr), + intrm_tables: Vec::new(), + _phantom: PhantomData, + } + } + + fn alloc_intrm_table(&mut self) -> HvResult { + let frame = Frame::new_zero()?; + let paddr = frame.start_paddr(); + self.intrm_tables.push(frame); + Ok(paddr) + } + + fn _dealloc_intrm_table(&mut self, _paddr: PhysAddr) {} + + fn get_entry_mut_or_create(&mut self, page: Page) -> PagingResult<&mut PTE> { + let vaddr: usize = page.vaddr.into(); + + let p4 = table_of_mut::(self.inner.root_paddr()); + let p4e = &mut p4[p4_index(vaddr)]; + + let p3 = next_table_mut_or_create(p4e, || self.alloc_intrm_table())?; + let p3e = &mut p3[p3_index(vaddr)]; + if page.size == PageSize::Size1G { + return Ok(p3e); + } + + let p2 = next_table_mut_or_create(p3e, || self.alloc_intrm_table())?; + let p2e = &mut p2[p2_index(vaddr)]; + if page.size == PageSize::Size2M { + return Ok(p2e); + } + + let p1 = next_table_mut_or_create(p2e, || self.alloc_intrm_table())?; + let p1e = &mut p1[p1_index(vaddr)]; + Ok(p1e) + } + + fn map_page( + &mut self, + page: Page, + paddr: PhysAddr, + flags: MemFlags, + ) -> PagingResult<&mut PTE> { + let entry = self.get_entry_mut_or_create(page)?; + if !entry.is_unused() { + return Err(PagingError::AlreadyMapped); + } + entry.set_addr(page.size.align_down(paddr)); + entry.set_flags(flags, page.size.is_huge()); + Ok(entry) + } + + fn unmap_page(&mut self, vaddr: VA) -> PagingResult<(PhysAddr, PageSize)> { + let (entry, size) = self.inner.get_entry_mut(vaddr)?; + if entry.is_unused() { + return Err(PagingError::NotMapped); + } + let paddr = entry.addr(); + entry.clear(); + Ok((paddr, size)) + } + + fn update(&mut self, vaddr: VA, paddr: PhysAddr, flags: MemFlags) -> PagingResult { + let (entry, size) = self.inner.get_entry_mut(vaddr)?; + entry.set_addr(paddr); + entry.set_flags(flags, size.is_huge()); + Ok(size) + } +} + +/// A extended level-4 page table implements `GenericPageTable`. It use locks to avoid data +/// racing between it and its clonees. +pub struct Level4PageTable { + inner: Level4PageTableUnlocked, + /// Make sure all accesses to the page table and its clonees is exclusive. + clonee_lock: Arc>, +} + +impl Level4PageTable +where + VA: From + Into + Copy, + PTE: GenericPTE, + I: PagingInstr, +{ + pub fn dump(&self, limit: usize) { + self.inner.inner.dump(limit) + } /// Clone only the top level page table mapping from `src`. pub fn clone_from(src: &impl GenericPageTableImmut) -> Self { @@ -180,24 +383,18 @@ where unsafe fn from_root(root_paddr: PhysAddr) -> Self { Self { - root: Frame::from_paddr(root_paddr), - intrm_tables: Vec::new(), - _phantom: PhantomData, + inner: Level4PageTableUnlocked::from_root(root_paddr), + clonee_lock: Arc::new(Mutex::new(())), } } fn root_paddr(&self) -> PhysAddr { - self.root.start_paddr() + self.inner.inner.root_paddr() } fn query(&self, vaddr: VA) -> PagingResult<(PhysAddr, MemFlags, PageSize)> { - // TODO: lock - let (entry, size) = self.get_entry_mut(vaddr)?; - if entry.is_unused() { - return Err(PagingError::NotMapped); - } - let off = size.page_offset(vaddr.into()); - Ok((entry.addr() + off, entry.flags(), size)) + let _lock = self.clonee_lock.lock(); + self.inner.inner.query(vaddr) } } @@ -208,11 +405,9 @@ where I: PagingInstr, { fn new() -> Self { - let frame = Frame::new_zero().expect("failed to allocate root frame for host page table"); Self { - root: frame, - intrm_tables: Vec::new(), - _phantom: PhantomData, + inner: Level4PageTableUnlocked::new(), + clonee_lock: Arc::new(Mutex::new(())), } } @@ -228,7 +423,7 @@ where core::any::type_name::(), region ); - // TODO: lock + let _lock = self.clonee_lock.lock(); let mut vaddr = region.start.into(); let mut size = region.size; while size > 0 { @@ -249,7 +444,8 @@ where PageSize::Size4K }; let page = Page::new_aligned(vaddr.into(), page_size); - self.map_page(page, paddr, region.flags) + self.inner + .map_page(page, paddr, region.flags) .map_err(|e: PagingError| { error!( "failed to map page: {:#x?}({:?}) -> {:#x?}, {:?}", @@ -269,11 +465,11 @@ where core::any::type_name::(), region ); - // TODO: lock + let _lock = self.clonee_lock.lock(); let mut vaddr = region.start.into(); let mut size = region.size; while size > 0 { - let (_, page_size) = self.unmap_page(vaddr.into()).map_err(|e| { + let (_, page_size) = self.inner.unmap_page(vaddr.into()).map_err(|e| { error!("failed to unmap page: {:#x?}, {:?}", vaddr, e); e })?; @@ -290,16 +486,14 @@ where } fn update(&mut self, vaddr: VA, paddr: PhysAddr, flags: MemFlags) -> PagingResult { - let (entry, size) = self.get_entry_mut(vaddr)?; - entry.set_addr(paddr); - entry.set_flags(flags, size.is_huge()); - Ok(size) + let _lock = self.clonee_lock.lock(); + self.inner.update(vaddr, paddr, flags) } fn clone(&self) -> Self { let mut pt = Self::clone_from(self); // clone with lock to avoid data racing between it and its clonees. - // TODO: pt.clonee_lock = self.clonee_lock.clone(); + pt.clonee_lock = self.clonee_lock.clone(); pt } @@ -312,123 +506,6 @@ where } } -// TODO: temp -impl Level4PageTable -where - VA: From + Into + Copy, - PTE: GenericPTE, - I: PagingInstr, -{ - fn get_entry_mut(&self, vaddr: VA) -> PagingResult<(&mut PTE, PageSize)> { - let vaddr = vaddr.into(); - - let p4 = table_of_mut::(self.root_paddr()); - let p4e = &mut p4[p4_index(vaddr)]; - - let p3 = next_table_mut(p4e)?; - let p3e = &mut p3[p3_index(vaddr)]; - if p3e.is_huge() { - return Ok((p3e, PageSize::Size1G)); - } - - let p2 = next_table_mut(p3e)?; - let p2e = &mut p2[p2_index(vaddr)]; - if p2e.is_huge() { - return Ok((p2e, PageSize::Size2M)); - } - - let p1 = next_table_mut(p2e)?; - let p1e = &mut p1[p1_index(vaddr)]; - Ok((p1e, PageSize::Size4K)) - } - - fn alloc_intrm_table(&mut self) -> HvResult { - let frame = Frame::new_zero()?; - let paddr = frame.start_paddr(); - self.intrm_tables.push(frame); - Ok(paddr) - } - - fn get_entry_mut_or_create(&mut self, page: Page) -> PagingResult<&mut PTE> { - let vaddr: usize = page.vaddr.into(); - - let p4 = table_of_mut::(self.root_paddr()); - let p4e = &mut p4[p4_index(vaddr)]; - - let p3 = next_table_mut_or_create(p4e, || self.alloc_intrm_table())?; - let p3e = &mut p3[p3_index(vaddr)]; - if page.size == PageSize::Size1G { - return Ok(p3e); - } - - let p2 = next_table_mut_or_create(p3e, || self.alloc_intrm_table())?; - let p2e = &mut p2[p2_index(vaddr)]; - if page.size == PageSize::Size2M { - return Ok(p2e); - } - - let p1 = next_table_mut_or_create(p2e, || self.alloc_intrm_table())?; - let p1e = &mut p1[p1_index(vaddr)]; - Ok(p1e) - } - - fn map_page( - &mut self, - page: Page, - paddr: PhysAddr, - flags: MemFlags, - ) -> PagingResult<&mut PTE> { - let entry = self.get_entry_mut_or_create(page)?; - if !entry.is_unused() { - return Err(PagingError::AlreadyMapped); - } - entry.set_addr(page.size.align_down(paddr)); - entry.set_flags(flags, page.size.is_huge()); - Ok(entry) - } - - fn unmap_page(&mut self, vaddr: VA) -> PagingResult<(PhysAddr, PageSize)> { - let (entry, size) = self.get_entry_mut(vaddr)?; - if entry.is_unused() { - return Err(PagingError::NotMapped); - } - let paddr = entry.addr(); - entry.clear(); - Ok((paddr, size)) - } - - fn walk( - &self, - table: &[PTE], - level: usize, - start_vaddr: usize, - limit: usize, - func: &impl Fn(usize, usize, usize, &PTE), - ) { - let mut n = 0; - for (i, entry) in table.iter().enumerate() { - let vaddr = start_vaddr + (i << (12 + (3 - level) * 9)); - if entry.is_present() { - func(level, i, vaddr, entry); - if level < 3 { - match next_table_mut(entry) { - Ok(entry) => self.walk(entry, level + 1, vaddr, limit, func), - Err(PagingError::MappedToHugePage) => {} - _ => unreachable!(), - } - } - n += 1; - if n >= limit { - break; - } - } - } - } -} - -const LEVELS: usize = 4; -const ENTRY_COUNT: usize = 512; - const fn p4_index(vaddr: usize) -> usize { (vaddr >> (12 + 27)) & (ENTRY_COUNT - 1) } diff --git a/src/arch/x86_64/s2pt.rs b/src/arch/x86_64/s2pt.rs index a18f85ab..cc25deb5 100644 --- a/src/arch/x86_64/s2pt.rs +++ b/src/arch/x86_64/s2pt.rs @@ -229,10 +229,6 @@ pub struct S2PTInstr; impl PagingInstr for S2PTInstr { unsafe fn activate(root_paddr: HostPhysAddr) { let s2ptp = S2PTPointer::from_table_phys(root_paddr).bits(); - info!( - "s2pt activate, root paddr: 0x{:x}, s2ptp: 0x{:x}", - root_paddr, s2ptp - ); crate::arch::vmx::set_s2ptp(s2ptp).unwrap(); unsafe { invs2pt(InvS2PTType::SingleContext, s2ptp) }; } diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index cac4175d..e93a2129 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -1,44 +1,44 @@ -use super::{ - cpu::{ArchCpu, TrapFrame}, - lapic::vectors::APIC_TIMER_VECTOR, +use crate::{ + arch::{ + cpu::ArchCpu, + device::all_virt_devices, + idt::IdtStruct, + lapic::{local_apic, vectors::*}, + msr::Msr::{self, *}, + vmx::*, + }, + device::irqchip::pic::lapic::VirtLocalApic, + error::{HvError, HvResult}, }; -use crate::arch::{idt::IdtStruct, lapic::local_apic}; -use crate::{arch::vmx::*, error::HvResult}; -use alloc::vec; -use core::arch::{self, global_asm}; -global_asm!( +core::arch::global_asm!( include_str!("trap.S"), sym arch_handle_trap ); const IRQ_VECTOR_START: u8 = 0x20; const IRQ_VECTOR_END: u8 = 0xff; + +const VM_EXIT_INSTR_LEN_CPUID: u8 = 2; +const VM_EXIT_INSTR_LEN_RDMSR: u8 = 2; +const VM_EXIT_INSTR_LEN_WRMSR: u8 = 2; const VM_EXIT_INSTR_LEN_VMCALL: u8 = 3; -#[allow(dead_code)] -#[allow(non_snake_case)] -#[allow(non_upper_case_globals)] -pub mod ExceptionType { - pub const DIVIDE_ERROR: u8 = 0; - pub const DEBUG: u8 = 1; - pub const NMI_INTERRUPT: u8 = 2; - pub const BREAKPOINT: u8 = 3; - pub const OVERFLOW: u8 = 4; - pub const BOUND_RANGE_EXCEEDED: u8 = 5; - pub const INVALID_OPCODE: u8 = 6; - pub const DEVICE_NOT_AVAILABLE: u8 = 7; - pub const DOUBLE_FAULT: u8 = 8; - pub const COPROCESSOR_SEGMENT_OVERRUN: u8 = 9; - pub const INVALID_TSS: u8 = 10; - pub const SEGMENT_NOT_PRESENT: u8 = 11; - pub const STACK_SEGMENT_FAULT: u8 = 12; - pub const GENERAL_PROTECTION: u8 = 13; - pub const PAGE_FAULT: u8 = 14; - pub const FLOATING_POINT_ERROR: u8 = 16; - pub const ALIGNMENT_CHECK: u8 = 17; - pub const MACHINE_CHECK: u8 = 18; - pub const SIMD_FLOATING_POINT_EXCEPTION: u8 = 19; +#[repr(C)] +#[derive(Debug, Default, Clone, Copy)] +pub struct TrapFrame { + pub usr: [u64; 15], + + // pushed by 'trap.S' + pub vector: u64, + pub error_code: u64, + + // pushed by CPU + pub rip: u64, + pub cs: u64, + pub rflags: u64, + pub rsp: u64, + pub ss: u64, } lazy_static::lazy_static! { @@ -75,7 +75,63 @@ fn handle_irq(vector: u8) { } } -fn handle_hypercall(arch_cpu: &mut ArchCpu) -> HvResult { +fn handle_cpuid(arch_cpu: &mut ArchCpu) -> HvResult { + use raw_cpuid::{cpuid, CpuIdResult}; + + const LEAF_FEATURE_INFO: u32 = 0x1; + const LEAF_HYPERVISOR_INFO: u32 = 0x4000_0000; + const LEAF_HYPERVISOR_FEATURE: u32 = 0x4000_0001; + const VENDOR_STR: &[u8; 12] = b"HVISORHVISOR"; + let vendor_regs = unsafe { &*(VENDOR_STR.as_ptr() as *const [u32; 3]) }; + + let regs = arch_cpu.regs_mut(); + let function = regs.rax as u32; + let res = match function { + LEAF_FEATURE_INFO => { + const FEATURE_VMX: u32 = 1 << 5; + const FEATURE_HYPERVISOR: u32 = 1 << 31; + let mut res = cpuid!(regs.rax, regs.rcx); + res.ecx &= !FEATURE_VMX; + res.ecx |= FEATURE_HYPERVISOR; + res + } + LEAF_HYPERVISOR_INFO => CpuIdResult { + eax: LEAF_HYPERVISOR_FEATURE, + ebx: vendor_regs[0], + ecx: vendor_regs[1], + edx: vendor_regs[2], + }, + LEAF_HYPERVISOR_FEATURE => CpuIdResult { + eax: 0, + ebx: 0, + ecx: 0, + edx: 0, + }, + _ => cpuid!(regs.rax, regs.rcx), + }; + + debug!( + "VM exit: CPUID({:#x}, {:#x}): {:?}", + regs.rax, regs.rcx, res + ); + regs.rax = res.eax as _; + regs.rbx = res.ebx as _; + regs.rcx = res.ecx as _; + regs.rdx = res.edx as _; + + advance_guest_rip(VM_EXIT_INSTR_LEN_CPUID)?; + Ok(()) +} + +fn handle_external_interrupt() -> HvResult { + let int_info = interrupt_exit_info()?; + trace!("VM-exit: external interrupt: {:#x?}", int_info); + assert!(int_info.valid); + handle_irq(int_info.vector); + Ok(()) +} + +fn handle_hypercall(arch_cpu: &ArchCpu) -> HvResult { let regs = arch_cpu.regs(); debug!( "VM exit: VMCALL({:#x}): {:?}", @@ -86,10 +142,111 @@ fn handle_hypercall(arch_cpu: &mut ArchCpu) -> HvResult { Ok(()) } -fn handle_ept_violation(guest_rip: usize, arch_cpu: &mut ArchCpu) -> HvResult { - let fault_info = ept_violation_info()?; +fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvResult { + let io_info = io_exit_info()?; + trace!( + "VM exit: I/O instruction @ {:#x}: {:#x?}", + exit_info.guest_rip, + io_info, + ); + if io_info.is_string { + error!("INS/OUTS instructions are not supported!"); + return hv_result_err!(ENOSYS); + } + if io_info.is_repeat { + error!("REP prefixed I/O instructions are not supported!"); + return hv_result_err!(ENOSYS); + } + + if let Some(dev) = all_virt_devices().find_port_io_device(io_info.port) { + if io_info.is_in { + let value = dev.read(io_info.port, io_info.access_size)?; + let rax = &mut arch_cpu.regs_mut().rax; + // SDM Vol. 1, Section 3.4.1.1: + // * 32-bit operands generate a 32-bit result, zero-extended to a 64-bit result in the + // destination general-purpose register. + // * 8-bit and 16-bit operands generate an 8-bit or 16-bit result. The upper 56 bits or + // 48 bits (respectively) of the destination general-purpose register are not modified + // by the operation. + match io_info.access_size { + 1 => *rax = (*rax & !0xff) | (value & 0xff) as u64, + 2 => *rax = (*rax & !0xffff) | (value & 0xffff) as u64, + 4 => *rax = value as u64, + _ => unreachable!(), + } + } else { + let rax = arch_cpu.regs().rax; + let value = match io_info.access_size { + 1 => rax & 0xff, + 2 => rax & 0xffff, + 4 => rax, + _ => unreachable!(), + } as u32; + dev.write(io_info.port, io_info.access_size, value)?; + } + } else { + panic!( + "Unsupported I/O port {:#x} access: {:#x?}", + io_info.port, io_info + ) + } + advance_guest_rip(exit_info.exit_instruction_length as _)?; + Ok(()) +} + +fn handle_msr_read(arch_cpu: &mut ArchCpu) -> HvResult { + let rcx = arch_cpu.regs().rcx as u32; + let msr = Msr::try_from(rcx).unwrap(); + + let res = if msr == IA32_APIC_BASE { + let mut apic_base = unsafe { IA32_APIC_BASE.read() }; + apic_base |= 1 << 11 | 1 << 10; // enable xAPIC and x2APIC + Ok(apic_base) + } else if VirtLocalApic::msr_range().contains(&rcx) { + VirtLocalApic::rdmsr(arch_cpu, msr) + } else { + hv_result_err!(ENOSYS) + }; + + if let Ok(value) = res { + debug!("VM exit: RDMSR({:#x}) -> {:#x}", rcx, value); + arch_cpu.regs_mut().rax = value & 0xffff_ffff; + arch_cpu.regs_mut().rdx = value >> 32; + } else { + panic!("Failed to handle RDMSR({:#x}): {:?}", rcx, res); + } + advance_guest_rip(VM_EXIT_INSTR_LEN_RDMSR)?; + Ok(()) +} + +fn handle_msr_write(arch_cpu: &mut ArchCpu) -> HvResult { + let rcx = arch_cpu.regs().rcx as u32; + let msr = Msr::try_from(rcx).unwrap(); + let value = (arch_cpu.regs().rax & 0xffff_ffff) | (arch_cpu.regs().rdx << 32); + debug!("VM exit: WRMSR({:#x}) <- {:#x}", rcx, value); + + let res = if msr == IA32_APIC_BASE { + Ok(()) // ignore + } else if VirtLocalApic::msr_range().contains(&rcx) { + VirtLocalApic::wrmsr(arch_cpu, msr, value) + } else { + hv_result_err!(ENOSYS) + }; + + if res.is_err() { + panic!( + "Failed to handle WRMSR({:#x}) <- {:#x}: {:?}", + rcx, value, res + ); + } + advance_guest_rip(VM_EXIT_INSTR_LEN_WRMSR)?; + Ok(()) +} + +fn handle_s2pt_violation(arch_cpu: &mut ArchCpu, guest_rip: usize) -> HvResult { + let fault_info = s2pt_violation_info()?; panic!( - "VM exit: EPT violation @ {:#x}, fault_paddr={:#x}, access_flags=({:?}), arch_cpu: {:#x?}", + "VM exit: S2PT violation @ {:#x}, fault_paddr={:#x}, access_flags=({:?}), {:#x?}", guest_rip, fault_info.fault_guest_paddr, fault_info.access_flags, arch_cpu ); } @@ -103,20 +260,24 @@ pub fn handle_vmexit(arch_cpu: &mut ArchCpu) -> HvResult { } let res = match exit_info.exit_reason { + VmxExitReason::CPUID => handle_cpuid(arch_cpu), + VmxExitReason::EPT_VIOLATION => handle_s2pt_violation(arch_cpu, exit_info.guest_rip), + VmxExitReason::EXTERNAL_INTERRUPT => handle_external_interrupt(), + VmxExitReason::INTERRUPT_WINDOW => set_interrupt_window(false), + VmxExitReason::IO_INSTRUCTION => handle_io_instruction(arch_cpu, &exit_info), + VmxExitReason::MSR_READ => handle_msr_read(arch_cpu), + VmxExitReason::MSR_WRITE => handle_msr_write(arch_cpu), VmxExitReason::VMCALL => handle_hypercall(arch_cpu), - VmxExitReason::EPT_VIOLATION => handle_ept_violation(exit_info.guest_rip, arch_cpu), _ => panic!( "Unhandled VM-Exit reason {:?}:\n{:#x?}", - exit_info.exit_reason, - arch_cpu.regs() + exit_info.exit_reason, arch_cpu ), }; if res.is_err() { panic!( "Failed to handle VM-exit {:?}:\n{:#x?}", - exit_info.exit_reason, - arch_cpu.regs() + exit_info.exit_reason, arch_cpu ); } diff --git a/src/arch/x86_64/vmx.rs b/src/arch/x86_64/vmx.rs index ab3e35c6..73410ec3 100644 --- a/src/arch/x86_64/vmx.rs +++ b/src/arch/x86_64/vmx.rs @@ -1,27 +1,20 @@ -#![allow(non_camel_case_types)] -#![allow(clippy::upper_case_acronyms)] -use crate::arch::s2pt::Stage2PageFaultInfo; -use crate::consts::PAGE_SIZE; -use crate::error::{HvError, HvResult}; -use crate::memory::{Frame, GuestPhysAddr, HostPhysAddr, MemFlags, PhysAddr}; +use crate::{ + arch::{msr::Msr, s2pt::Stage2PageFaultInfo}, + consts::PAGE_SIZE, + error::{HvError, HvResult}, + memory::{Frame, GuestPhysAddr, HostPhysAddr, HostVirtAddr, MemFlags, PhysAddr}, +}; use bit_field::BitField; use bitflags::{bitflags, Flags}; use raw_cpuid::CpuId; -use x86::dtables::{self, DescriptorTablePointer}; -use x86::msr::{ - IA32_EFER, IA32_FEATURE_CONTROL, IA32_FS_BASE, IA32_GS_BASE, IA32_PAT, IA32_VMX_BASIC, - IA32_VMX_ENTRY_CTLS, IA32_VMX_EXIT_CTLS, IA32_VMX_PINBASED_CTLS, IA32_VMX_PROCBASED_CTLS, - IA32_VMX_PROCBASED_CTLS2, IA32_VMX_TRUE_ENTRY_CTLS, IA32_VMX_TRUE_EXIT_CTLS, - IA32_VMX_TRUE_PINBASED_CTLS, IA32_VMX_TRUE_PROCBASED_CTLS, -}; -use x86::segmentation::SegmentSelector; -use x86::vmx::vmcs::control::{ - EntryControls, ExitControls, PinbasedControls, PrimaryControls, SecondaryControls, +use x86::{ + bits64::vmx, + dtables, + dtables::DescriptorTablePointer, + segmentation::SegmentSelector, + vmx::{vmcs::control::*, vmcs::*, VmFail}, }; -use x86::vmx::vmcs::*; -use x86::{bits64::vmx, vmx::VmFail}; use x86_64::registers::control::{Cr0, Cr0Flags, Cr3, Cr4, Cr4Flags}; -use x86_64::registers::model_specific::Msr; bitflags! { pub struct FeatureControlFlags: u64 { @@ -35,14 +28,16 @@ bitflags! { } } -pub fn vmread(field: u32) -> x86::vmx::Result { +fn vmread(field: u32) -> x86::vmx::Result { unsafe { vmx::vmread(field as u32) } } -pub fn vmwrite>(field: u32, value: T) -> x86::vmx::Result<()> { +fn vmwrite>(field: u32, value: T) -> x86::vmx::Result<()> { unsafe { vmx::vmwrite(field as u32, value.into()) } } +const ZERO: u64 = 0; + numeric_enum_macro::numeric_enum! { #[repr(u32)] #[derive(Debug, Copy, Clone, Eq, PartialEq)] @@ -133,6 +128,121 @@ pub struct VmxExitInfo { pub guest_rip: usize, } +/// Exit Qualification for I/O Instructions. (SDM Vol. 3C, Section 27.2.1, Table 27-5) +#[derive(Debug)] +pub struct VmxIoExitInfo { + /// Size of access. + pub access_size: u8, + /// Direction of the attempted access (0 = OUT, 1 = IN). + pub is_in: bool, + /// String instruction (0 = not string; 1 = string). + pub is_string: bool, + /// REP prefixed (0 = not REP; 1 = REP). + pub is_repeat: bool, + /// Port number. (as specified in DX or in an immediate operand) + pub port: u16, +} + +numeric_enum_macro::numeric_enum! { +#[repr(u8)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +/// The interruption type (bits 10:8) in VM-Entry Interruption-Information Field +/// and VM-Exit Interruption-Information Field. (SDM Vol. 3C, Section 24.8.3, 24.9.2) +pub enum VmxInterruptionType { + /// External interrupt + External = 0, + /// Reserved + Reserved = 1, + /// Non-maskable interrupt (NMI) + NMI = 2, + /// Hardware exception (e.g,. #PF) + HardException = 3, + /// Software interrupt (INT n) + SoftIntr = 4, + /// Privileged software exception (INT1) + PrivSoftException = 5, + /// Software exception (INT3 or INTO) + SoftException = 6, + /// Other event + Other = 7, +} +} + +impl VmxInterruptionType { + /// Whether the exception/interrupt with `vector` has an error code. + pub const fn vector_has_error_code(vector: u8) -> bool { + use x86::irq::*; + matches!( + vector, + DOUBLE_FAULT_VECTOR + | INVALID_TSS_VECTOR + | SEGMENT_NOT_PRESENT_VECTOR + | STACK_SEGEMENT_FAULT_VECTOR + | GENERAL_PROTECTION_FAULT_VECTOR + | PAGE_FAULT_VECTOR + | ALIGNMENT_CHECK_VECTOR + ) + } + + /// Determine interruption type by the interrupt vector. + pub const fn from_vector(vector: u8) -> Self { + // SDM Vol. 3C, Section 24.8.3 + use x86::irq::*; + match vector { + DEBUG_VECTOR => Self::PrivSoftException, + NONMASKABLE_INTERRUPT_VECTOR => Self::NMI, + BREAKPOINT_VECTOR | OVERFLOW_VECTOR => Self::SoftException, + // SDM Vol. 3A, Section 6.15: All other vectors from 0 to 21 are exceptions. + 0..=VIRTUALIZATION_VECTOR => Self::HardException, + 32..=255 => Self::External, + _ => Self::Other, + } + } + + /// For software interrupt, software exception, or privileged software + /// exception, we need to set VM-Entry Instruction Length Field. + pub const fn is_soft(&self) -> bool { + matches!( + *self, + Self::SoftIntr | Self::SoftException | Self::PrivSoftException + ) + } +} + +/// VM-Entry / VM-Exit Interruption-Information Field. (SDM Vol. 3C, Section 24.8.3, 24.9.2) +#[derive(Debug)] +pub struct VmxInterruptInfo { + /// Vector of interrupt or exception. + pub vector: u8, + /// Determines details of how the injection is performed. + pub int_type: VmxInterruptionType, + /// For hardware exceptions that would have delivered an error code on the stack. + pub err_code: Option, + /// Whether the field is valid. + pub valid: bool, +} + +impl VmxInterruptInfo { + /// Convert from the interrupt vector and the error code. + pub fn from(vector: u8, err_code: Option) -> Self { + Self { + vector, + int_type: VmxInterruptionType::from_vector(vector), + err_code, + valid: true, + } + } + + /// Raw bits for writing to VMCS. + pub fn bits(&self) -> u32 { + let mut bits = self.vector as u32; + bits |= (self.int_type as u32) << 8; + bits.set_bit(11, self.err_code.is_some()); + bits.set_bit(31, self.valid); + bits + } +} + #[derive(Debug)] pub struct VmxRegion { frame: Frame, @@ -172,27 +282,28 @@ pub fn is_vmx_enabled() -> bool { Cr4::read().contains(Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS) } -pub unsafe fn enable_vmxon() -> HvResult { - let mut ctrl_reg = Msr::new(IA32_FEATURE_CONTROL); +pub fn enable_vmxon() -> HvResult { + let mut ctrl_reg = Msr::IA32_FEATURE_CONTROL; let ctrl_flag = FeatureControlFlags::from_bits_truncate(ctrl_reg.read()); let locked = ctrl_flag.contains(FeatureControlFlags::LOCKED); let vmxon_outside = ctrl_flag.contains(FeatureControlFlags::VMXON_ENABLED_OUTSIDE_SMX); if !locked { - ctrl_reg.write( - (ctrl_flag - | FeatureControlFlags::LOCKED - | FeatureControlFlags::VMXON_ENABLED_OUTSIDE_SMX) - .bits(), - ) + unsafe { + ctrl_reg.write( + (ctrl_flag + | FeatureControlFlags::LOCKED + | FeatureControlFlags::VMXON_ENABLED_OUTSIDE_SMX) + .bits(), + ) + } } else if !vmxon_outside { - return Err(hv_err!(EPERM, "VMX disabled by BIOS")); + return hv_result_err!(EPERM, "VMX disabled by BIOS"); } Ok(()) } pub fn get_vmcs_revision_id() -> u32 { - let vmx_basic_reg = Msr::new(IA32_VMX_BASIC); - let vmx_basic_flag = unsafe { vmx_basic_reg.read() }; + let vmx_basic_flag = Msr::IA32_VMX_BASIC.read(); vmx_basic_flag.get_bits(0..=30) as u32 } @@ -212,26 +323,23 @@ pub unsafe fn enable_vmcs(start_paddr: u64) -> HvResult { Ok(()) } -// natural-width -type unw = u64; - -pub fn setup_vmcs_host(vmx_exit: usize) -> HvResult { - vmwrite::(host::IA32_PAT_FULL, unsafe { Msr::new(IA32_PAT).read() })?; - vmwrite::(host::IA32_EFER_FULL, unsafe { Msr::new(IA32_EFER).read() })?; +pub fn setup_vmcs_host(vmx_exit: HostVirtAddr) -> HvResult { + vmwrite(host::IA32_PAT_FULL, Msr::IA32_PAT.read())?; + vmwrite(host::IA32_EFER_FULL, Msr::IA32_EFER.read())?; - vmwrite::(host::CR0, Cr0::read_raw())?; - vmwrite::(host::CR3, Cr3::read_raw().0.start_address().as_u64())?; - vmwrite::(host::CR4, Cr4::read_raw())?; + vmwrite(host::CR0, Cr0::read_raw())?; + vmwrite(host::CR3, Cr3::read_raw().0.start_address().as_u64())?; + vmwrite(host::CR4, Cr4::read_raw())?; - vmwrite::(host::ES_SELECTOR, x86::segmentation::es().bits())?; - vmwrite::(host::CS_SELECTOR, x86::segmentation::cs().bits())?; - vmwrite::(host::SS_SELECTOR, x86::segmentation::ss().bits())?; - vmwrite::(host::DS_SELECTOR, x86::segmentation::ds().bits())?; - vmwrite::(host::FS_SELECTOR, x86::segmentation::fs().bits())?; - vmwrite::(host::GS_SELECTOR, x86::segmentation::gs().bits())?; + vmwrite(host::ES_SELECTOR, x86::segmentation::es().bits())?; + vmwrite(host::CS_SELECTOR, x86::segmentation::cs().bits())?; + vmwrite(host::SS_SELECTOR, x86::segmentation::ss().bits())?; + vmwrite(host::DS_SELECTOR, x86::segmentation::ds().bits())?; + vmwrite(host::FS_SELECTOR, x86::segmentation::fs().bits())?; + vmwrite(host::GS_SELECTOR, x86::segmentation::gs().bits())?; - vmwrite::(host::FS_BASE, unsafe { Msr::new(IA32_FS_BASE).read() })?; - vmwrite::(host::GS_BASE, unsafe { Msr::new(IA32_GS_BASE).read() })?; + vmwrite(host::FS_BASE, Msr::IA32_FS_BASE.read())?; + vmwrite(host::GS_BASE, Msr::IA32_GS_BASE.read())?; let tr = unsafe { x86::task::tr() }; let mut gdtp = DescriptorTablePointer::::default(); @@ -241,55 +349,52 @@ pub fn setup_vmcs_host(vmx_exit: usize) -> HvResult { dtables::sidt(&mut idtp); } - vmwrite::(host::TR_SELECTOR, tr.bits())?; - vmwrite::(host::TR_BASE, get_tr_base(tr, &gdtp))?; - vmwrite::(host::GDTR_BASE, gdtp.base as unw)?; - vmwrite::(host::IDTR_BASE, idtp.base as unw)?; - vmwrite::(host::RIP, vmx_exit as unw)?; + vmwrite(host::TR_SELECTOR, tr.bits())?; + vmwrite(host::TR_BASE, get_tr_base(tr, &gdtp))?; + vmwrite(host::GDTR_BASE, gdtp.base as u64)?; + vmwrite(host::IDTR_BASE, idtp.base as u64)?; + vmwrite(host::RIP, vmx_exit as u64)?; - vmwrite::(host::IA32_SYSENTER_ESP, 0)?; - vmwrite::(host::IA32_SYSENTER_EIP, 0)?; - vmwrite::(host::IA32_SYSENTER_CS, 0)?; + vmwrite(host::IA32_SYSENTER_ESP, ZERO)?; + vmwrite(host::IA32_SYSENTER_EIP, ZERO)?; + vmwrite(host::IA32_SYSENTER_CS, ZERO)?; - // VmcsHostNW::RSP.write(0)?; // TODO + // VmcsHostNW::RSP.write(ZERO)?; // TODO Ok(()) } -pub fn setup_vmcs_guest(entry: usize) -> HvResult { +pub fn setup_vmcs_guest(entry: GuestPhysAddr) -> HvResult { // Enable protected mode and paging. - let cr0_guest = Cr0Flags::PROTECTED_MODE_ENABLE - | Cr0Flags::EXTENSION_TYPE - | Cr0Flags::NUMERIC_ERROR - | Cr0Flags::PAGING; + let cr0_guest = Cr0Flags::EXTENSION_TYPE | Cr0Flags::NUMERIC_ERROR; let cr0_host_owned = Cr0Flags::NUMERIC_ERROR | Cr0Flags::NOT_WRITE_THROUGH | Cr0Flags::CACHE_DISABLE; let cr0_read_shadow = Cr0Flags::NUMERIC_ERROR; - vmwrite::(guest::CR0, cr0_guest.bits())?; - vmwrite::(control::CR0_GUEST_HOST_MASK, cr0_host_owned.bits())?; - vmwrite::(control::CR0_READ_SHADOW, cr0_read_shadow.bits())?; + vmwrite(guest::CR0, cr0_guest.bits())?; + vmwrite(control::CR0_GUEST_HOST_MASK, cr0_host_owned.bits())?; + vmwrite(control::CR0_READ_SHADOW, cr0_read_shadow.bits())?; // Enable physical address extensions that required in IA-32e mode. - let cr4_guest = Cr4Flags::PHYSICAL_ADDRESS_EXTENSION | Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS; + let cr4_guest = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS; let cr4_host_owned = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS; - let cr4_read_shadow = 0; + let cr4_read_shadow = ZERO; - vmwrite::(guest::CR4, cr4_guest.bits())?; - vmwrite::(control::CR4_GUEST_HOST_MASK, cr4_host_owned.bits())?; - vmwrite::(control::CR4_READ_SHADOW, cr4_read_shadow)?; + vmwrite(guest::CR4, cr4_guest.bits())?; + vmwrite(control::CR4_GUEST_HOST_MASK, cr4_host_owned.bits())?; + vmwrite(control::CR4_READ_SHADOW, cr4_read_shadow)?; macro_rules! set_guest_segment { ($seg: ident, $access_rights: expr) => {{ use guest::*; - vmwrite::(concat_idents!($seg, _SELECTOR), 0)?; - vmwrite::(concat_idents!($seg, _BASE), 0)?; - vmwrite::(concat_idents!($seg, _LIMIT), 0xffff)?; - vmwrite::(concat_idents!($seg, _ACCESS_RIGHTS), $access_rights)?; + vmwrite(concat_idents!($seg, _SELECTOR), ZERO)?; + vmwrite(concat_idents!($seg, _BASE), ZERO)?; + vmwrite(concat_idents!($seg, _LIMIT), 0xffff_u64)?; + vmwrite(concat_idents!($seg, _ACCESS_RIGHTS), $access_rights as u64)?; }}; } set_guest_segment!(ES, 0x93); // 16-bit, present, data, read/write, accessed - set_guest_segment!(CS, 0x209b); // 64-bit, present, code, exec/read, accessed + set_guest_segment!(CS, 0x9b); // 16-bit, present, code, exec/read, accessed set_guest_segment!(SS, 0x93); set_guest_segment!(DS, 0x93); set_guest_segment!(FS, 0x93); @@ -297,70 +402,76 @@ pub fn setup_vmcs_guest(entry: usize) -> HvResult { set_guest_segment!(TR, 0x8b); // present, system, 32-bit TSS busy set_guest_segment!(LDTR, 0x82); // present, system, LDT - vmwrite::(guest::GDTR_BASE, 0)?; - vmwrite::(guest::GDTR_LIMIT, 0xffff)?; - vmwrite::(guest::IDTR_BASE, 0)?; - vmwrite::(guest::IDTR_LIMIT, 0xffff)?; - - vmwrite::(guest::CR3, 0)?; - vmwrite::(guest::DR7, 0x400)?; - vmwrite::(guest::RSP, 0)?; - vmwrite::(guest::RIP, entry as unw)?; - vmwrite::(guest::RFLAGS, 0x2)?; - vmwrite::(guest::PENDING_DBG_EXCEPTIONS, 0)?; - vmwrite::(guest::IA32_SYSENTER_ESP, 0)?; - vmwrite::(guest::IA32_SYSENTER_EIP, 0)?; - vmwrite::(guest::IA32_SYSENTER_CS, 0)?; - - vmwrite::(guest::INTERRUPTIBILITY_STATE, 0)?; - vmwrite::(guest::ACTIVITY_STATE, 0)?; - vmwrite::(guest::VMX_PREEMPTION_TIMER_VALUE, 0)?; - - vmwrite::(guest::LINK_PTR_FULL, u64::MAX)?; - vmwrite::(guest::IA32_DEBUGCTL_FULL, 0)?; - vmwrite::(guest::IA32_PAT_FULL, unsafe { Msr::new(IA32_PAT).read() })?; - vmwrite::(guest::IA32_EFER_FULL, unsafe { Msr::new(IA32_EFER).read() })?; + vmwrite(guest::GDTR_BASE, ZERO)?; + vmwrite(guest::GDTR_LIMIT, 0xffff_u64)?; + vmwrite(guest::IDTR_BASE, ZERO)?; + vmwrite(guest::IDTR_LIMIT, 0xffff_u64)?; + + vmwrite(guest::CR3, ZERO)?; + vmwrite(guest::DR7, 0x400_u64)?; + vmwrite(guest::RSP, ZERO)?; + vmwrite(guest::RIP, entry as u64)?; + vmwrite(guest::RFLAGS, 0x2_u64)?; + vmwrite(guest::PENDING_DBG_EXCEPTIONS, ZERO)?; + vmwrite(guest::IA32_SYSENTER_ESP, ZERO)?; + vmwrite(guest::IA32_SYSENTER_EIP, ZERO)?; + vmwrite(guest::IA32_SYSENTER_CS, ZERO)?; + + vmwrite(guest::INTERRUPTIBILITY_STATE, ZERO)?; + vmwrite(guest::ACTIVITY_STATE, ZERO)?; + vmwrite(guest::VMX_PREEMPTION_TIMER_VALUE, ZERO)?; + + vmwrite(guest::LINK_PTR_FULL, u64::MAX)?; + vmwrite(guest::IA32_DEBUGCTL_FULL, ZERO)?; + vmwrite(guest::IA32_PAT_FULL, Msr::IA32_PAT.read())?; + vmwrite(guest::IA32_EFER_FULL, ZERO)?; Ok(()) } -pub fn setup_vmcs_control() -> HvResult { - // Intercept NMI, pass-through external interrupts. +pub fn setup_vmcs_control(msr_bitmap: HostPhysAddr) -> HvResult { + // Intercept NMI and external interrupts. set_control( control::PINBASED_EXEC_CONTROLS, - Msr::new(IA32_VMX_TRUE_PINBASED_CTLS), - unsafe { Msr::new(IA32_VMX_PINBASED_CTLS).read() } as u32, - PinbasedControls::NMI_EXITING.bits(), + Msr::IA32_VMX_TRUE_PINBASED_CTLS, + Msr::IA32_VMX_PINBASED_CTLS.read() as u32, + (PinbasedControls::NMI_EXITING | PinbasedControls::EXTERNAL_INTERRUPT_EXITING).bits(), 0, )?; - // Activate secondary controls, disable CR3 load/store interception. + // Intercept all I/O instructions, use MSR bitmaps, activate secondary controls, + // disable CR3 load/store interception. set_control( control::PRIMARY_PROCBASED_EXEC_CONTROLS, - Msr::new(IA32_VMX_TRUE_PROCBASED_CTLS), - unsafe { Msr::new(IA32_VMX_PROCBASED_CTLS).read() } as u32, - PrimaryControls::SECONDARY_CONTROLS.bits(), + Msr::IA32_VMX_TRUE_PROCBASED_CTLS, + Msr::IA32_VMX_PROCBASED_CTLS.read() as u32, + (PrimaryControls::UNCOND_IO_EXITING + | PrimaryControls::USE_MSR_BITMAPS + | PrimaryControls::SECONDARY_CONTROLS) + .bits(), (PrimaryControls::CR3_LOAD_EXITING | PrimaryControls::CR3_STORE_EXITING).bits(), )?; - // Enable EPT, RDTSCP, INVPCID. + // Enable EPT, RDTSCP, INVPCID, and unrestricted guest. set_control( control::SECONDARY_PROCBASED_EXEC_CONTROLS, - Msr::new(IA32_VMX_PROCBASED_CTLS2), + Msr::IA32_VMX_PROCBASED_CTLS2, 0, (SecondaryControls::ENABLE_EPT | SecondaryControls::ENABLE_RDTSCP - | SecondaryControls::ENABLE_INVPCID) + | SecondaryControls::ENABLE_INVPCID + | SecondaryControls::UNRESTRICTED_GUEST) .bits(), 0, )?; - // Switch to 64-bit host, switch IA32_PAT/IA32_EFER on VM exit. + // Switch to 64-bit host, acknowledge interrupt info, switch IA32_PAT/IA32_EFER on VM exit. set_control( control::VMEXIT_CONTROLS, - Msr::new(IA32_VMX_TRUE_EXIT_CTLS), - unsafe { Msr::new(IA32_VMX_EXIT_CTLS).read() } as u32, + Msr::IA32_VMX_TRUE_EXIT_CTLS, + Msr::IA32_VMX_EXIT_CTLS.read() as u32, (ExitControls::HOST_ADDRESS_SPACE_SIZE + | ExitControls::ACK_INTERRUPT_ON_EXIT | ExitControls::SAVE_IA32_PAT | ExitControls::LOAD_IA32_PAT | ExitControls::SAVE_IA32_EFER @@ -369,28 +480,25 @@ pub fn setup_vmcs_control() -> HvResult { 0, )?; - // Switch to 64-bit guest, load guest IA32_PAT/IA32_EFER on VM entry. + // Load guest IA32_PAT/IA32_EFER on VM entry. set_control( control::VMENTRY_CONTROLS, - Msr::new(IA32_VMX_TRUE_ENTRY_CTLS), - unsafe { Msr::new(IA32_VMX_ENTRY_CTLS).read() } as u32, - (EntryControls::IA32E_MODE_GUEST - | EntryControls::LOAD_IA32_PAT - | EntryControls::LOAD_IA32_EFER) - .bits(), + Msr::IA32_VMX_TRUE_ENTRY_CTLS, + Msr::IA32_VMX_ENTRY_CTLS.read() as u32, + (EntryControls::LOAD_IA32_PAT | EntryControls::LOAD_IA32_EFER).bits(), 0, )?; // No MSR switches if hypervisor doesn't use and there is only one vCPU. - vmwrite::(control::VMEXIT_MSR_STORE_COUNT, 0)?; - vmwrite::(control::VMEXIT_MSR_LOAD_COUNT, 0)?; - vmwrite::(control::VMENTRY_MSR_LOAD_COUNT, 0)?; + vmwrite(control::VMEXIT_MSR_STORE_COUNT, ZERO)?; + vmwrite(control::VMEXIT_MSR_LOAD_COUNT, ZERO)?; + vmwrite(control::VMENTRY_MSR_LOAD_COUNT, ZERO)?; - // Pass-through exceptions, I/O instructions, and MSR read/write. - vmwrite::(control::EXCEPTION_BITMAP, 0)?; - vmwrite::(control::IO_BITMAP_A_ADDR_FULL, 0)?; - vmwrite::(control::IO_BITMAP_B_ADDR_FULL, 0)?; - vmwrite::(control::MSR_BITMAPS_ADDR_FULL, 0)?; + // Pass-through exceptions, don't use I/O bitmap, set MSR bitmaps. + vmwrite(control::EXCEPTION_BITMAP, ZERO)?; + vmwrite(control::IO_BITMAP_A_ADDR_FULL, ZERO)?; + vmwrite(control::IO_BITMAP_B_ADDR_FULL, ZERO)?; + vmwrite(control::MSR_BITMAPS_ADDR_FULL, msr_bitmap as u64)?; Ok(()) } @@ -427,24 +535,24 @@ pub fn set_control( control, old_value, set, clear ); if (set & clear) != 0 { - return Err(hv_err!( - EPERM, + return hv_result_err!( + EINVAL, format!("can not set and clear the same bit in {:#x}", control) - )); + ); } if (allowed1 & set) != set { // failed if set 0-bits in allowed1 - return Err(hv_err!( - EPERM, + return hv_result_err!( + EINVAL, format!("can not set bits {:#x} in {:#x}", set, control) - )); + ); } if (allowed0 & clear) != 0 { // failed if clear 1-bits in allowed0 - return Err(hv_err!( - EPERM, + return hv_result_err!( + EINVAL, format!("can not clear bits {:#x} in {:#x}", clear, control) - )); + ); } // SDM Vol. 3C, Section 31.5.1, Algorithm 3 let flexible = !allowed0 & allowed1; // therse bits can be either 0 or 1 @@ -463,7 +571,7 @@ impl From for HvError { pub fn advance_guest_rip(instr_len: u8) -> HvResult { unsafe { - Ok(vmwrite::( + Ok(vmwrite( guest::RIP, (vmread(guest::RIP)? + instr_len as u64), )?) @@ -475,19 +583,31 @@ pub fn instruction_error() -> u32 { } pub fn set_host_rsp(rsp: HostPhysAddr) -> HvResult { - Ok(vmwrite::(host::RSP, rsp as unw)?) + Ok(vmwrite(host::RSP, rsp as u64)?) } pub fn set_guest_page_table(cr3: GuestPhysAddr) -> HvResult { - Ok(vmwrite::(guest::CR3, cr3 as unw)?) + Ok(vmwrite(guest::CR3, cr3 as u64)?) } pub fn set_guest_stack_pointer(rsp: GuestPhysAddr) -> HvResult { - Ok(vmwrite::(guest::RSP, rsp as unw)?) + Ok(vmwrite(guest::RSP, rsp as u64)?) } pub fn set_s2ptp(s2ptp: u64) -> HvResult { - Ok(vmwrite::(control::EPTP_FULL, s2ptp as u64)?) + Ok(vmwrite(control::EPTP_FULL, s2ptp as u64)?) +} + +pub fn guest_rip() -> u64 { + vmread(guest::RIP).unwrap() as u64 +} + +pub fn guest_rsp() -> u64 { + vmread(guest::RSP).unwrap() as u64 +} + +pub fn guest_cr3() -> u64 { + vmread(guest::CR3).unwrap() as u64 } pub fn exit_info() -> HvResult { @@ -503,7 +623,7 @@ pub fn exit_info() -> HvResult { }) } -pub fn ept_violation_info() -> HvResult { +pub fn s2pt_violation_info() -> HvResult { // SDM Vol. 3C, Section 27.2.1, Table 27-7 let qualification = vmread(ro::EXIT_QUALIFICATION)? as u64; let fault_guest_paddr = vmread(ro::GUEST_PHYSICAL_ADDR_FULL)? as usize; @@ -523,14 +643,74 @@ pub fn ept_violation_info() -> HvResult { }) } -pub fn guest_rip() -> unw { - vmread(guest::RIP).unwrap() as unw +pub fn io_exit_info() -> HvResult { + // SDM Vol. 3C, Section 27.2.1, Table 27-5 + let qualification = vmread(ro::EXIT_QUALIFICATION)?; + Ok(VmxIoExitInfo { + access_size: qualification.get_bits(0..3) as u8 + 1, + is_in: qualification.get_bit(3), + is_string: qualification.get_bit(4), + is_repeat: qualification.get_bit(5), + port: qualification.get_bits(16..32) as u16, + }) +} + +pub fn allow_interrupt() -> HvResult { + let rflags = vmread(guest::RFLAGS)?; + let block_state = vmread(guest::INTERRUPTIBILITY_STATE)?; + Ok( + rflags as u64 & x86_64::registers::rflags::RFlags::INTERRUPT_FLAG.bits() != 0 + && block_state == 0, + ) +} + +pub fn inject_event(vector: u8, err_code: Option) -> HvResult { + // SDM Vol. 3C, Section 24.8.3 + let err_code = if VmxInterruptionType::vector_has_error_code(vector) { + err_code.or_else(|| Some(vmread(ro::VMEXIT_INTERRUPTION_ERR_CODE).unwrap() as u32)) + } else { + None + }; + let int_info = VmxInterruptInfo::from(vector, err_code); + if let Some(err_code) = int_info.err_code { + vmwrite(control::VMENTRY_EXCEPTION_ERR_CODE, err_code)?; + } + if int_info.int_type.is_soft() { + vmwrite( + control::VMENTRY_INSTRUCTION_LEN, + vmread(ro::VMEXIT_INSTRUCTION_LEN)?, + )?; + } + vmwrite(control::VMENTRY_INTERRUPTION_INFO_FIELD, int_info.bits())?; + Ok(()) } -pub fn guest_rsp() -> unw { - vmread(guest::RSP).unwrap() as unw +/// If enable, a VM exit occurs at the beginning of any instruction if +/// `RFLAGS.IF` = 1 and there are no other blocking of interrupts. +/// (see SDM, Vol. 3C, Section 24.4.2) +pub fn set_interrupt_window(enable: bool) -> HvResult { + let mut ctrl = vmread(control::PRIMARY_PROCBASED_EXEC_CONTROLS)? as u32; + let bits = PrimaryControls::INTERRUPT_WINDOW_EXITING.bits(); + if enable { + ctrl |= bits + } else { + ctrl &= !bits + } + vmwrite(control::PRIMARY_PROCBASED_EXEC_CONTROLS, ctrl)?; + Ok(()) } -pub fn guest_cr3() -> unw { - vmread(guest::CR3).unwrap() as unw +pub fn interrupt_exit_info() -> HvResult { + // SDM Vol. 3C, Section 24.9.2 + let info = vmread(ro::VMEXIT_INTERRUPTION_INFO)?; + Ok(VmxInterruptInfo { + vector: info.get_bits(0..8) as u8, + int_type: VmxInterruptionType::try_from(info.get_bits(8..11) as u8).unwrap(), + err_code: if info.get_bit(11) { + Some(vmread(ro::VMEXIT_INTERRUPTION_ERR_CODE)? as u32) + } else { + None + }, + valid: info.get_bit(31), + }) } diff --git a/src/arch/x86_64/zone.rs b/src/arch/x86_64/zone.rs index 077bdf2f..5f509204 100644 --- a/src/arch/x86_64/zone.rs +++ b/src/arch/x86_64/zone.rs @@ -1,10 +1,7 @@ use crate::{ config::*, error::HvResult, - memory::{ - addr::align_down, addr::align_up, mmio_generic_handler, GuestPhysAddr, HostPhysAddr, - MemFlags, MemoryRegion, - }, + memory::{GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion}, zone::Zone, }; diff --git a/src/device/irqchip/mod.rs b/src/device/irqchip/mod.rs index 9e6d1d2f..e79376bb 100644 --- a/src/device/irqchip/mod.rs +++ b/src/device/irqchip/mod.rs @@ -8,7 +8,7 @@ pub mod plic; pub mod ls7a2000; #[cfg(target_arch = "x86_64")] -pub mod i8259; +pub mod pic; #[cfg(target_arch = "aarch64")] pub use gicv3::{inject_irq, percpu_init, primary_init_early, primary_init_late}; @@ -20,4 +20,4 @@ pub use plic::{inject_irq, percpu_init, primary_init_early, primary_init_late}; pub use ls7a2000::{inject_irq, percpu_init, primary_init_early, primary_init_late}; #[cfg(target_arch = "x86_64")] -pub use i8259::{inject_irq, percpu_init, primary_init_early, primary_init_late}; +pub use pic::{inject_irq, percpu_init, primary_init_early, primary_init_late}; diff --git a/src/device/irqchip/pic/i8259.rs b/src/device/irqchip/pic/i8259.rs new file mode 100644 index 00000000..9dd10a0a --- /dev/null +++ b/src/device/irqchip/pic/i8259.rs @@ -0,0 +1,25 @@ +use crate::{arch::device::PortIoDevice, error::HvResult}; + +pub struct VirtI8259Pic { + port_base: u16, +} + +impl PortIoDevice for VirtI8259Pic { + fn port_range(&self) -> core::ops::Range { + self.port_base..self.port_base + 2 + } + + fn read(&self, port: u16, access_size: u8) -> HvResult { + hv_result_err!(EIO) // report error for read + } + + fn write(&self, port: u16, access_size: u8, value: u32) -> HvResult { + Ok(()) // ignore write + } +} + +impl VirtI8259Pic { + pub const fn new(port_base: u16) -> Self { + Self { port_base } + } +} diff --git a/src/device/irqchip/pic/lapic.rs b/src/device/irqchip/pic/lapic.rs new file mode 100644 index 00000000..52a44f34 --- /dev/null +++ b/src/device/irqchip/pic/lapic.rs @@ -0,0 +1,193 @@ +use crate::{ + arch::{ + cpu::ArchCpu, + lapic::current_time_nanos, + msr::Msr::{self, *}, + }, + error::HvResult, +}; +use bit_field::BitField; + +const APIC_FREQ_MHZ: u64 = 1000; // 1000 MHz +const APIC_CYCLE_NANOS: u64 = 1000 / APIC_FREQ_MHZ; + +/// Local APIC timer modes. +#[derive(Debug, Copy, Clone)] +#[repr(u8)] +pub enum TimerMode { + /// Timer only fires once. + OneShot = 0b00, + /// Timer fires periodically. + Periodic = 0b01, + /// Timer fires at an absolute time. + TscDeadline = 0b10, +} + +pub struct VirtLocalApic; + +/// A virtual local APIC timer. (SDM Vol. 3C, Section 10.5.4) +pub struct VirtApicTimer { + lvt_timer_bits: u32, + divide_shift: u8, + initial_count: u32, + last_start_ns: u64, + deadline_ns: u64, +} + +impl VirtLocalApic { + pub const fn msr_range() -> core::ops::Range { + 0x800..0x840 + } + + pub fn rdmsr(arch_cpu: &mut ArchCpu, msr: Msr) -> HvResult { + let apic_timer = arch_cpu.apic_timer_mut(); + match msr { + SIVR => Ok(0x1ff), // SDM Vol. 3A, Section 10.9, Figure 10-23 (with Software Enable bit) + LVT_THERMAL | LVT_PMI | LVT_LINT0 | LVT_LINT1 | LVT_ERR => { + Ok(0x1_0000) // SDM Vol. 3A, Section 10.5.1, Figure 10-8 (with Mask bit) + } + LVT_TIMER => Ok(apic_timer.lvt_timer() as u64), + INIT_COUNT => Ok(apic_timer.initial_count() as u64), + DIV_CONF => Ok(apic_timer.divide() as u64), + CUR_COUNT => Ok(apic_timer.current_counter() as u64), + _ => hv_result_err!(ENOSYS), + } + } + + pub fn wrmsr(arch_cpu: &mut ArchCpu, msr: Msr, value: u64) -> HvResult { + if msr != ICR && (value >> 32) != 0 { + return hv_result_err!(EINVAL); // all registers except ICR are 32-bits + } + let apic_timer = arch_cpu.apic_timer_mut(); + match msr { + EOI => { + if value != 0 { + hv_result_err!(EINVAL) // write a non-zero value causes #GP + } else { + Ok(()) + } + } + SIVR | LVT_THERMAL | LVT_PMI | LVT_LINT0 | LVT_LINT1 | LVT_ERR => { + Ok(()) // ignore these register writes + } + LVT_TIMER => apic_timer.set_lvt_timer(value as u32), + INIT_COUNT => apic_timer.set_initial_count(value as u32), + DIV_CONF => apic_timer.set_divide(value as u32), + _ => hv_result_err!(ENOSYS), + } + } +} + +impl VirtApicTimer { + pub const fn new() -> Self { + Self { + lvt_timer_bits: 0x1_0000, // masked + divide_shift: 0, + initial_count: 0, + last_start_ns: 0, + deadline_ns: 0, + } + } + + /// Check if an interrupt generated. if yes, update it's states. + pub fn check_interrupt(&mut self) -> bool { + if self.deadline_ns == 0 { + false + } else if current_time_nanos() >= self.deadline_ns { + if self.is_periodic() { + self.deadline_ns += self.interval_ns(); + } else { + self.deadline_ns = 0; + } + !self.is_masked() + } else { + false + } + } + + /// Whether the timer interrupt is masked. + pub const fn is_masked(&self) -> bool { + self.lvt_timer_bits & (1 << 16) != 0 + } + + /// Whether the timer mode is periodic. + pub const fn is_periodic(&self) -> bool { + let timer_mode = (self.lvt_timer_bits >> 17) & 0b11; + timer_mode == TimerMode::Periodic as _ + } + + /// The timer interrupt vector number. + pub const fn vector(&self) -> u8 { + (self.lvt_timer_bits & 0xff) as u8 + } + + /// LVT Timer Register. (SDM Vol. 3A, Section 10.5.1, Figure 10-8) + pub const fn lvt_timer(&self) -> u32 { + self.lvt_timer_bits + } + + /// Divide Configuration Register. (SDM Vol. 3A, Section 10.5.4, Figure 10-10) + pub const fn divide(&self) -> u32 { + let dcr = self.divide_shift.wrapping_sub(1) as u32 & 0b111; + (dcr & 0b11) | ((dcr & 0b100) << 1) + } + + /// Initial Count Register. + pub const fn initial_count(&self) -> u32 { + self.initial_count + } + + /// Current Count Register. + pub fn current_counter(&self) -> u32 { + let elapsed_ns = current_time_nanos() - self.last_start_ns; + let elapsed_cycles = (elapsed_ns / APIC_CYCLE_NANOS) >> self.divide_shift; + if self.is_periodic() { + self.initial_count - (elapsed_cycles % self.initial_count as u64) as u32 + } else if elapsed_cycles < self.initial_count as u64 { + self.initial_count - elapsed_cycles as u32 + } else { + 0 + } + } + + /// Set LVT Timer Register. + pub fn set_lvt_timer(&mut self, bits: u32) -> HvResult { + let timer_mode = bits.get_bits(17..19); + if timer_mode == TimerMode::TscDeadline as _ { + return hv_result_err!(EINVAL); // TSC deadline mode was not supported + } else if timer_mode == 0b11 { + return hv_result_err!(EINVAL); // reserved + } + self.lvt_timer_bits = bits; + self.start_timer(); + Ok(()) + } + + /// Set Initial Count Register. + pub fn set_initial_count(&mut self, initial: u32) -> HvResult { + self.initial_count = initial; + self.start_timer(); + Ok(()) + } + + /// Set Divide Configuration Register. + pub fn set_divide(&mut self, dcr: u32) -> HvResult { + let shift = (dcr & 0b11) | ((dcr & 0b1000) >> 1); + self.divide_shift = (shift + 1) as u8 & 0b111; + self.start_timer(); + Ok(()) + } + + const fn interval_ns(&self) -> u64 { + (self.initial_count as u64 * APIC_CYCLE_NANOS) << self.divide_shift + } + + fn start_timer(&mut self) { + if self.initial_count != 0 { + self.last_start_ns = current_time_nanos(); + self.deadline_ns = self.last_start_ns + self.interval_ns(); + } else { + self.deadline_ns = 0; + } + } +} diff --git a/src/device/irqchip/i8259/mod.rs b/src/device/irqchip/pic/mod.rs similarity index 92% rename from src/device/irqchip/i8259/mod.rs rename to src/device/irqchip/pic/mod.rs index 989d5b3f..8a9bc620 100644 --- a/src/device/irqchip/i8259/mod.rs +++ b/src/device/irqchip/pic/mod.rs @@ -1,3 +1,6 @@ +pub mod i8259; +pub mod lapic; + use crate::zone::Zone; use core::arch::asm; diff --git a/src/device/uart/mod.rs b/src/device/uart/mod.rs index 778ee124..fdefb85e 100644 --- a/src/device/uart/mod.rs +++ b/src/device/uart/mod.rs @@ -23,4 +23,4 @@ pub use ns16440a::{console_getchar, console_putchar}; mod uart16550; #[cfg(target_arch = "x86_64")] -pub use uart16550::{console_getchar, console_putchar}; +pub use uart16550::{console_getchar, console_putchar, VirtUart16550}; diff --git a/src/device/uart/uart16550.rs b/src/device/uart/uart16550.rs index 8352184a..4c954499 100644 --- a/src/device/uart/uart16550.rs +++ b/src/device/uart/uart16550.rs @@ -1,8 +1,21 @@ -#![allow(dead_code)] +use crate::{ + arch::device::PortIoDevice, + error::{HvError, HvResult}, +}; use spin::Mutex; use x86_64::instructions::port::{PortReadOnly, PortWriteOnly}; +const DATA_REG: u16 = 0; +const INT_EN_REG: u16 = 1; +const FIFO_CTRL_REG: u16 = 2; +const LINE_CTRL_REG: u16 = 3; +const MODEM_CTRL_REG: u16 = 4; +const LINE_STATUS_REG: u16 = 5; +const MODEM_STATUS_REG: u16 = 6; +const SCRATCH_REG: u16 = 7; + const UART_CLOCK_FACTOR: usize = 16; +const UART_FIFO_CAPACITY: usize = 16; const OSC_FREQ: usize = 1_843_200; lazy_static::lazy_static! { @@ -13,6 +26,56 @@ lazy_static::lazy_static! { }; } +bitflags::bitflags! { + /// Line status flags + struct LineStatusFlags: u8 { + const INPUT_FULL = 1; + // 1 to 4 unknown + const OUTPUT_EMPTY = 1 << 5; + // 6 and 7 unknown + } +} + +/// FIFO queue for caching bytes read. +struct Fifo { + buf: [u8; CAP], + head: usize, + num: usize, +} + +impl Fifo { + const fn new() -> Self { + Self { + buf: [0; CAP], + head: 0, + num: 0, + } + } + + fn is_empty(&self) -> bool { + self.num == 0 + } + + fn is_full(&self) -> bool { + self.num == CAP + } + + fn push(&mut self, value: u8) { + assert!(self.num < CAP); + self.buf[(self.head + self.num) % CAP] = value; + self.num += 1; + } + + fn pop(&mut self) -> u8 { + assert!(self.num > 0); + let ret = self.buf[self.head]; + self.head += 1; + self.head %= CAP; + self.num -= 1; + ret + } +} + struct Uart16550 { rhr: PortReadOnly, // receive holding thr: PortWriteOnly, // transmit holding @@ -23,16 +86,21 @@ struct Uart16550 { lsr: PortReadOnly, // line status } +pub struct VirtUart16550 { + base_port: u16, + fifo: Mutex>, +} + impl Uart16550 { const fn new(base_port: u16) -> Self { Self { - rhr: PortReadOnly::new(base_port), - thr: PortWriteOnly::new(base_port), - ier: PortWriteOnly::new(base_port + 1), - fcr: PortWriteOnly::new(base_port + 2), - lcr: PortWriteOnly::new(base_port + 3), - mcr: PortWriteOnly::new(base_port + 4), - lsr: PortReadOnly::new(base_port + 5), + rhr: PortReadOnly::new(base_port + DATA_REG), + thr: PortWriteOnly::new(base_port + DATA_REG), + ier: PortWriteOnly::new(base_port + INT_EN_REG), + fcr: PortWriteOnly::new(base_port + FIFO_CTRL_REG), + lcr: PortWriteOnly::new(base_port + LINE_CTRL_REG), + mcr: PortWriteOnly::new(base_port + MODEM_CTRL_REG), + lsr: PortReadOnly::new(base_port + LINE_STATUS_REG), } } @@ -62,14 +130,14 @@ impl Uart16550 { fn putchar(&mut self, c: u8) { unsafe { - while self.lsr.read() & (1 << 5) == 0 {} + while self.lsr.read() & LineStatusFlags::OUTPUT_EMPTY.bits() == 0 {} self.thr.write(c); } } fn getchar(&mut self) -> Option { unsafe { - if self.lsr.read() & 1 != 0 { + if self.lsr.read() & LineStatusFlags::INPUT_FULL.bits() != 0 { Some(self.rhr.read()) } else { None @@ -78,6 +146,76 @@ impl Uart16550 { } } +impl VirtUart16550 { + pub fn new(base_port: u16) -> Self { + Self { + base_port, + fifo: Mutex::new(Fifo::new()), + } + } +} + +impl PortIoDevice for VirtUart16550 { + fn port_range(&self) -> core::ops::Range { + self.base_port..self.base_port + 8 + } + + fn read(&self, port: u16, access_size: u8) -> HvResult { + if access_size != 1 { + error!("Invalid serial port I/O read size: {} != 1", access_size); + return hv_result_err!(EIO); + } + let ret = match port - self.base_port { + DATA_REG => { + // read a byte from FIFO + let mut fifo = self.fifo.lock(); + if fifo.is_empty() { + 0 + } else { + fifo.pop() + } + } + LINE_STATUS_REG => { + // check if the physical serial port has an available byte, and push it to FIFO. + let mut fifo = self.fifo.lock(); + if !fifo.is_full() { + if let Some(c) = console_getchar() { + fifo.push(c); + } + } + let mut lsr = LineStatusFlags::OUTPUT_EMPTY; + if !fifo.is_empty() { + lsr |= LineStatusFlags::INPUT_FULL; + } + lsr.bits() + } + INT_EN_REG | FIFO_CTRL_REG | LINE_CTRL_REG | MODEM_CTRL_REG | MODEM_STATUS_REG + | SCRATCH_REG => { + info!("Unimplemented serial port I/O read: {:#x}", port); // unimplemented + 0 + } + _ => unreachable!(), + }; + Ok(ret as u32) + } + + fn write(&self, port: u16, access_size: u8, value: u32) -> HvResult { + if access_size != 1 { + error!("Invalid serial port I/O write size: {} != 1", access_size); + return hv_result_err!(EIO); + } + match port - self.base_port { + DATA_REG => console_putchar(value as u8), + INT_EN_REG | FIFO_CTRL_REG | LINE_CTRL_REG | MODEM_CTRL_REG | SCRATCH_REG => { + info!("Unimplemented serial port I/O write: {:#x}", port); // unimplemented + } + LINE_STATUS_REG => {} // ignore + _ => unreachable!(), + } + Ok(()) + } +} + pub fn console_putchar(c: u8) { COM1.lock().putchar(c); } diff --git a/src/main.rs b/src/main.rs index 3f02f619..ca579bc6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -105,7 +105,7 @@ fn primary_init_early() { device::irqchip::primary_init_early(); // TODO: tmp - crate::arch::mm::init_hv_page_table().unwrap(); + // crate::arch::mm::init_hv_page_table().unwrap(); // TODO: zone_create(root_zone_config()).unwrap(); diff --git a/src/platform/qemu_x86_64.rs b/src/platform/qemu_x86_64.rs index c35484e6..87d4207e 100644 --- a/src/platform/qemu_x86_64.rs +++ b/src/platform/qemu_x86_64.rs @@ -5,18 +5,38 @@ use crate::{ }; pub const ROOT_ZONE_DTB_ADDR: u64 = 0x00000000; -pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x00000000; -pub const ROOT_ZONE_ENTRY: u64 = 0x00000000; +pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x120_0000; +pub const ROOT_ZONE_ENTRY: u64 = 0x100_8000; pub const ROOT_ZONE_CPUS: u64 = (1 << 0); pub const ROOT_ZONE_NAME: &str = "root-linux"; -pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 1] = [HvConfigMemoryRegion { - mem_type: MEM_TYPE_RAM, - physical_start: 0x100_0000, - virtual_start: 0x0, - size: 0x100_0000, -}]; +pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 4] = [ + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x100_0000, + virtual_start: 0x0, + size: 0x100_0000, + }, // ram + HvConfigMemoryRegion { + mem_type: MEM_TYPE_IO, + physical_start: 0xfec0_0000, + virtual_start: 0xfec0_0000, + size: 0x1000, + }, // io apic + HvConfigMemoryRegion { + mem_type: MEM_TYPE_IO, + physical_start: 0xfed0_0000, + virtual_start: 0xfed0_0000, + size: 0x1000, + }, // hpet + HvConfigMemoryRegion { + mem_type: MEM_TYPE_IO, + physical_start: 0xfee0_0000, + virtual_start: 0xfee0_0000, + size: 0x1000, + }, // local apic +]; pub const ROOT_ZONE_IRQS: [u32; 32] = [0; 32]; pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig {}; From 92d6c636b0ea6b2842d3510aa9ee3120772f1056 Mon Sep 17 00:00:00 2001 From: Solicey Date: Thu, 27 Feb 2025 14:47:47 +0800 Subject: [PATCH 06/29] hacking to the linux shell --- scripts/qemu-x86_64.mk | 42 ++- scripts/x86_64/acpi/hpet.asl | 36 ++ scripts/x86_64/acpi/madt.asl | 54 +++ scripts/x86_64/acpi/rsdp.asl | 12 + scripts/x86_64/acpi/rsdt.asl | 18 + scripts/x86_64/acpi/xsdt.asl | 18 + src/arch/x86_64/acpi.rs | 0 src/arch/x86_64/boot.rs | 197 ++++++++++ src/arch/x86_64/cpu.rs | 449 +++++++++++++++++----- src/arch/x86_64/cpuid.rs | 190 ++++++++++ src/arch/x86_64/entry.rs | 4 +- src/arch/x86_64/gdt.rs | 20 + src/arch/x86_64/mod.rs | 4 + src/arch/x86_64/msr.rs | 64 ++-- src/arch/x86_64/s2pt.rs | 47 ++- src/arch/x86_64/trap.rs | 205 ++++++---- src/arch/x86_64/vmcs.rs | 569 ++++++++++++++++++++++++++++ src/arch/x86_64/vmx.rs | 647 +++++++++----------------------- src/device/irqchip/pic/i8259.rs | 3 +- src/device/irqchip/pic/lapic.rs | 129 ++++--- src/device/uart/uart16550.rs | 41 +- src/main.rs | 8 +- src/platform/qemu_x86_64.rs | 36 +- 23 files changed, 2043 insertions(+), 750 deletions(-) create mode 100644 scripts/x86_64/acpi/hpet.asl create mode 100644 scripts/x86_64/acpi/madt.asl create mode 100644 scripts/x86_64/acpi/rsdp.asl create mode 100644 scripts/x86_64/acpi/rsdt.asl create mode 100644 scripts/x86_64/acpi/xsdt.asl create mode 100644 src/arch/x86_64/acpi.rs create mode 100644 src/arch/x86_64/boot.rs create mode 100644 src/arch/x86_64/cpuid.rs create mode 100644 src/arch/x86_64/vmcs.rs diff --git a/scripts/qemu-x86_64.mk b/scripts/qemu-x86_64.mk index 0916d46d..8abf8ebb 100644 --- a/scripts/qemu-x86_64.mk +++ b/scripts/qemu-x86_64.mk @@ -1,18 +1,50 @@ QEMU := qemu-system-x86_64 +acpi_asl_dir := scripts/x86_64/acpi +acpi_aml_dir := $(image_dir)/acpi + zone0_bios := $(image_dir)/rvm-bios.bin zone0_kernel := $(image_dir)/nimbos.bin +zone0_image := $(image_dir)/bzImage +zone0_setup := $(image_dir)/setup.bin +zone0_vmlinux := $(image_dir)/vmlinux.bin +zone0_initrd := $(image_dir)/initramfs.cpio.gz +zone0_boot16 := $(image_dir)/boot16.bin + +aml_hpet := $(acpi_aml_dir)/hpet.aml +aml_madt := $(acpi_aml_dir)/madt.aml +aml_rsdp := $(acpi_aml_dir)/rsdp.aml +aml_rsdt := $(acpi_aml_dir)/rsdt.aml +aml_xsdt := $(acpi_aml_dir)/xsdt.aml + QEMU_ARGS := -machine q35 -QEMU_ARGS += -cpu host -accel kvm +QEMU_ARGS += -cpu host,+x2apic -accel kvm QEMU_ARGS += -smp 4 QEMU_ARGS += -serial mon:stdio QEMU_ARGS += -m 2G QEMU_ARGS += -nographic QEMU_ARGS += -kernel $(hvisor_elf) -QEMU_ARGS += -device loader,file="$(zone0_bios)",addr=0x1008000,force-raw=on -QEMU_ARGS += -device loader,file="$(zone0_kernel)",addr=0x1200000,force-raw=on +# QEMU_ARGS += -device loader,file="$(zone0_bios)",addr=0x5008000,force-raw=on +# QEMU_ARGS += -device loader,file="$(zone0_kernel)",addr=0x5200000,force-raw=on + +QEMU_ARGS += -device loader,file="$(zone0_boot16)",addr=0x5008000,force-raw=on +QEMU_ARGS += -device loader,file="$(zone0_setup)",addr=0x500d000,force-raw=on +QEMU_ARGS += -device loader,file="$(zone0_vmlinux)",addr=0x5100000,force-raw=on +QEMU_ARGS += -device loader,file="$(zone0_initrd)",addr=0x20000000,force-raw=on +QEMU_ARGS += -append "initrd_size=$(shell stat -c%s $(zone0_initrd))" + +QEMU_ARGS += -device loader,file="$(aml_rsdp)",addr=0x50f2400,force-raw=on +QEMU_ARGS += -device loader,file="$(aml_rsdt)",addr=0x50f2440,force-raw=on +QEMU_ARGS += -device loader,file="$(aml_xsdt)",addr=0x50f2480,force-raw=on +QEMU_ARGS += -device loader,file="$(aml_madt)",addr=0x50f2500,force-raw=on +QEMU_ARGS += -device loader,file="$(aml_hpet)",addr=0x50f2740,force-raw=on + +$(hvisor_bin): elf aml + $(OBJCOPY) $(hvisor_elf) --strip-all -O binary $@ + +aml: $(aml_hpet) $(aml_madt) $(aml_rsdp) $(aml_rsdt) $(aml_xsdt) -$(hvisor_bin): elf - $(OBJCOPY) $(hvisor_elf) --strip-all -O binary $@ \ No newline at end of file +$(acpi_aml_dir)/%.aml: $(acpi_asl_dir)/%.asl + iasl -p $@ $< \ No newline at end of file diff --git a/scripts/x86_64/acpi/hpet.asl b/scripts/x86_64/acpi/hpet.asl new file mode 100644 index 00000000..ae7416bc --- /dev/null +++ b/scripts/x86_64/acpi/hpet.asl @@ -0,0 +1,36 @@ +/* +* HPET template +*/ +[0004] Signature : "HPET" +[0004] Table Length : 00000000 +[0001] Revision : 01 +[0001] Checksum : 00 +[0006] Oem ID : "DM " +[0008] Oem Table ID : "DMHPET " +[0004] Oem Revision : 00000001 +/* iasl will fill in the compiler ID/revision fields */ +[0004] Asl Compiler ID : "xxxx" +[0004] Asl Compiler Revision : 00000000 + +/* +[31:16] = PCI Vendor ID of 1st Timer Block (0x8086) +[15] = LegacyReplacement IRQ Routing Capable (0) +[14] = Reserved (0) +[13] = COUNT_SIZE_CAP counter size (32-bit=0) +[12:8] = Number of Comparators in 1st Timer Block (3-1=2) +[7:0] = Hardware Rev ID (1) +*/ +[0004] Hardware Block ID : 80860201 + +[0012] Timer Block Register : [Generic Address Structure] + [0001] Space ID : 00 [SystemMemory] + [0001] Bit Width : 00 + [0001] Bit Offset : 00 + [0001] Encoded Access Width : 00 [Undefined/Legacy] + [0008] Address : 00000000fed00000 + +[0001] Sequence Number : 00 +[0002] Minimum Clock Ticks : 0000 +[0004] Flags (decoded below) : 00000001 + 4K Page Protect : 1 + 64K Page Protect : 0 \ No newline at end of file diff --git a/scripts/x86_64/acpi/madt.asl b/scripts/x86_64/acpi/madt.asl new file mode 100644 index 00000000..ce48d07f --- /dev/null +++ b/scripts/x86_64/acpi/madt.asl @@ -0,0 +1,54 @@ +/* +* MADT template +*/ +[0004] Signature : "APIC" +[0004] Table Length : 00000000 +[0001] Revision : 01 +[0001] Checksum : 00 +[0006] Oem ID : "DM " +[0008] Oem Table ID : "DMMADT " +[0004] Oem Revision : 00000001 +/* iasl will fill in the compiler ID/revision fields */ +[0004] Asl Compiler ID : "xxxx" +[0004] Asl Compiler Revision : 00000000 +[0004] Local Apic Address : fee00000 +[0004] Flags (decoded below) : 00000001 + PC-AT Compatibility : 1 + +/* Processor Local APIC */ +[0001] Subtable Type : 00 +[0001] Length : 08 +[0001] Processor ID : 00 +[0001] Local Apic ID : 00 +[0004] Flags (decoded below) : 00000001 + Processor Enabled : 1 + Runtime Online Capable : 0 + +/* IO APIC */ +[0001] Subtable Type : 01 +[0001] Length : 0C +[0001] I/O Apic ID : 00 +[0001] Reserved : 00 +[0004] Address : fec00000 +[0004] Interrupt : 00000000 + +/* Interrupt Source Override */ +/* Legacy IRQ0 is connected to pin 2 of the IOAPIC +[0001] Subtable Type : 02 +[0001] Length : 0A +[0001] Bus : 00 +[0001] Source : 00 +[0004] Interrupt : 00000002 +[0002] Flags (decoded below) : 0000 + Polarity : 0 + Trigger Mode : 0 */ + +/* Local APIC NMI Structure */ +/* Connected to LINT1 on all CPUs */ +[0001] Subtable Type : 04 +[0001] Length : 06 +[0001] Processor ID : ff +[0002] Flags (decoded below) : 0000 + Polarity : 0 + Trigger Mode : 0 +[0001] Interrupt Input LINT : 01 \ No newline at end of file diff --git a/scripts/x86_64/acpi/rsdp.asl b/scripts/x86_64/acpi/rsdp.asl new file mode 100644 index 00000000..cbf1120f --- /dev/null +++ b/scripts/x86_64/acpi/rsdp.asl @@ -0,0 +1,12 @@ +/* +* RSDP template +*/ +[0008] Signature : "RSD PTR " +[0001] Checksum : 00 +[0006] Oem ID : "DM " +[0001] Revision : 02 +[0004] RSDT Address : 000f2440 +[0004] Length : 00000024 +[0008] XSDT Address : 00000000000f2480 +[0001] Extended Checksum : 00 +[0003] Reserved : 000000 \ No newline at end of file diff --git a/scripts/x86_64/acpi/rsdt.asl b/scripts/x86_64/acpi/rsdt.asl new file mode 100644 index 00000000..4d778ae7 --- /dev/null +++ b/scripts/x86_64/acpi/rsdt.asl @@ -0,0 +1,18 @@ +/* +* RSDT template +*/ +[0004] Signature : "RSDT" +[0004] Table Length : 00000000 +[0001] Revision : 01 +[0001] Checksum : 00 +[0006] Oem ID : "DM " +[0008] Oem Table ID : "DMRSDT " +[0004] Oem Revision : 00000001 +/* iasl will fill in the compiler ID/revision fields */ +[0004] Asl Compiler ID : "xxxx" +[0004] Asl Compiler Revision : 00000000 + +/* MADT */ +[0004] ACPI Table Address : 000f2500 +/* HPET */ +[0004] ACPI Table Address : 000f2740 \ No newline at end of file diff --git a/scripts/x86_64/acpi/xsdt.asl b/scripts/x86_64/acpi/xsdt.asl new file mode 100644 index 00000000..f00ddf50 --- /dev/null +++ b/scripts/x86_64/acpi/xsdt.asl @@ -0,0 +1,18 @@ +/* +* XSDT template +*/ +[0004] Signature : "XSDT" +[0004] Table Length : 00000000 +[0001] Revision : 01 +[0001] Checksum : 00 +[0006] Oem ID : "DM " +[0008] Oem Table ID : "DMXSDT " +[0004] Oem Revision : 00000001 +/* iasl will fill in the compiler ID/revision fields */ +[0004] Asl Compiler ID : "xxxx" +[0004] Asl Compiler Revision : 00000000 + +/* MADT */ +[0004] ACPI Table Address : 000f2500 +/* HPET */ +[0004] ACPI Table Address : 000f2740 \ No newline at end of file diff --git a/src/arch/x86_64/acpi.rs b/src/arch/x86_64/acpi.rs new file mode 100644 index 00000000..e69de29b diff --git a/src/arch/x86_64/boot.rs b/src/arch/x86_64/boot.rs new file mode 100644 index 00000000..07961167 --- /dev/null +++ b/src/arch/x86_64/boot.rs @@ -0,0 +1,197 @@ +use crate::{ + config::{root_zone_config, HvZoneConfig, MEM_TYPE_RAM}, + error::HvResult, + memory::{GuestPhysAddr, HostPhysAddr}, + platform::qemu_x86_64::gpa_as_mut_ptr, +}; +use alloc::string::{String, ToString}; +use core::{ + ffi::{c_char, CStr}, + ptr::copy_nonoverlapping, +}; +use spin::Mutex; + +const E820_MAX_ENTRIES_ZEROPAGE: usize = 128; + +lazy_static::lazy_static! { + static ref CMDLINE: Mutex = Mutex::new(String::new()); +} + +pub fn cmdline() -> &'static Mutex { + &CMDLINE +} + +bitflags::bitflags! { + #[derive(Clone, Copy, Debug)] + /// https://www.kernel.org/doc/html/latest/arch/x86/boot.html + pub struct BootLoadFlags: u8 { + const LOADED_HIGH = 1; + const KASLR_FLAG = 1 << 1; + const QUIET_FLAG = 1 << 5; + const CAN_USE_HEAP = 1 << 7; + } +} + +numeric_enum_macro::numeric_enum! { +#[repr(u32)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[allow(non_camel_case_types)] +pub enum E820Type { + E820_RAM = 1, + E820_RESERVED = 2, + E820_ACPI = 3, + E820_NVS = 4, + E820_UNUSABLE = 5, + E820_PMEM = 7, +} +} + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +/// The so-called "zeropage" +pub struct BootParams { + pad0: [u8; 0x1e8], + e820_entries: u8, + pad1: [u8; 0x8], + setup_sects: u8, + pad2: [u8; 0x1b], + kernel_version: u16, + type_of_loader: u8, + loadflags: BootLoadFlags, + setup_move_size: u16, + code32_start: u32, + ramdisk_image: u32, + ramdisk_size: u32, + bootsect_kludge: u32, + heap_end_ptr: u16, + pad3: [u8; 2], + cmd_line_ptr: u32, + pad4: [u8; 12], + cmdline_size: u32, + pad5: [u8; 12], + payload_offset: u32, + payload_length: u32, + pad6: [u8; 128], + e820_table: [BootE820Entry; E820_MAX_ENTRIES_ZEROPAGE], + pad7: [u8; 0x330], +} + +impl BootParams { + pub fn fill( + setup_addr: GuestPhysAddr, + initrd_addr: GuestPhysAddr, + root_cmdline_addr: GuestPhysAddr, + root_cmdline: &str, + ) -> HvResult { + let boot_params_hpa = gpa_as_mut_ptr(setup_addr) as HostPhysAddr; + let boot_params = unsafe { &mut *(boot_params_hpa as *mut BootParams) }; + + // TODO: get kernel version + + // set bootloader type as undefined + boot_params.type_of_loader = 0xff; + let mut loadflags = boot_params.loadflags; + // print early messages + loadflags &= !BootLoadFlags::QUIET_FLAG; + // disable heap_ptr + loadflags &= !BootLoadFlags::CAN_USE_HEAP; + boot_params.loadflags = loadflags; + + // TODO: tmp command + unsafe { + copy_nonoverlapping( + root_cmdline.as_ptr(), + gpa_as_mut_ptr(root_cmdline_addr), + root_cmdline.len(), + ) + }; + boot_params.cmd_line_ptr = root_cmdline_addr as _; + + // set e820 + // TODO: zone config + boot_params.set_e820_entries(&root_zone_config()); + + // parse cmdline + let hv_cmdline = CMDLINE.lock().clone(); + for param in hv_cmdline.split_whitespace() { + let mut parts = param.splitn(2, '='); + let key = parts.next().unwrap().to_string(); + let value = parts.next().map(|s| s.to_string()); + match key.as_str() { + "initrd_size" => { + boot_params.set_initrd(initrd_addr as _, value.unwrap().parse::().unwrap()) + } + _ => {} + } + } + Ok(()) + } + + fn set_e820_entries(&mut self, config: &HvZoneConfig) { + let mut index = 0; + for mem_region in config.memory_regions().iter() { + match mem_region.mem_type { + MEM_TYPE_RAM => { + self.e820_table[index] = BootE820Entry { + addr: mem_region.virtual_start, + size: mem_region.size, + _type: E820Type::E820_RAM, + }; + index += 1; + } + _ => {} + } + // FIXME: very dirty! + if index == 3 { + break; + } + } + self.e820_entries = index as _; + } + + fn set_initrd(&mut self, ramdisk_image: u32, ramdisk_size: u32) { + self.ramdisk_image = ramdisk_image; + self.ramdisk_size = ramdisk_size; + info!("initrd size: {}", self.ramdisk_size); + } +} + +#[repr(packed)] +#[derive(Debug, Clone, Copy)] +/// The E820 memory region entry of the boot protocol ABI: +pub struct BootE820Entry { + addr: u64, + size: u64, + _type: E820Type, +} + +#[repr(C)] +#[derive(Debug, Default, Clone, Copy)] +pub struct MultibootInfo { + flags: u32, + mem_lower: u32, + mem_upper: u32, + boot_device: u32, + cmdline: u32, + pub mods_count: u32, + pub mods_addr: u32, +} + +impl MultibootInfo { + fn new(addr: usize) -> Self { + let multiboot_info = unsafe { &*(addr as *const MultibootInfo) }; + multiboot_info.clone() + } + + pub fn init(info_addr: usize) { + let boot_info = MultibootInfo::new(info_addr); + println!("{:#x?}", boot_info); + + let cmd_ptr = boot_info.cmdline as *const c_char; + let cmd_cstr = unsafe { CStr::from_ptr(cmd_ptr) }; + let cmd_str = cmd_cstr.to_str().unwrap(); + CMDLINE.lock().push_str(cmd_str); + + println!("cmdline: {}", CMDLINE.lock().as_str()); + } +} diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index 54b26210..a058ec3f 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -1,15 +1,16 @@ use crate::{ arch::{ - gdt::GdtStruct, + boot::BootParams, + gdt::{get_tr_base, GdtStruct}, lapic::{busy_wait, local_apic}, - msr::Msr::*, - msr::MsrBitmap, + msr::{Msr, Msr::*, MsrBitmap}, + vmcs::*, vmx::*, }, consts::{core_end, PER_CPU_SIZE}, device::irqchip::pic::lapic::VirtApicTimer, error::{HvError, HvResult}, - memory::{addr::phys_to_virt, GuestPhysAddr, PhysAddr, PAGE_SIZE}, + memory::{addr::phys_to_virt, GuestPhysAddr, HostPhysAddr, PhysAddr, PAGE_SIZE}, percpu::this_cpu_data, platform::qemu_x86_64::*, }; @@ -18,12 +19,20 @@ use core::{ arch::{asm, global_asm}, fmt::{Debug, Formatter, Result}, mem::size_of, + ptr::copy_nonoverlapping, time::Duration, }; use raw_cpuid::CpuId; -use x86_64::structures::tss::TaskStateSegment; - -use super::msr::Msr; +use x86::{ + dtables::{self, DescriptorTablePointer}, + vmx::vmcs::control::{ + EntryControls, ExitControls, PinbasedControls, PrimaryControls, SecondaryControls, + }, +}; +use x86_64::{ + registers::control::{Cr0, Cr0Flags, Cr3, Cr4, Cr4Flags}, + structures::tss::TaskStateSegment, +}; const AP_START_PAGE_IDX: u8 = 6; const AP_START_PAGE_PADDR: PhysAddr = AP_START_PAGE_IDX as usize * PAGE_SIZE; @@ -166,32 +175,40 @@ impl ArchCpu { } } - pub fn init(&mut self, entry: GuestPhysAddr, dtb: usize) -> HvResult { - self.activate_vmx()?; - self.setup_vmcs(entry)?; - Ok(()) + /// Advance guest `RIP` by `instr_len` bytes. + pub fn advance_guest_rip(&mut self, instr_len: u8) -> HvResult { + Ok(VmcsGuestNW::RIP.write(VmcsGuestNW::RIP.read()? + instr_len as usize)?) } - pub fn run(&mut self) -> ! { - assert!(this_cpu_id() == self.cpuid); - // TODO: this_cpu_data().cpu_on_entry - self.init(GUEST_ENTRY, this_cpu_data().dtb_ipa).unwrap(); - - this_cpu_data().activate_gpm(); - set_host_rsp(&self.host_stack_top as *const _ as usize).unwrap(); - set_guest_page_table(GUEST_PT1).unwrap(); - set_guest_stack_pointer(GUEST_STACK_TOP).unwrap(); + /// Returns the mutable reference of [`VirtApicTimer`]. + pub fn apic_timer_mut(&mut self) -> &mut VirtApicTimer { + &mut self.apic_timer + } - unsafe { self.vmx_launch() }; - loop {} + pub fn cr(&self, cr_idx: usize) -> usize { + (|| -> HvResult { + Ok(match cr_idx { + 4 => { + let host_mask = VmcsControlNW::CR4_GUEST_HOST_MASK.read()?; + (VmcsControlNW::CR4_READ_SHADOW.read()? & host_mask) + | (VmcsGuestNW::CR4.read()? & !host_mask) + } + _ => unreachable!(), + }) + })() + .expect("Failed to read guest control register") } pub fn idle(&mut self) -> ! { assert!(this_cpu_id() == self.cpuid); - unsafe { self.init(0, this_cpu_data().dtb_ipa) }; + // unsafe { self.reset(0, this_cpu_data().dtb_ipa) }; loop {} } + pub fn inject_fault(&mut self) -> HvResult { + Ok(()) + } + /// Guest general-purpose registers. pub fn regs(&self) -> &GeneralRegisters { &self.guest_regs @@ -202,15 +219,38 @@ impl ArchCpu { &mut self.guest_regs } - /// Returns the mutable reference of [`VirtApicTimer`]. - pub fn apic_timer_mut(&mut self) -> &mut VirtApicTimer { - &mut self.apic_timer + pub fn reset(&mut self, entry: GuestPhysAddr) -> HvResult { + self.activate_vmx()?; + self.setup_vmcs(entry)?; + Ok(()) } - /// Add a virtual interrupt or exception to the pending events list, - /// and try to inject it before later VM entries. - pub fn inject_event(&mut self, vector: u8, err_code: Option) { - self.pending_events.push_back((vector, err_code)); + pub fn run(&mut self) -> ! { + assert!(this_cpu_id() == self.cpuid); + + // this_cpu_data().cpu_on_entry + self.reset(this_cpu_data().cpu_on_entry).unwrap(); + + self.setup_boot_params().unwrap(); + this_cpu_data().activate_gpm(); + + unsafe { self.vmx_launch() }; + loop {} + } + + /// If enable, a VM exit occurs at the beginning of any instruction if + /// `RFLAGS.IF` = 1 and there are no other blocking of interrupts. + /// (see SDM, Vol. 3C, Section 24.4.2) + pub fn set_interrupt_window(&mut self, enable: bool) -> HvResult { + let mut ctrl: u32 = VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS.read()?; + let bits = PrimaryControls::INTERRUPT_WINDOW_EXITING.bits(); + if enable { + ctrl |= bits + } else { + ctrl &= !bits + } + VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS.write(ctrl)?; + Ok(()) } fn activate_vmx(&mut self) -> HvResult { @@ -231,32 +271,278 @@ impl ArchCpu { Ok(()) } + /// Whether the guest interrupts are blocked. (SDM Vol. 3C, Section 24.4.2, Table 24-3) + fn allow_interrupt(&self) -> bool { + let rflags = VmcsGuestNW::RFLAGS.read().unwrap(); + let block_state = VmcsGuest32::INTERRUPTIBILITY_STATE.read().unwrap(); + rflags as u64 & x86_64::registers::rflags::RFlags::INTERRUPT_FLAG.bits() != 0 + && block_state == 0 + } + + /// Try to inject a pending event before next VM entry. + fn check_pending_events(&mut self) -> HvResult { + if let Some(event) = self.pending_events.front() { + let allow_interrupt = self.allow_interrupt(); + if event.0 < 32 || allow_interrupt { + // if it's an exception, or an interrupt that is not blocked, inject it directly. + Vmcs::inject_interrupt(event.0, event.1)?; + self.pending_events.pop_front(); + } else { + // interrupts are blocked, enable interrupt-window exiting. + self.set_interrupt_window(true)?; + } + } + Ok(()) + } + + /// Add a virtual interrupt or exception to the pending events list, + /// and try to inject it before later VM entries. + fn inject_interrupt(&mut self, vector: u8, err_code: Option) { + self.pending_events.push_back((vector, err_code)); + } + + fn setup_boot_params(&mut self) -> HvResult { + BootParams::fill( + ROOT_ZONE_SETUP_ADDR, + ROOT_ZONE_INITRD_ADDR, + ROOT_ZONE_CMDLINE_ADDR, + "console=ttyS0 earlyprintk=serial rdinit=/init nokaslr\0", + // "console=ttyS0 earlyprintk=serial nokaslr\0" + )?; + self.guest_regs.rax = this_cpu_data().cpu_on_entry as u64; + self.guest_regs.rsi = ROOT_ZONE_SETUP_ADDR as u64; + Ok(()) + } + + fn set_cr(&mut self, cr_idx: usize, val: u64) -> HvResult { + match cr_idx { + 0 => { + // Retrieve/validate restrictions on CR0 + // + // In addition to what the VMX MSRs tell us, make sure that + // - NW and CD are kept off as they are not updated on VM exit and we + // don't want them enabled for performance reasons while in root mode + // - PE and PG can be freely chosen (by the guest) because we demand + // unrestricted guest mode support anyway + // - ET is ignored + let must0 = Msr::IA32_VMX_CR0_FIXED1.read(); + // & !(Cr0Flags::NOT_WRITE_THROUGH | Cr0Flags::CACHE_DISABLE).bits(); + let must1 = Msr::IA32_VMX_CR0_FIXED0.read() + & !(Cr0Flags::PAGING | Cr0Flags::PROTECTED_MODE_ENABLE).bits(); + VmcsGuestNW::CR0.write(((val & must0) | must1) as _)?; + VmcsControlNW::CR0_READ_SHADOW.write(val as _)?; + VmcsControlNW::CR0_GUEST_HOST_MASK.write((must1 | !must0) as _)?; + } + 3 => VmcsGuestNW::CR3.write(val as _)?, + 4 => { + let cr4_host_owned = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS; + let cr4_read_shadow = 0; + let val = val | Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS.bits(); + VmcsGuestNW::CR4.write(val as _)?; + VmcsControlNW::CR4_GUEST_HOST_MASK.write(cr4_host_owned.bits() as _)?; + VmcsControlNW::CR4_READ_SHADOW.write(cr4_read_shadow)?; + } + _ => unreachable!(), + } + Ok(()) + } + fn setup_vmcs(&mut self, entry: GuestPhysAddr) -> HvResult { self.vmcs_region = VmxRegion::new(self.vmcs_revision_id, false)?; + self.msr_bitmap = MsrBitmap::intercept_def()?; - self.msr_bitmap = MsrBitmap::init()?; - self.setup_msr_bitmap()?; + let start_paddr = self.vmcs_region.start_paddr() as usize; + Vmcs::clear(start_paddr)?; + Vmcs::load(start_paddr)?; - unsafe { enable_vmcs(self.vmcs_region.start_paddr() as u64)? }; - setup_vmcs_host(Self::vmx_exit as usize)?; - setup_vmcs_guest(entry)?; - setup_vmcs_control(self.msr_bitmap.phys_addr())?; + self.setup_vmcs_host(&self.host_stack_top as *const _ as usize)?; + self.setup_vmcs_guest(entry, ROOT_ZONE_BOOT_STACK)?; + self.setup_vmcs_control()?; Ok(()) } - #[naked] - unsafe extern "C" fn vmx_launch(&mut self) -> ! { - asm!( - "mov [rdi + {host_stack_top}], rsp", // save current RSP to host_stack_top - "mov rsp, rdi", // set RSP to guest regs area - restore_regs_from_stack!(), - "vmlaunch", - "jmp {failed}", - host_stack_top = const size_of::(), - failed = sym Self::vmx_entry_failed, - options(noreturn), - ) + fn setup_vmcs_control(&mut self) -> HvResult { + // Intercept NMI and external interrupts. + use PinbasedControls as PinCtrl; + Vmcs::set_control( + VmcsControl32::PINBASED_EXEC_CONTROLS, + Msr::IA32_VMX_TRUE_PINBASED_CTLS, + Msr::IA32_VMX_PINBASED_CTLS.read() as u32, + (PinCtrl::NMI_EXITING | PinCtrl::EXTERNAL_INTERRUPT_EXITING).bits(), + 0, + )?; + + // Intercept all I/O instructions, use MSR bitmaps, activate secondary controls, + // disable CR3 load/store interception. + use PrimaryControls as CpuCtrl; + Vmcs::set_control( + VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS, + Msr::IA32_VMX_TRUE_PROCBASED_CTLS, + Msr::IA32_VMX_PROCBASED_CTLS.read() as u32, + ( + // CpuCtrl::RDTSC_EXITING | + CpuCtrl::UNCOND_IO_EXITING | CpuCtrl::USE_MSR_BITMAPS | CpuCtrl::SECONDARY_CONTROLS + ) + .bits(), + (CpuCtrl::CR3_LOAD_EXITING | CpuCtrl::CR3_STORE_EXITING).bits(), + )?; + + // Enable EPT, RDTSCP, INVPCID, and unrestricted guest. + use SecondaryControls as CpuCtrl2; + Vmcs::set_control( + VmcsControl32::SECONDARY_PROCBASED_EXEC_CONTROLS, + Msr::IA32_VMX_PROCBASED_CTLS2, + 0, + (CpuCtrl2::ENABLE_EPT + | CpuCtrl2::ENABLE_RDTSCP + | CpuCtrl2::ENABLE_INVPCID + | CpuCtrl2::UNRESTRICTED_GUEST) + .bits(), + 0, + )?; + + // Load guest IA32_PAT/IA32_EFER on VM entry. + use EntryControls as EntryCtrl; + Vmcs::set_control( + VmcsControl32::VMENTRY_CONTROLS, + Msr::IA32_VMX_TRUE_ENTRY_CTLS, + Msr::IA32_VMX_ENTRY_CTLS.read() as u32, + (EntryCtrl::LOAD_IA32_PAT | EntryCtrl::LOAD_IA32_EFER).bits(), + 0, + )?; + + // Switch to 64-bit host, acknowledge interrupt info, switch IA32_PAT/IA32_EFER on VM exit. + use ExitControls as ExitCtrl; + Vmcs::set_control( + VmcsControl32::VMEXIT_CONTROLS, + Msr::IA32_VMX_TRUE_EXIT_CTLS, + Msr::IA32_VMX_EXIT_CTLS.read() as u32, + (ExitCtrl::HOST_ADDRESS_SPACE_SIZE + | ExitCtrl::ACK_INTERRUPT_ON_EXIT + | ExitCtrl::SAVE_IA32_PAT + | ExitCtrl::LOAD_IA32_PAT + | ExitCtrl::SAVE_IA32_EFER + | ExitCtrl::LOAD_IA32_EFER) + .bits(), + 0, + )?; + + // No MSR switches if hypervisor doesn't use and there is only one vCPU. + VmcsControl32::VMEXIT_MSR_STORE_COUNT.write(0)?; + VmcsControl32::VMEXIT_MSR_LOAD_COUNT.write(0)?; + VmcsControl32::VMENTRY_MSR_LOAD_COUNT.write(0)?; + + // Pass-through exceptions, don't use I/O bitmap, set MSR bitmaps. + VmcsControl32::EXCEPTION_BITMAP.write(0)?; + VmcsControl64::IO_BITMAP_A_ADDR.write(0)?; + VmcsControl64::IO_BITMAP_B_ADDR.write(0)?; + VmcsControl64::MSR_BITMAPS_ADDR.write(self.msr_bitmap.phys_addr() as _)?; + Ok(()) + } + + fn setup_vmcs_guest(&mut self, entry: GuestPhysAddr, rsp: GuestPhysAddr) -> HvResult { + let cr0_guest = Cr0Flags::EXTENSION_TYPE | Cr0Flags::NUMERIC_ERROR; + let cr4_guest = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS; + + self.set_cr(0, cr0_guest.bits()); + self.set_cr(3, 0); + self.set_cr(4, cr4_guest.bits()); + + macro_rules! set_guest_segment { + ($seg: ident, $access_rights: expr) => {{ + use VmcsGuest16::*; + use VmcsGuest32::*; + use VmcsGuestNW::*; + concat_idents!($seg, _SELECTOR).write(0)?; + concat_idents!($seg, _BASE).write(0)?; + concat_idents!($seg, _LIMIT).write(0xffff)?; + concat_idents!($seg, _ACCESS_RIGHTS).write($access_rights)?; + }}; + } + + set_guest_segment!(ES, 0x93); // 16-bit, present, data, read/write, accessed + set_guest_segment!(CS, 0x9b); // 16-bit, present, code, exec/read, accessed + set_guest_segment!(SS, 0x93); + set_guest_segment!(DS, 0x93); + set_guest_segment!(FS, 0x93); + set_guest_segment!(GS, 0x93); + set_guest_segment!(TR, 0x8b); // present, system, 32-bit TSS busy + set_guest_segment!(LDTR, 0x82); // present, system, LDT + + VmcsGuestNW::GDTR_BASE.write(0)?; + VmcsGuest32::GDTR_LIMIT.write(0xffff)?; + VmcsGuestNW::IDTR_BASE.write(0)?; + VmcsGuest32::IDTR_LIMIT.write(0xffff)?; + + VmcsGuestNW::DR7.write(0x400)?; + VmcsGuestNW::RSP.write(rsp)?; + VmcsGuestNW::RIP.write(entry)?; + VmcsGuestNW::RFLAGS.write(0x2)?; + VmcsGuestNW::PENDING_DBG_EXCEPTIONS.write(0)?; + VmcsGuestNW::IA32_SYSENTER_ESP.write(0)?; + VmcsGuestNW::IA32_SYSENTER_EIP.write(0)?; + VmcsGuest32::IA32_SYSENTER_CS.write(0)?; + + VmcsGuest32::INTERRUPTIBILITY_STATE.write(0)?; + VmcsGuest32::ACTIVITY_STATE.write(0)?; + VmcsGuest32::VMX_PREEMPTION_TIMER_VALUE.write(0)?; + + VmcsGuest64::LINK_PTR.write(u64::MAX)?; // SDM Vol. 3C, Section 24.4.2 + VmcsGuest64::IA32_DEBUGCTL.write(0)?; + VmcsGuest64::IA32_PAT.write(Msr::IA32_PAT.read())?; + VmcsGuest64::IA32_EFER.write(0)?; + Ok(()) + } + + fn setup_vmcs_host(&mut self, rsp: GuestPhysAddr) -> HvResult { + VmcsHost64::IA32_PAT.write(Msr::IA32_PAT.read())?; + VmcsHost64::IA32_EFER.write(Msr::IA32_EFER.read())?; + + VmcsHostNW::CR0.write(Cr0::read_raw() as _)?; + VmcsHostNW::CR3.write(Cr3::read_raw().0.start_address().as_u64() as _)?; + VmcsHostNW::CR4.write(Cr4::read_raw() as _)?; + + VmcsHost16::ES_SELECTOR.write(x86::segmentation::es().bits())?; + VmcsHost16::CS_SELECTOR.write(x86::segmentation::cs().bits())?; + VmcsHost16::SS_SELECTOR.write(x86::segmentation::ss().bits())?; + VmcsHost16::DS_SELECTOR.write(x86::segmentation::ds().bits())?; + VmcsHost16::FS_SELECTOR.write(x86::segmentation::fs().bits())?; + VmcsHost16::GS_SELECTOR.write(x86::segmentation::gs().bits())?; + VmcsHostNW::FS_BASE.write(Msr::IA32_FS_BASE.read() as _)?; + VmcsHostNW::GS_BASE.write(Msr::IA32_GS_BASE.read() as _)?; + + let tr = unsafe { x86::task::tr() }; + let mut gdtp = DescriptorTablePointer::::default(); + let mut idtp = DescriptorTablePointer::::default(); + unsafe { + dtables::sgdt(&mut gdtp); + dtables::sidt(&mut idtp); + } + VmcsHost16::TR_SELECTOR.write(tr.bits())?; + VmcsHostNW::TR_BASE.write(get_tr_base(tr, &gdtp) as _)?; + VmcsHostNW::GDTR_BASE.write(gdtp.base as _)?; + VmcsHostNW::IDTR_BASE.write(idtp.base as _)?; + VmcsHostNW::RSP.write(rsp)?; + VmcsHostNW::RIP.write(Self::vmx_exit as usize)?; + + VmcsHostNW::IA32_SYSENTER_ESP.write(0)?; + VmcsHostNW::IA32_SYSENTER_EIP.write(0)?; + VmcsHost32::IA32_SYSENTER_CS.write(0)?; + Ok(()) + } + + fn vmexit_handler(&mut self) { + crate::arch::trap::handle_vmexit(self).unwrap(); + // Check if there is an APIC timer interrupt + if self.apic_timer.check_interrupt() { + self.inject_interrupt(self.apic_timer.vector(), None); + } + self.check_pending_events().unwrap(); + } + + unsafe fn vmx_entry_failed() -> ! { + panic!("{}", Vmcs::instruction_error().unwrap().as_str()); } #[naked] @@ -278,48 +564,18 @@ impl ArchCpu { ); } - unsafe fn vmx_entry_failed() -> ! { - panic!("VMX instruction error: {}", instruction_error()); - } - - fn vmexit_handler(&mut self) { - crate::arch::trap::handle_vmexit(self).unwrap(); - // Check if there is an APIC timer interrupt - if self.apic_timer.check_interrupt() { - self.inject_event(self.apic_timer.vector(), None); - } - self.check_pending_events().unwrap(); - } - - /// Try to inject a pending event before next VM entry. - fn check_pending_events(&mut self) -> HvResult { - if let Some(event) = self.pending_events.front() { - let allow_interrupt = allow_interrupt()?; - if event.0 < 32 || allow_interrupt { - // if it's an exception, or an interrupt that is not blocked, inject it directly. - inject_event(event.0, event.1)?; - self.pending_events.pop_front(); - } else { - // interrupts are blocked, enable interrupt-window exiting. - set_interrupt_window(true)?; - } - } - Ok(()) - } - - fn setup_msr_bitmap(&mut self) -> HvResult { - // Intercept IA32_APIC_BASE MSR accesses - let msr = IA32_APIC_BASE; - self.msr_bitmap.set_read_intercept(msr, true); - self.msr_bitmap.set_write_intercept(msr, true); - // Intercept all x2APIC MSR accesses - for addr in 0x800_u32..=0x83f_u32 { - if let Ok(msr) = Msr::try_from(addr) { - self.msr_bitmap.set_read_intercept(msr, true); - self.msr_bitmap.set_write_intercept(msr, true); - } - } - Ok(()) + #[naked] + unsafe extern "C" fn vmx_launch(&mut self) -> ! { + asm!( + "mov [rdi + {host_stack_top}], rsp", // save current RSP to host_stack_top + "mov rsp, rdi", // set RSP to guest regs area + restore_regs_from_stack!(), + "vmlaunch", + "jmp {failed}", + host_stack_top = const size_of::(), + failed = sym Self::vmx_entry_failed, + options(noreturn), + ) } } @@ -335,9 +591,16 @@ impl Debug for ArchCpu { (|| -> HvResult { Ok(f.debug_struct("ArchCpu") .field("guest_regs", &self.guest_regs) - .field("rip", &guest_rip()) - .field("rsp", &guest_rsp()) - .field("cr3", &guest_cr3()) + .field("rip", &VmcsGuestNW::RIP.read()?) + .field("rsp", &VmcsGuestNW::RSP.read()?) + .field("rflags", &VmcsGuestNW::RFLAGS.read()?) + .field("cr0", &VmcsGuestNW::CR0.read()?) + .field("cr3", &VmcsGuestNW::CR3.read()?) + .field("cr4", &VmcsGuestNW::CR4.read()?) + .field("cs", &VmcsGuest16::CS_SELECTOR.read()?) + .field("fs_base", &VmcsGuestNW::FS_BASE.read()?) + .field("gs_base", &VmcsGuestNW::GS_BASE.read()?) + .field("tss", &VmcsGuest16::TR_SELECTOR.read()?) .finish()) })() .unwrap() diff --git a/src/arch/x86_64/cpuid.rs b/src/arch/x86_64/cpuid.rs new file mode 100644 index 00000000..7eeae7a1 --- /dev/null +++ b/src/arch/x86_64/cpuid.rs @@ -0,0 +1,190 @@ +numeric_enum_macro::numeric_enum! { +#[repr(u32)] +#[derive(Debug)] +pub enum CpuIdEax { + VendorInfo = 0x0, + FeatureInfo = 0x1, + StructuredExtendedFeatureInfo = 0x7, + HypervisorInfo = 0x4000_0000, + HypervisorFeatures = 0x4000_0001, +} +} + +bitflags::bitflags! { + /// Copied from https://docs.rs/raw-cpuid/8.1.2/src/raw_cpuid/lib.rs.html#1290-1294 + pub struct FeatureInfoFlags: u64 { + + // ECX flags + + /// Streaming SIMD Extensions 3 (SSE3). A value of 1 indicates the processor supports this technology. + const SSE3 = 1 << 0; + /// PCLMULQDQ. A value of 1 indicates the processor supports the PCLMULQDQ instruction + const PCLMULQDQ = 1 << 1; + /// 64-bit DS Area. A value of 1 indicates the processor supports DS area using 64-bit layout + const DTES64 = 1 << 2; + /// MONITOR/MWAIT. A value of 1 indicates the processor supports this feature. + const MONITOR = 1 << 3; + /// CPL Qualified Debug Store. A value of 1 indicates the processor supports the extensions to the Debug Store feature to allow for branch message storage qualified by CPL. + const DSCPL = 1 << 4; + /// Virtual Machine Extensions. A value of 1 indicates that the processor supports this technology. + const VMX = 1 << 5; + /// Safer Mode Extensions. A value of 1 indicates that the processor supports this technology. See Chapter 5, Safer Mode Extensions Reference. + const SMX = 1 << 6; + /// Enhanced Intel SpeedStep® technology. A value of 1 indicates that the processor supports this technology. + const EIST = 1 << 7; + /// Thermal Monitor 2. A value of 1 indicates whether the processor supports this technology. + const TM2 = 1 << 8; + /// A value of 1 indicates the presence of the Supplemental Streaming SIMD Extensions 3 (SSSE3). A value of 0 indicates the instruction extensions are not present in the processor + const SSSE3 = 1 << 9; + /// L1 Context ID. A value of 1 indicates the L1 data cache mode can be set to either adaptive mode or shared mode. A value of 0 indicates this feature is not supported. See definition of the IA32_MISC_ENABLE MSR Bit 24 (L1 Data Cache Context Mode) for details. + const CNXTID = 1 << 10; + /// A value of 1 indicates the processor supports FMA extensions using YMM state. + const FMA = 1 << 12; + /// CMPXCHG16B Available. A value of 1 indicates that the feature is available. See the CMPXCHG8B/CMPXCHG16B Compare and Exchange Bytes section. 14 + const CMPXCHG16B = 1 << 13; + /// Perfmon and Debug Capability: A value of 1 indicates the processor supports the performance and debug feature indication MSR IA32_PERF_CAPABILITIES. + const PDCM = 1 << 15; + /// Process-context identifiers. A value of 1 indicates that the processor supports PCIDs and the software may set CR4.PCIDE to 1. + const PCID = 1 << 17; + /// A value of 1 indicates the processor supports the ability to prefetch data from a memory mapped device. + const DCA = 1 << 18; + /// A value of 1 indicates that the processor supports SSE4.1. + const SSE41 = 1 << 19; + /// A value of 1 indicates that the processor supports SSE4.2. + const SSE42 = 1 << 20; + /// A value of 1 indicates that the processor supports x2APIC feature. + const X2APIC = 1 << 21; + /// A value of 1 indicates that the processor supports MOVBE instruction. + const MOVBE = 1 << 22; + /// A value of 1 indicates that the processor supports the POPCNT instruction. + const POPCNT = 1 << 23; + /// A value of 1 indicates that the processors local APIC timer supports one-shot operation using a TSC deadline value. + const TSC_DEADLINE = 1 << 24; + /// A value of 1 indicates that the processor supports the AESNI instruction extensions. + const AESNI = 1 << 25; + /// A value of 1 indicates that the processor supports the XSAVE/XRSTOR processor extended states feature, the XSETBV/XGETBV instructions, and XCR0. + const XSAVE = 1 << 26; + /// A value of 1 indicates that the OS has enabled XSETBV/XGETBV instructions to access XCR0, and support for processor extended state management using XSAVE/XRSTOR. + const OSXSAVE = 1 << 27; + /// A value of 1 indicates the processor supports the AVX instruction extensions. + const AVX = 1 << 28; + /// A value of 1 indicates that processor supports 16-bit floating-point conversion instructions. + const F16C = 1 << 29; + /// A value of 1 indicates that processor supports RDRAND instruction. + const RDRAND = 1 << 30; + /// A value of 1 indicates the indicates the presence of a hypervisor. + const HYPERVISOR = 1 << 31; + + // EDX flags + + /// Floating Point Unit On-Chip. The processor contains an x87 FPU. + const FPU = 1 << (32 + 0); + /// Virtual 8086 Mode Enhancements. Virtual 8086 mode enhancements, including CR4.VME for controlling the feature, CR4.PVI for protected mode virtual interrupts, software interrupt indirection, expansion of the TSS with the software indirection bitmap, and EFLAGS.VIF and EFLAGS.VIP flags. + const VME = 1 << (32 + 1); + /// Debugging Extensions. Support for I/O breakpoints, including CR4.DE for controlling the feature, and optional trapping of accesses to DR4 and DR5. + const DE = 1 << (32 + 2); + /// Page Size Extension. Large pages of size 4 MByte are supported, including CR4.PSE for controlling the feature, the defined dirty bit in PDE (Page Directory Entries), optional reserved bit trapping in CR3, PDEs, and PTEs. + const PSE = 1 << (32 + 3); + /// Time Stamp Counter. The RDTSC instruction is supported, including CR4.TSD for controlling privilege. + const TSC = 1 << (32 + 4); + /// Model Specific Registers RDMSR and WRMSR Instructions. The RDMSR and WRMSR instructions are supported. Some of the MSRs are implementation dependent. + const MSR = 1 << (32 + 5); + /// Physical Address Extension. Physical addresses greater than 32 bits are supported: extended page table entry formats, an extra level in the page translation tables is defined, 2-MByte pages are supported instead of 4 Mbyte pages if PAE bit is 1. + const PAE = 1 << (32 + 6); + /// Machine Check Exception. Exception 18 is defined for Machine Checks, including CR4.MCE for controlling the feature. This feature does not define the model-specific implementations of machine-check error logging, reporting, and processor shutdowns. Machine Check exception handlers may have to depend on processor version to do model specific processing of the exception, or test for the presence of the Machine Check feature. + const MCE = 1 << (32 + 7); + /// CMPXCHG8B Instruction. The compare-and-exchange 8 bytes (64 bits) instruction is supported (implicitly locked and atomic). + const CX8 = 1 << (32 + 8); + /// APIC On-Chip. The processor contains an Advanced Programmable Interrupt Controller (APIC), responding to memory mapped commands in the physical address range FFFE0000H to FFFE0FFFH (by default - some processors permit the APIC to be relocated). + const APIC = 1 << (32 + 9); + /// SYSENTER and SYSEXIT Instructions. The SYSENTER and SYSEXIT and associated MSRs are supported. + const SEP = 1 << (32 + 11); + /// Memory Type Range Registers. MTRRs are supported. The MTRRcap MSR contains feature bits that describe what memory types are supported, how many variable MTRRs are supported, and whether fixed MTRRs are supported. + const MTRR = 1 << (32 + 12); + /// Page Global Bit. The global bit is supported in paging-structure entries that map a page, indicating TLB entries that are common to different processes and need not be flushed. The CR4.PGE bit controls this feature. + const PGE = 1 << (32 + 13); + /// Machine Check Architecture. The Machine Check exArchitecture, which provides a compatible mechanism for error reporting in P6 family, Pentium 4, Intel Xeon processors, and future processors, is supported. The MCG_CAP MSR contains feature bits describing how many banks of error reporting MSRs are supported. + const MCA = 1 << (32 + 14); + /// Conditional Move Instructions. The conditional move instruction CMOV is supported. In addition, if x87 FPU is present as indicated by the CPUID.FPU feature bit, then the FCOMI and FCMOV instructions are supported + const CMOV = 1 << (32 + 15); + /// Page Attribute Table. Page Attribute Table is supported. This feature augments the Memory Type Range Registers (MTRRs), allowing an operating system to specify attributes of memory accessed through a linear address on a 4KB granularity. + const PAT = 1 << (32 + 16); + /// 36-Bit Page Size Extension. 4-MByte pages addressing physical memory beyond 4 GBytes are supported with 32-bit paging. This feature indicates that upper bits of the physical address of a 4-MByte page are encoded in bits 20:13 of the page-directory entry. Such physical addresses are limited by MAXPHYADDR and may be up to 40 bits in size. + const PSE36 = 1 << (32 + 17); + /// Processor Serial Number. The processor supports the 96-bit processor identification number feature and the feature is enabled. + const PSN = 1 << (32 + 18); + /// CLFLUSH Instruction. CLFLUSH Instruction is supported. + const CLFSH = 1 << (32 + 19); + /// Debug Store. The processor supports the ability to write debug information into a memory resident buffer. This feature is used by the branch trace store (BTS) and precise event-based sampling (PEBS) facilities (see Chapter 23, Introduction to Virtual-Machine Extensions, in the Intel® 64 and IA-32 Architectures Software Developers Manual, Volume 3C). + const DS = 1 << (32 + 21); + /// Thermal Monitor and Software Controlled Clock Facilities. The processor implements internal MSRs that allow processor temperature to be monitored and processor performance to be modulated in predefined duty cycles under software control. + const ACPI = 1 << (32 + 22); + /// Intel MMX Technology. The processor supports the Intel MMX technology. + const MMX = 1 << (32 + 23); + /// FXSAVE and FXRSTOR Instructions. The FXSAVE and FXRSTOR instructions are supported for fast save and restore of the floating point context. Presence of this bit also indicates that CR4.OSFXSR is available for an operating system to indicate that it supports the FXSAVE and FXRSTOR instructions. + const FXSR = 1 << (32 + 24); + /// SSE. The processor supports the SSE extensions. + const SSE = 1 << (32 + 25); + /// SSE2. The processor supports the SSE2 extensions. + const SSE2 = 1 << (32 + 26); + /// Self Snoop. The processor supports the management of conflicting memory types by performing a snoop of its own cache structure for transactions issued to the bus. + const SS = 1 << (32 + 27); + /// Max APIC IDs reserved field is Valid. A value of 0 for HTT indicates there is only a single logical processor in the package and software should assume only a single APIC ID is reserved. A value of 1 for HTT indicates the value in CPUID.1.EBX[23:16] (the Maximum number of addressable IDs for logical processors in this package) is valid for the package. + const HTT = 1 << (32 + 28); + /// Thermal Monitor. The processor implements the thermal monitor automatic thermal control circuitry (TCC). + const TM = 1 << (32 + 29); + /// Pending Break Enable. The processor supports the use of the FERR#/PBE# pin when the processor is in the stop-clock state (STPCLK# is asserted) to signal the processor that an interrupt is pending and that the processor should return to normal operation to handle the interrupt. Bit 10 (PBE enable) in the IA32_MISC_ENABLE MSR enables this capability. + const PBE = 1 << (32 + 31); + } + + pub struct ExtendedFeaturesEcx: u32 { + /// Bit 0: Prefetch WT1. (Intel® Xeon Phi™ only). + const PREFETCHWT1 = 1 << 0; + // Bit 01: AVX512_VBMI + const AVX512VBMI = 1 << 1; + /// Bit 02: UMIP. Supports user-mode instruction prevention if 1. + const UMIP = 1 << 2; + /// Bit 03: PKU. Supports protection keys for user-mode pages if 1. + const PKU = 1 << 3; + /// Bit 04: OSPKE. If 1, OS has set CR4.PKE to enable protection keys (and the RDPKRU/WRPKRU instruc-tions). + const OSPKE = 1 << 4; + /// Bit 5: WAITPKG + const WAITPKG = 1 << 5; + /// Bit 6: AV512_VBMI2 + const AVX512VBMI2 = 1 << 6; + /// Bit 7: CET_SS. Supports CET shadow stack features if 1. Processors that set this bit define bits 0..2 of the + /// IA32_U_CET and IA32_S_CET MSRs. Enumerates support for the following MSRs: + /// IA32_INTERRUPT_SPP_TABLE_ADDR, IA32_PL3_SSP, IA32_PL2_SSP, IA32_PL1_SSP, and IA32_PL0_SSP. + const CETSS = 1 << 7; + /// Bit 8: GFNI + const GFNI = 1 << 8; + /// Bit 9: VAES + const VAES = 1 << 9; + /// Bit 10: VPCLMULQDQ + const VPCLMULQDQ = 1 << 10; + /// Bit 11: AVX512_VNNI + const AVX512VNNI = 1 << 11; + /// Bit 12: AVX512_BITALG + const AVX512BITALG = 1 << 12; + /// Bit 13: TME_EN. If 1, the following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, + /// IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. + const TMEEN = 1 << 13; + /// Bit 14: AVX512_VPOPCNTDQ + const AVX512VPOPCNTDQ = 1 << 14; + + // Bit 15: Reserved. + + /// Bit 16: Supports 57-bit linear addresses and five-level paging if 1. + const LA57 = 1 << 16; + + // Bits 21 - 17: The value of MAWAU used by the BNDLDX and BNDSTX instructions in 64-bit mode + + /// Bit 22: RDPID. RDPID and IA32_TSC_AUX are available if 1. + const RDPID = 1 << 22; + + // Bits 29 - 23: Reserved. + + /// Bit 30: SGX_LC. Supports SGX Launch Configuration if 1. + const SGX_LC = 1 << 30; + } +} diff --git a/src/arch/x86_64/entry.rs b/src/arch/x86_64/entry.rs index c77afcc6..df2ed013 100644 --- a/src/arch/x86_64/entry.rs +++ b/src/arch/x86_64/entry.rs @@ -50,11 +50,11 @@ pub unsafe extern "C" fn arch_entry() -> i32 { ); } -fn rust_entry() { +extern "C" fn rust_entry(magic: u32, info_addr: usize) { crate::clear_bss(); unsafe { PHYS_VIRT_OFFSET = X86_PHYS_VIRT_OFFSET }; println!(""); - rust_main(this_cpu_id(), 0); + rust_main(this_cpu_id(), info_addr); } fn rust_entry_secondary() { diff --git a/src/arch/x86_64/gdt.rs b/src/arch/x86_64/gdt.rs index 113b920b..72331c22 100644 --- a/src/arch/x86_64/gdt.rs +++ b/src/arch/x86_64/gdt.rs @@ -1,5 +1,6 @@ use crate::arch::cpu::{self, this_cpu_id}; use alloc::{boxed::Box, collections::btree_map::BTreeMap}; +use bit_field::BitField; use spin::Mutex; use x86_64::{ addr::VirtAddr, @@ -77,3 +78,22 @@ pub fn load_gdt_tss() { gdt.insert(cpuid, GdtStruct::new(tss)); } }*/ + +pub fn get_tr_base( + tr: x86::segmentation::SegmentSelector, + gdt: &x86::dtables::DescriptorTablePointer, +) -> u64 { + let index = tr.index() as usize; + let table_len = (gdt.limit as usize + 1) / core::mem::size_of::(); + let table = unsafe { core::slice::from_raw_parts(gdt.base, table_len) }; + let entry = table[index]; + if entry & (1 << 47) != 0 { + // present + let base_low = entry.get_bits(16..40) | entry.get_bits(56..64) << 24; + let base_high = table[index + 1] & 0xffff_ffff; + base_low | base_high << 32 + } else { + // no present + 0 + } +} diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index cb10ec27..3b6b3e20 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -1,5 +1,8 @@ #![allow(unused)] +pub mod acpi; +pub mod boot; pub mod cpu; +pub mod cpuid; pub mod device; pub mod entry; pub mod gdt; @@ -12,6 +15,7 @@ pub mod paging; pub mod s1pt; pub mod s2pt; pub mod trap; +pub mod vmcs; pub mod vmx; pub mod zone; diff --git a/src/arch/x86_64/msr.rs b/src/arch/x86_64/msr.rs index 966e4856..3319685c 100644 --- a/src/arch/x86_64/msr.rs +++ b/src/arch/x86_64/msr.rs @@ -1,9 +1,9 @@ -use x86::msr::{rdmsr, wrmsr}; - use crate::{ + arch::msr::Msr::*, error::HvResult, memory::{Frame, HostPhysAddr}, }; +use x86::msr::{rdmsr, wrmsr}; numeric_enum_macro::numeric_enum! { #[repr(u32)] @@ -11,7 +11,7 @@ numeric_enum_macro::numeric_enum! { #[allow(non_camel_case_types)] /// X86 model-specific registers. (SDM Vol. 4) pub enum Msr { - /// APIC Location and Status (R/W) See Table 35-2. See Section 10.4.4, Local APIC Status and Location. + /// APIC Location and Status (R/W) See Table 35-2. See Section 10.4.4, Local APIC Status and Location. IA32_APIC_BASE = 0x1b, IA32_FEATURE_CONTROL = 0x3a, IA32_PAT = 0x277, @@ -33,38 +33,38 @@ pub enum Msr { IA32_VMX_TRUE_EXIT_CTLS = 0x48f, IA32_VMX_TRUE_ENTRY_CTLS = 0x490, - /// x2APIC Msr + /// X2APIC Msr /// ID register. - APICID = 0x802, + IA32_X2APIC_APICID = 0x802, /// Version register. - VERSION = 0x803, + IA32_X2APIC_VERSION = 0x803, /// End-Of-Interrupt register. - EOI = 0x80B, + IA32_X2APIC_EOI = 0x80B, /// Logical Destination Register. - LDR = 0x80D, + IA32_X2APIC_LDR = 0x80D, /// Spurious Interrupt Vector register. - SIVR = 0x80F, + IA32_X2APIC_SIVR = 0x80F, /// Interrupt Command register. - ICR = 0x830, + IA32_X2APIC_ICR = 0x830, /// LVT Timer Interrupt register. - LVT_TIMER = 0x832, + IA32_X2APIC_LVT_TIMER = 0x832, /// LVT Thermal Sensor Interrupt register. - LVT_THERMAL = 0x833, + IA32_X2APIC_LVT_THERMAL = 0x833, /// LVT Performance Monitor register. - LVT_PMI = 0x834, + IA32_X2APIC_LVT_PMI = 0x834, /// LVT LINT0 register. - LVT_LINT0 = 0x835, + IA32_X2APIC_LVT_LINT0 = 0x835, /// LVT LINT1 register. - LVT_LINT1 = 0x836, + IA32_X2APIC_LVT_LINT1 = 0x836, /// LVT Error register. - LVT_ERR = 0x837, - /// Initial Count register. - INIT_COUNT = 0x838, + IA32_X2APIC_LVT_ERROR = 0x837, + /// Initial Count register. + IA32_X2APIC_INIT_COUNT = 0x838, /// Current Count register. - CUR_COUNT = 0x839, + IA32_X2APIC_CUR_COUNT = 0x839, /// Divide Configuration register. - DIV_CONF = 0x83E, + IA32_X2APIC_DIV_CONF = 0x83E, IA32_EFER = 0xc000_0080, IA32_STAR = 0xc000_0081, @@ -109,12 +109,6 @@ impl MsrBitmap { } } - pub fn init() -> HvResult { - Ok(Self { - frame: Frame::new_zero()?, - }) - } - pub fn passthrough_all() -> HvResult { Ok(Self { frame: Frame::new_zero()?, @@ -127,6 +121,24 @@ impl MsrBitmap { Ok(Self { frame }) } + pub fn intercept_def() -> HvResult { + // Intercept IA32_APIC_BASE MSR accesses + let mut bitmap = Self { + frame: Frame::new_zero()?, + }; + let msr = IA32_APIC_BASE; + bitmap.set_read_intercept(msr, true); + bitmap.set_write_intercept(msr, true); + // Intercept all x2APIC MSR accesses + for addr in 0x800_u32..=0x83f_u32 { + if let Ok(msr) = Msr::try_from(addr) { + bitmap.set_read_intercept(msr, true); + bitmap.set_write_intercept(msr, true); + } + } + Ok(bitmap) + } + pub fn phys_addr(&self) -> HostPhysAddr { self.frame.start_paddr() } diff --git a/src/arch/x86_64/s2pt.rs b/src/arch/x86_64/s2pt.rs index cc25deb5..718b5497 100644 --- a/src/arch/x86_64/s2pt.rs +++ b/src/arch/x86_64/s2pt.rs @@ -1,11 +1,18 @@ -use super::paging::{GenericPTE, Level4PageTable, PagingInstr}; -use crate::consts::PAGE_SIZE; -use crate::memory::addr::{GuestPhysAddr, HostPhysAddr, PhysAddr}; -use crate::memory::MemFlags; +use crate::{ + arch::{ + paging::{GenericPTE, Level4PageTable, PagingInstr}, + vmcs::*, + }, + consts::PAGE_SIZE, + error::HvResult, + memory::{ + addr::{GuestPhysAddr, HostPhysAddr, PhysAddr}, + MemFlags, + }, +}; use bit_field::BitField; use bitflags::bitflags; -use core::arch::asm; -use core::fmt; +use core::{arch::asm, fmt}; bitflags! { /// EPT entry flags. (SDM Vol. 3C, Section 28.3.2) @@ -132,10 +139,12 @@ impl From for DescriptorAttr { attr |= Self::WRITE; } if flags.contains(MemFlags::EXECUTE) { - attr |= Self::EXECUTE; + attr |= Self::EXECUTE | Self::EXECUTE_FOR_USER; } if !flags.contains(MemFlags::IO) { attr.set_mem_type(MemType::WriteBack); + } else { + // attr &= !Self::READ; } attr } @@ -229,7 +238,7 @@ pub struct S2PTInstr; impl PagingInstr for S2PTInstr { unsafe fn activate(root_paddr: HostPhysAddr) { let s2ptp = S2PTPointer::from_table_phys(root_paddr).bits(); - crate::arch::vmx::set_s2ptp(s2ptp).unwrap(); + crate::arch::vmcs::VmcsControl64::EPTP.write(s2ptp).unwrap(); unsafe { invs2pt(InvS2PTType::SingleContext, s2ptp) }; } @@ -245,4 +254,26 @@ pub struct Stage2PageFaultInfo { pub fault_guest_paddr: GuestPhysAddr, } +impl Stage2PageFaultInfo { + pub fn new() -> HvResult { + // SDM Vol. 3C, Section 27.2.1, Table 27-7 + let qualification = VmcsReadOnlyNW::EXIT_QUALIFICATION.read()?; + let fault_guest_paddr = VmcsReadOnly64::GUEST_PHYSICAL_ADDR.read()? as usize; + let mut access_flags = MemFlags::empty(); + if qualification.get_bit(0) { + access_flags |= MemFlags::READ; + } + if qualification.get_bit(1) { + access_flags |= MemFlags::WRITE; + } + if qualification.get_bit(2) { + access_flags |= MemFlags::EXECUTE; + } + Ok(Stage2PageFaultInfo { + access_flags, + fault_guest_paddr, + }) + } +} + pub type Stage2PageTable = Level4PageTable; diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index e93a2129..3ed16017 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -1,15 +1,19 @@ use crate::{ arch::{ cpu::ArchCpu, + cpuid::{CpuIdEax, ExtendedFeaturesEcx, FeatureInfoFlags}, device::all_virt_devices, idt::IdtStruct, lapic::{local_apic, vectors::*}, msr::Msr::{self, *}, - vmx::*, + s2pt::Stage2PageFaultInfo, + vmcs::*, + vmx::{VmxCrAccessInfo, VmxExitInfo, VmxExitReason, VmxInterruptInfo, VmxIoExitInfo}, }, device::irqchip::pic::lapic::VirtLocalApic, - error::{HvError, HvResult}, + error::HvResult, }; +use x86_64::registers::control::Cr4Flags; core::arch::global_asm!( include_str!("trap.S"), @@ -77,73 +81,108 @@ fn handle_irq(vector: u8) { fn handle_cpuid(arch_cpu: &mut ArchCpu) -> HvResult { use raw_cpuid::{cpuid, CpuIdResult}; + // FIXME: temporary hypervisor hack + let signature = unsafe { &*("ACRNACRNACRN".as_ptr() as *const [u32; 3]) }; + let cr4_flags = Cr4Flags::from_bits_truncate(arch_cpu.cr(4) as _); + let regs = arch_cpu.regs_mut(); + let rax: Result = (regs.rax as u32).try_into(); + let mut res: CpuIdResult = cpuid!(regs.rax, regs.rcx); - const LEAF_FEATURE_INFO: u32 = 0x1; - const LEAF_HYPERVISOR_INFO: u32 = 0x4000_0000; - const LEAF_HYPERVISOR_FEATURE: u32 = 0x4000_0001; - const VENDOR_STR: &[u8; 12] = b"HVISORHVISOR"; - let vendor_regs = unsafe { &*(VENDOR_STR.as_ptr() as *const [u32; 3]) }; + if let Ok(function) = rax { + res = match function { + CpuIdEax::FeatureInfo => { + let mut res = cpuid!(regs.rax, regs.rcx); + let mut ecx = FeatureInfoFlags::from_bits_truncate(res.ecx as _); - let regs = arch_cpu.regs_mut(); - let function = regs.rax as u32; - let res = match function { - LEAF_FEATURE_INFO => { - const FEATURE_VMX: u32 = 1 << 5; - const FEATURE_HYPERVISOR: u32 = 1 << 31; - let mut res = cpuid!(regs.rax, regs.rcx); - res.ecx &= !FEATURE_VMX; - res.ecx |= FEATURE_HYPERVISOR; - res - } - LEAF_HYPERVISOR_INFO => CpuIdResult { - eax: LEAF_HYPERVISOR_FEATURE, - ebx: vendor_regs[0], - ecx: vendor_regs[1], - edx: vendor_regs[2], - }, - LEAF_HYPERVISOR_FEATURE => CpuIdResult { - eax: 0, - ebx: 0, - ecx: 0, - edx: 0, - }, - _ => cpuid!(regs.rax, regs.rcx), - }; + ecx.remove(FeatureInfoFlags::VMX); + ecx.remove(FeatureInfoFlags::TSC_DEADLINE); + ecx.remove(FeatureInfoFlags::XSAVE); - debug!( + ecx.insert(FeatureInfoFlags::X2APIC); + ecx.insert(FeatureInfoFlags::HYPERVISOR); + res.ecx = ecx.bits() as _; + + let mut edx = FeatureInfoFlags::from_bits_truncate((res.edx as u64) << 32); + // edx.remove(FeatureInfoFlags::TSC); + res.edx = (edx.bits() >> 32) as _; + + res + } + CpuIdEax::StructuredExtendedFeatureInfo => { + let mut res = cpuid!(regs.rax, regs.rcx); + let mut ecx = ExtendedFeaturesEcx::from_bits_truncate(res.ecx as _); + ecx.remove(ExtendedFeaturesEcx::WAITPKG); + res.ecx = ecx.bits() as _; + + res + } + CpuIdEax::HypervisorInfo => CpuIdResult { + eax: CpuIdEax::HypervisorFeatures as u32, + ebx: signature[0], + ecx: signature[1], + edx: signature[2], + }, + CpuIdEax::HypervisorFeatures => CpuIdResult { + eax: 0, + ebx: 0, + ecx: 0, + edx: 0, + }, + _ => cpuid!(regs.rax, regs.rcx), + }; + } + + trace!( "VM exit: CPUID({:#x}, {:#x}): {:?}", - regs.rax, regs.rcx, res + regs.rax, + regs.rcx, + res ); regs.rax = res.eax as _; regs.rbx = res.ebx as _; regs.rcx = res.ecx as _; regs.rdx = res.edx as _; - advance_guest_rip(VM_EXIT_INSTR_LEN_CPUID)?; + arch_cpu.advance_guest_rip(VM_EXIT_INSTR_LEN_CPUID)?; + Ok(()) +} + +fn handle_cr_access(arch_cpu: &mut ArchCpu) -> HvResult { + let cr_access_info = VmxCrAccessInfo::new()?; + panic!( + "VM-exit: CR{} access:\n{:#x?}", + cr_access_info.cr_n, arch_cpu + ); + + match cr_access_info.cr_n { + 0 => {} + _ => {} + } + Ok(()) } fn handle_external_interrupt() -> HvResult { - let int_info = interrupt_exit_info()?; + let int_info = VmxInterruptInfo::new()?; trace!("VM-exit: external interrupt: {:#x?}", int_info); assert!(int_info.valid); handle_irq(int_info.vector); Ok(()) } -fn handle_hypercall(arch_cpu: &ArchCpu) -> HvResult { +fn handle_hypercall(arch_cpu: &mut ArchCpu) -> HvResult { let regs = arch_cpu.regs(); debug!( "VM exit: VMCALL({:#x}): {:?}", regs.rax, [regs.rdi, regs.rsi, regs.rdx, regs.rcx] ); - advance_guest_rip(VM_EXIT_INSTR_LEN_VMCALL)?; + arch_cpu.advance_guest_rip(VM_EXIT_INSTR_LEN_VMCALL)?; Ok(()) } fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvResult { - let io_info = io_exit_info()?; + let io_info = VmxIoExitInfo::new()?; trace!( "VM exit: I/O instruction @ {:#x}: {:#x?}", exit_info.guest_rip, @@ -185,37 +224,42 @@ fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvR dev.write(io_info.port, io_info.access_size, value)?; } } else { - panic!( - "Unsupported I/O port {:#x} access: {:#x?}", - io_info.port, io_info + debug!( + "Unsupported I/O port {:#x} access: {:#x?} \n {:#x?}", + io_info.port, io_info, arch_cpu ) } - advance_guest_rip(exit_info.exit_instruction_length as _)?; + + arch_cpu.advance_guest_rip(exit_info.exit_instruction_length as _)?; Ok(()) } fn handle_msr_read(arch_cpu: &mut ArchCpu) -> HvResult { let rcx = arch_cpu.regs().rcx as u32; - let msr = Msr::try_from(rcx).unwrap(); - let res = if msr == IA32_APIC_BASE { - let mut apic_base = unsafe { IA32_APIC_BASE.read() }; - apic_base |= 1 << 11 | 1 << 10; // enable xAPIC and x2APIC - Ok(apic_base) - } else if VirtLocalApic::msr_range().contains(&rcx) { - VirtLocalApic::rdmsr(arch_cpu, msr) - } else { - hv_result_err!(ENOSYS) - }; + if let Ok(msr) = Msr::try_from(rcx) { + let res = if msr == IA32_APIC_BASE { + let mut apic_base = unsafe { IA32_APIC_BASE.read() }; + apic_base |= 1 << 11 | 1 << 10; // enable xAPIC and x2APIC + Ok(apic_base) + } else if VirtLocalApic::msr_range().contains(&rcx) { + VirtLocalApic::rdmsr(arch_cpu, msr) + } else { + hv_result_err!(ENOSYS) + }; - if let Ok(value) = res { - debug!("VM exit: RDMSR({:#x}) -> {:#x}", rcx, value); - arch_cpu.regs_mut().rax = value & 0xffff_ffff; - arch_cpu.regs_mut().rdx = value >> 32; + if let Ok(value) = res { + debug!("VM exit: RDMSR({:#x}) -> {:#x}", rcx, value); + arch_cpu.regs_mut().rax = value & 0xffff_ffff; + arch_cpu.regs_mut().rdx = value >> 32; + } else { + warn!("Failed to handle RDMSR({:#x}): {:?}", rcx, res); + } } else { - panic!("Failed to handle RDMSR({:#x}): {:?}", rcx, res); + warn!("Unrecognized RDMSR({:#x})", rcx); } - advance_guest_rip(VM_EXIT_INSTR_LEN_RDMSR)?; + + arch_cpu.advance_guest_rip(VM_EXIT_INSTR_LEN_RDMSR)?; Ok(()) } @@ -234,25 +278,30 @@ fn handle_msr_write(arch_cpu: &mut ArchCpu) -> HvResult { }; if res.is_err() { - panic!( - "Failed to handle WRMSR({:#x}) <- {:#x}: {:?}", - rcx, value, res + warn!( + "Failed to handle WRMSR({:#x}) <- {:#x}: {:?}\n{:#x?}", + rcx, value, res, arch_cpu ); } - advance_guest_rip(VM_EXIT_INSTR_LEN_WRMSR)?; + arch_cpu.advance_guest_rip(VM_EXIT_INSTR_LEN_WRMSR)?; Ok(()) } fn handle_s2pt_violation(arch_cpu: &mut ArchCpu, guest_rip: usize) -> HvResult { - let fault_info = s2pt_violation_info()?; + let fault_info = Stage2PageFaultInfo::new()?; panic!( "VM exit: S2PT violation @ {:#x}, fault_paddr={:#x}, access_flags=({:?}), {:#x?}", guest_rip, fault_info.fault_guest_paddr, fault_info.access_flags, arch_cpu ); } +fn handle_triple_fault(arch_cpu: &mut ArchCpu, guest_rip: usize) -> HvResult { + panic!("VM exit: Triple fault @ {:#x}", guest_rip); + Ok(()) +} + pub fn handle_vmexit(arch_cpu: &mut ArchCpu) -> HvResult { - let exit_info = exit_info()?; + let exit_info = VmxExitInfo::new()?; debug!("VM exit: {:#x?}", exit_info); if exit_info.entry_failure { @@ -260,14 +309,32 @@ pub fn handle_vmexit(arch_cpu: &mut ArchCpu) -> HvResult { } let res = match exit_info.exit_reason { - VmxExitReason::CPUID => handle_cpuid(arch_cpu), - VmxExitReason::EPT_VIOLATION => handle_s2pt_violation(arch_cpu, exit_info.guest_rip), VmxExitReason::EXTERNAL_INTERRUPT => handle_external_interrupt(), - VmxExitReason::INTERRUPT_WINDOW => set_interrupt_window(false), + VmxExitReason::TRIPLE_FAULT => handle_triple_fault(arch_cpu, exit_info.guest_rip), + VmxExitReason::INTERRUPT_WINDOW => arch_cpu.set_interrupt_window(false), + VmxExitReason::CPUID => handle_cpuid(arch_cpu), + VmxExitReason::RDTSC => { + // FIXME: temp! + let current_ticks = crate::arch::lapic::current_ticks(); + let regs = arch_cpu.regs_mut(); + regs.rdx = (current_ticks >> 32) & (u32::MAX as u64); + regs.rax = current_ticks & (u32::MAX as u64); + /*info!( + "RDTSC: {:x} rdx: {:x}, rax: {:x}, rip: {:x}", + crate::arch::lapic::current_ticks(), + regs.rdx, + regs.rax, + VmcsGuestNW::RIP.read()?, + );*/ + arch_cpu.advance_guest_rip(2)?; + Ok(()) + } + VmxExitReason::VMCALL => handle_hypercall(arch_cpu), + VmxExitReason::CR_ACCESS => handle_cr_access(arch_cpu), VmxExitReason::IO_INSTRUCTION => handle_io_instruction(arch_cpu, &exit_info), VmxExitReason::MSR_READ => handle_msr_read(arch_cpu), VmxExitReason::MSR_WRITE => handle_msr_write(arch_cpu), - VmxExitReason::VMCALL => handle_hypercall(arch_cpu), + VmxExitReason::EPT_VIOLATION => handle_s2pt_violation(arch_cpu, exit_info.guest_rip), _ => panic!( "Unhandled VM-Exit reason {:?}:\n{:#x?}", exit_info.exit_reason, arch_cpu diff --git a/src/arch/x86_64/vmcs.rs b/src/arch/x86_64/vmcs.rs new file mode 100644 index 00000000..54d04c6b --- /dev/null +++ b/src/arch/x86_64/vmcs.rs @@ -0,0 +1,569 @@ +#![allow(non_camel_case_types)] +use crate::{ + arch::{ + msr::Msr, + s2pt::Stage2PageFaultInfo, + vmx::{ + VmxExitInfo, VmxInstructionError, VmxInterruptInfo, VmxInterruptionType, VmxIoExitInfo, + }, + }, + error::{HvError, HvResult}, + memory::MemFlags, +}; +use bit_field::BitField; +use x86::{bits64::vmx, vmx::Result as VmResult, vmx::VmFail}; + +macro_rules! vmcs_read { + ($field_enum: ident, u64) => { + impl $field_enum { + pub fn read(self) -> x86::vmx::Result { + #[cfg(target_pointer_width = "64")] + unsafe { + vmx::vmread(self as u32) + } + #[cfg(target_pointer_width = "32")] + unsafe { + let field = self as u32; + Ok(vmx::vmread(field)? + (vmx::vmread(field + 1)? << 32)) + } + } + } + }; + ($field_enum: ident, $ux: ty) => { + impl $field_enum { + pub fn read(self) -> x86::vmx::Result<$ux> { + unsafe { vmx::vmread(self as u32).map(|v| v as $ux) } + } + } + }; +} + +macro_rules! vmcs_write { + ($field_enum: ident, u64) => { + impl $field_enum { + pub fn write(self, value: u64) -> x86::vmx::Result<()> { + #[cfg(target_pointer_width = "64")] + unsafe { + vmx::vmwrite(self as u32, value) + } + #[cfg(target_pointer_width = "32")] + unsafe { + let field = self as u32; + vmx::vmwrite(field, value & 0xffff_ffff)?; + vmx::vmwrite(field + 1, value >> 32)?; + Ok(()) + } + } + } + }; + ($field_enum: ident, $ux: ty) => { + impl $field_enum { + pub fn write(self, value: $ux) -> x86::vmx::Result<()> { + unsafe { vmx::vmwrite(self as u32, value as u64) } + } + } + }; +} + +/// 16-Bit Control Fields. (SDM Vol. 3D, Appendix B.1.1) +#[derive(Clone, Copy, Debug)] +pub enum VmcsControl16 { + /// Virtual-processor identifier (VPID). + VPID = 0x0, + /// Posted-interrupt notification vector. + POSTED_INTERRUPT_NOTIFICATION_VECTOR = 0x2, + /// EPTP index. + EPTP_INDEX = 0x4, +} +vmcs_read!(VmcsControl16, u16); +vmcs_write!(VmcsControl16, u16); + +/// 64-Bit Control Fields. (SDM Vol. 3D, Appendix B.2.1) +#[derive(Clone, Copy, Debug)] +pub enum VmcsControl64 { + /// Address of I/O bitmap A (full). + IO_BITMAP_A_ADDR = 0x2000, + /// Address of I/O bitmap B (full). + IO_BITMAP_B_ADDR = 0x2002, + /// Address of MSR bitmaps (full). + MSR_BITMAPS_ADDR = 0x2004, + /// VM-exit MSR-store address (full). + VMEXIT_MSR_STORE_ADDR = 0x2006, + /// VM-exit MSR-load address (full). + VMEXIT_MSR_LOAD_ADDR = 0x2008, + /// VM-entry MSR-load address (full). + VMENTRY_MSR_LOAD_ADDR = 0x200A, + /// Executive-VMCS pointer (full). + EXECUTIVE_VMCS_PTR = 0x200C, + /// PML address (full). + PML_ADDR = 0x200E, + /// TSC offset (full). + TSC_OFFSET = 0x2010, + /// Virtual-APIC address (full). + VIRT_APIC_ADDR = 0x2012, + /// APIC-access address (full). + APIC_ACCESS_ADDR = 0x2014, + /// Posted-interrupt descriptor address (full). + POSTED_INTERRUPT_DESC_ADDR = 0x2016, + /// VM-function controls (full). + VM_FUNCTION_CONTROLS = 0x2018, + /// EPT pointer (full). + EPTP = 0x201A, + /// EOI-exit bitmap 0 (full). + EOI_EXIT0 = 0x201C, + /// EOI-exit bitmap 1 (full). + EOI_EXIT1 = 0x201E, + /// EOI-exit bitmap 2 (full). + EOI_EXIT2 = 0x2020, + /// EOI-exit bitmap 3 (full). + EOI_EXIT3 = 0x2022, + /// EPTP-list address (full). + EPTP_LIST_ADDR = 0x2024, + /// VMREAD-bitmap address (full). + VMREAD_BITMAP_ADDR = 0x2026, + /// VMWRITE-bitmap address (full). + VMWRITE_BITMAP_ADDR = 0x2028, + /// Virtualization-exception information address (full). + VIRT_EXCEPTION_INFO_ADDR = 0x202A, + /// XSS-exiting bitmap (full). + XSS_EXITING_BITMAP = 0x202C, + /// ENCLS-exiting bitmap (full). + ENCLS_EXITING_BITMAP = 0x202E, + /// Sub-page-permission-table pointer (full). + SUBPAGE_PERM_TABLE_PTR = 0x2030, + /// TSC multiplier (full). + TSC_MULTIPLIER = 0x2032, +} +vmcs_read!(VmcsControl64, u64); +vmcs_write!(VmcsControl64, u64); + +/// 32-Bit Control Fields. (SDM Vol. 3D, Appendix B.3.1) +#[derive(Clone, Copy, Debug)] +pub enum VmcsControl32 { + /// Pin-based VM-execution controls. + PINBASED_EXEC_CONTROLS = 0x4000, + /// Primary processor-based VM-execution controls. + PRIMARY_PROCBASED_EXEC_CONTROLS = 0x4002, + /// Exception bitmap. + EXCEPTION_BITMAP = 0x4004, + /// Page-fault error-code mask. + PAGE_FAULT_ERR_CODE_MASK = 0x4006, + /// Page-fault error-code match. + PAGE_FAULT_ERR_CODE_MATCH = 0x4008, + /// CR3-target count. + CR3_TARGET_COUNT = 0x400A, + /// VM-exit controls. + VMEXIT_CONTROLS = 0x400C, + /// VM-exit MSR-store count. + VMEXIT_MSR_STORE_COUNT = 0x400E, + /// VM-exit MSR-load count. + VMEXIT_MSR_LOAD_COUNT = 0x4010, + /// VM-entry controls. + VMENTRY_CONTROLS = 0x4012, + /// VM-entry MSR-load count. + VMENTRY_MSR_LOAD_COUNT = 0x4014, + /// VM-entry interruption-information field. + VMENTRY_INTERRUPTION_INFO_FIELD = 0x4016, + /// VM-entry exception error code. + VMENTRY_EXCEPTION_ERR_CODE = 0x4018, + /// VM-entry instruction length. + VMENTRY_INSTRUCTION_LEN = 0x401A, + /// TPR threshold. + TPR_THRESHOLD = 0x401C, + /// Secondary processor-based VM-execution controls. + SECONDARY_PROCBASED_EXEC_CONTROLS = 0x401E, + /// PLE_Gap. + PLE_GAP = 0x4020, + /// PLE_Window. + PLE_WINDOW = 0x4022, +} +vmcs_read!(VmcsControl32, u32); +vmcs_write!(VmcsControl32, u32); + +/// Natural-Width Control Fields. (SDM Vol. 3D, Appendix B.4.1) +#[derive(Clone, Copy, Debug)] +pub enum VmcsControlNW { + /// CR0 guest/host mask. + CR0_GUEST_HOST_MASK = 0x6000, + /// CR4 guest/host mask. + CR4_GUEST_HOST_MASK = 0x6002, + /// CR0 read shadow. + CR0_READ_SHADOW = 0x6004, + /// CR4 read shadow. + CR4_READ_SHADOW = 0x6006, + /// CR3-target value 0. + CR3_TARGET_VALUE0 = 0x6008, + /// CR3-target value 1. + CR3_TARGET_VALUE1 = 0x600A, + /// CR3-target value 2. + CR3_TARGET_VALUE2 = 0x600C, + /// CR3-target value 3. + CR3_TARGET_VALUE3 = 0x600E, +} +vmcs_read!(VmcsControlNW, usize); +vmcs_write!(VmcsControlNW, usize); + +/// 16-Bit Guest-State Fields. (SDM Vol. 3D, Appendix B.1.2) +pub enum VmcsGuest16 { + /// Guest ES selector. + ES_SELECTOR = 0x800, + /// Guest CS selector. + CS_SELECTOR = 0x802, + /// Guest SS selector. + SS_SELECTOR = 0x804, + /// Guest DS selector. + DS_SELECTOR = 0x806, + /// Guest FS selector. + FS_SELECTOR = 0x808, + /// Guest GS selector. + GS_SELECTOR = 0x80a, + /// Guest LDTR selector. + LDTR_SELECTOR = 0x80c, + /// Guest TR selector. + TR_SELECTOR = 0x80e, + /// Guest interrupt status. + INTERRUPT_STATUS = 0x810, + /// PML index. + PML_INDEX = 0x812, +} +vmcs_read!(VmcsGuest16, u16); +vmcs_write!(VmcsGuest16, u16); + +/// 64-Bit Guest-State Fields. (SDM Vol. 3D, Appendix B.2.3) +#[derive(Clone, Copy, Debug)] +pub enum VmcsGuest64 { + /// VMCS link pointer (full). + LINK_PTR = 0x2800, + /// Guest IA32_DEBUGCTL (full). + IA32_DEBUGCTL = 0x2802, + /// Guest IA32_PAT (full). + IA32_PAT = 0x2804, + /// Guest IA32_EFER (full). + IA32_EFER = 0x2806, + /// Guest IA32_PERF_GLOBAL_CTRL (full). + IA32_PERF_GLOBAL_CTRL = 0x2808, + /// Guest PDPTE0 (full). + PDPTE0 = 0x280A, + /// Guest PDPTE1 (full). + PDPTE1 = 0x280C, + /// Guest PDPTE2 (full). + PDPTE2 = 0x280E, + /// Guest PDPTE3 (full). + PDPTE3 = 0x2810, + /// Guest IA32_BNDCFGS (full). + IA32_BNDCFGS = 0x2812, + /// Guest IA32_RTIT_CTL (full). + IA32_RTIT_CTL = 0x2814, +} +vmcs_read!(VmcsGuest64, u64); +vmcs_write!(VmcsGuest64, u64); + +/// 32-Bit Guest-State Fields. (SDM Vol. 3D, Appendix B.3.3) +#[derive(Clone, Copy, Debug)] +pub enum VmcsGuest32 { + /// Guest ES limit. + ES_LIMIT = 0x4800, + /// Guest CS limit. + CS_LIMIT = 0x4802, + /// Guest SS limit. + SS_LIMIT = 0x4804, + /// Guest DS limit. + DS_LIMIT = 0x4806, + /// Guest FS limit. + FS_LIMIT = 0x4808, + /// Guest GS limit. + GS_LIMIT = 0x480A, + /// Guest LDTR limit. + LDTR_LIMIT = 0x480C, + /// Guest TR limit. + TR_LIMIT = 0x480E, + /// Guest GDTR limit. + GDTR_LIMIT = 0x4810, + /// Guest IDTR limit. + IDTR_LIMIT = 0x4812, + /// Guest ES access rights. + ES_ACCESS_RIGHTS = 0x4814, + /// Guest CS access rights. + CS_ACCESS_RIGHTS = 0x4816, + /// Guest SS access rights. + SS_ACCESS_RIGHTS = 0x4818, + /// Guest DS access rights. + DS_ACCESS_RIGHTS = 0x481A, + /// Guest FS access rights. + FS_ACCESS_RIGHTS = 0x481C, + /// Guest GS access rights. + GS_ACCESS_RIGHTS = 0x481E, + /// Guest LDTR access rights. + LDTR_ACCESS_RIGHTS = 0x4820, + /// Guest TR access rights. + TR_ACCESS_RIGHTS = 0x4822, + /// Guest interruptibility state. + INTERRUPTIBILITY_STATE = 0x4824, + /// Guest activity state. + ACTIVITY_STATE = 0x4826, + /// Guest SMBASE. + SMBASE = 0x4828, + /// Guest IA32_SYSENTER_CS. + IA32_SYSENTER_CS = 0x482A, + /// VMX-preemption timer value. + VMX_PREEMPTION_TIMER_VALUE = 0x482E, +} +vmcs_read!(VmcsGuest32, u32); +vmcs_write!(VmcsGuest32, u32); + +/// Natural-Width Guest-State Fields. (SDM Vol. 3D, Appendix B.4.3) +#[derive(Clone, Copy, Debug)] +pub enum VmcsGuestNW { + /// Guest CR0. + CR0 = 0x6800, + /// Guest CR3. + CR3 = 0x6802, + /// Guest CR4. + CR4 = 0x6804, + /// Guest ES base. + ES_BASE = 0x6806, + /// Guest CS base. + CS_BASE = 0x6808, + /// Guest SS base. + SS_BASE = 0x680A, + /// Guest DS base. + DS_BASE = 0x680C, + /// Guest FS base. + FS_BASE = 0x680E, + /// Guest GS base. + GS_BASE = 0x6810, + /// Guest LDTR base. + LDTR_BASE = 0x6812, + /// Guest TR base. + TR_BASE = 0x6814, + /// Guest GDTR base. + GDTR_BASE = 0x6816, + /// Guest IDTR base. + IDTR_BASE = 0x6818, + /// Guest DR7. + DR7 = 0x681A, + /// Guest RSP. + RSP = 0x681C, + /// Guest RIP. + RIP = 0x681E, + /// Guest RFLAGS. + RFLAGS = 0x6820, + /// Guest pending debug exceptions. + PENDING_DBG_EXCEPTIONS = 0x6822, + /// Guest IA32_SYSENTER_ESP. + IA32_SYSENTER_ESP = 0x6824, + /// Guest IA32_SYSENTER_EIP. + IA32_SYSENTER_EIP = 0x6826, +} +vmcs_read!(VmcsGuestNW, usize); +vmcs_write!(VmcsGuestNW, usize); + +/// 16-Bit Host-State Fields. (SDM Vol. 3D, Appendix B.1.3) +#[derive(Clone, Copy, Debug)] +pub enum VmcsHost16 { + /// Host ES selector. + ES_SELECTOR = 0xC00, + /// Host CS selector. + CS_SELECTOR = 0xC02, + /// Host SS selector. + SS_SELECTOR = 0xC04, + /// Host DS selector. + DS_SELECTOR = 0xC06, + /// Host FS selector. + FS_SELECTOR = 0xC08, + /// Host GS selector. + GS_SELECTOR = 0xC0A, + /// Host TR selector. + TR_SELECTOR = 0xC0C, +} +vmcs_read!(VmcsHost16, u16); +vmcs_write!(VmcsHost16, u16); + +/// 64-Bit Host-State Fields. (SDM Vol. 3D, Appendix B.2.4) +#[derive(Clone, Copy, Debug)] +pub enum VmcsHost64 { + /// Host IA32_PAT (full). + IA32_PAT = 0x2C00, + /// Host IA32_EFER (full). + IA32_EFER = 0x2C02, + /// Host IA32_PERF_GLOBAL_CTRL (full). + IA32_PERF_GLOBAL_CTRL = 0x2C04, +} +vmcs_read!(VmcsHost64, u64); +vmcs_write!(VmcsHost64, u64); + +/// 32-Bit Host-State Field. (SDM Vol. 3D, Appendix B.3.4) +#[derive(Clone, Copy, Debug)] +pub enum VmcsHost32 { + /// Host IA32_SYSENTER_CS. + IA32_SYSENTER_CS = 0x4C00, +} +vmcs_read!(VmcsHost32, u32); +vmcs_write!(VmcsHost32, u32); + +/// Natural-Width Host-State Fields. (SDM Vol. 3D, Appendix B.4.4) +#[derive(Clone, Copy, Debug)] +pub enum VmcsHostNW { + /// Host CR0. + CR0 = 0x6C00, + /// Host CR3. + CR3 = 0x6C02, + /// Host CR4. + CR4 = 0x6C04, + /// Host FS base. + FS_BASE = 0x6C06, + /// Host GS base. + GS_BASE = 0x6C08, + /// Host TR base. + TR_BASE = 0x6C0A, + /// Host GDTR base. + GDTR_BASE = 0x6C0C, + /// Host IDTR base. + IDTR_BASE = 0x6C0E, + /// Host IA32_SYSENTER_ESP. + IA32_SYSENTER_ESP = 0x6C10, + /// Host IA32_SYSENTER_EIP. + IA32_SYSENTER_EIP = 0x6C12, + /// Host RSP. + RSP = 0x6C14, + /// Host RIP. + RIP = 0x6C16, +} +vmcs_read!(VmcsHostNW, usize); +vmcs_write!(VmcsHostNW, usize); + +/// 64-Bit Read-Only Data Fields. (SDM Vol. 3D, Appendix B.2.2) +#[derive(Clone, Copy, Debug)] +pub enum VmcsReadOnly64 { + /// Guest-physical address (full). + GUEST_PHYSICAL_ADDR = 0x2400, +} +vmcs_read!(VmcsReadOnly64, u64); + +/// 32-Bit Read-Only Data Fields. (SDM Vol. 3D, Appendix B.3.2) +#[derive(Clone, Copy, Debug)] +pub enum VmcsReadOnly32 { + /// VM-instruction error. + VM_INSTRUCTION_ERROR = 0x4400, + /// Exit reason. + EXIT_REASON = 0x4402, + /// VM-exit interruption information. + VMEXIT_INTERRUPTION_INFO = 0x4404, + /// VM-exit interruption error code. + VMEXIT_INTERRUPTION_ERR_CODE = 0x4406, + /// IDT-vectoring information field. + IDT_VECTORING_INFO = 0x4408, + /// IDT-vectoring error code. + IDT_VECTORING_ERR_CODE = 0x440A, + /// VM-exit instruction length. + VMEXIT_INSTRUCTION_LEN = 0x440C, + /// VM-exit instruction information. + VMEXIT_INSTRUCTION_INFO = 0x440E, +} +vmcs_read!(VmcsReadOnly32, u32); + +/// Natural-Width Read-Only Data Fields. (SDM Vol. 3D, Appendix B.4.2) +#[derive(Clone, Copy, Debug)] +pub enum VmcsReadOnlyNW { + /// Exit qualification. + EXIT_QUALIFICATION = 0x6400, + /// I/O RCX. + IO_RCX = 0x6402, + /// I/O RSI. + IO_RSI = 0x6404, + /// I/O RDI. + IO_RDI = 0x6406, + /// I/O RIP. + IO_RIP = 0x6408, + /// Guest-linear address. + GUEST_LINEAR_ADDR = 0x640A, +} +vmcs_read!(VmcsReadOnlyNW, usize); + +pub struct Vmcs; + +impl Vmcs { + pub fn load(paddr: usize) -> VmResult<()> { + unsafe { vmx::vmptrld(paddr as _) } + } + + pub fn clear(paddr: usize) -> VmResult<()> { + unsafe { vmx::vmclear(paddr as _) } + } + + pub fn inject_interrupt(vector: u8, err_code: Option) -> HvResult { + // SDM Vol. 3C, Section 24.8.3 + let err_code = if VmxInterruptionType::vector_has_error_code(vector) { + err_code.or_else(|| Some(VmcsReadOnly32::VMEXIT_INTERRUPTION_ERR_CODE.read().unwrap())) + } else { + None + }; + let int_info = VmxInterruptInfo::from(vector, err_code); + if let Some(err_code) = int_info.err_code { + VmcsControl32::VMENTRY_EXCEPTION_ERR_CODE.write(err_code)?; + } + if int_info.int_type.is_soft() { + VmcsControl32::VMENTRY_INSTRUCTION_LEN + .write(VmcsReadOnly32::VMEXIT_INSTRUCTION_LEN.read()?)?; + } + VmcsControl32::VMENTRY_INTERRUPTION_INFO_FIELD.write(int_info.bits())?; + Ok(()) + } + + pub fn instruction_error() -> HvResult { + Ok(VmcsReadOnly32::VM_INSTRUCTION_ERROR.read()?.into()) + } + + pub fn set_control( + control: VmcsControl32, + capability_msr: Msr, + old_value: u32, + set: u32, + clear: u32, + ) -> HvResult { + let cap = capability_msr.read(); + let allowed0 = cap as u32; + let allowed1 = (cap >> 32) as u32; + assert_eq!(allowed0 & allowed1, allowed0); + debug!( + "set {:?}: {:#x} (+{:#x}, -{:#x})", + control, old_value, set, clear + ); + if (set & clear) != 0 { + return hv_result_err!( + EINVAL, + format!("can not set and clear the same bit in {:?}", control) + ); + } + if (allowed1 & set) != set { + // failed if set 0-bits in allowed1 + return hv_result_err!( + EINVAL, + format!("can not set bits {:#x} in {:?}", set, control) + ); + } + if (allowed0 & clear) != 0 { + // failed if clear 1-bits in allowed0 + return hv_result_err!( + EINVAL, + format!("can not clear bits {:#x} in {:?}", clear, control) + ); + } + // SDM Vol. 3C, Section 31.5.1, Algorithm 3 + let flexible = !allowed0 & allowed1; // therse bits can be either 0 or 1 + let unknown = flexible & !(set | clear); // hypervisor untouched bits + let default = unknown & old_value; // these bits keep unchanged in old value + let fixed1 = allowed0; // these bits are fixed to 1 + control.write(fixed1 | default | set)?; + Ok(()) + } +} + +impl From for HvError { + fn from(err: VmFail) -> Self { + match err { + VmFail::VmFailValid => hv_err!(EFAULT, Vmcs::instruction_error().unwrap().as_str()), + _ => hv_err!(EFAULT, format!("VMX instruction failed: {:?}", err)), + } + } +} diff --git a/src/arch/x86_64/vmx.rs b/src/arch/x86_64/vmx.rs index 73410ec3..14ff7975 100644 --- a/src/arch/x86_64/vmx.rs +++ b/src/arch/x86_64/vmx.rs @@ -1,11 +1,17 @@ use crate::{ - arch::{msr::Msr, s2pt::Stage2PageFaultInfo}, + arch::{ + cpu::ArchCpu, + msr::Msr, + s2pt::Stage2PageFaultInfo, + vmcs::{self, *}, + }, consts::PAGE_SIZE, error::{HvError, HvResult}, memory::{Frame, GuestPhysAddr, HostPhysAddr, HostVirtAddr, MemFlags, PhysAddr}, }; use bit_field::BitField; use bitflags::{bitflags, Flags}; +use core::fmt::{Debug, Formatter, Result}; use raw_cpuid::CpuId; use x86::{ bits64::vmx, @@ -14,7 +20,10 @@ use x86::{ segmentation::SegmentSelector, vmx::{vmcs::control::*, vmcs::*, VmFail}, }; -use x86_64::registers::control::{Cr0, Cr0Flags, Cr3, Cr4, Cr4Flags}; +use x86_64::{ + registers::control::{Cr0, Cr0Flags, Cr3, Cr4, Cr4Flags}, + structures::gdt, +}; bitflags! { pub struct FeatureControlFlags: u64 { @@ -28,15 +37,62 @@ bitflags! { } } -fn vmread(field: u32) -> x86::vmx::Result { - unsafe { vmx::vmread(field as u32) } +/// Exit Qualification for CR access. (SDM Vol. 3C, Section 27.2.1, Table 27-5) +#[derive(Debug)] +pub struct VmxCrAccessInfo { + /// Control register number (CR0/CR3/CR4). + pub cr_n: u8, + /// Access type (0 = MOV to CR; 1 = MOV from CR; 2 = CLTS; 3 = LMSW). + pub access_type: u8, + /// LMSW operand type. + pub lmsw_op_type: u8, + /// General register. + pub gpr: u8, + /// LMSW source. + pub lmsw_src: u16, +} + +impl VmxCrAccessInfo { + pub fn new() -> HvResult { + let qualification = VmcsReadOnlyNW::EXIT_QUALIFICATION.read()?; + Ok(VmxCrAccessInfo { + cr_n: qualification.get_bits(0..=3) as _, + access_type: qualification.get_bits(4..=5) as _, + lmsw_op_type: qualification.get_bit(6) as _, + gpr: qualification.get_bits(8..=11) as _, + lmsw_src: qualification.get_bits(16..=31) as _, + }) + } } -fn vmwrite>(field: u32, value: T) -> x86::vmx::Result<()> { - unsafe { vmx::vmwrite(field as u32, value.into()) } +/// VM-Exit Informations. (SDM Vol. 3C, Section 24.9.1) +#[derive(Debug)] +pub struct VmxExitInfo { + /// VM-entry failure. (0 = true VM exit; 1 = VM-entry failure) + pub entry_failure: bool, + /// Basic exit reason. + pub exit_reason: VmxExitReason, + /// For VM exits resulting from instruction execution, this field receives + /// the length in bytes of the instruction whose execution led to the VM exit. + pub exit_instruction_length: u32, + /// Guest `RIP` where the VM exit occurs. + pub guest_rip: usize, } -const ZERO: u64 = 0; +impl VmxExitInfo { + pub fn new() -> HvResult { + let full_reason = VmcsReadOnly32::EXIT_REASON.read()?; + Ok(Self { + exit_reason: full_reason + .get_bits(0..16) + .try_into() + .expect("Unknown VM-exit reason"), + entry_failure: full_reason.get_bit(31), + exit_instruction_length: VmcsReadOnly32::VMEXIT_INSTRUCTION_LEN.read()?, + guest_rip: VmcsGuestNW::RIP.read()?, + }) + } +} numeric_enum_macro::numeric_enum! { #[repr(u32)] @@ -114,33 +170,102 @@ pub enum VmxExitReason { } } -/// VM-Exit Informations. (SDM Vol. 3C, Section 24.9.1) -#[derive(Debug)] -pub struct VmxExitInfo { - /// VM-entry failure. (0 = true VM exit; 1 = VM-entry failure) - pub entry_failure: bool, - /// Basic exit reason. - pub exit_reason: VmxExitReason, - /// For VM exits resulting from instruction execution, this field receives - /// the length in bytes of the instruction whose execution led to the VM exit. - pub exit_instruction_length: u32, - /// Guest `RIP` where the VM exit occurs. - pub guest_rip: usize, +/// VM instruction error numbers. (SDM Vol. 3C, Section 30.4) +pub struct VmxInstructionError(u32); + +impl VmxInstructionError { + pub fn as_str(&self) -> &str { + match self.0 { + 0 => "OK", + 1 => "VMCALL executed in VMX root operation", + 2 => "VMCLEAR with invalid physical address", + 3 => "VMCLEAR with VMXON pointer", + 4 => "VMLAUNCH with non-clear VMCS", + 5 => "VMRESUME with non-launched VMCS", + 6 => "VMRESUME after VMXOFF (VMXOFF and VMXON between VMLAUNCH and VMRESUME)", + 7 => "VM entry with invalid control field(s)", + 8 => "VM entry with invalid host-state field(s)", + 9 => "VMPTRLD with invalid physical address", + 10 => "VMPTRLD with VMXON pointer", + 11 => "VMPTRLD with incorrect VMCS revision identifier", + 12 => "VMREAD/VMWRITE from/to unsupported VMCS component", + 13 => "VMWRITE to read-only VMCS component", + 15 => "VMXON executed in VMX root operation", + 16 => "VM entry with invalid executive-VMCS pointer", + 17 => "VM entry with non-launched executive VMCS", + 18 => "VM entry with executive-VMCS pointer not VMXON pointer (when attempting to deactivate the dual-monitor treatment of SMIs and SMM)", + 19 => "VMCALL with non-clear VMCS (when attempting to activate the dual-monitor treatment of SMIs and SMM)", + 20 => "VMCALL with invalid VM-exit control fields", + 22 => "VMCALL with incorrect MSEG revision identifier (when attempting to activate the dual-monitor treatment of SMIs and SMM)", + 23 => "VMXOFF under dual-monitor treatment of SMIs and SMM", + 24 => "VMCALL with invalid SMM-monitor features (when attempting to activate the dual-monitor treatment of SMIs and SMM)", + 25 => "VM entry with invalid VM-execution control fields in executive VMCS (when attempting to return from SMM)", + 26 => "VM entry with events blocked by MOV SS", + 28 => "Invalid operand to INVEPT/INVVPID", + _ => "[INVALID]", + } + } } -/// Exit Qualification for I/O Instructions. (SDM Vol. 3C, Section 27.2.1, Table 27-5) +impl From for VmxInstructionError { + fn from(value: u32) -> Self { + Self(value) + } +} + +impl Debug for VmxInstructionError { + fn fmt(&self, f: &mut Formatter) -> Result { + write!(f, "VmxInstructionError({}, {:?})", self.0, self.as_str()) + } +} + +/// VM-Entry / VM-Exit Interruption-Information Field. (SDM Vol. 3C, Section 24.8.3, 24.9.2) #[derive(Debug)] -pub struct VmxIoExitInfo { - /// Size of access. - pub access_size: u8, - /// Direction of the attempted access (0 = OUT, 1 = IN). - pub is_in: bool, - /// String instruction (0 = not string; 1 = string). - pub is_string: bool, - /// REP prefixed (0 = not REP; 1 = REP). - pub is_repeat: bool, - /// Port number. (as specified in DX or in an immediate operand) - pub port: u16, +pub struct VmxInterruptInfo { + /// Vector of interrupt or exception. + pub vector: u8, + /// Determines details of how the injection is performed. + pub int_type: VmxInterruptionType, + /// For hardware exceptions that would have delivered an error code on the stack. + pub err_code: Option, + /// Whether the field is valid. + pub valid: bool, +} + +impl VmxInterruptInfo { + pub fn new() -> HvResult { + // SDM Vol. 3C, Section 24.9.2 + let info = VmcsReadOnly32::VMEXIT_INTERRUPTION_INFO.read()?; + Ok(VmxInterruptInfo { + vector: info.get_bits(0..8) as u8, + int_type: VmxInterruptionType::try_from(info.get_bits(8..11) as u8).unwrap(), + err_code: if info.get_bit(11) { + Some(VmcsReadOnly32::VMEXIT_INTERRUPTION_ERR_CODE.read()?) + } else { + None + }, + valid: info.get_bit(31), + }) + } + + /// Convert from the interrupt vector and the error code. + pub fn from(vector: u8, err_code: Option) -> Self { + Self { + vector, + int_type: VmxInterruptionType::from_vector(vector), + err_code, + valid: true, + } + } + + /// Raw bits for writing to VMCS. + pub fn bits(&self) -> u32 { + let mut bits = self.vector as u32; + bits |= (self.int_type as u32) << 8; + bits.set_bit(11, self.err_code.is_some()); + bits.set_bit(31, self.valid); + bits + } } numeric_enum_macro::numeric_enum! { @@ -209,37 +334,32 @@ impl VmxInterruptionType { } } -/// VM-Entry / VM-Exit Interruption-Information Field. (SDM Vol. 3C, Section 24.8.3, 24.9.2) +/// Exit Qualification for I/O Instructions. (SDM Vol. 3C, Section 27.2.1, Table 27-5) #[derive(Debug)] -pub struct VmxInterruptInfo { - /// Vector of interrupt or exception. - pub vector: u8, - /// Determines details of how the injection is performed. - pub int_type: VmxInterruptionType, - /// For hardware exceptions that would have delivered an error code on the stack. - pub err_code: Option, - /// Whether the field is valid. - pub valid: bool, +pub struct VmxIoExitInfo { + /// Size of access. + pub access_size: u8, + /// Direction of the attempted access (0 = OUT, 1 = IN). + pub is_in: bool, + /// String instruction (0 = not string; 1 = string). + pub is_string: bool, + /// REP prefixed (0 = not REP; 1 = REP). + pub is_repeat: bool, + /// Port number. (as specified in DX or in an immediate operand) + pub port: u16, } -impl VmxInterruptInfo { - /// Convert from the interrupt vector and the error code. - pub fn from(vector: u8, err_code: Option) -> Self { - Self { - vector, - int_type: VmxInterruptionType::from_vector(vector), - err_code, - valid: true, - } - } - - /// Raw bits for writing to VMCS. - pub fn bits(&self) -> u32 { - let mut bits = self.vector as u32; - bits |= (self.int_type as u32) << 8; - bits.set_bit(11, self.err_code.is_some()); - bits.set_bit(31, self.valid); - bits +impl VmxIoExitInfo { + pub fn new() -> HvResult { + // SDM Vol. 3C, Section 27.2.1, Table 27-5 + let qualification = VmcsReadOnlyNW::EXIT_QUALIFICATION.read()?; + Ok(VmxIoExitInfo { + access_size: qualification.get_bits(0..3) as u8 + 1, + is_in: qualification.get_bit(3), + is_string: qualification.get_bit(4), + is_repeat: qualification.get_bit(5), + port: qualification.get_bits(16..32) as u16, + }) } } @@ -278,10 +398,6 @@ pub fn check_vmx_support() -> bool { } } -pub fn is_vmx_enabled() -> bool { - Cr4::read().contains(Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS) -} - pub fn enable_vmxon() -> HvResult { let mut ctrl_reg = Msr::IA32_FEATURE_CONTROL; let ctrl_flag = FeatureControlFlags::from_bits_truncate(ctrl_reg.read()); @@ -302,11 +418,6 @@ pub fn enable_vmxon() -> HvResult { Ok(()) } -pub fn get_vmcs_revision_id() -> u32 { - let vmx_basic_flag = Msr::IA32_VMX_BASIC.read(); - vmx_basic_flag.get_bits(0..=30) as u32 -} - pub unsafe fn execute_vmxon(start_paddr: u64) -> HvResult { // enable VMX using the VMXE bit Cr4::write(Cr4::read() | Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS); @@ -316,401 +427,11 @@ pub unsafe fn execute_vmxon(start_paddr: u64) -> HvResult { Ok(()) } -pub unsafe fn enable_vmcs(start_paddr: u64) -> HvResult { - vmx::vmclear(start_paddr)?; - vmx::vmptrld(start_paddr)?; - - Ok(()) -} - -pub fn setup_vmcs_host(vmx_exit: HostVirtAddr) -> HvResult { - vmwrite(host::IA32_PAT_FULL, Msr::IA32_PAT.read())?; - vmwrite(host::IA32_EFER_FULL, Msr::IA32_EFER.read())?; - - vmwrite(host::CR0, Cr0::read_raw())?; - vmwrite(host::CR3, Cr3::read_raw().0.start_address().as_u64())?; - vmwrite(host::CR4, Cr4::read_raw())?; - - vmwrite(host::ES_SELECTOR, x86::segmentation::es().bits())?; - vmwrite(host::CS_SELECTOR, x86::segmentation::cs().bits())?; - vmwrite(host::SS_SELECTOR, x86::segmentation::ss().bits())?; - vmwrite(host::DS_SELECTOR, x86::segmentation::ds().bits())?; - vmwrite(host::FS_SELECTOR, x86::segmentation::fs().bits())?; - vmwrite(host::GS_SELECTOR, x86::segmentation::gs().bits())?; - - vmwrite(host::FS_BASE, Msr::IA32_FS_BASE.read())?; - vmwrite(host::GS_BASE, Msr::IA32_GS_BASE.read())?; - - let tr = unsafe { x86::task::tr() }; - let mut gdtp = DescriptorTablePointer::::default(); - let mut idtp = DescriptorTablePointer::::default(); - unsafe { - dtables::sgdt(&mut gdtp); - dtables::sidt(&mut idtp); - } - - vmwrite(host::TR_SELECTOR, tr.bits())?; - vmwrite(host::TR_BASE, get_tr_base(tr, &gdtp))?; - vmwrite(host::GDTR_BASE, gdtp.base as u64)?; - vmwrite(host::IDTR_BASE, idtp.base as u64)?; - vmwrite(host::RIP, vmx_exit as u64)?; - - vmwrite(host::IA32_SYSENTER_ESP, ZERO)?; - vmwrite(host::IA32_SYSENTER_EIP, ZERO)?; - vmwrite(host::IA32_SYSENTER_CS, ZERO)?; - - // VmcsHostNW::RSP.write(ZERO)?; // TODO - Ok(()) -} - -pub fn setup_vmcs_guest(entry: GuestPhysAddr) -> HvResult { - // Enable protected mode and paging. - let cr0_guest = Cr0Flags::EXTENSION_TYPE | Cr0Flags::NUMERIC_ERROR; - let cr0_host_owned = - Cr0Flags::NUMERIC_ERROR | Cr0Flags::NOT_WRITE_THROUGH | Cr0Flags::CACHE_DISABLE; - let cr0_read_shadow = Cr0Flags::NUMERIC_ERROR; - - vmwrite(guest::CR0, cr0_guest.bits())?; - vmwrite(control::CR0_GUEST_HOST_MASK, cr0_host_owned.bits())?; - vmwrite(control::CR0_READ_SHADOW, cr0_read_shadow.bits())?; - - // Enable physical address extensions that required in IA-32e mode. - let cr4_guest = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS; - let cr4_host_owned = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS; - let cr4_read_shadow = ZERO; - - vmwrite(guest::CR4, cr4_guest.bits())?; - vmwrite(control::CR4_GUEST_HOST_MASK, cr4_host_owned.bits())?; - vmwrite(control::CR4_READ_SHADOW, cr4_read_shadow)?; - - macro_rules! set_guest_segment { - ($seg: ident, $access_rights: expr) => {{ - use guest::*; - vmwrite(concat_idents!($seg, _SELECTOR), ZERO)?; - vmwrite(concat_idents!($seg, _BASE), ZERO)?; - vmwrite(concat_idents!($seg, _LIMIT), 0xffff_u64)?; - vmwrite(concat_idents!($seg, _ACCESS_RIGHTS), $access_rights as u64)?; - }}; - } - - set_guest_segment!(ES, 0x93); // 16-bit, present, data, read/write, accessed - set_guest_segment!(CS, 0x9b); // 16-bit, present, code, exec/read, accessed - set_guest_segment!(SS, 0x93); - set_guest_segment!(DS, 0x93); - set_guest_segment!(FS, 0x93); - set_guest_segment!(GS, 0x93); - set_guest_segment!(TR, 0x8b); // present, system, 32-bit TSS busy - set_guest_segment!(LDTR, 0x82); // present, system, LDT - - vmwrite(guest::GDTR_BASE, ZERO)?; - vmwrite(guest::GDTR_LIMIT, 0xffff_u64)?; - vmwrite(guest::IDTR_BASE, ZERO)?; - vmwrite(guest::IDTR_LIMIT, 0xffff_u64)?; - - vmwrite(guest::CR3, ZERO)?; - vmwrite(guest::DR7, 0x400_u64)?; - vmwrite(guest::RSP, ZERO)?; - vmwrite(guest::RIP, entry as u64)?; - vmwrite(guest::RFLAGS, 0x2_u64)?; - vmwrite(guest::PENDING_DBG_EXCEPTIONS, ZERO)?; - vmwrite(guest::IA32_SYSENTER_ESP, ZERO)?; - vmwrite(guest::IA32_SYSENTER_EIP, ZERO)?; - vmwrite(guest::IA32_SYSENTER_CS, ZERO)?; - - vmwrite(guest::INTERRUPTIBILITY_STATE, ZERO)?; - vmwrite(guest::ACTIVITY_STATE, ZERO)?; - vmwrite(guest::VMX_PREEMPTION_TIMER_VALUE, ZERO)?; - - vmwrite(guest::LINK_PTR_FULL, u64::MAX)?; - vmwrite(guest::IA32_DEBUGCTL_FULL, ZERO)?; - vmwrite(guest::IA32_PAT_FULL, Msr::IA32_PAT.read())?; - vmwrite(guest::IA32_EFER_FULL, ZERO)?; - - Ok(()) -} - -pub fn setup_vmcs_control(msr_bitmap: HostPhysAddr) -> HvResult { - // Intercept NMI and external interrupts. - set_control( - control::PINBASED_EXEC_CONTROLS, - Msr::IA32_VMX_TRUE_PINBASED_CTLS, - Msr::IA32_VMX_PINBASED_CTLS.read() as u32, - (PinbasedControls::NMI_EXITING | PinbasedControls::EXTERNAL_INTERRUPT_EXITING).bits(), - 0, - )?; - - // Intercept all I/O instructions, use MSR bitmaps, activate secondary controls, - // disable CR3 load/store interception. - set_control( - control::PRIMARY_PROCBASED_EXEC_CONTROLS, - Msr::IA32_VMX_TRUE_PROCBASED_CTLS, - Msr::IA32_VMX_PROCBASED_CTLS.read() as u32, - (PrimaryControls::UNCOND_IO_EXITING - | PrimaryControls::USE_MSR_BITMAPS - | PrimaryControls::SECONDARY_CONTROLS) - .bits(), - (PrimaryControls::CR3_LOAD_EXITING | PrimaryControls::CR3_STORE_EXITING).bits(), - )?; - - // Enable EPT, RDTSCP, INVPCID, and unrestricted guest. - set_control( - control::SECONDARY_PROCBASED_EXEC_CONTROLS, - Msr::IA32_VMX_PROCBASED_CTLS2, - 0, - (SecondaryControls::ENABLE_EPT - | SecondaryControls::ENABLE_RDTSCP - | SecondaryControls::ENABLE_INVPCID - | SecondaryControls::UNRESTRICTED_GUEST) - .bits(), - 0, - )?; - - // Switch to 64-bit host, acknowledge interrupt info, switch IA32_PAT/IA32_EFER on VM exit. - set_control( - control::VMEXIT_CONTROLS, - Msr::IA32_VMX_TRUE_EXIT_CTLS, - Msr::IA32_VMX_EXIT_CTLS.read() as u32, - (ExitControls::HOST_ADDRESS_SPACE_SIZE - | ExitControls::ACK_INTERRUPT_ON_EXIT - | ExitControls::SAVE_IA32_PAT - | ExitControls::LOAD_IA32_PAT - | ExitControls::SAVE_IA32_EFER - | ExitControls::LOAD_IA32_EFER) - .bits(), - 0, - )?; - - // Load guest IA32_PAT/IA32_EFER on VM entry. - set_control( - control::VMENTRY_CONTROLS, - Msr::IA32_VMX_TRUE_ENTRY_CTLS, - Msr::IA32_VMX_ENTRY_CTLS.read() as u32, - (EntryControls::LOAD_IA32_PAT | EntryControls::LOAD_IA32_EFER).bits(), - 0, - )?; - - // No MSR switches if hypervisor doesn't use and there is only one vCPU. - vmwrite(control::VMEXIT_MSR_STORE_COUNT, ZERO)?; - vmwrite(control::VMEXIT_MSR_LOAD_COUNT, ZERO)?; - vmwrite(control::VMENTRY_MSR_LOAD_COUNT, ZERO)?; - - // Pass-through exceptions, don't use I/O bitmap, set MSR bitmaps. - vmwrite(control::EXCEPTION_BITMAP, ZERO)?; - vmwrite(control::IO_BITMAP_A_ADDR_FULL, ZERO)?; - vmwrite(control::IO_BITMAP_B_ADDR_FULL, ZERO)?; - vmwrite(control::MSR_BITMAPS_ADDR_FULL, msr_bitmap as u64)?; - - Ok(()) -} - -fn get_tr_base(tr: SegmentSelector, gdt: &DescriptorTablePointer) -> u64 { - let index = tr.index() as usize; - let table_len = (gdt.limit as usize + 1) / core::mem::size_of::(); - let table = unsafe { core::slice::from_raw_parts(gdt.base, table_len) }; - let entry = table[index]; - if entry & (1 << 47) != 0 { - // present - let base_low = entry.get_bits(16..40) | entry.get_bits(56..64) << 24; - let base_high = table[index + 1] & 0xffff_ffff; - base_low | base_high << 32 - } else { - // no present - 0 - } -} - -pub fn set_control( - control: u32, - capability_msr: Msr, - old_value: u32, - set: u32, - clear: u32, -) -> HvResult<()> { - let cap = unsafe { capability_msr.read() }; - let allowed0 = cap as u32; - let allowed1 = (cap >> 32) as u32; - assert_eq!(allowed0 & allowed1, allowed0); - debug!( - "set {:#x}: {:#x} (+{:#x}, -{:#x})", - control, old_value, set, clear - ); - if (set & clear) != 0 { - return hv_result_err!( - EINVAL, - format!("can not set and clear the same bit in {:#x}", control) - ); - } - if (allowed1 & set) != set { - // failed if set 0-bits in allowed1 - return hv_result_err!( - EINVAL, - format!("can not set bits {:#x} in {:#x}", set, control) - ); - } - if (allowed0 & clear) != 0 { - // failed if clear 1-bits in allowed0 - return hv_result_err!( - EINVAL, - format!("can not clear bits {:#x} in {:#x}", clear, control) - ); - } - // SDM Vol. 3C, Section 31.5.1, Algorithm 3 - let flexible = !allowed0 & allowed1; // therse bits can be either 0 or 1 - let unknown = flexible & !(set | clear); // hypervisor untouched bits - let default = unknown & old_value; // these bits keep unchanged in old value - let fixed1 = allowed0; // these bits are fixed to 1 - vmwrite(control, fixed1 | default | set)?; - Ok(()) -} - -impl From for HvError { - fn from(err: VmFail) -> Self { - hv_err!(EFAULT, format!("VMX instruction failed: {:?}", err)) - } -} - -pub fn advance_guest_rip(instr_len: u8) -> HvResult { - unsafe { - Ok(vmwrite( - guest::RIP, - (vmread(guest::RIP)? + instr_len as u64), - )?) - } -} - -pub fn instruction_error() -> u32 { - vmread(ro::VM_INSTRUCTION_ERROR).unwrap() as u32 -} - -pub fn set_host_rsp(rsp: HostPhysAddr) -> HvResult { - Ok(vmwrite(host::RSP, rsp as u64)?) -} - -pub fn set_guest_page_table(cr3: GuestPhysAddr) -> HvResult { - Ok(vmwrite(guest::CR3, cr3 as u64)?) -} - -pub fn set_guest_stack_pointer(rsp: GuestPhysAddr) -> HvResult { - Ok(vmwrite(guest::RSP, rsp as u64)?) -} - -pub fn set_s2ptp(s2ptp: u64) -> HvResult { - Ok(vmwrite(control::EPTP_FULL, s2ptp as u64)?) -} - -pub fn guest_rip() -> u64 { - vmread(guest::RIP).unwrap() as u64 -} - -pub fn guest_rsp() -> u64 { - vmread(guest::RSP).unwrap() as u64 -} - -pub fn guest_cr3() -> u64 { - vmread(guest::CR3).unwrap() as u64 -} - -pub fn exit_info() -> HvResult { - let full_reason = vmread(ro::EXIT_REASON)? as u32; - Ok(VmxExitInfo { - exit_reason: full_reason - .get_bits(0..16) - .try_into() - .expect("Unknown VM-exit reason"), - entry_failure: full_reason.get_bit(31), - exit_instruction_length: vmread(ro::VMEXIT_INSTRUCTION_LEN)? as u32, - guest_rip: vmread(guest::RIP)? as usize, - }) -} - -pub fn s2pt_violation_info() -> HvResult { - // SDM Vol. 3C, Section 27.2.1, Table 27-7 - let qualification = vmread(ro::EXIT_QUALIFICATION)? as u64; - let fault_guest_paddr = vmread(ro::GUEST_PHYSICAL_ADDR_FULL)? as usize; - let mut access_flags = MemFlags::empty(); - if qualification.get_bit(0) { - access_flags |= MemFlags::READ; - } - if qualification.get_bit(1) { - access_flags |= MemFlags::WRITE; - } - if qualification.get_bit(2) { - access_flags |= MemFlags::EXECUTE; - } - Ok(Stage2PageFaultInfo { - access_flags, - fault_guest_paddr, - }) -} - -pub fn io_exit_info() -> HvResult { - // SDM Vol. 3C, Section 27.2.1, Table 27-5 - let qualification = vmread(ro::EXIT_QUALIFICATION)?; - Ok(VmxIoExitInfo { - access_size: qualification.get_bits(0..3) as u8 + 1, - is_in: qualification.get_bit(3), - is_string: qualification.get_bit(4), - is_repeat: qualification.get_bit(5), - port: qualification.get_bits(16..32) as u16, - }) -} - -pub fn allow_interrupt() -> HvResult { - let rflags = vmread(guest::RFLAGS)?; - let block_state = vmread(guest::INTERRUPTIBILITY_STATE)?; - Ok( - rflags as u64 & x86_64::registers::rflags::RFlags::INTERRUPT_FLAG.bits() != 0 - && block_state == 0, - ) -} - -pub fn inject_event(vector: u8, err_code: Option) -> HvResult { - // SDM Vol. 3C, Section 24.8.3 - let err_code = if VmxInterruptionType::vector_has_error_code(vector) { - err_code.or_else(|| Some(vmread(ro::VMEXIT_INTERRUPTION_ERR_CODE).unwrap() as u32)) - } else { - None - }; - let int_info = VmxInterruptInfo::from(vector, err_code); - if let Some(err_code) = int_info.err_code { - vmwrite(control::VMENTRY_EXCEPTION_ERR_CODE, err_code)?; - } - if int_info.int_type.is_soft() { - vmwrite( - control::VMENTRY_INSTRUCTION_LEN, - vmread(ro::VMEXIT_INSTRUCTION_LEN)?, - )?; - } - vmwrite(control::VMENTRY_INTERRUPTION_INFO_FIELD, int_info.bits())?; - Ok(()) -} - -/// If enable, a VM exit occurs at the beginning of any instruction if -/// `RFLAGS.IF` = 1 and there are no other blocking of interrupts. -/// (see SDM, Vol. 3C, Section 24.4.2) -pub fn set_interrupt_window(enable: bool) -> HvResult { - let mut ctrl = vmread(control::PRIMARY_PROCBASED_EXEC_CONTROLS)? as u32; - let bits = PrimaryControls::INTERRUPT_WINDOW_EXITING.bits(); - if enable { - ctrl |= bits - } else { - ctrl &= !bits - } - vmwrite(control::PRIMARY_PROCBASED_EXEC_CONTROLS, ctrl)?; - Ok(()) +pub fn get_vmcs_revision_id() -> u32 { + let vmx_basic_flag = Msr::IA32_VMX_BASIC.read(); + vmx_basic_flag.get_bits(0..=30) as u32 } -pub fn interrupt_exit_info() -> HvResult { - // SDM Vol. 3C, Section 24.9.2 - let info = vmread(ro::VMEXIT_INTERRUPTION_INFO)?; - Ok(VmxInterruptInfo { - vector: info.get_bits(0..8) as u8, - int_type: VmxInterruptionType::try_from(info.get_bits(8..11) as u8).unwrap(), - err_code: if info.get_bit(11) { - Some(vmread(ro::VMEXIT_INTERRUPTION_ERR_CODE)? as u32) - } else { - None - }, - valid: info.get_bit(31), - }) +pub fn is_vmx_enabled() -> bool { + Cr4::read().contains(Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS) } diff --git a/src/device/irqchip/pic/i8259.rs b/src/device/irqchip/pic/i8259.rs index 9dd10a0a..bf86e4fb 100644 --- a/src/device/irqchip/pic/i8259.rs +++ b/src/device/irqchip/pic/i8259.rs @@ -10,7 +10,8 @@ impl PortIoDevice for VirtI8259Pic { } fn read(&self, port: u16, access_size: u8) -> HvResult { - hv_result_err!(EIO) // report error for read + Ok(0) + // hv_result_err!(EIO) report error for read } fn write(&self, port: u16, access_size: u8, value: u32) -> HvResult { diff --git a/src/device/irqchip/pic/lapic.rs b/src/device/irqchip/pic/lapic.rs index 52a44f34..c33c077d 100644 --- a/src/device/irqchip/pic/lapic.rs +++ b/src/device/irqchip/pic/lapic.rs @@ -23,10 +23,9 @@ pub enum TimerMode { TscDeadline = 0b10, } -pub struct VirtLocalApic; - /// A virtual local APIC timer. (SDM Vol. 3C, Section 10.5.4) pub struct VirtApicTimer { + is_enabled: u8, lvt_timer_bits: u32, divide_shift: u8, initial_count: u32, @@ -34,53 +33,10 @@ pub struct VirtApicTimer { deadline_ns: u64, } -impl VirtLocalApic { - pub const fn msr_range() -> core::ops::Range { - 0x800..0x840 - } - - pub fn rdmsr(arch_cpu: &mut ArchCpu, msr: Msr) -> HvResult { - let apic_timer = arch_cpu.apic_timer_mut(); - match msr { - SIVR => Ok(0x1ff), // SDM Vol. 3A, Section 10.9, Figure 10-23 (with Software Enable bit) - LVT_THERMAL | LVT_PMI | LVT_LINT0 | LVT_LINT1 | LVT_ERR => { - Ok(0x1_0000) // SDM Vol. 3A, Section 10.5.1, Figure 10-8 (with Mask bit) - } - LVT_TIMER => Ok(apic_timer.lvt_timer() as u64), - INIT_COUNT => Ok(apic_timer.initial_count() as u64), - DIV_CONF => Ok(apic_timer.divide() as u64), - CUR_COUNT => Ok(apic_timer.current_counter() as u64), - _ => hv_result_err!(ENOSYS), - } - } - - pub fn wrmsr(arch_cpu: &mut ArchCpu, msr: Msr, value: u64) -> HvResult { - if msr != ICR && (value >> 32) != 0 { - return hv_result_err!(EINVAL); // all registers except ICR are 32-bits - } - let apic_timer = arch_cpu.apic_timer_mut(); - match msr { - EOI => { - if value != 0 { - hv_result_err!(EINVAL) // write a non-zero value causes #GP - } else { - Ok(()) - } - } - SIVR | LVT_THERMAL | LVT_PMI | LVT_LINT0 | LVT_LINT1 | LVT_ERR => { - Ok(()) // ignore these register writes - } - LVT_TIMER => apic_timer.set_lvt_timer(value as u32), - INIT_COUNT => apic_timer.set_initial_count(value as u32), - DIV_CONF => apic_timer.set_divide(value as u32), - _ => hv_result_err!(ENOSYS), - } - } -} - impl VirtApicTimer { pub const fn new() -> Self { Self { + is_enabled: 1, lvt_timer_bits: 0x1_0000, // masked divide_shift: 0, initial_count: 0, @@ -89,20 +45,25 @@ impl VirtApicTimer { } } + pub fn set_enable(&mut self, is_enabled: u8) { + self.is_enabled = is_enabled; + } + /// Check if an interrupt generated. if yes, update it's states. pub fn check_interrupt(&mut self) -> bool { if self.deadline_ns == 0 { - false + return false; } else if current_time_nanos() >= self.deadline_ns { if self.is_periodic() { self.deadline_ns += self.interval_ns(); } else { self.deadline_ns = 0; } - !self.is_masked() - } else { - false + if self.is_enabled != 0 { + return !self.is_masked(); + } } + false } /// Whether the timer interrupt is masked. @@ -153,9 +114,10 @@ impl VirtApicTimer { /// Set LVT Timer Register. pub fn set_lvt_timer(&mut self, bits: u32) -> HvResult { let timer_mode = bits.get_bits(17..19); - if timer_mode == TimerMode::TscDeadline as _ { + /*if timer_mode == TimerMode::TscDeadline as _ { return hv_result_err!(EINVAL); // TSC deadline mode was not supported - } else if timer_mode == 0b11 { + } else */ + if timer_mode == 0b11 { return hv_result_err!(EINVAL); // reserved } self.lvt_timer_bits = bits; @@ -191,3 +153,66 @@ impl VirtApicTimer { } } } + +pub struct VirtLocalApic; + +impl VirtLocalApic { + pub const fn msr_range() -> core::ops::Range { + 0x800..0x840 + } + + pub fn rdmsr(arch_cpu: &mut ArchCpu, msr: Msr) -> HvResult { + let apic_timer = arch_cpu.apic_timer_mut(); + trace!("lapic rdmsr: {:?}", msr,); + match msr { + IA32_X2APIC_APICID => Ok(arch_cpu.cpuid as u64), + IA32_X2APIC_VERSION => Ok(0x50014), // Max LVT Entry: 0x5, Version: 0x14 + IA32_X2APIC_LDR => Ok(0x0), // TODO: IPI + IA32_X2APIC_SIVR => Ok(((apic_timer.is_enabled as u64 & 0x1) << 8) | 0xff), // SDM Vol. 3A, Section 10.9, Figure 10-23 (with Software Enable bit) + IA32_X2APIC_LVT_TIMER => Ok(apic_timer.lvt_timer() as u64), + IA32_X2APIC_LVT_THERMAL + | IA32_X2APIC_LVT_PMI + | IA32_X2APIC_LVT_LINT0 + | IA32_X2APIC_LVT_LINT1 + | IA32_X2APIC_LVT_ERROR => { + Ok(0x1_0000) // SDM Vol. 3A, Section 10.5.1, Figure 10-8 (with Mask bit) + } + IA32_X2APIC_INIT_COUNT => Ok(apic_timer.initial_count() as u64), + IA32_X2APIC_CUR_COUNT => Ok(apic_timer.current_counter() as u64), + IA32_X2APIC_DIV_CONF => Ok(apic_timer.divide() as u64), + _ => hv_result_err!(ENOSYS), + } + } + + pub fn wrmsr(arch_cpu: &mut ArchCpu, msr: Msr, value: u64) -> HvResult { + if msr != IA32_X2APIC_ICR && (value >> 32) != 0 { + return hv_result_err!(EINVAL); // all registers except ICR are 32-bits + } + let apic_timer = arch_cpu.apic_timer_mut(); + trace!("lapic wrmsr: {:?}, value: {:x}", msr, value); + match msr { + IA32_X2APIC_EOI => { + if value != 0 { + hv_result_err!(EINVAL) // write a non-zero value causes #GP + } else { + Ok(()) + } + } + IA32_X2APIC_SIVR => { + apic_timer.set_enable(((value >> 8) & 1) as _); + Ok(()) + } + IA32_X2APIC_LVT_THERMAL + | IA32_X2APIC_LVT_PMI + | IA32_X2APIC_LVT_LINT0 + | IA32_X2APIC_LVT_LINT1 + | IA32_X2APIC_LVT_ERROR => { + Ok(()) // ignore these register writes + } + IA32_X2APIC_LVT_TIMER => apic_timer.set_lvt_timer(value as u32), + IA32_X2APIC_INIT_COUNT => apic_timer.set_initial_count(value as u32), + IA32_X2APIC_DIV_CONF => apic_timer.set_divide(value as u32), + _ => hv_result_err!(ENOSYS), + } + } +} diff --git a/src/device/uart/uart16550.rs b/src/device/uart/uart16550.rs index 4c954499..35cd862c 100644 --- a/src/device/uart/uart16550.rs +++ b/src/device/uart/uart16550.rs @@ -1,7 +1,4 @@ -use crate::{ - arch::device::PortIoDevice, - error::{HvError, HvResult}, -}; +use crate::{arch::device::PortIoDevice, error::HvResult}; use spin::Mutex; use x86_64::instructions::port::{PortReadOnly, PortWriteOnly}; @@ -29,10 +26,9 @@ lazy_static::lazy_static! { bitflags::bitflags! { /// Line status flags struct LineStatusFlags: u8 { - const INPUT_FULL = 1; - // 1 to 4 unknown - const OUTPUT_EMPTY = 1 << 5; - // 6 and 7 unknown + const RECEIVER_DATA_READY = 1; + const TRANSMIT_HOLD_REG_EMPTY = 1 << 5; + const TRANSMITTER_EMPTY = 1 << 6; } } @@ -86,11 +82,6 @@ struct Uart16550 { lsr: PortReadOnly, // line status } -pub struct VirtUart16550 { - base_port: u16, - fifo: Mutex>, -} - impl Uart16550 { const fn new(base_port: u16) -> Self { Self { @@ -130,14 +121,14 @@ impl Uart16550 { fn putchar(&mut self, c: u8) { unsafe { - while self.lsr.read() & LineStatusFlags::OUTPUT_EMPTY.bits() == 0 {} + while self.lsr.read() & LineStatusFlags::TRANSMIT_HOLD_REG_EMPTY.bits() == 0 {} self.thr.write(c); } } fn getchar(&mut self) -> Option { unsafe { - if self.lsr.read() & LineStatusFlags::INPUT_FULL.bits() != 0 { + if self.lsr.read() & LineStatusFlags::RECEIVER_DATA_READY.bits() != 0 { Some(self.rhr.read()) } else { None @@ -146,6 +137,11 @@ impl Uart16550 { } } +pub struct VirtUart16550 { + base_port: u16, + fifo: Mutex>, +} + impl VirtUart16550 { pub fn new(base_port: u16) -> Self { Self { @@ -183,15 +179,18 @@ impl PortIoDevice for VirtUart16550 { fifo.push(c); } } - let mut lsr = LineStatusFlags::OUTPUT_EMPTY; + let mut lsr = + LineStatusFlags::TRANSMIT_HOLD_REG_EMPTY | LineStatusFlags::TRANSMITTER_EMPTY; if !fifo.is_empty() { - lsr |= LineStatusFlags::INPUT_FULL; + lsr |= LineStatusFlags::RECEIVER_DATA_READY; } lsr.bits() } - INT_EN_REG | FIFO_CTRL_REG | LINE_CTRL_REG | MODEM_CTRL_REG | MODEM_STATUS_REG - | SCRATCH_REG => { - info!("Unimplemented serial port I/O read: {:#x}", port); // unimplemented + FIFO_CTRL_REG => { + 0xc0 // FIFO enabled + } + INT_EN_REG | LINE_CTRL_REG | MODEM_CTRL_REG | MODEM_STATUS_REG | SCRATCH_REG => { + debug!("Unimplemented serial port I/O read: {:#x}", port); // unimplemented 0 } _ => unreachable!(), @@ -207,7 +206,7 @@ impl PortIoDevice for VirtUart16550 { match port - self.base_port { DATA_REG => console_putchar(value as u8), INT_EN_REG | FIFO_CTRL_REG | LINE_CTRL_REG | MODEM_CTRL_REG | SCRATCH_REG => { - info!("Unimplemented serial port I/O write: {:#x}", port); // unimplemented + debug!("Unimplemented serial port I/O write: {:#x}", port); // unimplemented } LINE_STATUS_REG => {} // ignore _ => unreachable!(), diff --git a/src/main.rs b/src/main.rs index ca579bc6..4d2ab1ca 100644 --- a/src/main.rs +++ b/src/main.rs @@ -40,6 +40,8 @@ mod percpu; mod platform; mod zone; +#[cfg(target_arch = "x86_64")] +use crate::arch::boot::MultibootInfo; #[cfg(target_arch = "aarch64")] use crate::arch::mm::setup_parange; use crate::consts::MAX_CPU_NUM; @@ -151,6 +153,8 @@ fn x86_rust_main_tmp(cpuid: usize, host_dtb: usize) { is_primary = true; memory::heap::init(); memory::heap::test(); + #[cfg(target_arch = "x86_64")] + MultibootInfo::init(host_dtb); } let cpu = PerCpu::new(cpuid); @@ -163,6 +167,7 @@ fn x86_rust_main_tmp(cpuid: usize, host_dtb: usize) { cpu.arch_cpu.gdt.load(); // load gdt and tss if is_primary { + // cpu.arch_cpu.cmdline = Some(&cmdline); wakeup_secondary_cpus(cpu.id, host_dtb); } @@ -178,9 +183,6 @@ fn x86_rust_main_tmp(cpuid: usize, host_dtb: usize) { if is_primary { primary_init_early(); // create root zone here - - // TODO: tmp - cpu.boot_cpu = true; } else { wait_for_counter(&INIT_EARLY_OK, 1); } diff --git a/src/platform/qemu_x86_64.rs b/src/platform/qemu_x86_64.rs index 87d4207e..609c5ff0 100644 --- a/src/platform/qemu_x86_64.rs +++ b/src/platform/qemu_x86_64.rs @@ -5,18 +5,40 @@ use crate::{ }; pub const ROOT_ZONE_DTB_ADDR: u64 = 0x00000000; -pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x120_0000; -pub const ROOT_ZONE_ENTRY: u64 = 0x100_8000; +pub const ROOT_ZONE_ENTRY: u64 = 0x8000; // 0x10_0000; +pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x500_0000; // 0x500_0000; +pub const ROOT_ZONE_SETUP_ADDR: GuestPhysAddr = 0xd000; +pub const ROOT_ZONE_BOOT_STACK: GuestPhysAddr = 0x7000; +pub const ROOT_ZONE_INITRD_ADDR: GuestPhysAddr = 0x1500_0000; +pub const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0xc000; pub const ROOT_ZONE_CPUS: u64 = (1 << 0); pub const ROOT_ZONE_NAME: &str = "root-linux"; -pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 4] = [ +pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 7] = [ HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, - physical_start: 0x100_0000, + physical_start: 0x500_0000, virtual_start: 0x0, - size: 0x100_0000, + size: 0x1_0000, + }, // ram + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x501_0000, + virtual_start: 0x1_0000, + size: 0x14ff_0000, + }, // ram + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x2020_0000, + virtual_start: 0x1520_0000, + size: 0x4000_0000, + }, // ram + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x2000_0000, + virtual_start: 0x1500_0000, + size: 0x20_0000, }, // ram HvConfigMemoryRegion { mem_type: MEM_TYPE_IO, @@ -44,12 +66,12 @@ pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig {}; // TODO: temp pub const GUEST_PT1: GuestPhysAddr = 0x1000; pub const GUEST_PT2: GuestPhysAddr = 0x2000; -pub const GUEST_ENTRY: GuestPhysAddr = 0x8000; +pub const GUEST_ENTRY: GuestPhysAddr = 0x10_0000; pub const GUEST_STACK_TOP: GuestPhysAddr = 0x7000; pub const GUEST_PHYS_MEMORY_START: HostPhysAddr = 0x100_0000; pub fn gpa_as_mut_ptr(guest_paddr: GuestPhysAddr) -> *mut u8 { - let offset = GUEST_PHYS_MEMORY_START as usize; + let offset = ROOT_ZONE_KERNEL_ADDR as usize; let host_vaddr = guest_paddr + offset; host_vaddr as *mut u8 } From d2bdba5ea5fad6cfd2cd706e4aff9b3c0854221b Mon Sep 17 00:00:00 2001 From: Solicey Date: Mon, 10 Mar 2025 11:13:30 +0800 Subject: [PATCH 07/29] enable vPIC, fix vUART output delay --- scripts/x86_64/acpi/madt.asl | 4 +- src/arch/x86_64/{lapic.rs => apic.rs} | 41 ++- src/arch/x86_64/cpu.rs | 16 +- src/arch/x86_64/device.rs | 42 ++- src/arch/x86_64/mod.rs | 2 +- src/arch/x86_64/multiboot.S | 6 +- src/arch/x86_64/trap.rs | 21 +- src/device/irqchip/pic/i8259.rs | 26 -- src/device/irqchip/pic/i8259a.rs | 321 +++++++++++++++++++++++ src/device/irqchip/pic/lapic.rs | 2 +- src/device/irqchip/pic/mod.rs | 2 +- src/device/uart/mod.rs | 4 +- src/device/uart/uart16550.rs | 224 ---------------- src/device/uart/uart16550a.rs | 351 ++++++++++++++++++++++++++ src/main.rs | 4 +- src/platform/qemu_x86_64.rs | 2 + 16 files changed, 775 insertions(+), 293 deletions(-) rename src/arch/x86_64/{lapic.rs => apic.rs} (70%) delete mode 100644 src/device/irqchip/pic/i8259.rs create mode 100644 src/device/irqchip/pic/i8259a.rs delete mode 100644 src/device/uart/uart16550.rs create mode 100644 src/device/uart/uart16550a.rs diff --git a/scripts/x86_64/acpi/madt.asl b/scripts/x86_64/acpi/madt.asl index ce48d07f..3d413454 100644 --- a/scripts/x86_64/acpi/madt.asl +++ b/scripts/x86_64/acpi/madt.asl @@ -44,11 +44,11 @@ Trigger Mode : 0 */ /* Local APIC NMI Structure */ -/* Connected to LINT1 on all CPUs */ +/* Connected to LINT1 on all CPUs [0001] Subtable Type : 04 [0001] Length : 06 [0001] Processor ID : ff [0002] Flags (decoded below) : 0000 Polarity : 0 Trigger Mode : 0 -[0001] Interrupt Input LINT : 01 \ No newline at end of file +[0001] Interrupt Input LINT : 01 */ \ No newline at end of file diff --git a/src/arch/x86_64/lapic.rs b/src/arch/x86_64/apic.rs similarity index 70% rename from src/arch/x86_64/lapic.rs rename to src/arch/x86_64/apic.rs index 27c0ae97..23ab43fe 100644 --- a/src/arch/x86_64/lapic.rs +++ b/src/arch/x86_64/apic.rs @@ -1,16 +1,26 @@ +use self::irqs::*; use self::vectors::*; use crate::device::irqchip::pic::enable_irq; use core::time::Duration; use raw_cpuid::CpuId; -use x2apic::lapic::{LocalApic, LocalApicBuilder, TimerDivide, TimerMode}; +use spin::Mutex; +use x2apic::{ + ioapic::{IoApic, IrqFlags, IrqMode}, + lapic::{LocalApic, LocalApicBuilder, TimerDivide, TimerMode}, +}; use x86_64::instructions::port::Port; type TimeValue = Duration; +pub mod irqs { + pub const UART_COM1_IRQ: u8 = 0x4; +} + pub mod vectors { pub const APIC_TIMER_VECTOR: u8 = 0xf0; pub const APIC_SPURIOUS_VECTOR: u8 = 0xf1; pub const APIC_ERROR_VECTOR: u8 = 0xf2; + pub const UART_COM1_VECTOR: u8 = 0xf3; } static mut LOCAL_APIC: Option = None; @@ -18,6 +28,9 @@ static mut CPU_FREQ_MHZ: u64 = 4_000; const LAPIC_TICKS_PER_SEC: u64 = 1_000_000_000; // TODO: need to calibrate const TICKS_PER_SEC: u64 = 100; +const IO_APIC_BASE: u64 = 0xfec00000; +static mut IO_APIC: Option = None; + pub fn local_apic<'a>() -> &'a mut LocalApic { // It's safe as LAPIC is per-cpu. unsafe { LOCAL_APIC.as_mut().unwrap() } @@ -49,11 +62,31 @@ fn busy_wait_until(deadline: TimeValue) { } } -pub fn init_primary() { +// FIXME: temporary +unsafe fn configure_gsi(io_apic: &mut IoApic, gsi: u8, vector: u8) { + let mut entry = io_apic.table_entry(gsi); + entry.set_dest(0); // ! + entry.set_vector(vector); + entry.set_mode(IrqMode::Fixed); + entry.set_flags(IrqFlags::MASKED); + io_apic.set_table_entry(gsi, entry); + io_apic.enable_irq(gsi); +} + +pub fn init_ioapic() { + unsafe { + let mut io_apic = IoApic::new(IO_APIC_BASE); + configure_gsi(&mut io_apic, UART_COM1_IRQ, UART_COM1_VECTOR); + IO_APIC = Some(io_apic); + } +} + +pub fn init_lapic() { println!("Initializing Local APIC..."); unsafe { // Disable 8259A interrupt controllers + // TODO: only cpu0 does this Port::::new(0x20).write(0xff); Port::::new(0xA0).write(0xff); } @@ -86,9 +119,7 @@ pub fn init_primary() { lapic.set_timer_initial((LAPIC_TICKS_PER_SEC / TICKS_PER_SEC) as u32); } - unsafe { - LOCAL_APIC = Some(lapic); - } + unsafe { LOCAL_APIC = Some(lapic) }; enable_irq(); } diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index a058ec3f..98f7888d 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -1,8 +1,8 @@ use crate::{ arch::{ + apic::{busy_wait, local_apic}, boot::BootParams, gdt::{get_tr_base, GdtStruct}, - lapic::{busy_wait, local_apic}, msr::{Msr, Msr::*, MsrBitmap}, vmcs::*, vmx::*, @@ -205,8 +205,10 @@ impl ArchCpu { loop {} } - pub fn inject_fault(&mut self) -> HvResult { - Ok(()) + /// Add a virtual interrupt or exception to the pending events list, + /// and try to inject it before later VM entries. + pub fn inject_interrupt(&mut self, vector: u8, err_code: Option) { + self.pending_events.push_back((vector, err_code)); } /// Guest general-purpose registers. @@ -295,18 +297,12 @@ impl ArchCpu { Ok(()) } - /// Add a virtual interrupt or exception to the pending events list, - /// and try to inject it before later VM entries. - fn inject_interrupt(&mut self, vector: u8, err_code: Option) { - self.pending_events.push_back((vector, err_code)); - } - fn setup_boot_params(&mut self) -> HvResult { BootParams::fill( ROOT_ZONE_SETUP_ADDR, ROOT_ZONE_INITRD_ADDR, ROOT_ZONE_CMDLINE_ADDR, - "console=ttyS0 earlyprintk=serial rdinit=/init nokaslr\0", + ROOT_ZONE_CMDLINE, // "console=ttyS0 earlyprintk=serial nokaslr\0" )?; self.guest_regs.rax = this_cpu_data().cpu_on_entry as u64; diff --git a/src/arch/x86_64/device.rs b/src/arch/x86_64/device.rs index dd3d1973..5d1e6a23 100644 --- a/src/arch/x86_64/device.rs +++ b/src/arch/x86_64/device.rs @@ -1,13 +1,23 @@ use crate::{ - device::{irqchip::pic::i8259::VirtI8259Pic, uart::VirtUart16550}, + device::{irqchip::pic::i8259a::VirtDualI8259a, uart::VirtUart16550a}, error::HvResult, }; use alloc::{sync::Arc, vec, vec::Vec}; +pub const PIC_MASTER_BASE_PORT: u16 = 0x20; +pub const PIC_SLAVE_BASE_PORT: u16 = 0xa0; +pub const UART_COM1_BASE_PORT: u16 = 0x3f8; + +#[allow(non_snake_case)] +pub mod DeviceMsg { + pub const UPDATE_IRQ_LOW: u8 = 0x0; + pub const UPDATE_IRQ_HIGH: u8 = 0x1; +} + pub trait PortIoDevice: Send + Sync { - fn port_range(&self) -> core::ops::Range; - fn read(&self, port: u16, access_size: u8) -> HvResult; - fn write(&self, port: u16, access_size: u8, value: u32) -> HvResult; + fn port_range(&self) -> &Vec>; + fn read(&self, port: u16, msg: u8) -> HvResult; + fn write(&self, port: u16, value: u32, msg: u8) -> HvResult; } pub struct VirtDeviceList { @@ -16,18 +26,30 @@ pub struct VirtDeviceList { impl VirtDeviceList { pub fn find_port_io_device(&self, port: u16) -> Option<&Arc> { - self.port_io_devices - .iter() - .find(|dev| dev.port_range().contains(&port)) + self.port_io_devices.iter().find(|dev| { + dev.port_range() + .iter() + .find(|range| range.contains(&port)) + .is_some() + }) + } + + pub fn send_msg(&self, port: u16, value: u32, msg: u8) { + if let Some(device) = self.find_port_io_device(port) { + /*info!( + "SEND MSG! port: {:x}, value: {:x}, msg: {:x}", + port, value, msg + );*/ + device.write(port, value, msg).unwrap(); + } } } lazy_static::lazy_static! { static ref VIRT_DEVICES : VirtDeviceList = VirtDeviceList { port_io_devices: vec![ - Arc::new(VirtUart16550::new(0x3f8)), // COM1 - Arc::new(VirtI8259Pic::new(0x20)), // PIC1 - Arc::new(VirtI8259Pic::new(0xA0)), // PIC2 + Arc::new(VirtDualI8259a::new(PIC_MASTER_BASE_PORT, PIC_SLAVE_BASE_PORT)), // Dual PIC + Arc::new(VirtUart16550a::new(UART_COM1_BASE_PORT)), // COM1 ], }; } diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 3b6b3e20..744ff62c 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -1,5 +1,6 @@ #![allow(unused)] pub mod acpi; +pub mod apic; pub mod boot; pub mod cpu; pub mod cpuid; @@ -8,7 +9,6 @@ pub mod entry; pub mod gdt; pub mod idt; pub mod ipi; -pub mod lapic; pub mod mm; pub mod msr; pub mod paging; diff --git a/src/arch/x86_64/multiboot.S b/src/arch/x86_64/multiboot.S index 47623cb8..ffaafd34 100644 --- a/src/arch/x86_64/multiboot.S +++ b/src/arch/x86_64/multiboot.S @@ -130,7 +130,7 @@ ap_entry64: .balign 4096 .Ltmp_pml4: - // 0x0000_0000 ~ 0x8000_0000 + // 0x0000_0000 ~ 0x1_0000_0000 .quad .Ltmp_pdpt_low - {offset} + 0x3 // PRESENT | WRITABLE | paddr(tmp_pdpt) .zero 8 * 510 // 0xffff_ff80_0000_0000 ~ 0xffff_ff80_8000_0000 @@ -139,7 +139,9 @@ ap_entry64: .Ltmp_pdpt_low: .quad 0x0000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) .quad 0x40000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x4000_0000) - .zero 8 * 510 + .quad 0x80000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x4000_0000) + .quad 0xc0000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x4000_0000) + .zero 8 * 508 .Ltmp_pdpt_high: .quad 0x0000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index 3ed16017..c74c9613 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -1,20 +1,22 @@ use crate::{ arch::{ + apic::{local_apic, vectors::*}, cpu::ArchCpu, cpuid::{CpuIdEax, ExtendedFeaturesEcx, FeatureInfoFlags}, device::all_virt_devices, idt::IdtStruct, - lapic::{local_apic, vectors::*}, msr::Msr::{self, *}, s2pt::Stage2PageFaultInfo, vmcs::*, vmx::{VmxCrAccessInfo, VmxExitInfo, VmxExitReason, VmxInterruptInfo, VmxIoExitInfo}, }, - device::irqchip::pic::lapic::VirtLocalApic, + device::{irqchip::pic::lapic::VirtLocalApic, uart::UartReg}, error::HvResult, }; use x86_64::registers::control::Cr4Flags; +use super::device::UART_COM1_BASE_PORT; + core::arch::global_asm!( include_str!("trap.S"), sym arch_handle_trap @@ -69,14 +71,17 @@ pub fn arch_handle_trap(tf: &mut TrapFrame) { fn handle_irq(vector: u8) { match vector { - APIC_TIMER_VECTOR => { - // println!("Timer"); - unsafe { local_apic().end_of_interrupt() }; + APIC_TIMER_VECTOR => {} + UART_COM1_VECTOR => { + if let Some(device) = all_virt_devices().find_port_io_device(UART_COM1_BASE_PORT) { + device.read(UART_COM1_BASE_PORT + UartReg::LINE_STATUS, 0); + } } _ => { println!("Unhandled irq {}", vector); } } + unsafe { local_apic().end_of_interrupt() }; } fn handle_cpuid(arch_cpu: &mut ArchCpu) -> HvResult { @@ -199,7 +204,7 @@ fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvR if let Some(dev) = all_virt_devices().find_port_io_device(io_info.port) { if io_info.is_in { - let value = dev.read(io_info.port, io_info.access_size)?; + let value = dev.read(io_info.port, 0)?; let rax = &mut arch_cpu.regs_mut().rax; // SDM Vol. 1, Section 3.4.1.1: // * 32-bit operands generate a 32-bit result, zero-extended to a 64-bit result in the @@ -221,7 +226,7 @@ fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvR 4 => rax, _ => unreachable!(), } as u32; - dev.write(io_info.port, io_info.access_size, value)?; + dev.write(io_info.port, value, 0)?; } } else { debug!( @@ -315,7 +320,7 @@ pub fn handle_vmexit(arch_cpu: &mut ArchCpu) -> HvResult { VmxExitReason::CPUID => handle_cpuid(arch_cpu), VmxExitReason::RDTSC => { // FIXME: temp! - let current_ticks = crate::arch::lapic::current_ticks(); + let current_ticks = crate::arch::apic::current_ticks(); let regs = arch_cpu.regs_mut(); regs.rdx = (current_ticks >> 32) & (u32::MAX as u64); regs.rax = current_ticks & (u32::MAX as u64); diff --git a/src/device/irqchip/pic/i8259.rs b/src/device/irqchip/pic/i8259.rs deleted file mode 100644 index bf86e4fb..00000000 --- a/src/device/irqchip/pic/i8259.rs +++ /dev/null @@ -1,26 +0,0 @@ -use crate::{arch::device::PortIoDevice, error::HvResult}; - -pub struct VirtI8259Pic { - port_base: u16, -} - -impl PortIoDevice for VirtI8259Pic { - fn port_range(&self) -> core::ops::Range { - self.port_base..self.port_base + 2 - } - - fn read(&self, port: u16, access_size: u8) -> HvResult { - Ok(0) - // hv_result_err!(EIO) report error for read - } - - fn write(&self, port: u16, access_size: u8, value: u32) -> HvResult { - Ok(()) // ignore write - } -} - -impl VirtI8259Pic { - pub const fn new(port_base: u16) -> Self { - Self { port_base } - } -} diff --git a/src/device/irqchip/pic/i8259a.rs b/src/device/irqchip/pic/i8259a.rs new file mode 100644 index 00000000..8ad3a0be --- /dev/null +++ b/src/device/irqchip/pic/i8259a.rs @@ -0,0 +1,321 @@ +use crate::{ + arch::device::{ + all_virt_devices, DeviceMsg, PortIoDevice, PIC_MASTER_BASE_PORT, PIC_SLAVE_BASE_PORT, + }, + error::HvResult, + percpu::this_cpu_data, +}; +use alloc::vec::Vec; +use core::ops::Range; +use spin::{Mutex, MutexGuard}; + +pub const SEND_IRQ_KEY: u8 = 0x82; +pub const SEND_VECTOR_KEY: u8 = 0x59; + +struct VirtI8259aUnlocked { + base_port: u16, + isr: u8, + irr: u8, + imr: u8, + init_state: u8, + vector_base: u8, + auto_eoi: bool, + icw4_needed: bool, + is_master: bool, + poll_cmd: bool, + read_isr: bool, + special_fully_nested_mode: bool, + special_mask: bool, +} + +impl VirtI8259aUnlocked { + fn new(base_port: u16, is_master: bool) -> Self { + Self { + base_port, + isr: 0, + irr: 0, + imr: 0, + init_state: 0, + vector_base: 0, + auto_eoi: false, + icw4_needed: false, + is_master, + poll_cmd: false, + read_isr: false, + special_fully_nested_mode: false, + special_mask: false, + } + } + + fn ack_irq(&mut self, irq: i32) { + // TODO: auto eoi + self.isr |= (1 << irq); + // TODO: elcr + self.irr &= !(1 << irq); + // VirtDualI8259aUnlocked::update_irq(self, pic2); + } + + fn get_priority(&mut self, mask: u8) -> u8 { + if mask == 0 { + return 8; + } + + let mut priority: u8 = 0; + // TODO: priority add + while mask & (1 << (priority & 7)) == 0 { + priority += 1; + } + + priority + } + + fn get_irq(&mut self) -> i32 { + let mut mask = self.irr & !self.imr; + let priority = self.get_priority(mask); + if priority == 8 { + return -1; + } + + mask = self.isr; + if self.special_mask { + mask &= !self.imr; + } + if self.special_fully_nested_mode && self.is_master { + mask &= !(1u8 << 2); // ignore in service slave irq + } + let cur_priority = self.get_priority(mask); + if priority < cur_priority { + return (priority as i32) & 7; + } + + -1 + } + + fn set_irq(&mut self, irq: i32) { + let mask: u8 = 1 << irq; + self.irr |= mask; + } +} + +pub struct VirtDualI8259aUnlocked { + pics: Vec, +} + +impl VirtDualI8259aUnlocked { + fn new(master_base_port: u16, slave_base_port: u16) -> Self { + Self { + pics: vec![ + VirtI8259aUnlocked::new(master_base_port, true), + VirtI8259aUnlocked::new(slave_base_port, false), + ], + } + } + + fn ack_irq(&mut self, irq: i32, id: usize) { + self.pics[id].ack_irq(irq); + self.update_irq(id); + } + + fn set_irq(&mut self, irq: i32, id: usize) { + self.pics[id].set_irq(irq); + self.update_irq(id); + } + + fn update_irq(&mut self, id: usize) { + let mut pic = &mut self.pics[id]; + let irq = pic.get_irq(); + if irq < 0 { + return; + } + + if pic.is_master { + let mut vector = pic.vector_base + (irq as u8); + if irq == 2 { + let mut irq2 = self.pics[1].get_irq(); + if irq2 >= 0 { + self.ack_irq(irq2, 1); + } else { + irq2 = 7; // spurious irq + } + vector = self.pics[1].vector_base + (irq2 as u8); + } + self.ack_irq(irq, 0); + // TODO: inject irq + this_cpu_data().arch_cpu.inject_interrupt(vector, None); + } else { + self.set_irq(2, 0); + } + } + + fn read(&mut self, id: usize, port: u16) -> HvResult { + let mut pic = &mut self.pics[id]; + if pic.poll_cmd { + pic.poll_cmd = false; + let mut irq = pic.get_irq(); + if irq >= 0 { + self.ack_irq(irq, id); + irq |= 0x80; + } else { + irq = 0; + } + return Ok(irq as u32); + } + + let mut ret = 0; + let offset = port - pic.base_port; + if offset == 0 { + if pic.read_isr { + ret = pic.isr; + } else { + ret = pic.irr; + } + } else { + ret = pic.imr; + } + + Ok(ret as u32) + } + + fn write(&mut self, id: usize, port: u16, value: u32) -> HvResult { + let mut pic = &mut self.pics[id]; + let offset = port - pic.base_port; + let value: u8 = value as u8; + if offset == 0 { + if value & 0x10 != 0 { + // ICW1 + if value & 0x08 != 0 { + error!("I8259A: level-triggered not supported!"); + } + if value & 0x02 != 0 { + error!("I8259A: single pic not supported!"); + } + pic.init_state = 1; + pic.icw4_needed = (value & 0x01) != 0; + } else if value & 0x08 != 0 { + // OCW3 + if value & 0x02 != 0 { + pic.read_isr = value & 1 != 0; + } + if value & 0x04 != 0 { + pic.poll_cmd = true; + } + if value & 0x40 != 0 { + pic.special_mask = (value >> 5) & 1 != 0; + } + } else { + // OCW2 + let cmd = value >> 5; + match cmd { + 0 | 4 => { + // TODO: rotate auto eoi + } + 1 | 5 => { + // non specific eoi + let isr = pic.isr; + let priority = pic.get_priority(isr); + if priority != 8 { + let irq = priority & 7; + pic.isr &= !(1 << irq); + self.update_irq(id); + } + } + 3 | 7 => { + // specific eoi + let irq = value & 7; + pic.isr &= !(1 << irq); + self.update_irq(id); + } + _ => {} + } + } + } else { + match pic.init_state { + 0 => { + pic.imr = value; + self.update_irq(id); + } + 1 => { + // ICW2 + pic.vector_base = value & 0xf8; + info!("I8259A: vector base: {:x}", pic.vector_base); + pic.init_state = 2; + } + 2 => { + // ICW3 + // master: 0x4, slave: 0x2 + pic.init_state = match pic.icw4_needed { + true => 3, + false => 0, + } + } + 3 => { + // ICW4 + pic.special_fully_nested_mode = (value >> 4) & 1 != 0; + pic.auto_eoi = (value >> 1) & 1 != 0; + pic.init_state = 0; + } + _ => {} + } + } + Ok(()) + } +} + +pub struct VirtDualI8259a { + port_range: Vec>, + dual_pic: Mutex, +} + +impl VirtDualI8259a { + pub fn new(master_base_port: u16, slave_base_port: u16) -> Self { + Self { + port_range: vec![ + master_base_port..master_base_port + 2, + slave_base_port..slave_base_port + 2, + ], + dual_pic: Mutex::new(VirtDualI8259aUnlocked::new( + master_base_port, + slave_base_port, + )), + } + } +} + +impl PortIoDevice for VirtDualI8259a { + fn port_range(&self) -> &Vec> { + &self.port_range + } + + fn read(&self, port: u16, msg: u8) -> HvResult { + // info!("I8259A read, port: {:x}", port); + let mut dual_pic = self.dual_pic.lock(); + + if self.port_range[0].contains(&port) { + dual_pic.read(0, port) + } else { + dual_pic.read(1, port) + } + } + + fn write(&self, port: u16, value: u32, msg: u8) -> HvResult { + // info!("I8259A write, port: {:x} value: {:x}", port, value); + let mut dual_pic = self.dual_pic.lock(); + let is_master = self.port_range[0].contains(&port); + let id = match is_master { + true => 0, + false => 1, + }; + + if msg != 0 { + match msg { + DeviceMsg::UPDATE_IRQ_HIGH => { + dual_pic.set_irq(value as i32, id); + } + _ => {} + } + return Ok(()); + } + + dual_pic.write(id, port, value) + } +} diff --git a/src/device/irqchip/pic/lapic.rs b/src/device/irqchip/pic/lapic.rs index c33c077d..2baf92b3 100644 --- a/src/device/irqchip/pic/lapic.rs +++ b/src/device/irqchip/pic/lapic.rs @@ -1,7 +1,7 @@ use crate::{ arch::{ + apic::current_time_nanos, cpu::ArchCpu, - lapic::current_time_nanos, msr::Msr::{self, *}, }, error::HvResult, diff --git a/src/device/irqchip/pic/mod.rs b/src/device/irqchip/pic/mod.rs index 8a9bc620..3a554c49 100644 --- a/src/device/irqchip/pic/mod.rs +++ b/src/device/irqchip/pic/mod.rs @@ -1,4 +1,4 @@ -pub mod i8259; +pub mod i8259a; pub mod lapic; use crate::zone::Zone; diff --git a/src/device/uart/mod.rs b/src/device/uart/mod.rs index fdefb85e..3da2f253 100644 --- a/src/device/uart/mod.rs +++ b/src/device/uart/mod.rs @@ -20,7 +20,7 @@ mod ns16440a; pub use ns16440a::{console_getchar, console_putchar}; #[cfg(target_arch = "x86_64")] -mod uart16550; +mod uart16550a; #[cfg(target_arch = "x86_64")] -pub use uart16550::{console_getchar, console_putchar, VirtUart16550}; +pub use uart16550a::{console_getchar, console_putchar, UartReg, VirtUart16550a}; diff --git a/src/device/uart/uart16550.rs b/src/device/uart/uart16550.rs deleted file mode 100644 index 35cd862c..00000000 --- a/src/device/uart/uart16550.rs +++ /dev/null @@ -1,224 +0,0 @@ -use crate::{arch::device::PortIoDevice, error::HvResult}; -use spin::Mutex; -use x86_64::instructions::port::{PortReadOnly, PortWriteOnly}; - -const DATA_REG: u16 = 0; -const INT_EN_REG: u16 = 1; -const FIFO_CTRL_REG: u16 = 2; -const LINE_CTRL_REG: u16 = 3; -const MODEM_CTRL_REG: u16 = 4; -const LINE_STATUS_REG: u16 = 5; -const MODEM_STATUS_REG: u16 = 6; -const SCRATCH_REG: u16 = 7; - -const UART_CLOCK_FACTOR: usize = 16; -const UART_FIFO_CAPACITY: usize = 16; -const OSC_FREQ: usize = 1_843_200; - -lazy_static::lazy_static! { - static ref COM1: Mutex = { - let mut uart = Uart16550::new(0x3f8); - uart.init(115200); - Mutex::new(uart) - }; -} - -bitflags::bitflags! { - /// Line status flags - struct LineStatusFlags: u8 { - const RECEIVER_DATA_READY = 1; - const TRANSMIT_HOLD_REG_EMPTY = 1 << 5; - const TRANSMITTER_EMPTY = 1 << 6; - } -} - -/// FIFO queue for caching bytes read. -struct Fifo { - buf: [u8; CAP], - head: usize, - num: usize, -} - -impl Fifo { - const fn new() -> Self { - Self { - buf: [0; CAP], - head: 0, - num: 0, - } - } - - fn is_empty(&self) -> bool { - self.num == 0 - } - - fn is_full(&self) -> bool { - self.num == CAP - } - - fn push(&mut self, value: u8) { - assert!(self.num < CAP); - self.buf[(self.head + self.num) % CAP] = value; - self.num += 1; - } - - fn pop(&mut self) -> u8 { - assert!(self.num > 0); - let ret = self.buf[self.head]; - self.head += 1; - self.head %= CAP; - self.num -= 1; - ret - } -} - -struct Uart16550 { - rhr: PortReadOnly, // receive holding - thr: PortWriteOnly, // transmit holding - ier: PortWriteOnly, // interrupt enable - fcr: PortWriteOnly, // fifo control - lcr: PortWriteOnly, // line control - mcr: PortWriteOnly, // modem control - lsr: PortReadOnly, // line status -} - -impl Uart16550 { - const fn new(base_port: u16) -> Self { - Self { - rhr: PortReadOnly::new(base_port + DATA_REG), - thr: PortWriteOnly::new(base_port + DATA_REG), - ier: PortWriteOnly::new(base_port + INT_EN_REG), - fcr: PortWriteOnly::new(base_port + FIFO_CTRL_REG), - lcr: PortWriteOnly::new(base_port + LINE_CTRL_REG), - mcr: PortWriteOnly::new(base_port + MODEM_CTRL_REG), - lsr: PortReadOnly::new(base_port + LINE_STATUS_REG), - } - } - - fn init(&mut self, baud_rate: usize) { - unsafe { - // disable interrupts - self.ier.write(0x00); - - // enable DLAB, set baud rate - let divisor = OSC_FREQ / (baud_rate * UART_CLOCK_FACTOR); - self.lcr.write(0x80); - self.thr.write((divisor & 0xff) as u8); - self.ier.write((divisor >> 8) as u8); - - // disable DLAB, set word length to 8 bits - self.lcr.write(0x03); - - // enable fifo, clear tx/rx queues - // set interrupt level to 14 bytes - self.fcr.write(0xC7); - - // data terminal ready, request to send - // enable option 2 output (used as interrupt line for CPU) - self.mcr.write(0x0B); - } - } - - fn putchar(&mut self, c: u8) { - unsafe { - while self.lsr.read() & LineStatusFlags::TRANSMIT_HOLD_REG_EMPTY.bits() == 0 {} - self.thr.write(c); - } - } - - fn getchar(&mut self) -> Option { - unsafe { - if self.lsr.read() & LineStatusFlags::RECEIVER_DATA_READY.bits() != 0 { - Some(self.rhr.read()) - } else { - None - } - } - } -} - -pub struct VirtUart16550 { - base_port: u16, - fifo: Mutex>, -} - -impl VirtUart16550 { - pub fn new(base_port: u16) -> Self { - Self { - base_port, - fifo: Mutex::new(Fifo::new()), - } - } -} - -impl PortIoDevice for VirtUart16550 { - fn port_range(&self) -> core::ops::Range { - self.base_port..self.base_port + 8 - } - - fn read(&self, port: u16, access_size: u8) -> HvResult { - if access_size != 1 { - error!("Invalid serial port I/O read size: {} != 1", access_size); - return hv_result_err!(EIO); - } - let ret = match port - self.base_port { - DATA_REG => { - // read a byte from FIFO - let mut fifo = self.fifo.lock(); - if fifo.is_empty() { - 0 - } else { - fifo.pop() - } - } - LINE_STATUS_REG => { - // check if the physical serial port has an available byte, and push it to FIFO. - let mut fifo = self.fifo.lock(); - if !fifo.is_full() { - if let Some(c) = console_getchar() { - fifo.push(c); - } - } - let mut lsr = - LineStatusFlags::TRANSMIT_HOLD_REG_EMPTY | LineStatusFlags::TRANSMITTER_EMPTY; - if !fifo.is_empty() { - lsr |= LineStatusFlags::RECEIVER_DATA_READY; - } - lsr.bits() - } - FIFO_CTRL_REG => { - 0xc0 // FIFO enabled - } - INT_EN_REG | LINE_CTRL_REG | MODEM_CTRL_REG | MODEM_STATUS_REG | SCRATCH_REG => { - debug!("Unimplemented serial port I/O read: {:#x}", port); // unimplemented - 0 - } - _ => unreachable!(), - }; - Ok(ret as u32) - } - - fn write(&self, port: u16, access_size: u8, value: u32) -> HvResult { - if access_size != 1 { - error!("Invalid serial port I/O write size: {} != 1", access_size); - return hv_result_err!(EIO); - } - match port - self.base_port { - DATA_REG => console_putchar(value as u8), - INT_EN_REG | FIFO_CTRL_REG | LINE_CTRL_REG | MODEM_CTRL_REG | SCRATCH_REG => { - debug!("Unimplemented serial port I/O write: {:#x}", port); // unimplemented - } - LINE_STATUS_REG => {} // ignore - _ => unreachable!(), - } - Ok(()) - } -} - -pub fn console_putchar(c: u8) { - COM1.lock().putchar(c); -} - -pub fn console_getchar() -> Option { - COM1.lock().getchar() -} diff --git a/src/device/uart/uart16550a.rs b/src/device/uart/uart16550a.rs new file mode 100644 index 00000000..a0465aa2 --- /dev/null +++ b/src/device/uart/uart16550a.rs @@ -0,0 +1,351 @@ +use crate::{ + arch::device::{ + all_virt_devices, DeviceMsg, PortIoDevice, PIC_MASTER_BASE_PORT, UART_COM1_BASE_PORT, + }, + error::HvResult, +}; +use alloc::vec::Vec; +use core::ops::Range; +use spin::Mutex; +use x86_64::instructions::port::{PortReadOnly, PortWriteOnly}; + +#[allow(non_snake_case)] +pub mod UartReg { + pub const RCVR_BUFFER: u16 = 0; + pub const XMIT_BUFFER: u16 = 0; + pub const INTR_ENABLE: u16 = 1; + pub const INTR_IDENT: u16 = 2; + pub const FIFO_CTRL: u16 = 2; + pub const LINE_CTRL: u16 = 3; + pub const MODEM_CTRL: u16 = 4; + pub const LINE_STATUS: u16 = 5; + pub const MODEM_STATUS: u16 = 6; + pub const SCRATCH: u16 = 7; +} + +const UART_COM1_IRQ: u32 = 4; +const UART_CLOCK_FACTOR: usize = 16; +const UART_FIFO_CAPACITY: usize = 64; +const OSC_FREQ: usize = 1_843_200; + +lazy_static::lazy_static! { + static ref COM1: Mutex = { + let mut uart = Uart16550a::new(UART_COM1_BASE_PORT); + uart.init(115200); + Mutex::new(uart) + }; +} + +bitflags::bitflags! { + struct InterruptEnableFlags: u8 { + const ENABLE_RCVR_DATA_AVAIL_INTR = 1 << 0; + const ENABLE_XMIT_HOLD_REG_EMPTY_INTR = 1 << 1; + const _ = !0; + } + + struct InterruptIdentFlags: u8 { + const NO_INTR_IS_PENDING = 1 << 0; + const XMIT_HOLD_REG_EMPTY = 0x2; + const RCVR_DATA_AVAIL = 0x4; + const FIFO_ENABLED_16550_MODE = 0xc0; + const _ = !0; + } + + struct LineControlFlags: u8 { + const WORD_LENGTH_SELECT_8_BITS = 0x3; + const DIVISOR_LATCH_ACCESS_BIT = 1 << 7; + const _ = !0; + } + + struct LineStatusFlags: u8 { + const RCVR_DATA_READY = 1; + const XMIT_HOLD_REG_EMPTY = 1 << 5; + const XMIT_EMPTY = 1 << 6; + const _ = !0; + } +} + +/// FIFO queue for caching bytes read. +struct Fifo { + buf: [u8; CAP], + head: usize, + num: usize, +} + +impl Fifo { + const fn new() -> Self { + Self { + buf: [0; CAP], + head: 0, + num: 0, + } + } + + fn is_empty(&self) -> bool { + self.num == 0 + } + + fn is_full(&self) -> bool { + self.num == CAP + } + + fn push(&mut self, value: u8) { + assert!(self.num < CAP); + self.buf[(self.head + self.num) % CAP] = value; + self.num += 1; + } + + fn pop(&mut self) -> u8 { + assert!(self.num > 0); + let ret = self.buf[self.head]; + self.head += 1; + self.head %= CAP; + self.num -= 1; + ret + } +} + +struct Uart16550a { + rhr: PortReadOnly, // receive holding + thr: PortWriteOnly, // transmit holding + ier: PortWriteOnly, // interrupt enable + fcr: PortWriteOnly, // fifo control + lcr: PortWriteOnly, // line control + mcr: PortWriteOnly, // modem control + lsr: PortReadOnly, // line status +} + +impl Uart16550a { + const fn new(base_port: u16) -> Self { + Self { + rhr: PortReadOnly::new(base_port + UartReg::RCVR_BUFFER), + thr: PortWriteOnly::new(base_port + UartReg::XMIT_BUFFER), + ier: PortWriteOnly::new(base_port + UartReg::INTR_ENABLE), + fcr: PortWriteOnly::new(base_port + UartReg::FIFO_CTRL), + lcr: PortWriteOnly::new(base_port + UartReg::LINE_CTRL), + mcr: PortWriteOnly::new(base_port + UartReg::MODEM_CTRL), + lsr: PortReadOnly::new(base_port + UartReg::LINE_STATUS), + } + } + + fn init(&mut self, baud_rate: usize) { + unsafe { + // enable read available interrupts + self.ier + .write(InterruptEnableFlags::ENABLE_RCVR_DATA_AVAIL_INTR.bits()); + + // enable DLAB, set baud rate + let divisor = OSC_FREQ / (baud_rate * UART_CLOCK_FACTOR); + self.lcr + .write(LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits()); + self.thr.write((divisor & 0xff) as u8); + self.ier.write((divisor >> 8) as u8); + + // disable DLAB, set word length to 8 bits + self.lcr + .write(LineControlFlags::WORD_LENGTH_SELECT_8_BITS.bits()); + + // enable fifo, clear tx/rx queues + // set interrupt level to 14 bytes + self.fcr.write(0xC7); + + // data terminal ready, request to send + // enable option 2 output (used as interrupt line for CPU) + self.mcr.write(0x0B); + } + } + + fn putchar(&mut self, c: u8) { + unsafe { + while self.lsr.read() & LineStatusFlags::XMIT_HOLD_REG_EMPTY.bits() == 0 {} + self.thr.write(c); + } + } + + fn getchar(&mut self) -> Option { + unsafe { + if self.lsr.read() & LineStatusFlags::RCVR_DATA_READY.bits() != 0 { + Some(self.rhr.read()) + } else { + None + } + } + } +} + +pub struct VirtUart16550aUnlocked { + iir: u8, + ier: u8, + lcr: u8, + lsr: u8, + irq_state: u8, + fifo: Fifo, +} + +impl VirtUart16550aUnlocked { + fn new() -> Self { + Self { + iir: 0, + ier: 0, + lcr: 0, + lsr: (LineStatusFlags::XMIT_HOLD_REG_EMPTY | LineStatusFlags::XMIT_EMPTY).bits(), + irq_state: 0, + fifo: Fifo::new(), + } + } + + fn update_irq(&mut self) { + let mut iir: u8 = 0; + + if self.ier & InterruptEnableFlags::ENABLE_RCVR_DATA_AVAIL_INTR.bits() != 0 + && self.lsr & LineStatusFlags::RCVR_DATA_READY.bits() != 0 + { + iir |= InterruptIdentFlags::RCVR_DATA_AVAIL.bits(); + } + + if self.ier & InterruptEnableFlags::ENABLE_XMIT_HOLD_REG_EMPTY_INTR.bits() != 0 + && self.lsr & LineStatusFlags::XMIT_EMPTY.bits() != 0 + { + iir |= InterruptIdentFlags::XMIT_HOLD_REG_EMPTY.bits(); + } + + if iir == 0 { + self.iir = InterruptIdentFlags::NO_INTR_IS_PENDING.bits(); + if self.irq_state != 0 { + all_virt_devices().send_msg( + PIC_MASTER_BASE_PORT, + UART_COM1_IRQ, + DeviceMsg::UPDATE_IRQ_LOW, + ); + } + } else { + self.iir = iir; + if self.irq_state == 0 { + all_virt_devices().send_msg( + PIC_MASTER_BASE_PORT, + UART_COM1_IRQ, + DeviceMsg::UPDATE_IRQ_HIGH, + ); + } + } + self.irq_state = iir; + } +} + +pub struct VirtUart16550a { + base_port: u16, + port_range: Vec>, + uart: Mutex, +} + +impl VirtUart16550a { + pub fn new(base_port: u16) -> Self { + Self { + base_port, + port_range: vec![base_port..base_port + 8], + uart: Mutex::new(VirtUart16550aUnlocked::new()), + } + } +} + +impl PortIoDevice for VirtUart16550a { + fn port_range(&self) -> &Vec> { + &self.port_range + } + + fn read(&self, port: u16, msg: u8) -> HvResult { + let mut uart = self.uart.lock(); + + let ret = match port - self.base_port { + UartReg::RCVR_BUFFER => { + if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { + 1 // dll + } else { + // read a byte from FIFO + if uart.fifo.is_empty() { + 0 + } else { + uart.fifo.pop() + } + } + } + UartReg::INTR_ENABLE => { + if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { + 0 //dlm + } else { + uart.ier + } + } + UartReg::INTR_IDENT => { + // info!("IIR read, {:x}", uart.iir); + uart.iir | InterruptIdentFlags::FIFO_ENABLED_16550_MODE.bits() + } + UartReg::LINE_CTRL => uart.lcr, + UartReg::LINE_STATUS => { + // check if the physical serial port has an available byte, and push it to FIFO. + if !uart.fifo.is_full() { + if let Some(c) = console_getchar() { + uart.fifo.push(c); + } + } + if !uart.fifo.is_empty() { + uart.lsr |= LineStatusFlags::RCVR_DATA_READY.bits(); + } else { + uart.lsr &= (!LineStatusFlags::RCVR_DATA_READY).bits(); + } + uart.lsr + } + UartReg::MODEM_CTRL | UartReg::MODEM_STATUS | UartReg::SCRATCH => { + debug!("Unimplemented serial port I/O read: {:#x}", port); // unimplemented + 0 + } + _ => unreachable!(), + }; + + uart.update_irq(); + Ok(ret as u32) + } + + fn write(&self, port: u16, value: u32, msg: u8) -> HvResult { + let mut uart = self.uart.lock(); + let value: u8 = value as u8; + + match port - self.base_port { + UartReg::XMIT_BUFFER => { + if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { + // dll + } else { + uart.lsr |= + (LineStatusFlags::XMIT_HOLD_REG_EMPTY | LineStatusFlags::XMIT_EMPTY).bits(); + console_putchar(value as u8); + } + } + UartReg::INTR_ENABLE => { + if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { + // dlm + } else { + uart.ier = value & 0x0f; + } + } + UartReg::LINE_CTRL => { + uart.lcr = value; + } + UartReg::FIFO_CTRL | UartReg::MODEM_CTRL | UartReg::SCRATCH => { + debug!("Unimplemented serial port I/O write: {:#x}", port); + } + UartReg::LINE_STATUS => {} // ignore + _ => unreachable!(), + } + + uart.update_irq(); + Ok(()) + } +} + +pub fn console_putchar(c: u8) { + COM1.lock().putchar(c); +} + +pub fn console_getchar() -> Option { + COM1.lock().getchar() +} diff --git a/src/main.rs b/src/main.rs index 4d2ab1ca..20341c23 100644 --- a/src/main.rs +++ b/src/main.rs @@ -133,7 +133,9 @@ fn per_cpu_init(cpu: &mut PerCpu) { fn wakeup_secondary_cpus(this_id: usize, host_dtb: usize) { #[cfg(target_arch = "x86_64")] - arch::lapic::init_primary(); + arch::apic::init_lapic(); + #[cfg(target_arch = "x86_64")] + arch::apic::init_ioapic(); for cpu_id in 0..MAX_CPU_NUM { if cpu_id == this_id { diff --git a/src/platform/qemu_x86_64.rs b/src/platform/qemu_x86_64.rs index 609c5ff0..180f42c7 100644 --- a/src/platform/qemu_x86_64.rs +++ b/src/platform/qemu_x86_64.rs @@ -14,6 +14,8 @@ pub const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0xc000; pub const ROOT_ZONE_CPUS: u64 = (1 << 0); pub const ROOT_ZONE_NAME: &str = "root-linux"; +pub const ROOT_ZONE_CMDLINE: &str = + "console=ttyS0 earlyprintk=serial rdinit=/init nokaslr noapic\0"; pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 7] = [ HvConfigMemoryRegion { From 476dbb6f241d8da1881aea5bd3111fd167f57ded Mon Sep 17 00:00:00 2001 From: Solicey Date: Tue, 18 Mar 2025 14:48:16 +0800 Subject: [PATCH 08/29] enable SMP on guest linux --- scripts/qemu-x86_64.mk | 2 +- scripts/x86_64/acpi/madt.asl | 16 ++ scripts/x86_64/acpi/rsdt.asl | 2 +- scripts/x86_64/acpi/xsdt.asl | 2 +- src/arch/x86_64/apic.rs | 49 ++++-- src/arch/x86_64/cpu.rs | 160 +++++++++----------- src/arch/x86_64/cpuid.rs | 1 + src/arch/x86_64/device.rs | 8 +- src/arch/x86_64/idt.rs | 9 ++ src/arch/x86_64/ipi.rs | 163 +++++++++++++++++++- src/arch/x86_64/mmio.rs | 0 src/arch/x86_64/msr.rs | 53 ++++++- src/arch/x86_64/trap.rs | 65 ++++---- src/arch/x86_64/vmcs.rs | 30 +++- src/device/irqchip/mod.rs | 2 +- src/device/irqchip/pic/hpet.rs | 211 ++++++++++++++++++++++++++ src/device/irqchip/pic/i8254.rs | 155 +++++++++++++++++++ src/device/irqchip/pic/i8259a.rs | 14 +- src/device/irqchip/pic/lapic.rs | 249 ++++++++++++++++++++++--------- src/device/irqchip/pic/mod.rs | 76 +++++++++- src/device/uart/uart16550a.rs | 31 ++-- src/main.rs | 10 +- src/platform/qemu_x86_64.rs | 2 +- 23 files changed, 1070 insertions(+), 240 deletions(-) create mode 100644 src/arch/x86_64/mmio.rs create mode 100644 src/device/irqchip/pic/hpet.rs create mode 100644 src/device/irqchip/pic/i8254.rs diff --git a/scripts/qemu-x86_64.mk b/scripts/qemu-x86_64.mk index 8abf8ebb..d94efbff 100644 --- a/scripts/qemu-x86_64.mk +++ b/scripts/qemu-x86_64.mk @@ -19,7 +19,7 @@ aml_rsdt := $(acpi_aml_dir)/rsdt.aml aml_xsdt := $(acpi_aml_dir)/xsdt.aml QEMU_ARGS := -machine q35 -QEMU_ARGS += -cpu host,+x2apic -accel kvm +QEMU_ARGS += -cpu host,+x2apic,+invtsc -accel kvm QEMU_ARGS += -smp 4 QEMU_ARGS += -serial mon:stdio QEMU_ARGS += -m 2G diff --git a/scripts/x86_64/acpi/madt.asl b/scripts/x86_64/acpi/madt.asl index 3d413454..4c8e1309 100644 --- a/scripts/x86_64/acpi/madt.asl +++ b/scripts/x86_64/acpi/madt.asl @@ -24,6 +24,22 @@ Processor Enabled : 1 Runtime Online Capable : 0 +[0001] Subtable Type : 00 +[0001] Length : 08 +[0001] Processor ID : 01 +[0001] Local Apic ID : 01 +[0004] Flags (decoded below) : 00000001 + Processor Enabled : 1 + Runtime Online Capable : 0 + +[0001] Subtable Type : 00 +[0001] Length : 08 +[0001] Processor ID : 02 +[0001] Local Apic ID : 02 +[0004] Flags (decoded below) : 00000001 + Processor Enabled : 1 + Runtime Online Capable : 0 + /* IO APIC */ [0001] Subtable Type : 01 [0001] Length : 0C diff --git a/scripts/x86_64/acpi/rsdt.asl b/scripts/x86_64/acpi/rsdt.asl index 4d778ae7..a86c0c8a 100644 --- a/scripts/x86_64/acpi/rsdt.asl +++ b/scripts/x86_64/acpi/rsdt.asl @@ -15,4 +15,4 @@ /* MADT */ [0004] ACPI Table Address : 000f2500 /* HPET */ -[0004] ACPI Table Address : 000f2740 \ No newline at end of file +// [0004] ACPI Table Address : 000f2740 \ No newline at end of file diff --git a/scripts/x86_64/acpi/xsdt.asl b/scripts/x86_64/acpi/xsdt.asl index f00ddf50..6b71e6df 100644 --- a/scripts/x86_64/acpi/xsdt.asl +++ b/scripts/x86_64/acpi/xsdt.asl @@ -15,4 +15,4 @@ /* MADT */ [0004] ACPI Table Address : 000f2500 /* HPET */ -[0004] ACPI Table Address : 000f2740 \ No newline at end of file +// [0004] ACPI Table Address : 000f2740 \ No newline at end of file diff --git a/src/arch/x86_64/apic.rs b/src/arch/x86_64/apic.rs index 23ab43fe..d4f56b6e 100644 --- a/src/arch/x86_64/apic.rs +++ b/src/arch/x86_64/apic.rs @@ -1,7 +1,8 @@ use self::irqs::*; -use self::vectors::*; use crate::device::irqchip::pic::enable_irq; +use crate::device::irqchip::pic::hpet; use core::time::Duration; +use core::u32; use raw_cpuid::CpuId; use spin::Mutex; use x2apic::{ @@ -15,8 +16,10 @@ type TimeValue = Duration; pub mod irqs { pub const UART_COM1_IRQ: u8 = 0x4; } +static mut IO_APIC: Option = None; +const IO_APIC_BASE: u64 = 0xfec00000; -pub mod vectors { +/*pub mod vectors { pub const APIC_TIMER_VECTOR: u8 = 0xf0; pub const APIC_SPURIOUS_VECTOR: u8 = 0xf1; pub const APIC_ERROR_VECTOR: u8 = 0xf2; @@ -28,8 +31,6 @@ static mut CPU_FREQ_MHZ: u64 = 4_000; const LAPIC_TICKS_PER_SEC: u64 = 1_000_000_000; // TODO: need to calibrate const TICKS_PER_SEC: u64 = 100; -const IO_APIC_BASE: u64 = 0xfec00000; -static mut IO_APIC: Option = None; pub fn local_apic<'a>() -> &'a mut LocalApic { // It's safe as LAPIC is per-cpu. @@ -60,7 +61,7 @@ fn busy_wait_until(deadline: TimeValue) { while current_time() < deadline { core::hint::spin_loop(); } -} +}*/ // FIXME: temporary unsafe fn configure_gsi(io_apic: &mut IoApic, gsi: u8, vector: u8) { @@ -74,14 +75,18 @@ unsafe fn configure_gsi(io_apic: &mut IoApic, gsi: u8, vector: u8) { } pub fn init_ioapic() { + println!("Initializing I/O APIC..."); unsafe { + Port::::new(0x20).write(0xff); + Port::::new(0xA0).write(0xff); + let mut io_apic = IoApic::new(IO_APIC_BASE); - configure_gsi(&mut io_apic, UART_COM1_IRQ, UART_COM1_VECTOR); + configure_gsi(&mut io_apic, UART_COM1_IRQ, 0xf3); IO_APIC = Some(io_apic); } } -pub fn init_lapic() { +/*pub fn init_lapic() { println!("Initializing Local APIC..."); unsafe { @@ -108,18 +113,42 @@ pub fn init_lapic() { } } + unsafe { + lapic.enable(); + } + + let mut best_freq_hz = 0; + for _ in 0..5 { + unsafe { lapic.set_timer_initial(u32::MAX) }; + let hpet_start = hpet::current_ticks(); + hpet::wait_millis(10); + let ticks = u32::MAX - unsafe { lapic.timer_current() }; + let hpet_end = hpet::current_ticks(); + + let nanos = hpet::ticks_to_nanos(hpet_end.wrapping_sub(hpet_start)); + let ticks_per_sec = (ticks as u64 * 1_000_000_000 / nanos) as u32; + + if ticks_per_sec > best_freq_hz { + best_freq_hz = ticks_per_sec; + } + } + println!( + "Calibrated LAPIC frequency: {}.{:03} MHz", + best_freq_hz / 1_000_000, + best_freq_hz % 1_000_000 / 1_000, + ); + /*if let Some(sth) = CpuId::new().get_processor_brand_string() { println!("{:?}", sth); }*/ unsafe { - lapic.enable(); lapic.set_timer_mode(TimerMode::Periodic); lapic.set_timer_divide(TimerDivide::Div256); - lapic.set_timer_initial((LAPIC_TICKS_PER_SEC / TICKS_PER_SEC) as u32); + lapic.set_timer_initial((best_freq_hz as u64 / TICKS_PER_SEC) as u32); } unsafe { LOCAL_APIC = Some(lapic) }; enable_irq(); -} +}*/ diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index 98f7888d..92899480 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -1,25 +1,32 @@ use crate::{ arch::{ - apic::{busy_wait, local_apic}, boot::BootParams, gdt::{get_tr_base, GdtStruct}, - msr::{Msr, Msr::*, MsrBitmap}, + ipi, + msr::{ + Msr::{self, *}, + MsrBitmap, + }, vmcs::*, vmx::*, }, - consts::{core_end, PER_CPU_SIZE}, - device::irqchip::pic::lapic::VirtApicTimer, + consts::{core_end, MAX_CPU_NUM, PER_CPU_SIZE}, + device::irqchip::pic::{ + check_pending_vectors, hpet, + lapic::{VirtLocalApic, VirtLocalApicTimer}, + }, error::{HvError, HvResult}, memory::{addr::phys_to_virt, GuestPhysAddr, HostPhysAddr, PhysAddr, PAGE_SIZE}, percpu::this_cpu_data, platform::qemu_x86_64::*, }; -use alloc::{boxed::Box, collections::vec_deque::VecDeque}; +use alloc::boxed::Box; use core::{ arch::{asm, global_asm}, fmt::{Debug, Formatter, Result}, mem::size_of, ptr::copy_nonoverlapping, + sync::atomic::{AtomicU32, Ordering}, time::Duration, }; use raw_cpuid::CpuId; @@ -37,6 +44,8 @@ use x86_64::{ const AP_START_PAGE_IDX: u8 = 6; const AP_START_PAGE_PADDR: PhysAddr = AP_START_PAGE_IDX as usize * PAGE_SIZE; +static VM_LAUNCH_READY: AtomicU32 = AtomicU32::new(0); + global_asm!( include_str!("ap_start.S"), ap_start_page_paddr = const AP_START_PAGE_PADDR, @@ -108,13 +117,13 @@ unsafe fn setup_ap_start_page(cpuid: usize) { pub fn cpu_start(cpuid: usize, start_addr: usize, opaque: usize) { unsafe { setup_ap_start_page(cpuid) }; - let lapic = local_apic(); + let lapic = VirtLocalApic::phys_local_apic(); // Intel SDM Vol 3C, Section 8.4.4, MP Initialization Example unsafe { lapic.send_init_ipi(cpuid as u32) }; - busy_wait(Duration::from_millis(10)); // 10ms + hpet::busy_wait(Duration::from_millis(10)); // 10ms unsafe { lapic.send_sipi(AP_START_PAGE_IDX, cpuid as u32) }; - busy_wait(Duration::from_micros(200)); // 200us + hpet::busy_wait(Duration::from_micros(200)); // 200us unsafe { lapic.send_sipi(AP_START_PAGE_IDX, cpuid as u32) }; } @@ -148,12 +157,11 @@ pub struct ArchCpu { pub cpuid: usize, pub power_on: bool, pub gdt: GdtStruct, + pub virt_lapic: VirtLocalApic, vmcs_revision_id: u32, vmxon_region: VmxRegion, vmcs_region: VmxRegion, msr_bitmap: MsrBitmap, - apic_timer: VirtApicTimer, - pending_events: VecDeque<(u8, Option)>, } impl ArchCpu { @@ -161,17 +169,16 @@ impl ArchCpu { let boxed = Box::new(TaskStateSegment::new()); let tss = Box::leak(boxed); Self { + guest_regs: GeneralRegisters::default(), + host_stack_top: 0, cpuid, power_on: false, gdt: GdtStruct::new(tss), + virt_lapic: VirtLocalApic::new(), vmcs_revision_id: 0, vmxon_region: VmxRegion::uninit(), vmcs_region: VmxRegion::uninit(), - guest_regs: GeneralRegisters::default(), - host_stack_top: 0, msr_bitmap: MsrBitmap::uninit(), - apic_timer: VirtApicTimer::new(), - pending_events: VecDeque::with_capacity(8), } } @@ -180,11 +187,6 @@ impl ArchCpu { Ok(VmcsGuestNW::RIP.write(VmcsGuestNW::RIP.read()? + instr_len as usize)?) } - /// Returns the mutable reference of [`VirtApicTimer`]. - pub fn apic_timer_mut(&mut self) -> &mut VirtApicTimer { - &mut self.apic_timer - } - pub fn cr(&self, cr_idx: usize) -> usize { (|| -> HvResult { Ok(match cr_idx { @@ -201,14 +203,11 @@ impl ArchCpu { pub fn idle(&mut self) -> ! { assert!(this_cpu_id() == self.cpuid); - // unsafe { self.reset(0, this_cpu_data().dtb_ipa) }; - loop {} - } - /// Add a virtual interrupt or exception to the pending events list, - /// and try to inject it before later VM entries. - pub fn inject_interrupt(&mut self, vector: u8, err_code: Option) { - self.pending_events.push_back((vector, err_code)); + self.activate_vmx().unwrap(); + VM_LAUNCH_READY.fetch_add(1, Ordering::SeqCst); + + loop {} } /// Guest general-purpose registers. @@ -221,79 +220,52 @@ impl ArchCpu { &mut self.guest_regs } - pub fn reset(&mut self, entry: GuestPhysAddr) -> HvResult { - self.activate_vmx()?; - self.setup_vmcs(entry)?; - Ok(()) - } - pub fn run(&mut self) -> ! { assert!(this_cpu_id() == self.cpuid); - // this_cpu_data().cpu_on_entry - self.reset(this_cpu_data().cpu_on_entry).unwrap(); + let mut per_cpu = this_cpu_data(); - self.setup_boot_params().unwrap(); - this_cpu_data().activate_gpm(); + if per_cpu.boot_cpu { + // only bsp does this + self.activate_vmx().unwrap(); + self.setup_boot_params().unwrap(); + } else { + // ap start up never returns to irq handler + unsafe { self.virt_lapic.phys_lapic.end_of_interrupt() }; + if let Some(ipi_info) = ipi::get_ipi_info(self.cpuid) { + per_cpu.cpu_on_entry = ipi_info.lock().start_up_addr; + } + // VmcsGuestNW::RIP.write(per_cpu.cpu_on_entry).unwrap(); + // info!("AP start up! addr: {:x}", per_cpu.cpu_on_entry); + } - unsafe { self.vmx_launch() }; - loop {} - } + self.setup_vmcs(per_cpu.cpu_on_entry, per_cpu.boot_cpu) + .unwrap(); + per_cpu.activate_gpm(); - /// If enable, a VM exit occurs at the beginning of any instruction if - /// `RFLAGS.IF` = 1 and there are no other blocking of interrupts. - /// (see SDM, Vol. 3C, Section 24.4.2) - pub fn set_interrupt_window(&mut self, enable: bool) -> HvResult { - let mut ctrl: u32 = VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS.read()?; - let bits = PrimaryControls::INTERRUPT_WINDOW_EXITING.bits(); - if enable { - ctrl |= bits - } else { - ctrl &= !bits + while VM_LAUNCH_READY.load(Ordering::Acquire) < MAX_CPU_NUM as u32 - 1 { + core::hint::spin_loop(); } - VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS.write(ctrl)?; - Ok(()) + + unsafe { self.vmx_launch() }; + loop {} } fn activate_vmx(&mut self) -> HvResult { assert!(check_vmx_support()); - assert!(!is_vmx_enabled()); + // assert!(!is_vmx_enabled()); // enable VMXON - unsafe { enable_vmxon()? }; + unsafe { enable_vmxon().unwrap() }; // TODO: check related registers // get VMCS revision identifier in IA32_VMX_BASIC MSR self.vmcs_revision_id = get_vmcs_revision_id(); - self.vmxon_region = VmxRegion::new(self.vmcs_revision_id, false)?; - - unsafe { execute_vmxon(self.vmxon_region.start_paddr() as u64)? }; - - Ok(()) - } + self.vmxon_region = VmxRegion::new(self.vmcs_revision_id, false).unwrap(); - /// Whether the guest interrupts are blocked. (SDM Vol. 3C, Section 24.4.2, Table 24-3) - fn allow_interrupt(&self) -> bool { - let rflags = VmcsGuestNW::RFLAGS.read().unwrap(); - let block_state = VmcsGuest32::INTERRUPTIBILITY_STATE.read().unwrap(); - rflags as u64 & x86_64::registers::rflags::RFlags::INTERRUPT_FLAG.bits() != 0 - && block_state == 0 - } + unsafe { execute_vmxon(self.vmxon_region.start_paddr() as u64).unwrap() }; - /// Try to inject a pending event before next VM entry. - fn check_pending_events(&mut self) -> HvResult { - if let Some(event) = self.pending_events.front() { - let allow_interrupt = self.allow_interrupt(); - if event.0 < 32 || allow_interrupt { - // if it's an exception, or an interrupt that is not blocked, inject it directly. - Vmcs::inject_interrupt(event.0, event.1)?; - self.pending_events.pop_front(); - } else { - // interrupts are blocked, enable interrupt-window exiting. - self.set_interrupt_window(true)?; - } - } Ok(()) } @@ -343,7 +315,7 @@ impl ArchCpu { Ok(()) } - fn setup_vmcs(&mut self, entry: GuestPhysAddr) -> HvResult { + fn setup_vmcs(&mut self, entry: GuestPhysAddr, set_rip: bool) -> HvResult { self.vmcs_region = VmxRegion::new(self.vmcs_revision_id, false)?; self.msr_bitmap = MsrBitmap::intercept_def()?; @@ -352,7 +324,7 @@ impl ArchCpu { Vmcs::load(start_paddr)?; self.setup_vmcs_host(&self.host_stack_top as *const _ as usize)?; - self.setup_vmcs_guest(entry, ROOT_ZONE_BOOT_STACK)?; + self.setup_vmcs_guest(entry, set_rip, ROOT_ZONE_BOOT_STACK)?; self.setup_vmcs_control()?; Ok(()) @@ -378,7 +350,10 @@ impl ArchCpu { Msr::IA32_VMX_PROCBASED_CTLS.read() as u32, ( // CpuCtrl::RDTSC_EXITING | - CpuCtrl::UNCOND_IO_EXITING | CpuCtrl::USE_MSR_BITMAPS | CpuCtrl::SECONDARY_CONTROLS + CpuCtrl::HLT_EXITING + | CpuCtrl::UNCOND_IO_EXITING + | CpuCtrl::USE_MSR_BITMAPS + | CpuCtrl::SECONDARY_CONTROLS ) .bits(), (CpuCtrl::CR3_LOAD_EXITING | CpuCtrl::CR3_STORE_EXITING).bits(), @@ -437,7 +412,12 @@ impl ArchCpu { Ok(()) } - fn setup_vmcs_guest(&mut self, entry: GuestPhysAddr, rsp: GuestPhysAddr) -> HvResult { + fn setup_vmcs_guest( + &mut self, + entry: GuestPhysAddr, + set_rip: bool, + rsp: GuestPhysAddr, + ) -> HvResult { let cr0_guest = Cr0Flags::EXTENSION_TYPE | Cr0Flags::NUMERIC_ERROR; let cr4_guest = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS; @@ -488,6 +468,13 @@ impl ArchCpu { VmcsGuest64::IA32_DEBUGCTL.write(0)?; VmcsGuest64::IA32_PAT.write(Msr::IA32_PAT.read())?; VmcsGuest64::IA32_EFER.write(0)?; + + // for AP start up, set CS_BASE to entry address, and RIP to 0. + if !set_rip { + VmcsGuestNW::RIP.write(0)?; + VmcsGuestNW::CS_BASE.write(entry)?; + } + Ok(()) } @@ -530,11 +517,8 @@ impl ArchCpu { fn vmexit_handler(&mut self) { crate::arch::trap::handle_vmexit(self).unwrap(); - // Check if there is an APIC timer interrupt - if self.apic_timer.check_interrupt() { - self.inject_interrupt(self.apic_timer.vector(), None); - } - self.check_pending_events().unwrap(); + self.virt_lapic.check_timer_interrupt(); + check_pending_vectors(this_cpu_id()); } unsafe fn vmx_entry_failed() -> ! { diff --git a/src/arch/x86_64/cpuid.rs b/src/arch/x86_64/cpuid.rs index 7eeae7a1..71783033 100644 --- a/src/arch/x86_64/cpuid.rs +++ b/src/arch/x86_64/cpuid.rs @@ -5,6 +5,7 @@ pub enum CpuIdEax { VendorInfo = 0x0, FeatureInfo = 0x1, StructuredExtendedFeatureInfo = 0x7, + ProcessorFrequencyInfo = 0x16, HypervisorInfo = 0x4000_0000, HypervisorFeatures = 0x4000_0001, } diff --git a/src/arch/x86_64/device.rs b/src/arch/x86_64/device.rs index 5d1e6a23..703885e2 100644 --- a/src/arch/x86_64/device.rs +++ b/src/arch/x86_64/device.rs @@ -1,11 +1,16 @@ use crate::{ - device::{irqchip::pic::i8259a::VirtDualI8259a, uart::VirtUart16550a}, + device::{ + irqchip::pic::{i8254::VirtI8254, i8259a::VirtDualI8259a}, + uart::VirtUart16550a, + }, error::HvResult, }; use alloc::{sync::Arc, vec, vec::Vec}; pub const PIC_MASTER_BASE_PORT: u16 = 0x20; pub const PIC_SLAVE_BASE_PORT: u16 = 0xa0; +pub const PIT_BASE_PORT: u16 = 0x40; +pub const PIT_SPEAKER_PORT: u16 = 0x61; pub const UART_COM1_BASE_PORT: u16 = 0x3f8; #[allow(non_snake_case)] @@ -49,6 +54,7 @@ lazy_static::lazy_static! { static ref VIRT_DEVICES : VirtDeviceList = VirtDeviceList { port_io_devices: vec![ Arc::new(VirtDualI8259a::new(PIC_MASTER_BASE_PORT, PIC_SLAVE_BASE_PORT)), // Dual PIC + Arc::new(VirtI8254::new(PIT_BASE_PORT, PIT_SPEAKER_PORT)), Arc::new(VirtUart16550a::new(UART_COM1_BASE_PORT)), // COM1 ], }; diff --git a/src/arch/x86_64/idt.rs b/src/arch/x86_64/idt.rs index d67f937b..964b7c6c 100644 --- a/src/arch/x86_64/idt.rs +++ b/src/arch/x86_64/idt.rs @@ -2,6 +2,15 @@ use x86_64::structures::idt::{Entry, HandlerFunc, InterruptDescriptorTable}; const NUM_INT: usize = 256; +#[allow(non_snake_case)] +pub mod IdtVector { + pub const VIRT_IPI_VECTOR: u8 = 0xe0; + pub const APIC_TIMER_VECTOR: u8 = 0xf0; + pub const APIC_SPURIOUS_VECTOR: u8 = 0xf1; + pub const APIC_ERROR_VECTOR: u8 = 0xf2; + pub const UART_COM1_VECTOR: u8 = 0xf3; +} + pub struct IdtStruct { table: InterruptDescriptorTable, } diff --git a/src/arch/x86_64/ipi.rs b/src/arch/x86_64/ipi.rs index 0bb0769d..8afec9e4 100644 --- a/src/arch/x86_64/ipi.rs +++ b/src/arch/x86_64/ipi.rs @@ -1 +1,162 @@ -pub fn arch_send_event(cpu_id: u64, sgi_num: u64) {} +use crate::{ + device::irqchip::inject_vector, + error::HvResult, + event, + percpu::{this_cpu_data, this_zone, CpuSet}, +}; +use alloc::{collections::vec_deque::VecDeque, vec::Vec}; +use bit_field::BitField; +use spin::{Mutex, Once}; + +use super::{cpu::this_cpu_id, idt::IdtVector}; + +#[allow(non_snake_case)] +pub mod IpiDeliveryMode { + pub const FIXED: u8 = 0; + pub const NMI: u8 = 4; + pub const INIT: u8 = 5; + pub const START_UP: u8 = 6; +} + +#[allow(non_snake_case)] +pub mod IpiDestShorthand { + pub const NO_SHORTHAND: u8 = 0; + pub const SELF: u8 = 1; + pub const ALL_INCLUDING_SELF: u8 = 2; + pub const ALL_EXCLUDING_SELF: u8 = 3; +} + +pub struct IpiInfo { + pub start_up_addr: usize, + pub has_start_up: bool, +} + +impl IpiInfo { + fn new() -> Self { + Self { + start_up_addr: 0, + has_start_up: false, + } + } +} + +static IPI_MANAGER: Once = Once::new(); +struct IpiManager { + pub inner: Vec>, +} + +impl IpiManager { + fn new(max_cpus: usize) -> Self { + let mut vs = vec![]; + for _ in 0..max_cpus { + let v = Mutex::new(IpiInfo::new()); + vs.push(v) + } + Self { inner: vs } + } + + fn get_ipi_info<'a>(&'a self, cpu: usize) -> Option<&'a Mutex> { + self.inner.get(cpu) + } +} + +pub fn init(max_cpus: usize) { + IPI_MANAGER.call_once(|| IpiManager::new(max_cpus)); +} + +pub fn get_ipi_info<'a>(cpu: usize) -> Option<&'a Mutex> { + IPI_MANAGER.get().unwrap().get_ipi_info(cpu) +} + +pub fn send_ipi(value: u64) -> HvResult { + let vector = value.get_bits(0..=7) as u8; + let delivery_mode: u8 = value.get_bits(8..=10) as u8; + let dest_shorthand = value.get_bits(18..=19) as u8; + let dest = value.get_bits(32..=39) as usize; + let cnt = value.get_bits(40..=63) as u32; + + let mut cpu_set = this_zone().read().cpu_set; + let cpu_id = this_cpu_id(); + let mut dest_set = CpuSet::new(cpu_set.max_cpu_id, 0); + + match dest_shorthand { + IpiDestShorthand::NO_SHORTHAND => { + dest_set.set_bit(dest); + } + IpiDestShorthand::SELF => { + dest_set.set_bit(cpu_id); + } + IpiDestShorthand::ALL_INCLUDING_SELF => { + dest_set = cpu_set; + } + IpiDestShorthand::ALL_EXCLUDING_SELF => { + dest_set = cpu_set; + dest_set.clear_bit(cpu_id); + } + _ => {} + } + + dest_set.iter().for_each(|dest| { + match delivery_mode { + IpiDeliveryMode::FIXED => { + inject_vector(dest, vector, None, true); + arch_send_event(dest as _, IdtVector::VIRT_IPI_VECTOR as _); + } + IpiDeliveryMode::NMI => { + inject_vector(dest, 2, None, true); + arch_send_event(dest as _, IdtVector::VIRT_IPI_VECTOR as _); + } + IpiDeliveryMode::INIT => {} + IpiDeliveryMode::START_UP => { + // TODO: target + let mut ipi_info = get_ipi_info(dest).unwrap().lock(); + if !ipi_info.has_start_up { + // we only start up once + ipi_info.has_start_up = true; + ipi_info.start_up_addr = (vector as usize) << 12; + event::send_event( + dest, + IdtVector::VIRT_IPI_VECTOR as _, + event::IPI_EVENT_WAKEUP, + ); + } + } + _ => {} + } + }); + + Ok(()) +} + +pub fn arch_send_event(dest: u64, vector: u64) { + unsafe { + this_cpu_data() + .arch_cpu + .virt_lapic + .phys_lapic + .send_ipi(vector as _, dest as _) + }; +} + +pub fn handle_virt_ipi() { + // this may never return! + if event::check_events() { + return; + } + + // inject ipi + /*let mut vectors = &mut get_ipi_info(this_cpu_id()).unwrap().lock().fixed_vectors; + if vectors.len() > 1 { + // info!("handle_virt_ipi vectors len: {:x}", vectors.len()); + } + + while vectors.len() != 0 { + if let Some(vector) = vectors.pop_front() { + // info!("handle_virt_ipi vector: {:x}", vector); + this_cpu_data() + .arch_cpu + .virt_lapic + .inject_event((vector & 0xff) as u8, None); + } + }*/ +} diff --git a/src/arch/x86_64/mmio.rs b/src/arch/x86_64/mmio.rs new file mode 100644 index 00000000..e69de29b diff --git a/src/arch/x86_64/msr.rs b/src/arch/x86_64/msr.rs index 3319685c..220dbaeb 100644 --- a/src/arch/x86_64/msr.rs +++ b/src/arch/x86_64/msr.rs @@ -1,5 +1,6 @@ use crate::{ arch::msr::Msr::*, + device::irqchip::pic::lapic::VirtLocalApic, error::HvResult, memory::{Frame, HostPhysAddr}, }; @@ -35,6 +36,9 @@ pub enum Msr { /// X2APIC Msr + /// TSC Target of Local APIC s TSC Deadline Mode (R/W) See Table 35-2 + IA32_TSC_DEADLINE = 0x6e0, + /// ID register. IA32_X2APIC_APICID = 0x802, /// Version register. @@ -45,6 +49,43 @@ pub enum Msr { IA32_X2APIC_LDR = 0x80D, /// Spurious Interrupt Vector register. IA32_X2APIC_SIVR = 0x80F, + + /// In-Service register bits [31:0]. + IA32_X2APIC_ISR0 = 0x810, + /// In-Service register bits [63:32]. + IA32_X2APIC_ISR1 = 0x811, + /// In-Service register bits [95:64]. + IA32_X2APIC_ISR2 = 0x812, + /// In-Service register bits [127:96]. + IA32_X2APIC_ISR3 = 0x813, + /// In-Service register bits [159:128]. + IA32_X2APIC_ISR4 = 0x814, + /// In-Service register bits [159:128]. + IA32_X2APIC_ISR5 = 0x815, + /// In-Service register bits [191:160]. + IA32_X2APIC_ISR6 = 0x816, + /// In-Service register bits [223:192]. + IA32_X2APIC_ISR7 = 0x817, + + /// Interrupt Request register bits [31:0]. + IA32_X2APIC_IRR0 = 0x820, + /// Interrupt Request register bits [63:32]. + IA32_X2APIC_IRR1 = 0x821, + /// Interrupt Request register bits [95:64]. + IA32_X2APIC_IRR2 = 0x822, + /// Interrupt Request register bits [127:96]. + IA32_X2APIC_IRR3 = 0x823, + /// Interrupt Request register bits [159:128]. + IA32_X2APIC_IRR4 = 0x824, + /// Interrupt Request register bits [159:128]. + IA32_X2APIC_IRR5 = 0x825, + /// Interrupt Request register bits [191:160]. + IA32_X2APIC_IRR6 = 0x826, + /// Interrupt Request register bits [223:192]. + IA32_X2APIC_IRR7 = 0x827, + + /// Error Status register. + IA32_X2APIC_ESR = 0x828, /// Interrupt Command register. IA32_X2APIC_ICR = 0x830, /// LVT Timer Interrupt register. @@ -126,11 +167,15 @@ impl MsrBitmap { let mut bitmap = Self { frame: Frame::new_zero()?, }; - let msr = IA32_APIC_BASE; - bitmap.set_read_intercept(msr, true); - bitmap.set_write_intercept(msr, true); + + bitmap.set_read_intercept(IA32_APIC_BASE, true); + bitmap.set_write_intercept(IA32_APIC_BASE, true); + + bitmap.set_read_intercept(IA32_TSC_DEADLINE, true); + bitmap.set_write_intercept(IA32_TSC_DEADLINE, true); + // Intercept all x2APIC MSR accesses - for addr in 0x800_u32..=0x83f_u32 { + for addr in VirtLocalApic::msr_range() { if let Ok(msr) = Msr::try_from(addr) { bitmap.set_read_intercept(msr, true); bitmap.set_write_intercept(msr, true); diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index c74c9613..42257907 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -1,21 +1,24 @@ use crate::{ arch::{ - apic::{local_apic, vectors::*}, cpu::ArchCpu, cpuid::{CpuIdEax, ExtendedFeaturesEcx, FeatureInfoFlags}, device::all_virt_devices, idt::IdtStruct, + ipi, msr::Msr::{self, *}, s2pt::Stage2PageFaultInfo, vmcs::*, vmx::{VmxCrAccessInfo, VmxExitInfo, VmxExitReason, VmxInterruptInfo, VmxIoExitInfo}, }, - device::{irqchip::pic::lapic::VirtLocalApic, uart::UartReg}, + device::{ + irqchip::pic::{hpet, lapic::VirtLocalApic}, + uart::UartReg, + }, error::HvResult, }; use x86_64::registers::control::Cr4Flags; -use super::device::UART_COM1_BASE_PORT; +use super::{device::UART_COM1_BASE_PORT, idt::IdtVector}; core::arch::global_asm!( include_str!("trap.S"), @@ -26,6 +29,7 @@ const IRQ_VECTOR_START: u8 = 0x20; const IRQ_VECTOR_END: u8 = 0xff; const VM_EXIT_INSTR_LEN_CPUID: u8 = 2; +const VM_EXIT_INSTR_LEN_HLT: u8 = 1; const VM_EXIT_INSTR_LEN_RDMSR: u8 = 2; const VM_EXIT_INSTR_LEN_WRMSR: u8 = 2; const VM_EXIT_INSTR_LEN_VMCALL: u8 = 3; @@ -71,8 +75,9 @@ pub fn arch_handle_trap(tf: &mut TrapFrame) { fn handle_irq(vector: u8) { match vector { - APIC_TIMER_VECTOR => {} - UART_COM1_VECTOR => { + IdtVector::VIRT_IPI_VECTOR => ipi::handle_virt_ipi(), + IdtVector::APIC_TIMER_VECTOR => {} + IdtVector::UART_COM1_VECTOR => { if let Some(device) = all_virt_devices().find_port_io_device(UART_COM1_BASE_PORT) { device.read(UART_COM1_BASE_PORT + UartReg::LINE_STATUS, 0); } @@ -81,7 +86,7 @@ fn handle_irq(vector: u8) { println!("Unhandled irq {}", vector); } } - unsafe { local_apic().end_of_interrupt() }; + unsafe { VirtLocalApic::phys_local_apic().end_of_interrupt() }; } fn handle_cpuid(arch_cpu: &mut ArchCpu) -> HvResult { @@ -100,7 +105,7 @@ fn handle_cpuid(arch_cpu: &mut ArchCpu) -> HvResult { let mut ecx = FeatureInfoFlags::from_bits_truncate(res.ecx as _); ecx.remove(FeatureInfoFlags::VMX); - ecx.remove(FeatureInfoFlags::TSC_DEADLINE); + // ecx.remove(FeatureInfoFlags::TSC_DEADLINE); ecx.remove(FeatureInfoFlags::XSAVE); ecx.insert(FeatureInfoFlags::X2APIC); @@ -121,6 +126,18 @@ fn handle_cpuid(arch_cpu: &mut ArchCpu) -> HvResult { res } + CpuIdEax::ProcessorFrequencyInfo => { + if let Some(freq_mhz) = hpet::get_tsc_freq_mhz() { + CpuIdResult { + eax: freq_mhz, + ebx: freq_mhz, + ecx: freq_mhz, + edx: 0, + } + } else { + cpuid!(regs.rax, regs.rcx) + } + } CpuIdEax::HypervisorInfo => CpuIdResult { eax: CpuIdEax::HypervisorFeatures as u32, ebx: signature[0], @@ -248,7 +265,7 @@ fn handle_msr_read(arch_cpu: &mut ArchCpu) -> HvResult { apic_base |= 1 << 11 | 1 << 10; // enable xAPIC and x2APIC Ok(apic_base) } else if VirtLocalApic::msr_range().contains(&rcx) { - VirtLocalApic::rdmsr(arch_cpu, msr) + arch_cpu.virt_lapic.rdmsr(msr) } else { hv_result_err!(ENOSYS) }; @@ -276,8 +293,8 @@ fn handle_msr_write(arch_cpu: &mut ArchCpu) -> HvResult { let res = if msr == IA32_APIC_BASE { Ok(()) // ignore - } else if VirtLocalApic::msr_range().contains(&rcx) { - VirtLocalApic::wrmsr(arch_cpu, msr, value) + } else if VirtLocalApic::msr_range().contains(&rcx) || msr == IA32_TSC_DEADLINE { + arch_cpu.virt_lapic.wrmsr(msr, value) } else { hv_result_err!(ENOSYS) }; @@ -300,8 +317,12 @@ fn handle_s2pt_violation(arch_cpu: &mut ArchCpu, guest_rip: usize) -> HvResult { ); } -fn handle_triple_fault(arch_cpu: &mut ArchCpu, guest_rip: usize) -> HvResult { - panic!("VM exit: Triple fault @ {:#x}", guest_rip); +fn handle_triple_fault(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvResult { + panic!( + "VM exit: Triple fault @ {:#x}, instr length: {:x}", + exit_info.guest_rip, exit_info.exit_instruction_length + ); + // arch_cpu.advance_guest_rip(exit_info.exit_instruction_length as _)?; Ok(()) } @@ -315,23 +336,11 @@ pub fn handle_vmexit(arch_cpu: &mut ArchCpu) -> HvResult { let res = match exit_info.exit_reason { VmxExitReason::EXTERNAL_INTERRUPT => handle_external_interrupt(), - VmxExitReason::TRIPLE_FAULT => handle_triple_fault(arch_cpu, exit_info.guest_rip), - VmxExitReason::INTERRUPT_WINDOW => arch_cpu.set_interrupt_window(false), + VmxExitReason::TRIPLE_FAULT => handle_triple_fault(arch_cpu, &exit_info), + VmxExitReason::INTERRUPT_WINDOW => Vmcs::set_interrupt_window(false), VmxExitReason::CPUID => handle_cpuid(arch_cpu), - VmxExitReason::RDTSC => { - // FIXME: temp! - let current_ticks = crate::arch::apic::current_ticks(); - let regs = arch_cpu.regs_mut(); - regs.rdx = (current_ticks >> 32) & (u32::MAX as u64); - regs.rax = current_ticks & (u32::MAX as u64); - /*info!( - "RDTSC: {:x} rdx: {:x}, rax: {:x}, rip: {:x}", - crate::arch::lapic::current_ticks(), - regs.rdx, - regs.rax, - VmcsGuestNW::RIP.read()?, - );*/ - arch_cpu.advance_guest_rip(2)?; + VmxExitReason::HLT => { + arch_cpu.advance_guest_rip(VM_EXIT_INSTR_LEN_HLT)?; Ok(()) } VmxExitReason::VMCALL => handle_hypercall(arch_cpu), diff --git a/src/arch/x86_64/vmcs.rs b/src/arch/x86_64/vmcs.rs index 54d04c6b..047dec7b 100644 --- a/src/arch/x86_64/vmcs.rs +++ b/src/arch/x86_64/vmcs.rs @@ -11,7 +11,10 @@ use crate::{ memory::MemFlags, }; use bit_field::BitField; -use x86::{bits64::vmx, vmx::Result as VmResult, vmx::VmFail}; +use x86::{ + bits64::vmx, + vmx::{vmcs::control::PrimaryControls, Result as VmResult, VmFail}, +}; macro_rules! vmcs_read { ($field_enum: ident, u64) => { @@ -491,6 +494,16 @@ impl Vmcs { unsafe { vmx::vmclear(paddr as _) } } + /// Whether the guest interrupts are blocked. (SDM Vol. 3C, Section 24.4.2, Table 24-3) + pub fn allow_interrupt() -> HvResult { + let rflags = VmcsGuestNW::RFLAGS.read().unwrap(); + let block_state = VmcsGuest32::INTERRUPTIBILITY_STATE.read().unwrap(); + Ok( + rflags as u64 & x86_64::registers::rflags::RFlags::INTERRUPT_FLAG.bits() != 0 + && block_state == 0, + ) + } + pub fn inject_interrupt(vector: u8, err_code: Option) -> HvResult { // SDM Vol. 3C, Section 24.8.3 let err_code = if VmxInterruptionType::vector_has_error_code(vector) { @@ -514,6 +527,21 @@ impl Vmcs { Ok(VmcsReadOnly32::VM_INSTRUCTION_ERROR.read()?.into()) } + /// If enable, a VM exit occurs at the beginning of any instruction if + /// `RFLAGS.IF` = 1 and there are no other blocking of interrupts. + /// (see SDM, Vol. 3C, Section 24.4.2) + pub fn set_interrupt_window(enable: bool) -> HvResult { + let mut ctrl: u32 = VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS.read()?; + let bits = PrimaryControls::INTERRUPT_WINDOW_EXITING.bits(); + if enable { + ctrl |= bits + } else { + ctrl &= !bits + } + VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS.write(ctrl)?; + Ok(()) + } + pub fn set_control( control: VmcsControl32, capability_msr: Msr, diff --git a/src/device/irqchip/mod.rs b/src/device/irqchip/mod.rs index e79376bb..d3b0f7ce 100644 --- a/src/device/irqchip/mod.rs +++ b/src/device/irqchip/mod.rs @@ -20,4 +20,4 @@ pub use plic::{inject_irq, percpu_init, primary_init_early, primary_init_late}; pub use ls7a2000::{inject_irq, percpu_init, primary_init_early, primary_init_late}; #[cfg(target_arch = "x86_64")] -pub use pic::{inject_irq, percpu_init, primary_init_early, primary_init_late}; +pub use pic::{inject_irq, inject_vector, percpu_init, primary_init_early, primary_init_late}; diff --git a/src/device/irqchip/pic/hpet.rs b/src/device/irqchip/pic/hpet.rs new file mode 100644 index 00000000..15cdb5f7 --- /dev/null +++ b/src/device/irqchip/pic/hpet.rs @@ -0,0 +1,211 @@ +use crate::memory::VirtAddr; +use bit_field::BitField; +use core::{arch::x86_64::_rdtsc, time::Duration, u32}; +use spin::Mutex; +use tock_registers::{ + interfaces::{Readable, Writeable}, + register_structs, + registers::{ReadOnly, ReadWrite}, +}; + +type TimeValue = Duration; + +lazy_static::lazy_static! { + static ref HPET: Hpet = { + let mut hpet = Hpet::new(0xfed0_0000); + hpet.init(); + hpet + }; +} + +bitflags::bitflags! { + struct TimerConfigCaps: u64 { + /// 0 - this timer generates edge-triggered interrupts. 1 - this timer + /// generates level-triggered interrupts. + const TN_INT_TYPE_CNF = 1 << 1; + /// Setting this bit to 1 enables triggering of interrupts. + const TN_INT_ENB_CNF = 1 << 2; + /// If Tn_PER_INT_CAP is 1, then writing 1 to this field enables periodic + /// timer. + const TN_TYPE_CNF = 1 << 3; + /// If this read-only bit is set to 1, this timer supports periodic mode. + const TN_PER_INT_CAP = 1 << 4; + /// If this read-only bit is set to 1, the size of the timer is 64-bit. + const TN_SIZE_CAP = 1 << 5; + /// This field is used to allow software to directly set periodic timer's + /// accumulator. + const TN_VAL_SET_CNF = 1 << 6; + /// For 64-bit timer, if this field is set, the timer will be forced to + /// work in 32-bit mode. + const TN_32MODE_CNF = 1 << 8; + } +} + +register_structs! { + HpetRegs { + /// General Capabilities and ID Register. + (0x000 => general_caps: ReadOnly), + (0x008 => _reserved_0), + /// General Configuration Register. + (0x010 => general_config: ReadWrite), + (0x018 => _reserved_1), + /// General Interrupt Status Register. + (0x020 => general_intr_status: ReadWrite), + (0x028 => _reserved_2), + /// Main Counter Value Register. + (0x0f0 => main_counter_value: ReadWrite), + (0x0f8 => _reserved_3), + (0x100 => @END), + } +} + +register_structs! { + HpetTimerRegs { + /// Timer N Configuration and Capability Register. + (0x0 => config_caps: ReadWrite), + /// Timer N Comparator Value Register. + (0x8 => comparator_value: ReadWrite), + /// Timer N FSB Interrupt Route Register. + (0x10 => fsb_int_route: ReadWrite), + (0x18 => _reserved_0), + (0x20 => @END), + } +} + +struct Hpet { + base_vaddr: VirtAddr, + num_timers: u8, + period_fs: u64, + freq_hz: u64, + freq_mhz: u64, + ticks_per_ms: u64, + is_64_bit: bool, +} + +impl Hpet { + const fn new(base_vaddr: VirtAddr) -> Self { + Self { + base_vaddr, + num_timers: 0, + period_fs: 0, + freq_hz: 0, + freq_mhz: 0, + ticks_per_ms: 0, + is_64_bit: false, + } + } + + const fn regs(&self) -> &HpetRegs { + unsafe { &*(self.base_vaddr as *const HpetRegs) } + } + + const fn timer_regs(&self, n: u8) -> &HpetTimerRegs { + assert!(n < self.num_timers); + unsafe { &*((self.base_vaddr + 0x100 + n as usize * 0x20) as *const HpetTimerRegs) } + } + + fn init(&mut self) { + println!("Initializing HPET..."); + let cap = self.regs().general_caps.get(); + let num_timers = cap.get_bits(8..=12) as u8 + 1; + let period_fs = cap.get_bits(32..); + let is_64_bit = cap.get_bit(13); + let freq_hz = 1_000_000_000_000_000 / period_fs; + println!( + "HPET: {}.{:06} MHz, {}-bit, {} timers", + freq_hz / 1_000_000, + freq_hz % 1_000_000, + if is_64_bit { 64 } else { 32 }, + num_timers + ); + + self.num_timers = num_timers; + self.period_fs = period_fs; + self.freq_hz = freq_hz; + self.freq_mhz = freq_hz / 1_000_000; + self.ticks_per_ms = freq_hz / 1000; + self.is_64_bit = is_64_bit; + + self.set_enable(false); + for i in 0..num_timers { + // disable timer interrupts + let config_caps = + unsafe { TimerConfigCaps::from_bits_retain(self.timer_regs(i).config_caps.get()) }; + self.timer_regs(i) + .config_caps + .set((config_caps - TimerConfigCaps::TN_INT_ENB_CNF).bits()); + } + self.set_enable(true); + } + + fn set_enable(&mut self, enable: bool) { + const LEG_RT_CNF: u64 = 1 << 1; // Legacy replacement mapping will disable PIT IRQs + const ENABLE_CNF: u64 = 1 << 0; + let config = &self.regs().general_config; + if enable { + config.set(LEG_RT_CNF | ENABLE_CNF); + } else { + config.set(0); + } + } + + fn wait_millis(&self, millis: u64) { + let main_counter_value = &self.regs().main_counter_value; + let ticks = millis * self.ticks_per_ms; + let init = main_counter_value.get(); + while main_counter_value.get().wrapping_sub(init) < ticks {} + } +} + +pub fn busy_wait(duration: Duration) { + busy_wait_until(current_time() + duration); +} + +fn busy_wait_until(deadline: TimeValue) { + while current_time() < deadline { + core::hint::spin_loop(); + } +} + +pub fn current_time() -> TimeValue { + TimeValue::from_nanos(current_time_nanos()) +} + +pub fn current_ticks() -> u64 { + HPET.regs().main_counter_value.get() +} + +pub fn ticks_to_nanos(ticks: u64) -> u64 { + ticks * 1_000 / HPET.freq_mhz +} + +pub fn current_time_nanos() -> u64 { + ticks_to_nanos(current_ticks()) +} + +pub fn wait_millis(millis: u64) { + HPET.wait_millis(millis); +} + +pub fn get_tsc_freq_mhz() -> Option { + let mut best_freq_mhz = u32::MAX; + for _ in 0..5 { + let tsc_start = unsafe { _rdtsc() }; + let hpet_start = current_ticks(); + wait_millis(10); + let tsc_end = unsafe { _rdtsc() }; + let hpet_end = current_ticks(); + + let nanos = ticks_to_nanos(hpet_end.wrapping_sub(hpet_start)); + let freq_mhz = ((tsc_end - tsc_start) * 1_000 / nanos) as u32; + + if freq_mhz < best_freq_mhz { + best_freq_mhz = freq_mhz; + } + } + if best_freq_mhz != u32::MAX { + Some(best_freq_mhz) + } else { + None + } +} diff --git a/src/device/irqchip/pic/i8254.rs b/src/device/irqchip/pic/i8254.rs new file mode 100644 index 00000000..4de21534 --- /dev/null +++ b/src/device/irqchip/pic/i8254.rs @@ -0,0 +1,155 @@ +use crate::{arch::device::PortIoDevice, device::irqchip::pic::hpet, error::HvResult}; +use alloc::vec::Vec; +use bit_field::BitField; +use core::ops::Range; +use spin::Mutex; + +const VIRT_PIT_FREQ_HZ: u64 = 1193182; + +#[allow(non_snake_case)] +pub mod ReadWriteState { + pub const LSB: u8 = 1; + pub const MSB: u8 = 2; + pub const WORD_0: u8 = 3; + pub const WORD_1: u8 = 4; +} + +#[derive(Debug, Default, Clone)] +struct VirtI8254Channel { + count: i32, + mode: u8, + rw_mode: u8, + read_state: u8, + write_state: u8, + write_latch: u32, + count_set_time: u64, +} + +impl VirtI8254Channel { + fn get_count(&self) -> i32 { + let delta = + (hpet::current_time_nanos() - self.count_set_time) * VIRT_PIT_FREQ_HZ / 1_000_000_000; + let mut count = self.count; + match self.mode { + 0 => count = (self.count - (delta as i32)) & 0xffff, + _ => {} + } + count + } + + fn set_count(&mut self, mut value: u32) { + if value == 0 { + value = 0x1_0000; + } + self.count_set_time = hpet::current_time_nanos(); + self.count = value as _; + } +} + +pub struct VirtI8254 { + base_port: u16, + speaker_port: u16, + port_range: Vec>, + channels: Vec>, +} + +impl VirtI8254 { + pub fn new(base_port: u16, speaker_port: u16) -> Self { + Self { + base_port, + speaker_port, + port_range: vec![base_port..base_port + 4, speaker_port..speaker_port + 1], + channels: vec![ + Mutex::new(VirtI8254Channel::default()), + Mutex::new(VirtI8254Channel::default()), + Mutex::new(VirtI8254Channel::default()), + ], + } + } +} + +impl PortIoDevice for VirtI8254 { + fn port_range(&self) -> &Vec> { + &self.port_range + } + + fn read(&self, port: u16, msg: u8) -> HvResult { + // info!("i8254 read: {:x}", port); + + /*if port == self.speaker_port { + if let Some(channel) = self.channels.get(2) { + let mut channel = channel.lock(); + let cnt = channel.get_count(); + return Ok(0); + } + }*/ + + let chan_id = ((port - self.base_port) & 3) as usize; + if let Some(channel) = self.channels.get(chan_id) { + let mut channel = channel.lock(); + + let ret = match channel.read_state { + ReadWriteState::LSB => 0, + ReadWriteState::MSB => 0, + ReadWriteState::WORD_0 => { + channel.read_state = ReadWriteState::WORD_1; + channel.get_count() & 0xff + } + ReadWriteState::WORD_1 => { + channel.read_state = ReadWriteState::WORD_0; + (channel.get_count() >> 8) & 0xff + } + _ => 0, + }; + return Ok(ret as u32); + } + + Ok(0) + } + + fn write(&self, port: u16, value: u32, msg: u8) -> HvResult { + // info!("i8254 write: {:x}, {:x}", port, value); + + let offset: usize = (port - self.base_port) as _; + match offset { + 3 => { + let chan_id: usize = value.get_bits(6..=7) as _; + if chan_id == 3 { + } else if let Some(channel) = self.channels.get(chan_id) { + let mut channel = channel.lock(); + let access: u8 = value.get_bits(4..=5) as _; + + if access != 0 { + channel.rw_mode = access; + channel.read_state = access; + channel.write_state = access; + + channel.mode = value.get_bits(1..=3) as _; + } + } + } + 0 | 1 | 2 => { + if let Some(channel) = self.channels.get(offset) { + let mut channel = channel.lock(); + match channel.write_state { + ReadWriteState::LSB => {} + ReadWriteState::MSB => {} + ReadWriteState::WORD_0 => { + channel.write_latch = value; + channel.write_state = ReadWriteState::WORD_1; + } + ReadWriteState::WORD_1 => { + let low = channel.write_latch; + channel.set_count(low | (value << 8)); + channel.write_state = ReadWriteState::WORD_0; + } + _ => {} + } + } + } + _ => {} + }; + + Ok(()) + } +} diff --git a/src/device/irqchip/pic/i8259a.rs b/src/device/irqchip/pic/i8259a.rs index 8ad3a0be..7a015e78 100644 --- a/src/device/irqchip/pic/i8259a.rs +++ b/src/device/irqchip/pic/i8259a.rs @@ -2,8 +2,9 @@ use crate::{ arch::device::{ all_virt_devices, DeviceMsg, PortIoDevice, PIC_MASTER_BASE_PORT, PIC_SLAVE_BASE_PORT, }, + device::irqchip::inject_vector, error::HvResult, - percpu::this_cpu_data, + percpu::{get_cpu_data, this_cpu_data}, }; use alloc::vec::Vec; use core::ops::Range; @@ -118,7 +119,10 @@ impl VirtDualI8259aUnlocked { fn set_irq(&mut self, irq: i32, id: usize) { self.pics[id].set_irq(irq); - self.update_irq(id); + // isr bit empty, can serve + if self.pics[id].isr & (1 << irq) == 0 { + self.update_irq(id); + } } fn update_irq(&mut self, id: usize) { @@ -140,8 +144,8 @@ impl VirtDualI8259aUnlocked { vector = self.pics[1].vector_base + (irq2 as u8); } self.ack_irq(irq, 0); - // TODO: inject irq - this_cpu_data().arch_cpu.inject_interrupt(vector, None); + // TODO: single core? smp? + inject_vector(0, vector, None, true); } else { self.set_irq(2, 0); } @@ -237,7 +241,7 @@ impl VirtDualI8259aUnlocked { 1 => { // ICW2 pic.vector_base = value & 0xf8; - info!("I8259A: vector base: {:x}", pic.vector_base); + // info!("I8259A: vector base: {:x}", pic.vector_base); pic.init_state = 2; } 2 => { diff --git a/src/device/irqchip/pic/lapic.rs b/src/device/irqchip/pic/lapic.rs index 2baf92b3..5ae514dc 100644 --- a/src/device/irqchip/pic/lapic.rs +++ b/src/device/irqchip/pic/lapic.rs @@ -1,69 +1,83 @@ use crate::{ arch::{ - apic::current_time_nanos, - cpu::ArchCpu, + cpu::{this_cpu_id, ArchCpu}, + idt::IdtVector, + ipi::{self, IpiDeliveryMode}, msr::Msr::{self, *}, + vmcs::Vmcs, }, + device::irqchip::{inject_vector, pic::hpet}, error::HvResult, + percpu::{this_cpu_data, this_zone, CpuSet}, }; +use alloc::collections::vec_deque::VecDeque; use bit_field::BitField; +use core::{arch::x86_64::_rdtsc, u32}; +use x2apic::lapic::{LocalApic, LocalApicBuilder, TimerDivide, TimerMode}; -const APIC_FREQ_MHZ: u64 = 1000; // 1000 MHz -const APIC_CYCLE_NANOS: u64 = 1000 / APIC_FREQ_MHZ; - -/// Local APIC timer modes. -#[derive(Debug, Copy, Clone)] -#[repr(u8)] -pub enum TimerMode { - /// Timer only fires once. - OneShot = 0b00, - /// Timer fires periodically. - Periodic = 0b01, - /// Timer fires at an absolute time. - TscDeadline = 0b10, -} +const PHYS_LAPIC_TIMER_INTR_FREQ: u64 = 100; +const VIRT_LAPIC_TIMER_FREQ_MHZ: u64 = 1000; // 1000 MHz +const VIRT_LAPIC_TIMER_NANOS_PER_TICK: u64 = 1000 / VIRT_LAPIC_TIMER_FREQ_MHZ; /// A virtual local APIC timer. (SDM Vol. 3C, Section 10.5.4) -pub struct VirtApicTimer { - is_enabled: u8, +pub struct VirtLocalApicTimer { + is_enabled: bool, lvt_timer_bits: u32, divide_shift: u8, initial_count: u32, last_start_ns: u64, deadline_ns: u64, + deadline_tsc: u64, + timer_mode: TimerMode, } -impl VirtApicTimer { +impl VirtLocalApicTimer { pub const fn new() -> Self { Self { - is_enabled: 1, + is_enabled: true, lvt_timer_bits: 0x1_0000, // masked divide_shift: 0, initial_count: 0, last_start_ns: 0, deadline_ns: 0, + deadline_tsc: 0, + timer_mode: TimerMode::TscDeadline, } } pub fn set_enable(&mut self, is_enabled: u8) { - self.is_enabled = is_enabled; + self.is_enabled = is_enabled != 0; } /// Check if an interrupt generated. if yes, update it's states. pub fn check_interrupt(&mut self) -> bool { - if self.deadline_ns == 0 { + if !self.is_enabled || self.is_masked() { return false; - } else if current_time_nanos() >= self.deadline_ns { - if self.is_periodic() { - self.deadline_ns += self.interval_ns(); - } else { - self.deadline_ns = 0; + } + + match self.timer_mode { + TimerMode::OneShot => { + if self.deadline_ns != 0 && hpet::current_time_nanos() >= self.deadline_ns { + self.deadline_ns = 0; + return true; + } + } + TimerMode::Periodic => { + let hpet_ns = hpet::current_time_nanos(); + if self.deadline_ns != 0 && hpet_ns >= self.deadline_ns { + self.deadline_ns += self.interval_ns(); + return true; + } } - if self.is_enabled != 0 { - return !self.is_masked(); + TimerMode::TscDeadline => { + if (self.deadline_tsc != 0) && unsafe { _rdtsc() } >= self.deadline_tsc { + self.deadline_tsc = 0; + return true; + } } + _ => {} } - false + return false; } /// Whether the timer interrupt is masked. @@ -100,35 +114,43 @@ impl VirtApicTimer { /// Current Count Register. pub fn current_counter(&self) -> u32 { - let elapsed_ns = current_time_nanos() - self.last_start_ns; - let elapsed_cycles = (elapsed_ns / APIC_CYCLE_NANOS) >> self.divide_shift; - if self.is_periodic() { - self.initial_count - (elapsed_cycles % self.initial_count as u64) as u32 - } else if elapsed_cycles < self.initial_count as u64 { - self.initial_count - elapsed_cycles as u32 - } else { - 0 + let elapsed_ns = hpet::current_time_nanos() - self.last_start_ns; + let elapsed_cycles = (elapsed_ns / VIRT_LAPIC_TIMER_NANOS_PER_TICK) >> self.divide_shift; + + match self.timer_mode { + TimerMode::OneShot => { + if elapsed_cycles < self.initial_count as u64 { + return self.initial_count - elapsed_cycles as u32; + } + } + TimerMode::Periodic => { + if self.initial_count != 0 { + return self.initial_count + - (elapsed_cycles % self.initial_count as u64) as u32; + } + } + _ => {} } + return 0; } /// Set LVT Timer Register. pub fn set_lvt_timer(&mut self, bits: u32) -> HvResult { let timer_mode = bits.get_bits(17..19); - /*if timer_mode == TimerMode::TscDeadline as _ { - return hv_result_err!(EINVAL); // TSC deadline mode was not supported - } else */ - if timer_mode == 0b11 { - return hv_result_err!(EINVAL); // reserved - } + self.timer_mode = match timer_mode { + 0 => TimerMode::OneShot, + 1 => TimerMode::Periodic, + _ => TimerMode::TscDeadline, + }; self.lvt_timer_bits = bits; - self.start_timer(); + self.set_deadline(); Ok(()) } /// Set Initial Count Register. pub fn set_initial_count(&mut self, initial: u32) -> HvResult { self.initial_count = initial; - self.start_timer(); + self.set_deadline(); Ok(()) } @@ -136,17 +158,22 @@ impl VirtApicTimer { pub fn set_divide(&mut self, dcr: u32) -> HvResult { let shift = (dcr & 0b11) | ((dcr & 0b1000) >> 1); self.divide_shift = (shift + 1) as u8 & 0b111; - self.start_timer(); + self.set_deadline(); + Ok(()) + } + + pub fn set_tsc_deadline(&mut self, ddl: u64) -> HvResult { + self.deadline_tsc = ddl; Ok(()) } const fn interval_ns(&self) -> u64 { - (self.initial_count as u64 * APIC_CYCLE_NANOS) << self.divide_shift + (self.initial_count as u64 * VIRT_LAPIC_TIMER_NANOS_PER_TICK) << self.divide_shift } - fn start_timer(&mut self) { + fn set_deadline(&mut self) { if self.initial_count != 0 { - self.last_start_ns = current_time_nanos(); + self.last_start_ns = hpet::current_time_nanos(); self.deadline_ns = self.last_start_ns + self.interval_ns(); } else { self.deadline_ns = 0; @@ -154,22 +181,86 @@ impl VirtApicTimer { } } -pub struct VirtLocalApic; +pub struct VirtLocalApic { + pub phys_lapic: LocalApic, + pub virt_lapic_timer: VirtLocalApicTimer, +} impl VirtLocalApic { + pub fn new() -> Self { + let mut lapic = LocalApicBuilder::new() + .timer_vector(IdtVector::APIC_TIMER_VECTOR as _) + .error_vector(IdtVector::APIC_ERROR_VECTOR as _) + .spurious_vector(IdtVector::APIC_SPURIOUS_VECTOR as _) + .build() + .unwrap(); + + unsafe { lapic.enable() }; + + // calibrate phys lapic timer + let mut best_freq_hz = 0; + for _ in 0..5 { + unsafe { lapic.set_timer_initial(u32::MAX) }; + let hpet_start = hpet::current_ticks(); + hpet::wait_millis(10); + let ticks = u32::MAX - unsafe { lapic.timer_current() }; + let hpet_end = hpet::current_ticks(); + + let nanos = hpet::ticks_to_nanos(hpet_end.wrapping_sub(hpet_start)); + let ticks_per_sec = (ticks as u64 * 1_000_000_000 / nanos) as u32; + + if ticks_per_sec > best_freq_hz { + best_freq_hz = ticks_per_sec; + } + } + println!( + "Calibrated LAPIC frequency: {}.{:03} MHz", + best_freq_hz / 1_000_000, + best_freq_hz % 1_000_000 / 1_000, + ); + + unsafe { + lapic.set_timer_mode(TimerMode::Periodic); + lapic.set_timer_divide(TimerDivide::Div256); + lapic.set_timer_initial((best_freq_hz as u64 / PHYS_LAPIC_TIMER_INTR_FREQ) as u32); + } + + Self { + phys_lapic: lapic, + virt_lapic_timer: VirtLocalApicTimer::new(), + } + } + pub const fn msr_range() -> core::ops::Range { 0x800..0x840 } - pub fn rdmsr(arch_cpu: &mut ArchCpu, msr: Msr) -> HvResult { - let apic_timer = arch_cpu.apic_timer_mut(); - trace!("lapic rdmsr: {:?}", msr,); + pub fn phys_local_apic<'a>() -> &'a mut LocalApic { + &mut this_cpu_data().arch_cpu.virt_lapic.phys_lapic + } + + pub fn rdmsr(&mut self, msr: Msr) -> HvResult { + if msr != IA32_X2APIC_CUR_COUNT { + // info!("lapic rdmsr: {:?}", msr,); + } + match msr { - IA32_X2APIC_APICID => Ok(arch_cpu.cpuid as u64), + IA32_X2APIC_APICID => Ok(this_cpu_id() as u64), IA32_X2APIC_VERSION => Ok(0x50014), // Max LVT Entry: 0x5, Version: 0x14 - IA32_X2APIC_LDR => Ok(0x0), // TODO: IPI - IA32_X2APIC_SIVR => Ok(((apic_timer.is_enabled as u64 & 0x1) << 8) | 0xff), // SDM Vol. 3A, Section 10.9, Figure 10-23 (with Software Enable bit) - IA32_X2APIC_LVT_TIMER => Ok(apic_timer.lvt_timer() as u64), + IA32_X2APIC_LDR => Ok(this_cpu_id() as u64), // logical apic id + IA32_X2APIC_SIVR => Ok(((self.virt_lapic_timer.is_enabled as u64 & 0x1) << 8) | 0xff), // SDM Vol. 3A, Section 10.9, Figure 10-23 (with Software Enable bit) + IA32_X2APIC_ISR0 | IA32_X2APIC_ISR1 | IA32_X2APIC_ISR2 | IA32_X2APIC_ISR3 + | IA32_X2APIC_ISR4 | IA32_X2APIC_ISR5 | IA32_X2APIC_ISR6 | IA32_X2APIC_ISR7 => { + // info!("read ISR"); + Ok(0x0) + } + IA32_X2APIC_IRR0 | IA32_X2APIC_IRR1 | IA32_X2APIC_IRR2 | IA32_X2APIC_IRR3 + | IA32_X2APIC_IRR4 | IA32_X2APIC_IRR5 | IA32_X2APIC_IRR6 | IA32_X2APIC_IRR7 => { + // info!("read IRR"); + Ok(0x0) + } + IA32_X2APIC_ESR => Ok(0x0), + IA32_X2APIC_LVT_TIMER => Ok(self.virt_lapic_timer.lvt_timer() as u64), IA32_X2APIC_LVT_THERMAL | IA32_X2APIC_LVT_PMI | IA32_X2APIC_LVT_LINT0 @@ -177,19 +268,25 @@ impl VirtLocalApic { | IA32_X2APIC_LVT_ERROR => { Ok(0x1_0000) // SDM Vol. 3A, Section 10.5.1, Figure 10-8 (with Mask bit) } - IA32_X2APIC_INIT_COUNT => Ok(apic_timer.initial_count() as u64), - IA32_X2APIC_CUR_COUNT => Ok(apic_timer.current_counter() as u64), - IA32_X2APIC_DIV_CONF => Ok(apic_timer.divide() as u64), + IA32_X2APIC_INIT_COUNT => Ok(self.virt_lapic_timer.initial_count() as u64), + IA32_X2APIC_CUR_COUNT => Ok(self.virt_lapic_timer.current_counter() as u64), + IA32_X2APIC_DIV_CONF => Ok(self.virt_lapic_timer.divide() as u64), _ => hv_result_err!(ENOSYS), } } - pub fn wrmsr(arch_cpu: &mut ArchCpu, msr: Msr, value: u64) -> HvResult { - if msr != IA32_X2APIC_ICR && (value >> 32) != 0 { + pub fn wrmsr(&mut self, msr: Msr, value: u64) -> HvResult { + if (msr != IA32_X2APIC_ICR && msr != IA32_TSC_DEADLINE) && (value >> 32) != 0 { return hv_result_err!(EINVAL); // all registers except ICR are 32-bits } - let apic_timer = arch_cpu.apic_timer_mut(); - trace!("lapic wrmsr: {:?}, value: {:x}", msr, value); + if msr == IA32_TSC_DEADLINE { + self.virt_lapic_timer.set_tsc_deadline(value); + return Ok(()); + } + + if msr == IA32_X2APIC_INIT_COUNT { + //info!("{:?}, value: {:x}", msr, value); + } match msr { IA32_X2APIC_EOI => { if value != 0 { @@ -199,20 +296,32 @@ impl VirtLocalApic { } } IA32_X2APIC_SIVR => { - apic_timer.set_enable(((value >> 8) & 1) as _); + self.virt_lapic_timer.set_enable(((value >> 8) & 1) as _); Ok(()) } - IA32_X2APIC_LVT_THERMAL + IA32_X2APIC_ICR => { + // info!("ICR value: {:x}", value); + ipi::send_ipi(value); + Ok(()) + } + IA32_X2APIC_ESR + | IA32_X2APIC_LVT_THERMAL | IA32_X2APIC_LVT_PMI | IA32_X2APIC_LVT_LINT0 | IA32_X2APIC_LVT_LINT1 | IA32_X2APIC_LVT_ERROR => { Ok(()) // ignore these register writes } - IA32_X2APIC_LVT_TIMER => apic_timer.set_lvt_timer(value as u32), - IA32_X2APIC_INIT_COUNT => apic_timer.set_initial_count(value as u32), - IA32_X2APIC_DIV_CONF => apic_timer.set_divide(value as u32), + IA32_X2APIC_LVT_TIMER => self.virt_lapic_timer.set_lvt_timer(value as u32), + IA32_X2APIC_INIT_COUNT => self.virt_lapic_timer.set_initial_count(value as u32), + IA32_X2APIC_DIV_CONF => self.virt_lapic_timer.set_divide(value as u32), _ => hv_result_err!(ENOSYS), } } + + pub fn check_timer_interrupt(&mut self) { + if self.virt_lapic_timer.check_interrupt() { + inject_vector(this_cpu_id(), self.virt_lapic_timer.vector(), None, false); + } + } } diff --git a/src/device/irqchip/pic/mod.rs b/src/device/irqchip/pic/mod.rs index 3a554c49..156feaf5 100644 --- a/src/device/irqchip/pic/mod.rs +++ b/src/device/irqchip/pic/mod.rs @@ -1,8 +1,79 @@ +pub mod hpet; +pub mod i8254; pub mod i8259a; pub mod lapic; -use crate::zone::Zone; +use crate::{ + arch::{cpu::this_cpu_id, ipi, vmcs::Vmcs}, + consts::MAX_CPU_NUM, + zone::Zone, +}; +use alloc::{collections::vec_deque::VecDeque, vec::Vec}; use core::arch::asm; +use spin::{Mutex, Once}; + +static PENDING_VECTORS: Once = Once::new(); + +struct PendingVectors { + inner: Vec)>>>, +} + +impl PendingVectors { + fn new(max_cpus: usize) -> Self { + let mut vs = vec![]; + for _ in 0..max_cpus { + let v = Mutex::new(VecDeque::new()); + vs.push(v) + } + Self { inner: vs } + } + + fn add_vector(&self, cpu_id: usize, vector: u8, err_code: Option, allow_repeat: bool) { + match self.inner.get(cpu_id) { + Some(pending_vectors) => { + let mut vectors = pending_vectors.lock(); + if vectors.len() > 2 { + info!("len: {:x}", vectors.len()); + } + if allow_repeat || !vectors.contains(&(vector, err_code)) { + vectors.push_back((vector, err_code)); + } + } + _ => {} + } + } + + fn check_pending_vectors(&self, cpu_id: usize) { + match self.inner.get(cpu_id) { + Some(pending_vectors) => { + let mut vectors = pending_vectors.lock(); + if let Some(vector) = vectors.front() { + let allow_interrupt = Vmcs::allow_interrupt().unwrap(); + if vector.0 < 32 || allow_interrupt { + // if it's an exception, or an interrupt that is not blocked, inject it directly. + Vmcs::inject_interrupt(vector.0, vector.1).unwrap(); + vectors.pop_front(); + } else { + // interrupts are blocked, enable interrupt-window exiting. + Vmcs::set_interrupt_window(true).unwrap(); + } + } + } + _ => {} + } + } +} + +pub fn inject_vector(cpu_id: usize, vector: u8, err_code: Option, allow_repeat: bool) { + PENDING_VECTORS + .get() + .unwrap() + .add_vector(cpu_id, vector, err_code, allow_repeat); +} + +pub fn check_pending_vectors(cpu_id: usize) { + PENDING_VECTORS.get().unwrap().check_pending_vectors(cpu_id); +} pub fn enable_irq() { unsafe { asm!("sti") }; @@ -17,7 +88,8 @@ pub fn inject_irq(_irq: usize, _is_hardware: bool) {} pub fn percpu_init() {} pub fn primary_init_early() { - warn!("x86_64: irqchip: primary_init_early do nothing"); + ipi::init(MAX_CPU_NUM); + PENDING_VECTORS.call_once(|| PendingVectors::new(MAX_CPU_NUM)); } pub fn primary_init_late() {} diff --git a/src/device/uart/uart16550a.rs b/src/device/uart/uart16550a.rs index a0465aa2..e37c4002 100644 --- a/src/device/uart/uart16550a.rs +++ b/src/device/uart/uart16550a.rs @@ -178,7 +178,6 @@ pub struct VirtUart16550aUnlocked { ier: u8, lcr: u8, lsr: u8, - irq_state: u8, fifo: Fifo, } @@ -189,7 +188,6 @@ impl VirtUart16550aUnlocked { ier: 0, lcr: 0, lsr: (LineStatusFlags::XMIT_HOLD_REG_EMPTY | LineStatusFlags::XMIT_EMPTY).bits(), - irq_state: 0, fifo: Fifo::new(), } } @@ -204,31 +202,21 @@ impl VirtUart16550aUnlocked { } if self.ier & InterruptEnableFlags::ENABLE_XMIT_HOLD_REG_EMPTY_INTR.bits() != 0 - && self.lsr & LineStatusFlags::XMIT_EMPTY.bits() != 0 + && self.lsr & LineStatusFlags::XMIT_HOLD_REG_EMPTY.bits() != 0 { iir |= InterruptIdentFlags::XMIT_HOLD_REG_EMPTY.bits(); } if iir == 0 { self.iir = InterruptIdentFlags::NO_INTR_IS_PENDING.bits(); - if self.irq_state != 0 { - all_virt_devices().send_msg( - PIC_MASTER_BASE_PORT, - UART_COM1_IRQ, - DeviceMsg::UPDATE_IRQ_LOW, - ); - } } else { self.iir = iir; - if self.irq_state == 0 { - all_virt_devices().send_msg( - PIC_MASTER_BASE_PORT, - UART_COM1_IRQ, - DeviceMsg::UPDATE_IRQ_HIGH, - ); - } + all_virt_devices().send_msg( + PIC_MASTER_BASE_PORT, + UART_COM1_IRQ, + DeviceMsg::UPDATE_IRQ_HIGH, + ); } - self.irq_state = iir; } } @@ -271,7 +259,7 @@ impl PortIoDevice for VirtUart16550a { } UartReg::INTR_ENABLE => { if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { - 0 //dlm + 0 // dlm } else { uart.ier } @@ -317,13 +305,16 @@ impl PortIoDevice for VirtUart16550a { } else { uart.lsr |= (LineStatusFlags::XMIT_HOLD_REG_EMPTY | LineStatusFlags::XMIT_EMPTY).bits(); - console_putchar(value as u8); + if value != 0xff { + console_putchar(value as u8); + } } } UartReg::INTR_ENABLE => { if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { // dlm } else { + // info!("ier: {:x}", uart.ier); uart.ier = value & 0x0f; } } diff --git a/src/main.rs b/src/main.rs index 20341c23..b9e21895 100644 --- a/src/main.rs +++ b/src/main.rs @@ -132,11 +132,6 @@ fn per_cpu_init(cpu: &mut PerCpu) { } fn wakeup_secondary_cpus(this_id: usize, host_dtb: usize) { - #[cfg(target_arch = "x86_64")] - arch::apic::init_lapic(); - #[cfg(target_arch = "x86_64")] - arch::apic::init_ioapic(); - for cpu_id in 0..MAX_CPU_NUM { if cpu_id == this_id { continue; @@ -157,9 +152,14 @@ fn x86_rust_main_tmp(cpuid: usize, host_dtb: usize) { memory::heap::test(); #[cfg(target_arch = "x86_64")] MultibootInfo::init(host_dtb); + #[cfg(target_arch = "x86_64")] + arch::apic::init_ioapic(); } let cpu = PerCpu::new(cpuid); + #[cfg(target_arch = "x86_64")] + crate::device::irqchip::pic::enable_irq(); + println!( "Booting CPU {}: {:p} arch:{:p}, DTB: {:#x}", cpu.id, cpu as *const _, &cpu.arch_cpu as *const _, host_dtb diff --git a/src/platform/qemu_x86_64.rs b/src/platform/qemu_x86_64.rs index 180f42c7..ee887666 100644 --- a/src/platform/qemu_x86_64.rs +++ b/src/platform/qemu_x86_64.rs @@ -11,7 +11,7 @@ pub const ROOT_ZONE_SETUP_ADDR: GuestPhysAddr = 0xd000; pub const ROOT_ZONE_BOOT_STACK: GuestPhysAddr = 0x7000; pub const ROOT_ZONE_INITRD_ADDR: GuestPhysAddr = 0x1500_0000; pub const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0xc000; -pub const ROOT_ZONE_CPUS: u64 = (1 << 0); +pub const ROOT_ZONE_CPUS: u64 = (1 << 0) | (1 << 1) | (1 << 2); pub const ROOT_ZONE_NAME: &str = "root-linux"; pub const ROOT_ZONE_CMDLINE: &str = From f51428cf664feaeb650088f016b801d829e67a73 Mon Sep 17 00:00:00 2001 From: Solicey Date: Sun, 23 Mar 2025 09:54:24 +0800 Subject: [PATCH 09/29] enable MMIO and I/O APIC, simplify Local APIC, passthrough UART --- src/arch/x86_64/apic.rs | 7 +- src/arch/x86_64/boot.rs | 4 - src/arch/x86_64/cpu.rs | 18 +- src/arch/x86_64/device.rs | 65 ------ src/arch/x86_64/mmio.rs | 376 +++++++++++++++++++++++++++++++ src/arch/x86_64/mod.rs | 3 +- src/arch/x86_64/msr.rs | 13 +- src/arch/x86_64/pio.rs | 62 +++++ src/arch/x86_64/s2pt.rs | 2 +- src/arch/x86_64/trap.rs | 64 ++++-- src/arch/x86_64/zone.rs | 25 +- src/device/irqchip/pic/i8254.rs | 155 ------------- src/device/irqchip/pic/i8259a.rs | 325 -------------------------- src/device/irqchip/pic/ioapic.rs | 176 +++++++++++++++ src/device/irqchip/pic/lapic.rs | 289 +++--------------------- src/device/irqchip/pic/mod.rs | 7 +- src/device/uart/mod.rs | 2 +- src/device/uart/uart16550a.rs | 169 +------------- src/main.rs | 2 +- src/platform/qemu_x86_64.rs | 43 +--- 20 files changed, 750 insertions(+), 1057 deletions(-) delete mode 100644 src/arch/x86_64/device.rs create mode 100644 src/arch/x86_64/pio.rs delete mode 100644 src/device/irqchip/pic/i8254.rs delete mode 100644 src/device/irqchip/pic/i8259a.rs create mode 100644 src/device/irqchip/pic/ioapic.rs diff --git a/src/arch/x86_64/apic.rs b/src/arch/x86_64/apic.rs index d4f56b6e..a7b354a5 100644 --- a/src/arch/x86_64/apic.rs +++ b/src/arch/x86_64/apic.rs @@ -1,4 +1,4 @@ -use self::irqs::*; +/*use self::irqs::*; use crate::device::irqchip::pic::enable_irq; use crate::device::irqchip::pic::hpet; use core::time::Duration; @@ -19,7 +19,7 @@ pub mod irqs { static mut IO_APIC: Option = None; const IO_APIC_BASE: u64 = 0xfec00000; -/*pub mod vectors { +pub mod vectors { pub const APIC_TIMER_VECTOR: u8 = 0xf0; pub const APIC_SPURIOUS_VECTOR: u8 = 0xf1; pub const APIC_ERROR_VECTOR: u8 = 0xf2; @@ -61,7 +61,7 @@ fn busy_wait_until(deadline: TimeValue) { while current_time() < deadline { core::hint::spin_loop(); } -}*/ +} // FIXME: temporary unsafe fn configure_gsi(io_apic: &mut IoApic, gsi: u8, vector: u8) { @@ -152,3 +152,4 @@ pub fn init_ioapic() { enable_irq(); }*/ +*/ diff --git a/src/arch/x86_64/boot.rs b/src/arch/x86_64/boot.rs index 07961167..994e937d 100644 --- a/src/arch/x86_64/boot.rs +++ b/src/arch/x86_64/boot.rs @@ -141,10 +141,6 @@ impl BootParams { } _ => {} } - // FIXME: very dirty! - if index == 3 { - break; - } } self.e820_entries = index as _; } diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index 92899480..ef2ae3ca 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -7,14 +7,12 @@ use crate::{ Msr::{self, *}, MsrBitmap, }, + pio::PortIoBitmap, vmcs::*, vmx::*, }, consts::{core_end, MAX_CPU_NUM, PER_CPU_SIZE}, - device::irqchip::pic::{ - check_pending_vectors, hpet, - lapic::{VirtLocalApic, VirtLocalApicTimer}, - }, + device::irqchip::pic::{check_pending_vectors, hpet, lapic::VirtLocalApic}, error::{HvError, HvResult}, memory::{addr::phys_to_virt, GuestPhysAddr, HostPhysAddr, PhysAddr, PAGE_SIZE}, percpu::this_cpu_data, @@ -162,6 +160,7 @@ pub struct ArchCpu { vmxon_region: VmxRegion, vmcs_region: VmxRegion, msr_bitmap: MsrBitmap, + pio_bitmap: PortIoBitmap, } impl ArchCpu { @@ -179,6 +178,7 @@ impl ArchCpu { vmxon_region: VmxRegion::uninit(), vmcs_region: VmxRegion::uninit(), msr_bitmap: MsrBitmap::uninit(), + pio_bitmap: PortIoBitmap::uninit(), } } @@ -318,6 +318,7 @@ impl ArchCpu { fn setup_vmcs(&mut self, entry: GuestPhysAddr, set_rip: bool) -> HvResult { self.vmcs_region = VmxRegion::new(self.vmcs_revision_id, false)?; self.msr_bitmap = MsrBitmap::intercept_def()?; + self.pio_bitmap = PortIoBitmap::intercept_def()?; let start_paddr = self.vmcs_region.start_paddr() as usize; Vmcs::clear(start_paddr)?; @@ -341,7 +342,7 @@ impl ArchCpu { 0, )?; - // Intercept all I/O instructions, use MSR bitmaps, activate secondary controls, + // Use I/O bitmaps and MSR bitmaps, activate secondary controls, // disable CR3 load/store interception. use PrimaryControls as CpuCtrl; Vmcs::set_control( @@ -351,7 +352,7 @@ impl ArchCpu { ( // CpuCtrl::RDTSC_EXITING | CpuCtrl::HLT_EXITING - | CpuCtrl::UNCOND_IO_EXITING + | CpuCtrl::USE_IO_BITMAPS | CpuCtrl::USE_MSR_BITMAPS | CpuCtrl::SECONDARY_CONTROLS ) @@ -406,8 +407,8 @@ impl ArchCpu { // Pass-through exceptions, don't use I/O bitmap, set MSR bitmaps. VmcsControl32::EXCEPTION_BITMAP.write(0)?; - VmcsControl64::IO_BITMAP_A_ADDR.write(0)?; - VmcsControl64::IO_BITMAP_B_ADDR.write(0)?; + VmcsControl64::IO_BITMAP_A_ADDR.write(self.pio_bitmap.bitmap_a_addr() as _)?; + VmcsControl64::IO_BITMAP_B_ADDR.write(self.pio_bitmap.bitmap_b_addr() as _)?; VmcsControl64::MSR_BITMAPS_ADDR.write(self.msr_bitmap.phys_addr() as _)?; Ok(()) } @@ -517,7 +518,6 @@ impl ArchCpu { fn vmexit_handler(&mut self) { crate::arch::trap::handle_vmexit(self).unwrap(); - self.virt_lapic.check_timer_interrupt(); check_pending_vectors(this_cpu_id()); } diff --git a/src/arch/x86_64/device.rs b/src/arch/x86_64/device.rs deleted file mode 100644 index 703885e2..00000000 --- a/src/arch/x86_64/device.rs +++ /dev/null @@ -1,65 +0,0 @@ -use crate::{ - device::{ - irqchip::pic::{i8254::VirtI8254, i8259a::VirtDualI8259a}, - uart::VirtUart16550a, - }, - error::HvResult, -}; -use alloc::{sync::Arc, vec, vec::Vec}; - -pub const PIC_MASTER_BASE_PORT: u16 = 0x20; -pub const PIC_SLAVE_BASE_PORT: u16 = 0xa0; -pub const PIT_BASE_PORT: u16 = 0x40; -pub const PIT_SPEAKER_PORT: u16 = 0x61; -pub const UART_COM1_BASE_PORT: u16 = 0x3f8; - -#[allow(non_snake_case)] -pub mod DeviceMsg { - pub const UPDATE_IRQ_LOW: u8 = 0x0; - pub const UPDATE_IRQ_HIGH: u8 = 0x1; -} - -pub trait PortIoDevice: Send + Sync { - fn port_range(&self) -> &Vec>; - fn read(&self, port: u16, msg: u8) -> HvResult; - fn write(&self, port: u16, value: u32, msg: u8) -> HvResult; -} - -pub struct VirtDeviceList { - port_io_devices: Vec>, -} - -impl VirtDeviceList { - pub fn find_port_io_device(&self, port: u16) -> Option<&Arc> { - self.port_io_devices.iter().find(|dev| { - dev.port_range() - .iter() - .find(|range| range.contains(&port)) - .is_some() - }) - } - - pub fn send_msg(&self, port: u16, value: u32, msg: u8) { - if let Some(device) = self.find_port_io_device(port) { - /*info!( - "SEND MSG! port: {:x}, value: {:x}, msg: {:x}", - port, value, msg - );*/ - device.write(port, value, msg).unwrap(); - } - } -} - -lazy_static::lazy_static! { - static ref VIRT_DEVICES : VirtDeviceList = VirtDeviceList { - port_io_devices: vec![ - Arc::new(VirtDualI8259a::new(PIC_MASTER_BASE_PORT, PIC_SLAVE_BASE_PORT)), // Dual PIC - Arc::new(VirtI8254::new(PIT_BASE_PORT, PIT_SPEAKER_PORT)), - Arc::new(VirtUart16550a::new(UART_COM1_BASE_PORT)), // COM1 - ], - }; -} - -pub fn all_virt_devices() -> &'static VirtDeviceList { - &VIRT_DEVICES -} diff --git a/src/arch/x86_64/mmio.rs b/src/arch/x86_64/mmio.rs index e69de29b..c8a0f7f8 100644 --- a/src/arch/x86_64/mmio.rs +++ b/src/arch/x86_64/mmio.rs @@ -0,0 +1,376 @@ +use crate::{ + arch::{ + s2pt::DescriptorAttr, + vmcs::{VmcsGuest16, VmcsGuestNW}, + }, + error::HvResult, + memory::{GuestPhysAddr, GuestVirtAddr, HostPhysAddr, MMIOAccess}, + percpu::{this_cpu_data, this_zone}, +}; +use alloc::{sync::Arc, vec::Vec}; +use bit_field::BitField; +use core::{mem::size_of, ops::Range, slice::from_raw_parts}; +use spin::Mutex; +use x86::controlregs::{Cr0, Cr4}; + +pub trait MMIoDevice: Send + Sync { + fn gpa_range(&self) -> &Vec>; + fn read(&self, gpa: GuestPhysAddr) -> HvResult; + fn write(&self, gpa: GuestPhysAddr, value: u64, size: usize) -> HvResult; + fn trigger(&self, signal: usize) -> HvResult; +} + +numeric_enum_macro::numeric_enum! { +#[repr(u32)] +#[derive(Debug)] +pub enum RmReg { + AX = 0, + CX = 1, + DX = 2, + BX = 3, + SP = 4, + BP = 5, + SI = 6, + DI = 7, + R8 = 8, + R9 = 9, + R10 = 10, + R11 = 11, + R12 = 12, + R13 = 13, + R14 = 14, + R15 = 15, + RIP = 16, + CR0 = 17, + CR1 = 18, + CR2 = 19, + CR3 = 20, + CR4 = 21, + GDTR = 22, + LDTR = 23, + TR = 24, + IDTR = 25, +} +} + +impl RmReg { + fn read(&self) -> HvResult { + let gen_regs = this_cpu_data().arch_cpu.regs(); + let res = match self { + RmReg::AX => gen_regs.rax, + RmReg::CX => gen_regs.rcx, + RmReg::DX => gen_regs.rdx, + RmReg::BX => gen_regs.rbx, + RmReg::SP => VmcsGuestNW::RSP.read().unwrap() as _, + RmReg::BP => gen_regs.rbp, + RmReg::SI => gen_regs.rsi, + RmReg::DI => gen_regs.rdi, + RmReg::R8 => gen_regs.r8, + RmReg::R9 => gen_regs.r9, + RmReg::R10 => gen_regs.r10, + RmReg::R11 => gen_regs.r11, + RmReg::R12 => gen_regs.r12, + RmReg::R13 => gen_regs.r13, + RmReg::R14 => gen_regs.r14, + RmReg::R15 => gen_regs.r15, + RmReg::RIP => VmcsGuestNW::RIP.read().unwrap() as _, + RmReg::CR0 => VmcsGuestNW::CR0.read().unwrap() as _, + RmReg::CR3 => VmcsGuestNW::CR3.read().unwrap() as _, + RmReg::CR4 => VmcsGuestNW::CR4.read().unwrap() as _, + RmReg::GDTR => VmcsGuestNW::GDTR_BASE.read().unwrap() as _, + RmReg::LDTR => VmcsGuestNW::LDTR_BASE.read().unwrap() as _, + RmReg::TR => VmcsGuestNW::TR_BASE.read().unwrap() as _, + RmReg::IDTR => VmcsGuestNW::IDTR_BASE.read().unwrap() as _, + _ => 0, + }; + Ok(res) + } + + fn write(&self, new_value: u64, size: usize) -> HvResult { + let mut gen_regs = this_cpu_data().arch_cpu.regs_mut(); + + let mut value = self.read().unwrap(); + value.set_bits(0..(size * 4), new_value.get_bits(0..(size * 4))); + + match self { + RmReg::AX => gen_regs.rax = value, + RmReg::CX => gen_regs.rcx = value, + RmReg::DX => gen_regs.rdx = value, + RmReg::BX => gen_regs.rbx = value, + RmReg::SP => VmcsGuestNW::RSP.write(value as _)?, + RmReg::BP => gen_regs.rbp = value, + RmReg::SI => gen_regs.rsi = value, + RmReg::DI => gen_regs.rdi = value, + RmReg::R8 => gen_regs.r8 = value, + RmReg::R9 => gen_regs.r9 = value, + RmReg::R10 => gen_regs.r10 = value, + RmReg::R11 => gen_regs.r11 = value, + RmReg::R12 => gen_regs.r12 = value, + RmReg::R13 => gen_regs.r13 = value, + RmReg::R14 => gen_regs.r14 = value, + RmReg::R15 => gen_regs.r15 = value, + RmReg::RIP => VmcsGuestNW::RIP.write(value as _)?, + RmReg::CR0 => VmcsGuestNW::CR0.write(value as _)?, + RmReg::CR3 => VmcsGuestNW::CR3.write(value as _)?, + RmReg::CR4 => VmcsGuestNW::CR4.write(value as _)?, + RmReg::GDTR => VmcsGuestNW::GDTR_BASE.write(value as _)?, + RmReg::LDTR => VmcsGuestNW::LDTR_BASE.write(value as _)?, + RmReg::TR => VmcsGuestNW::TR_BASE.write(value as _)?, + RmReg::IDTR => VmcsGuestNW::IDTR_BASE.write(value as _)?, + _ => {} + } + Ok(()) + } +} + +/* +G: general registers +E: registers / memory +B: byte +V: word / dword / qword +*/ +numeric_enum_macro::numeric_enum! { +#[repr(u8)] +#[derive(Debug)] +pub enum OpCode { + // move r to r/m + MovEvGv = 0x89, + // move r/m to r + MovGvEv = 0x8b, +} +} + +bitflags::bitflags! { + #[derive(Debug, PartialEq)] + struct RexPrefixLow: u8 { + const BASE = 1 << 0; + const INDEX = 1 << 1; + const REGISTERS = 1 << 2; + const OPERAND_WIDTH = 1 << 3; + } +} +const REX_PREFIX_HIGH: u8 = 0x4; + +// len stands for instruction len +enum OprandType { + Reg { reg: RmReg, len: usize }, + Gpa { gpa: usize, len: usize }, +} + +struct ModRM { + pub _mod: u32, + pub reg_opcode: u32, + pub rm: u32, +} + +impl ModRM { + pub fn new(byte: u8, rex: &RexPrefixLow) -> Self { + let mut reg_opcode = byte.get_bits(3..=5) as u32; + if rex.contains(RexPrefixLow::REGISTERS) { + reg_opcode.set_bit(3, true); + } + Self { + _mod: byte.get_bits(6..=7) as _, + reg_opcode, + rm: byte.get_bits(0..=2) as _, + } + } + + pub fn get_reg(&self) -> RmReg { + self.reg_opcode.try_into().unwrap() + } + + pub fn get_modrm(&self, inst: &Vec, disp_id: usize) -> Option { + let reg: RmReg = self.rm.try_into().unwrap(); + let mut reg_val = reg.read().unwrap(); + // TODO: SIB + match self._mod { + 0 => Some(OprandType::Gpa { + gpa: gva_to_gpa(reg_val as _).unwrap(), + len: 0, + }), + 1 => { + let mut buf = [0u8; 1]; + buf[0..1].copy_from_slice(&inst[disp_id..disp_id + 1]); + let disp_8 = i8::from_ne_bytes(buf); + if disp_8 > 0 { + reg_val += (disp_8 as u64); + } else { + reg_val -= ((-disp_8) as u64); + } + Some(OprandType::Gpa { + gpa: gva_to_gpa(reg_val as _).unwrap(), + len: 1, + }) + } + 2 => { + let mut buf = [0u8; 4]; + buf[0..4].copy_from_slice(&inst[disp_id..disp_id + 4]); + let disp_32 = i32::from_ne_bytes(buf); + if disp_32 > 0 { + reg_val += (disp_32 as u64); + } else { + reg_val -= ((-disp_32) as u64); + } + Some(OprandType::Gpa { + gpa: gva_to_gpa(reg_val as _).unwrap(), + len: 4, + }) + } + 3 => Some(OprandType::Reg { reg, len: 0 }), + _ => None, + } + } +} + +fn gpa_to_hpa(gpa: GuestPhysAddr) -> HvResult { + let (hpa, _, _) = unsafe { this_zone().read().gpm.page_table_query(gpa)? }; + Ok(hpa) +} + +fn get_page_entry(pt_hpa: HostPhysAddr, pte_id: usize) -> usize { + unsafe { (*((pt_hpa + (pte_id * size_of::())) as *const usize)) & 0x7ffffffffffffusize } +} + +fn gva_to_gpa(gva: GuestVirtAddr) -> HvResult { + let mut gpa: GuestPhysAddr = 0; + let cr0 = VmcsGuestNW::CR0.read()?; + let cr4 = VmcsGuestNW::CR4.read()?; + + // guest hasn't enabled paging, va = pa + if cr0 & Cr0::CR0_ENABLE_PAGING.bits() == 0 { + gpa = gva; + // still in real mode, apply cs + if cr0 & Cr0::CR0_PROTECTED_MODE.bits() == 0 { + let cs_selector = VmcsGuest16::CS_SELECTOR.read()? as usize; + gpa = (cs_selector << 4) | gva; + } + return Ok(gpa); + } + + if cr4 & Cr4::CR4_ENABLE_PAE.bits() == 0 { + panic!("protected mode gva_to_gpa not implemented yet!"); + } + + // lookup guest page table in long mode + + let p4_gpa = VmcsGuestNW::CR3.read()?; + let p4_hpa = gpa_to_hpa(p4_gpa)?; + let p4_entry_id = (gva >> 39) & 0x1ff; + let p4_entry = get_page_entry(p4_hpa, p4_entry_id); + + let p3_gpa = p4_entry & !(0xfff); + let p3_entry_id = (gva >> 30) & 0x1ff; + let p3_hpa = gpa_to_hpa(p3_gpa)?; + let p3_entry = get_page_entry(p3_hpa, p3_entry_id); + + // info!("p3_entry: {:x}", p3_entry); + + if p3_entry & (DescriptorAttr::HUGE_PAGE.bits() as usize) != 0 { + let page_gpa = p3_entry & !(0xfff); + return Ok(page_gpa | (gva & 0x3fffffff)); + } + + let p2_gpa = p3_entry & !(0xfff); + let p2_entry_id = (gva >> 21) & 0x1ff; + let p2_hpa = gpa_to_hpa(p2_gpa)?; + let p2_entry = get_page_entry(p2_hpa, p2_entry_id); + + // info!("p2_entry: {:x}", p2_entry); + + if p2_entry & (DescriptorAttr::HUGE_PAGE.bits() as usize) != 0 { + let page_gpa = p2_entry & !(0xfff); + return Ok(page_gpa | (gva & 0x1fffff)); + } + + let p1_gpa = p2_entry & !(0xfff); + let p1_entry_id = (gva >> 12) & 0x1ff; + let p1_hpa = gpa_to_hpa(p1_gpa)?; + let p1_entry = get_page_entry(p1_hpa, p1_entry_id); + + // info!("p1_entry: {:x}", p1_entry); + + let page_gpa: usize = p1_entry & !(0xfff); + Ok(page_gpa | (gva & 0xfff)) +} + +fn emulate_inst(inst: &Vec, dev: &Arc) -> HvResult { + assert!(inst.len() > 0); + + let mut cur_id = 0; + + let mut rex = RexPrefixLow::from_bits_truncate(0); + if inst[cur_id].get_bits(4..=7) == REX_PREFIX_HIGH { + rex = RexPrefixLow::from_bits_truncate(inst[cur_id].get_bits(0..=3)); + assert!(rex == RexPrefixLow::REGISTERS); + cur_id += 1; + } + + let opcode: OpCode = inst[cur_id].try_into().unwrap(); + cur_id += 1; + + match opcode { + OpCode::MovEvGv => { + let mod_rm = ModRM::new(inst[cur_id], &rex); + cur_id += 1; + + let src = mod_rm.get_reg(); + let src_val = src.read().unwrap(); + + let dst = mod_rm.get_modrm(inst, cur_id).unwrap(); + match dst { + OprandType::Reg { reg, len } => { + cur_id += len; + reg.write(src_val, size_of::()).unwrap(); + } + OprandType::Gpa { gpa, len } => { + cur_id += len; + dev.write(gpa, src_val, size_of::()).unwrap(); + } + _ => {} + } + + Ok(cur_id) + } + OpCode::MovGvEv => { + let mod_rm = ModRM::new(inst[cur_id], &rex); + cur_id += 1; + + let dst = mod_rm.get_reg(); + + let src = mod_rm.get_modrm(inst, cur_id).unwrap(); + let src_val = match src { + OprandType::Reg { reg, len } => { + cur_id += len; + reg.read().unwrap() + } + OprandType::Gpa { gpa, len } => { + cur_id += len; + dev.read(gpa).unwrap() + } + }; + // info!("src_val: {:x}", src_val); + + dst.write(src_val, size_of::()).unwrap(); + Ok(cur_id) + } + _ => { + hv_result_err!( + ENOSYS, + format!("Unimplemented opcode: 0x{:x}", opcode as u8) + ) + } + } +} + +pub fn mmio_handler(mmio: &mut MMIOAccess, dev: &Arc) -> HvResult { + let rip_hpa = gpa_to_hpa(gva_to_gpa(VmcsGuestNW::RIP.read()?)?)? as *const u8; + let inst = unsafe { from_raw_parts(rip_hpa, 15) }.to_vec(); + + // info!("rip_hpa: {:?}, inst: {:x?}", rip_hpa, inst); + + let len = emulate_inst(&inst, dev).unwrap(); + this_cpu_data().arch_cpu.advance_guest_rip(len as _)?; + + Ok(()) +} diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 744ff62c..5b33e2d7 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -4,14 +4,15 @@ pub mod apic; pub mod boot; pub mod cpu; pub mod cpuid; -pub mod device; pub mod entry; pub mod gdt; pub mod idt; pub mod ipi; pub mod mm; +pub mod mmio; pub mod msr; pub mod paging; +pub mod pio; pub mod s1pt; pub mod s2pt; pub mod trap; diff --git a/src/arch/x86_64/msr.rs b/src/arch/x86_64/msr.rs index 220dbaeb..abbe31f0 100644 --- a/src/arch/x86_64/msr.rs +++ b/src/arch/x86_64/msr.rs @@ -163,12 +163,20 @@ impl MsrBitmap { } pub fn intercept_def() -> HvResult { - // Intercept IA32_APIC_BASE MSR accesses let mut bitmap = Self { frame: Frame::new_zero()?, }; bitmap.set_read_intercept(IA32_APIC_BASE, true); + bitmap.set_read_intercept(IA32_X2APIC_APICID, true); + bitmap.set_read_intercept(IA32_X2APIC_LDR, true); + bitmap.set_read_intercept(IA32_X2APIC_LVT_TIMER, true); + + bitmap.set_write_intercept(IA32_APIC_BASE, true); + bitmap.set_write_intercept(IA32_X2APIC_ICR, true); + bitmap.set_write_intercept(IA32_X2APIC_LVT_TIMER, true); + + /*bitmap.set_read_intercept(IA32_APIC_BASE, true); bitmap.set_write_intercept(IA32_APIC_BASE, true); bitmap.set_read_intercept(IA32_TSC_DEADLINE, true); @@ -180,7 +188,8 @@ impl MsrBitmap { bitmap.set_read_intercept(msr, true); bitmap.set_write_intercept(msr, true); } - } + }*/ + Ok(bitmap) } diff --git a/src/arch/x86_64/pio.rs b/src/arch/x86_64/pio.rs new file mode 100644 index 00000000..607e6a29 --- /dev/null +++ b/src/arch/x86_64/pio.rs @@ -0,0 +1,62 @@ +use crate::{ + error::HvResult, + memory::{Frame, HostPhysAddr}, +}; + +pub const UART_COM1_BASE_PORT: u16 = 0x3f8; + +#[derive(Debug)] +pub struct PortIoBitmap { + a: Frame, + b: Frame, +} + +impl PortIoBitmap { + pub fn uninit() -> Self { + Self { + a: unsafe { Frame::from_paddr(0) }, + b: unsafe { Frame::from_paddr(0) }, + } + } + + pub fn intercept_def() -> HvResult { + let mut bitmap = Self { + a: Frame::new_zero()?, + b: Frame::new_zero()?, + }; + bitmap.a.fill(0xff); + bitmap.b.fill(0xff); + + for port in UART_COM1_BASE_PORT..UART_COM1_BASE_PORT + 8 { + bitmap.set_intercept(port, false); + } + + Ok(bitmap) + } + + pub fn bitmap_a_addr(&self) -> HostPhysAddr { + self.a.start_paddr() + } + + pub fn bitmap_b_addr(&self) -> HostPhysAddr { + self.b.start_paddr() + } + + fn set_intercept(&mut self, mut port: u16, intercept: bool) { + let bitmap = match port <= 0x7fff { + true => unsafe { core::slice::from_raw_parts_mut(self.a.as_mut_ptr(), 1024) }, + false => { + port -= 0x8000; + unsafe { core::slice::from_raw_parts_mut(self.b.as_mut_ptr(), 1024) } + } + }; + + let byte = (port / 8) as usize; + let bits = port % 8; + if intercept { + bitmap[byte] |= 1 << bits; + } else { + bitmap[byte] &= !(1 << bits); + } + } +} diff --git a/src/arch/x86_64/s2pt.rs b/src/arch/x86_64/s2pt.rs index 718b5497..42126b28 100644 --- a/src/arch/x86_64/s2pt.rs +++ b/src/arch/x86_64/s2pt.rs @@ -16,7 +16,7 @@ use core::{arch::asm, fmt}; bitflags! { /// EPT entry flags. (SDM Vol. 3C, Section 28.3.2) - struct DescriptorAttr: u64 { + pub struct DescriptorAttr: u64 { /// Read access. const READ = 1 << 0; /// Write access. diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index 42257907..c0709a05 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -1,9 +1,8 @@ use crate::{ arch::{ - cpu::ArchCpu, + cpu::{this_cpu_id, ArchCpu}, cpuid::{CpuIdEax, ExtendedFeaturesEcx, FeatureInfoFlags}, - device::all_virt_devices, - idt::IdtStruct, + idt::{IdtStruct, IdtVector}, ipi, msr::Msr::{self, *}, s2pt::Stage2PageFaultInfo, @@ -11,15 +10,22 @@ use crate::{ vmx::{VmxCrAccessInfo, VmxExitInfo, VmxExitReason, VmxInterruptInfo, VmxIoExitInfo}, }, device::{ - irqchip::pic::{hpet, lapic::VirtLocalApic}, + irqchip::{ + inject_vector, + pic::{ + hpet, + ioapic::{ioapic_inject_irq, irqs}, + lapic::VirtLocalApic, + }, + }, uart::UartReg, }, error::HvResult, + memory::{mmio_handle_access, MMIOAccess, MemFlags}, + percpu::this_cpu_data, }; use x86_64::registers::control::Cr4Flags; -use super::{device::UART_COM1_BASE_PORT, idt::IdtVector}; - core::arch::global_asm!( include_str!("trap.S"), sym arch_handle_trap @@ -76,14 +82,17 @@ pub fn arch_handle_trap(tf: &mut TrapFrame) { fn handle_irq(vector: u8) { match vector { IdtVector::VIRT_IPI_VECTOR => ipi::handle_virt_ipi(), - IdtVector::APIC_TIMER_VECTOR => {} + IdtVector::APIC_TIMER_VECTOR => inject_vector( + this_cpu_id(), + this_cpu_data().arch_cpu.virt_lapic.virt_timer_vector, + None, + true, + ), IdtVector::UART_COM1_VECTOR => { - if let Some(device) = all_virt_devices().find_port_io_device(UART_COM1_BASE_PORT) { - device.read(UART_COM1_BASE_PORT + UartReg::LINE_STATUS, 0); - } + ioapic_inject_irq(irqs::UART_COM1_IRQ); } _ => { - println!("Unhandled irq {}", vector); + // println!("Unhandled irq {}", vector); } } unsafe { VirtLocalApic::phys_local_apic().end_of_interrupt() }; @@ -219,7 +228,7 @@ fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvR return hv_result_err!(ENOSYS); } - if let Some(dev) = all_virt_devices().find_port_io_device(io_info.port) { + /*if let Some(dev) = all_virt_devices().find_port_io_device(io_info.port) { if io_info.is_in { let value = dev.read(io_info.port, 0)?; let rax = &mut arch_cpu.regs_mut().rax; @@ -250,7 +259,7 @@ fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvR "Unsupported I/O port {:#x} access: {:#x?} \n {:#x?}", io_info.port, io_info, arch_cpu ) - } + }*/ arch_cpu.advance_guest_rip(exit_info.exit_instruction_length as _)?; Ok(()) @@ -261,7 +270,9 @@ fn handle_msr_read(arch_cpu: &mut ArchCpu) -> HvResult { if let Ok(msr) = Msr::try_from(rcx) { let res = if msr == IA32_APIC_BASE { + // FIXME: non root linux let mut apic_base = unsafe { IA32_APIC_BASE.read() }; + // info!("APIC BASE: {:x}", apic_base); apic_base |= 1 << 11 | 1 << 10; // enable xAPIC and x2APIC Ok(apic_base) } else if VirtLocalApic::msr_range().contains(&rcx) { @@ -309,12 +320,23 @@ fn handle_msr_write(arch_cpu: &mut ArchCpu) -> HvResult { Ok(()) } -fn handle_s2pt_violation(arch_cpu: &mut ArchCpu, guest_rip: usize) -> HvResult { +fn handle_s2pt_violation(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvResult { let fault_info = Stage2PageFaultInfo::new()?; - panic!( + mmio_handle_access(&mut MMIOAccess { + address: fault_info.fault_guest_paddr, + size: 0, + is_write: fault_info.access_flags.contains(MemFlags::WRITE), + value: 0, + })?; + + // FIXME: do advance_guest_rip in mmio handler, for the inst len is not correct + // arch_cpu.advance_guest_rip(exit_info.exit_instruction_length as _)?; + Ok(()) + + /*panic!( "VM exit: S2PT violation @ {:#x}, fault_paddr={:#x}, access_flags=({:?}), {:#x?}", - guest_rip, fault_info.fault_guest_paddr, fault_info.access_flags, arch_cpu - ); + exit_info.guest_rip, fault_info.fault_guest_paddr, fault_info.access_flags, arch_cpu + );*/ } fn handle_triple_fault(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvResult { @@ -348,7 +370,7 @@ pub fn handle_vmexit(arch_cpu: &mut ArchCpu) -> HvResult { VmxExitReason::IO_INSTRUCTION => handle_io_instruction(arch_cpu, &exit_info), VmxExitReason::MSR_READ => handle_msr_read(arch_cpu), VmxExitReason::MSR_WRITE => handle_msr_write(arch_cpu), - VmxExitReason::EPT_VIOLATION => handle_s2pt_violation(arch_cpu, exit_info.guest_rip), + VmxExitReason::EPT_VIOLATION => handle_s2pt_violation(arch_cpu, &exit_info), _ => panic!( "Unhandled VM-Exit reason {:?}:\n{:#x?}", exit_info.exit_reason, arch_cpu @@ -357,8 +379,10 @@ pub fn handle_vmexit(arch_cpu: &mut ArchCpu) -> HvResult { if res.is_err() { panic!( - "Failed to handle VM-exit {:?}:\n{:#x?}", - exit_info.exit_reason, arch_cpu + "Failed to handle VM-exit {:?}:\n{:#x?}\n{:?}", + exit_info.exit_reason, + arch_cpu, + res.err() ); } diff --git a/src/arch/x86_64/zone.rs b/src/arch/x86_64/zone.rs index 5f509204..43262cf1 100644 --- a/src/arch/x86_64/zone.rs +++ b/src/arch/x86_64/zone.rs @@ -1,13 +1,18 @@ use crate::{ + arch::mmio::mmio_handler, config::*, error::HvResult, memory::{GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion}, + platform::qemu_x86_64::MEM_TYPE_ROM, zone::Zone, }; #[repr(C)] #[derive(Debug, Clone)] -pub struct HvArchZoneConfig {} +pub struct HvArchZoneConfig { + pub ioapic_base: usize, + pub ioapic_size: usize, +} impl Zone { pub fn pt_init(&mut self, mem_regions: &[HvConfigMemoryRegion]) -> HvResult { @@ -17,9 +22,11 @@ impl Zone { let mut flags = MemFlags::READ | MemFlags::WRITE | MemFlags::EXECUTE; if mem_region.mem_type == MEM_TYPE_IO { flags |= MemFlags::IO; + } else if mem_region.mem_type == MEM_TYPE_ROM { + flags &= !MemFlags::WRITE; } match mem_region.mem_type { - MEM_TYPE_RAM | MEM_TYPE_IO => { + MEM_TYPE_RAM | MEM_TYPE_ROM | MEM_TYPE_IO => { self.gpm.insert(MemoryRegion::new_with_offset_mapper( mem_region.virtual_start as GuestPhysAddr, mem_region.physical_start as HostPhysAddr, @@ -27,7 +34,15 @@ impl Zone { flags, ))? } - /*TODO: MEM_TYPE_VIRTIO => { + /*MEM_TYPE_IO => { + self.mmio_region_register( + mem_region.physical_start as _, + mem_region.size as _, + mmio_handler, + mem_region.physical_start as _, + ); + } + MEM_TYPE_VIRTIO => { self.mmio_region_register( mem_region.physical_start as _, mem_region.size as _, @@ -45,7 +60,9 @@ impl Zone { Ok(()) } - pub fn mmio_init(&mut self, hv_config: &HvArchZoneConfig) {} + pub fn mmio_init(&mut self, hv_config: &HvArchZoneConfig) { + self.ioapic_mmio_init(hv_config); + } pub fn isa_init(&mut self, fdt: &fdt::Fdt) {} diff --git a/src/device/irqchip/pic/i8254.rs b/src/device/irqchip/pic/i8254.rs deleted file mode 100644 index 4de21534..00000000 --- a/src/device/irqchip/pic/i8254.rs +++ /dev/null @@ -1,155 +0,0 @@ -use crate::{arch::device::PortIoDevice, device::irqchip::pic::hpet, error::HvResult}; -use alloc::vec::Vec; -use bit_field::BitField; -use core::ops::Range; -use spin::Mutex; - -const VIRT_PIT_FREQ_HZ: u64 = 1193182; - -#[allow(non_snake_case)] -pub mod ReadWriteState { - pub const LSB: u8 = 1; - pub const MSB: u8 = 2; - pub const WORD_0: u8 = 3; - pub const WORD_1: u8 = 4; -} - -#[derive(Debug, Default, Clone)] -struct VirtI8254Channel { - count: i32, - mode: u8, - rw_mode: u8, - read_state: u8, - write_state: u8, - write_latch: u32, - count_set_time: u64, -} - -impl VirtI8254Channel { - fn get_count(&self) -> i32 { - let delta = - (hpet::current_time_nanos() - self.count_set_time) * VIRT_PIT_FREQ_HZ / 1_000_000_000; - let mut count = self.count; - match self.mode { - 0 => count = (self.count - (delta as i32)) & 0xffff, - _ => {} - } - count - } - - fn set_count(&mut self, mut value: u32) { - if value == 0 { - value = 0x1_0000; - } - self.count_set_time = hpet::current_time_nanos(); - self.count = value as _; - } -} - -pub struct VirtI8254 { - base_port: u16, - speaker_port: u16, - port_range: Vec>, - channels: Vec>, -} - -impl VirtI8254 { - pub fn new(base_port: u16, speaker_port: u16) -> Self { - Self { - base_port, - speaker_port, - port_range: vec![base_port..base_port + 4, speaker_port..speaker_port + 1], - channels: vec![ - Mutex::new(VirtI8254Channel::default()), - Mutex::new(VirtI8254Channel::default()), - Mutex::new(VirtI8254Channel::default()), - ], - } - } -} - -impl PortIoDevice for VirtI8254 { - fn port_range(&self) -> &Vec> { - &self.port_range - } - - fn read(&self, port: u16, msg: u8) -> HvResult { - // info!("i8254 read: {:x}", port); - - /*if port == self.speaker_port { - if let Some(channel) = self.channels.get(2) { - let mut channel = channel.lock(); - let cnt = channel.get_count(); - return Ok(0); - } - }*/ - - let chan_id = ((port - self.base_port) & 3) as usize; - if let Some(channel) = self.channels.get(chan_id) { - let mut channel = channel.lock(); - - let ret = match channel.read_state { - ReadWriteState::LSB => 0, - ReadWriteState::MSB => 0, - ReadWriteState::WORD_0 => { - channel.read_state = ReadWriteState::WORD_1; - channel.get_count() & 0xff - } - ReadWriteState::WORD_1 => { - channel.read_state = ReadWriteState::WORD_0; - (channel.get_count() >> 8) & 0xff - } - _ => 0, - }; - return Ok(ret as u32); - } - - Ok(0) - } - - fn write(&self, port: u16, value: u32, msg: u8) -> HvResult { - // info!("i8254 write: {:x}, {:x}", port, value); - - let offset: usize = (port - self.base_port) as _; - match offset { - 3 => { - let chan_id: usize = value.get_bits(6..=7) as _; - if chan_id == 3 { - } else if let Some(channel) = self.channels.get(chan_id) { - let mut channel = channel.lock(); - let access: u8 = value.get_bits(4..=5) as _; - - if access != 0 { - channel.rw_mode = access; - channel.read_state = access; - channel.write_state = access; - - channel.mode = value.get_bits(1..=3) as _; - } - } - } - 0 | 1 | 2 => { - if let Some(channel) = self.channels.get(offset) { - let mut channel = channel.lock(); - match channel.write_state { - ReadWriteState::LSB => {} - ReadWriteState::MSB => {} - ReadWriteState::WORD_0 => { - channel.write_latch = value; - channel.write_state = ReadWriteState::WORD_1; - } - ReadWriteState::WORD_1 => { - let low = channel.write_latch; - channel.set_count(low | (value << 8)); - channel.write_state = ReadWriteState::WORD_0; - } - _ => {} - } - } - } - _ => {} - }; - - Ok(()) - } -} diff --git a/src/device/irqchip/pic/i8259a.rs b/src/device/irqchip/pic/i8259a.rs deleted file mode 100644 index 7a015e78..00000000 --- a/src/device/irqchip/pic/i8259a.rs +++ /dev/null @@ -1,325 +0,0 @@ -use crate::{ - arch::device::{ - all_virt_devices, DeviceMsg, PortIoDevice, PIC_MASTER_BASE_PORT, PIC_SLAVE_BASE_PORT, - }, - device::irqchip::inject_vector, - error::HvResult, - percpu::{get_cpu_data, this_cpu_data}, -}; -use alloc::vec::Vec; -use core::ops::Range; -use spin::{Mutex, MutexGuard}; - -pub const SEND_IRQ_KEY: u8 = 0x82; -pub const SEND_VECTOR_KEY: u8 = 0x59; - -struct VirtI8259aUnlocked { - base_port: u16, - isr: u8, - irr: u8, - imr: u8, - init_state: u8, - vector_base: u8, - auto_eoi: bool, - icw4_needed: bool, - is_master: bool, - poll_cmd: bool, - read_isr: bool, - special_fully_nested_mode: bool, - special_mask: bool, -} - -impl VirtI8259aUnlocked { - fn new(base_port: u16, is_master: bool) -> Self { - Self { - base_port, - isr: 0, - irr: 0, - imr: 0, - init_state: 0, - vector_base: 0, - auto_eoi: false, - icw4_needed: false, - is_master, - poll_cmd: false, - read_isr: false, - special_fully_nested_mode: false, - special_mask: false, - } - } - - fn ack_irq(&mut self, irq: i32) { - // TODO: auto eoi - self.isr |= (1 << irq); - // TODO: elcr - self.irr &= !(1 << irq); - // VirtDualI8259aUnlocked::update_irq(self, pic2); - } - - fn get_priority(&mut self, mask: u8) -> u8 { - if mask == 0 { - return 8; - } - - let mut priority: u8 = 0; - // TODO: priority add - while mask & (1 << (priority & 7)) == 0 { - priority += 1; - } - - priority - } - - fn get_irq(&mut self) -> i32 { - let mut mask = self.irr & !self.imr; - let priority = self.get_priority(mask); - if priority == 8 { - return -1; - } - - mask = self.isr; - if self.special_mask { - mask &= !self.imr; - } - if self.special_fully_nested_mode && self.is_master { - mask &= !(1u8 << 2); // ignore in service slave irq - } - let cur_priority = self.get_priority(mask); - if priority < cur_priority { - return (priority as i32) & 7; - } - - -1 - } - - fn set_irq(&mut self, irq: i32) { - let mask: u8 = 1 << irq; - self.irr |= mask; - } -} - -pub struct VirtDualI8259aUnlocked { - pics: Vec, -} - -impl VirtDualI8259aUnlocked { - fn new(master_base_port: u16, slave_base_port: u16) -> Self { - Self { - pics: vec![ - VirtI8259aUnlocked::new(master_base_port, true), - VirtI8259aUnlocked::new(slave_base_port, false), - ], - } - } - - fn ack_irq(&mut self, irq: i32, id: usize) { - self.pics[id].ack_irq(irq); - self.update_irq(id); - } - - fn set_irq(&mut self, irq: i32, id: usize) { - self.pics[id].set_irq(irq); - // isr bit empty, can serve - if self.pics[id].isr & (1 << irq) == 0 { - self.update_irq(id); - } - } - - fn update_irq(&mut self, id: usize) { - let mut pic = &mut self.pics[id]; - let irq = pic.get_irq(); - if irq < 0 { - return; - } - - if pic.is_master { - let mut vector = pic.vector_base + (irq as u8); - if irq == 2 { - let mut irq2 = self.pics[1].get_irq(); - if irq2 >= 0 { - self.ack_irq(irq2, 1); - } else { - irq2 = 7; // spurious irq - } - vector = self.pics[1].vector_base + (irq2 as u8); - } - self.ack_irq(irq, 0); - // TODO: single core? smp? - inject_vector(0, vector, None, true); - } else { - self.set_irq(2, 0); - } - } - - fn read(&mut self, id: usize, port: u16) -> HvResult { - let mut pic = &mut self.pics[id]; - if pic.poll_cmd { - pic.poll_cmd = false; - let mut irq = pic.get_irq(); - if irq >= 0 { - self.ack_irq(irq, id); - irq |= 0x80; - } else { - irq = 0; - } - return Ok(irq as u32); - } - - let mut ret = 0; - let offset = port - pic.base_port; - if offset == 0 { - if pic.read_isr { - ret = pic.isr; - } else { - ret = pic.irr; - } - } else { - ret = pic.imr; - } - - Ok(ret as u32) - } - - fn write(&mut self, id: usize, port: u16, value: u32) -> HvResult { - let mut pic = &mut self.pics[id]; - let offset = port - pic.base_port; - let value: u8 = value as u8; - if offset == 0 { - if value & 0x10 != 0 { - // ICW1 - if value & 0x08 != 0 { - error!("I8259A: level-triggered not supported!"); - } - if value & 0x02 != 0 { - error!("I8259A: single pic not supported!"); - } - pic.init_state = 1; - pic.icw4_needed = (value & 0x01) != 0; - } else if value & 0x08 != 0 { - // OCW3 - if value & 0x02 != 0 { - pic.read_isr = value & 1 != 0; - } - if value & 0x04 != 0 { - pic.poll_cmd = true; - } - if value & 0x40 != 0 { - pic.special_mask = (value >> 5) & 1 != 0; - } - } else { - // OCW2 - let cmd = value >> 5; - match cmd { - 0 | 4 => { - // TODO: rotate auto eoi - } - 1 | 5 => { - // non specific eoi - let isr = pic.isr; - let priority = pic.get_priority(isr); - if priority != 8 { - let irq = priority & 7; - pic.isr &= !(1 << irq); - self.update_irq(id); - } - } - 3 | 7 => { - // specific eoi - let irq = value & 7; - pic.isr &= !(1 << irq); - self.update_irq(id); - } - _ => {} - } - } - } else { - match pic.init_state { - 0 => { - pic.imr = value; - self.update_irq(id); - } - 1 => { - // ICW2 - pic.vector_base = value & 0xf8; - // info!("I8259A: vector base: {:x}", pic.vector_base); - pic.init_state = 2; - } - 2 => { - // ICW3 - // master: 0x4, slave: 0x2 - pic.init_state = match pic.icw4_needed { - true => 3, - false => 0, - } - } - 3 => { - // ICW4 - pic.special_fully_nested_mode = (value >> 4) & 1 != 0; - pic.auto_eoi = (value >> 1) & 1 != 0; - pic.init_state = 0; - } - _ => {} - } - } - Ok(()) - } -} - -pub struct VirtDualI8259a { - port_range: Vec>, - dual_pic: Mutex, -} - -impl VirtDualI8259a { - pub fn new(master_base_port: u16, slave_base_port: u16) -> Self { - Self { - port_range: vec![ - master_base_port..master_base_port + 2, - slave_base_port..slave_base_port + 2, - ], - dual_pic: Mutex::new(VirtDualI8259aUnlocked::new( - master_base_port, - slave_base_port, - )), - } - } -} - -impl PortIoDevice for VirtDualI8259a { - fn port_range(&self) -> &Vec> { - &self.port_range - } - - fn read(&self, port: u16, msg: u8) -> HvResult { - // info!("I8259A read, port: {:x}", port); - let mut dual_pic = self.dual_pic.lock(); - - if self.port_range[0].contains(&port) { - dual_pic.read(0, port) - } else { - dual_pic.read(1, port) - } - } - - fn write(&self, port: u16, value: u32, msg: u8) -> HvResult { - // info!("I8259A write, port: {:x} value: {:x}", port, value); - let mut dual_pic = self.dual_pic.lock(); - let is_master = self.port_range[0].contains(&port); - let id = match is_master { - true => 0, - false => 1, - }; - - if msg != 0 { - match msg { - DeviceMsg::UPDATE_IRQ_HIGH => { - dual_pic.set_irq(value as i32, id); - } - _ => {} - } - return Ok(()); - } - - dual_pic.write(id, port, value) - } -} diff --git a/src/device/irqchip/pic/ioapic.rs b/src/device/irqchip/pic/ioapic.rs new file mode 100644 index 00000000..852b28df --- /dev/null +++ b/src/device/irqchip/pic/ioapic.rs @@ -0,0 +1,176 @@ +use self::{irqs::*, IoApicReg::*}; +use crate::{ + arch::{idt::IdtVector, mmio::mmio_handler, mmio::MMIoDevice, zone::HvArchZoneConfig}, + device::irqchip::pic::{enable_irq, hpet, inject_vector}, + error::HvResult, + memory::{GuestPhysAddr, MMIOAccess}, + platform::qemu_x86_64::ROOT_IOAPIC_BASE, + zone::Zone, +}; +use alloc::{sync::Arc, vec::Vec}; +use bit_field::BitField; +use core::{ops::Range, time::Duration, u32}; +use raw_cpuid::CpuId; +use spin::Mutex; +use x2apic::{ + ioapic::{IoApic, IrqFlags, IrqMode}, + lapic::{LocalApic, LocalApicBuilder, TimerDivide, TimerMode}, +}; +use x86_64::instructions::port::Port; + +pub mod irqs { + pub const UART_COM1_IRQ: u8 = 0x4; +} + +#[allow(non_snake_case)] +pub mod IoApicReg { + pub const ID: u32 = 0x00; + pub const VERSION: u32 = 0x01; + pub const ARBITRATION: u32 = 0x02; + pub const TABLE_BASE: u32 = 0x10; +} + +const IOAPIC_MAX_REDIRECT_ENTRIES: u64 = 0x17; + +static mut IO_APIC: Option = None; + +lazy_static::lazy_static! { + static ref VIRT_IOAPIC: (Arc,) = (Arc::new(VirtIoApic::new(ROOT_IOAPIC_BASE)),); +} + +#[derive(Default)] +struct VirtIoApicUnlocked { + cur_reg: u32, + rte: [u64; (IOAPIC_MAX_REDIRECT_ENTRIES + 1) as usize], +} + +pub struct VirtIoApic { + base_gpa: usize, + gpa_range: Vec>, + inner: Mutex, +} + +impl VirtIoApic { + pub fn new(base_gpa: GuestPhysAddr) -> Self { + Self { + base_gpa, + gpa_range: vec![base_gpa..base_gpa + 0x1000], + inner: Mutex::new(VirtIoApicUnlocked::default()), + } + } +} + +impl MMIoDevice for VirtIoApic { + fn gpa_range(&self) -> &Vec> { + &self.gpa_range + } + + fn read(&self, gpa: GuestPhysAddr) -> HvResult { + // info!("ioapic read! gpa: {:x}", gpa,); + + if gpa == self.base_gpa { + return Ok(self.inner.lock().cur_reg as _); + } + assert!(gpa - self.base_gpa == 0x10); + + let inner = self.inner.lock(); + match inner.cur_reg { + IoApicReg::ID => Ok(0), + IoApicReg::VERSION => Ok(IOAPIC_MAX_REDIRECT_ENTRIES << 16 | 0x11), // max redirect entries: 0x17, version: 0x11 + IoApicReg::ARBITRATION => Ok(0), + mut reg => { + reg -= IoApicReg::TABLE_BASE; + let index = (reg >> 1) as usize; + if let Some(entry) = inner.rte.get(index) { + if reg % 2 == 0 { + Ok((*entry).get_bits(0..=31)) + } else { + Ok((*entry).get_bits(32..=63)) + } + } else { + Ok(0) + } + } + } + } + + fn write(&self, gpa: GuestPhysAddr, value: u64, size: usize) -> HvResult { + /*info!( + "ioapic write! gpa: {:x}, value: {:x}, size: {:x}", + gpa, value, size, + );*/ + assert!(size == 4); + + if gpa == self.base_gpa { + self.inner.lock().cur_reg = value as _; + return Ok(()); + } + assert!(gpa - self.base_gpa == 0x10); + + let mut inner = self.inner.lock(); + match inner.cur_reg { + IoApicReg::ID | IoApicReg::VERSION | IoApicReg::ARBITRATION => {} + mut reg => { + reg -= IoApicReg::TABLE_BASE; + let index = (reg >> 1) as usize; + if let Some(entry) = inner.rte.get_mut(index) { + if reg % 2 == 0 { + entry.set_bits(0..=31, value.get_bits(0..=31)); + } else { + entry.set_bits(32..=63, value.get_bits(0..=31)); + } + } + } + } + Ok(()) + } + + fn trigger(&self, signal: usize) -> HvResult { + if let Some(entry) = self.inner.lock().rte.get(signal) { + // TODO: physical & logical mode + let dest = entry.get_bits(56..=63) as usize; + let masked = entry.get_bit(16); + let vector = entry.get_bits(0..=7) as u8; + if !masked { + inject_vector(dest, vector, None, true); + } + } + Ok(()) + } +} + +impl Zone { + pub fn ioapic_mmio_init(&mut self, arch: &HvArchZoneConfig) { + self.mmio_region_register(arch.ioapic_base, arch.ioapic_size, ioapic_mmio_handler, 0); + } +} + +fn ioapic_mmio_handler(mmio: &mut MMIOAccess, _arg: usize) -> HvResult { + mmio_handler(mmio, &VIRT_IOAPIC.0) +} + +unsafe fn configure_gsi(io_apic: &mut IoApic, dest: u8, gsi: u8, vector: u8) { + let mut entry = io_apic.table_entry(gsi); + entry.set_dest(dest); + entry.set_vector(vector); + entry.set_mode(IrqMode::Fixed); + io_apic.set_table_entry(gsi, entry); + io_apic.enable_irq(gsi); +} + +pub fn init_ioapic() { + println!("Initializing I/O APIC..."); + unsafe { + Port::::new(0x20).write(0xff); + Port::::new(0xA0).write(0xff); + + let mut io_apic = IoApic::new(ROOT_IOAPIC_BASE as _); + // println!("ioapic id: {:x}", io_apic.id()); + configure_gsi(&mut io_apic, 0, UART_COM1_IRQ, IdtVector::UART_COM1_VECTOR); + IO_APIC = Some(io_apic); + } +} + +pub fn ioapic_inject_irq(irq: u8) { + VIRT_IOAPIC.0.trigger(irq as _); +} diff --git a/src/device/irqchip/pic/lapic.rs b/src/device/irqchip/pic/lapic.rs index 5ae514dc..dd2489c5 100644 --- a/src/device/irqchip/pic/lapic.rs +++ b/src/device/irqchip/pic/lapic.rs @@ -12,178 +12,13 @@ use crate::{ }; use alloc::collections::vec_deque::VecDeque; use bit_field::BitField; -use core::{arch::x86_64::_rdtsc, u32}; +use core::{arch::x86_64::_rdtsc, ops::Range, u32}; use x2apic::lapic::{LocalApic, LocalApicBuilder, TimerDivide, TimerMode}; -const PHYS_LAPIC_TIMER_INTR_FREQ: u64 = 100; -const VIRT_LAPIC_TIMER_FREQ_MHZ: u64 = 1000; // 1000 MHz -const VIRT_LAPIC_TIMER_NANOS_PER_TICK: u64 = 1000 / VIRT_LAPIC_TIMER_FREQ_MHZ; - -/// A virtual local APIC timer. (SDM Vol. 3C, Section 10.5.4) -pub struct VirtLocalApicTimer { - is_enabled: bool, - lvt_timer_bits: u32, - divide_shift: u8, - initial_count: u32, - last_start_ns: u64, - deadline_ns: u64, - deadline_tsc: u64, - timer_mode: TimerMode, -} - -impl VirtLocalApicTimer { - pub const fn new() -> Self { - Self { - is_enabled: true, - lvt_timer_bits: 0x1_0000, // masked - divide_shift: 0, - initial_count: 0, - last_start_ns: 0, - deadline_ns: 0, - deadline_tsc: 0, - timer_mode: TimerMode::TscDeadline, - } - } - - pub fn set_enable(&mut self, is_enabled: u8) { - self.is_enabled = is_enabled != 0; - } - - /// Check if an interrupt generated. if yes, update it's states. - pub fn check_interrupt(&mut self) -> bool { - if !self.is_enabled || self.is_masked() { - return false; - } - - match self.timer_mode { - TimerMode::OneShot => { - if self.deadline_ns != 0 && hpet::current_time_nanos() >= self.deadline_ns { - self.deadline_ns = 0; - return true; - } - } - TimerMode::Periodic => { - let hpet_ns = hpet::current_time_nanos(); - if self.deadline_ns != 0 && hpet_ns >= self.deadline_ns { - self.deadline_ns += self.interval_ns(); - return true; - } - } - TimerMode::TscDeadline => { - if (self.deadline_tsc != 0) && unsafe { _rdtsc() } >= self.deadline_tsc { - self.deadline_tsc = 0; - return true; - } - } - _ => {} - } - return false; - } - - /// Whether the timer interrupt is masked. - pub const fn is_masked(&self) -> bool { - self.lvt_timer_bits & (1 << 16) != 0 - } - - /// Whether the timer mode is periodic. - pub const fn is_periodic(&self) -> bool { - let timer_mode = (self.lvt_timer_bits >> 17) & 0b11; - timer_mode == TimerMode::Periodic as _ - } - - /// The timer interrupt vector number. - pub const fn vector(&self) -> u8 { - (self.lvt_timer_bits & 0xff) as u8 - } - - /// LVT Timer Register. (SDM Vol. 3A, Section 10.5.1, Figure 10-8) - pub const fn lvt_timer(&self) -> u32 { - self.lvt_timer_bits - } - - /// Divide Configuration Register. (SDM Vol. 3A, Section 10.5.4, Figure 10-10) - pub const fn divide(&self) -> u32 { - let dcr = self.divide_shift.wrapping_sub(1) as u32 & 0b111; - (dcr & 0b11) | ((dcr & 0b100) << 1) - } - - /// Initial Count Register. - pub const fn initial_count(&self) -> u32 { - self.initial_count - } - - /// Current Count Register. - pub fn current_counter(&self) -> u32 { - let elapsed_ns = hpet::current_time_nanos() - self.last_start_ns; - let elapsed_cycles = (elapsed_ns / VIRT_LAPIC_TIMER_NANOS_PER_TICK) >> self.divide_shift; - - match self.timer_mode { - TimerMode::OneShot => { - if elapsed_cycles < self.initial_count as u64 { - return self.initial_count - elapsed_cycles as u32; - } - } - TimerMode::Periodic => { - if self.initial_count != 0 { - return self.initial_count - - (elapsed_cycles % self.initial_count as u64) as u32; - } - } - _ => {} - } - return 0; - } - - /// Set LVT Timer Register. - pub fn set_lvt_timer(&mut self, bits: u32) -> HvResult { - let timer_mode = bits.get_bits(17..19); - self.timer_mode = match timer_mode { - 0 => TimerMode::OneShot, - 1 => TimerMode::Periodic, - _ => TimerMode::TscDeadline, - }; - self.lvt_timer_bits = bits; - self.set_deadline(); - Ok(()) - } - - /// Set Initial Count Register. - pub fn set_initial_count(&mut self, initial: u32) -> HvResult { - self.initial_count = initial; - self.set_deadline(); - Ok(()) - } - - /// Set Divide Configuration Register. - pub fn set_divide(&mut self, dcr: u32) -> HvResult { - let shift = (dcr & 0b11) | ((dcr & 0b1000) >> 1); - self.divide_shift = (shift + 1) as u8 & 0b111; - self.set_deadline(); - Ok(()) - } - - pub fn set_tsc_deadline(&mut self, ddl: u64) -> HvResult { - self.deadline_tsc = ddl; - Ok(()) - } - - const fn interval_ns(&self) -> u64 { - (self.initial_count as u64 * VIRT_LAPIC_TIMER_NANOS_PER_TICK) << self.divide_shift - } - - fn set_deadline(&mut self) { - if self.initial_count != 0 { - self.last_start_ns = hpet::current_time_nanos(); - self.deadline_ns = self.last_start_ns + self.interval_ns(); - } else { - self.deadline_ns = 0; - } - } -} - pub struct VirtLocalApic { pub phys_lapic: LocalApic, - pub virt_lapic_timer: VirtLocalApicTimer, + pub virt_timer_vector: u8, + virt_lvt_timer_bits: u32, } impl VirtLocalApic { @@ -195,43 +30,19 @@ impl VirtLocalApic { .build() .unwrap(); - unsafe { lapic.enable() }; - - // calibrate phys lapic timer - let mut best_freq_hz = 0; - for _ in 0..5 { - unsafe { lapic.set_timer_initial(u32::MAX) }; - let hpet_start = hpet::current_ticks(); - hpet::wait_millis(10); - let ticks = u32::MAX - unsafe { lapic.timer_current() }; - let hpet_end = hpet::current_ticks(); - - let nanos = hpet::ticks_to_nanos(hpet_end.wrapping_sub(hpet_start)); - let ticks_per_sec = (ticks as u64 * 1_000_000_000 / nanos) as u32; - - if ticks_per_sec > best_freq_hz { - best_freq_hz = ticks_per_sec; - } - } - println!( - "Calibrated LAPIC frequency: {}.{:03} MHz", - best_freq_hz / 1_000_000, - best_freq_hz % 1_000_000 / 1_000, - ); - unsafe { - lapic.set_timer_mode(TimerMode::Periodic); - lapic.set_timer_divide(TimerDivide::Div256); - lapic.set_timer_initial((best_freq_hz as u64 / PHYS_LAPIC_TIMER_INTR_FREQ) as u32); + lapic.enable(); + lapic.disable_timer(); } Self { phys_lapic: lapic, - virt_lapic_timer: VirtLocalApicTimer::new(), + virt_timer_vector: 0, + virt_lvt_timer_bits: (1 << 16) as _, // masked } } - pub const fn msr_range() -> core::ops::Range { + pub const fn msr_range() -> Range { 0x800..0x840 } @@ -240,88 +51,40 @@ impl VirtLocalApic { } pub fn rdmsr(&mut self, msr: Msr) -> HvResult { - if msr != IA32_X2APIC_CUR_COUNT { - // info!("lapic rdmsr: {:?}", msr,); - } - match msr { IA32_X2APIC_APICID => Ok(this_cpu_id() as u64), - IA32_X2APIC_VERSION => Ok(0x50014), // Max LVT Entry: 0x5, Version: 0x14 IA32_X2APIC_LDR => Ok(this_cpu_id() as u64), // logical apic id - IA32_X2APIC_SIVR => Ok(((self.virt_lapic_timer.is_enabled as u64 & 0x1) << 8) | 0xff), // SDM Vol. 3A, Section 10.9, Figure 10-23 (with Software Enable bit) - IA32_X2APIC_ISR0 | IA32_X2APIC_ISR1 | IA32_X2APIC_ISR2 | IA32_X2APIC_ISR3 - | IA32_X2APIC_ISR4 | IA32_X2APIC_ISR5 | IA32_X2APIC_ISR6 | IA32_X2APIC_ISR7 => { - // info!("read ISR"); - Ok(0x0) - } - IA32_X2APIC_IRR0 | IA32_X2APIC_IRR1 | IA32_X2APIC_IRR2 | IA32_X2APIC_IRR3 - | IA32_X2APIC_IRR4 | IA32_X2APIC_IRR5 | IA32_X2APIC_IRR6 | IA32_X2APIC_IRR7 => { - // info!("read IRR"); - Ok(0x0) - } - IA32_X2APIC_ESR => Ok(0x0), - IA32_X2APIC_LVT_TIMER => Ok(self.virt_lapic_timer.lvt_timer() as u64), - IA32_X2APIC_LVT_THERMAL - | IA32_X2APIC_LVT_PMI - | IA32_X2APIC_LVT_LINT0 - | IA32_X2APIC_LVT_LINT1 - | IA32_X2APIC_LVT_ERROR => { - Ok(0x1_0000) // SDM Vol. 3A, Section 10.5.1, Figure 10-8 (with Mask bit) - } - IA32_X2APIC_INIT_COUNT => Ok(self.virt_lapic_timer.initial_count() as u64), - IA32_X2APIC_CUR_COUNT => Ok(self.virt_lapic_timer.current_counter() as u64), - IA32_X2APIC_DIV_CONF => Ok(self.virt_lapic_timer.divide() as u64), + IA32_X2APIC_LVT_TIMER => Ok(self.virt_lvt_timer_bits as _), _ => hv_result_err!(ENOSYS), } } pub fn wrmsr(&mut self, msr: Msr, value: u64) -> HvResult { - if (msr != IA32_X2APIC_ICR && msr != IA32_TSC_DEADLINE) && (value >> 32) != 0 { - return hv_result_err!(EINVAL); // all registers except ICR are 32-bits - } - if msr == IA32_TSC_DEADLINE { - self.virt_lapic_timer.set_tsc_deadline(value); - return Ok(()); - } - - if msr == IA32_X2APIC_INIT_COUNT { - //info!("{:?}, value: {:x}", msr, value); - } match msr { - IA32_X2APIC_EOI => { - if value != 0 { - hv_result_err!(EINVAL) // write a non-zero value causes #GP - } else { - Ok(()) - } - } - IA32_X2APIC_SIVR => { - self.virt_lapic_timer.set_enable(((value >> 8) & 1) as _); - Ok(()) - } IA32_X2APIC_ICR => { // info!("ICR value: {:x}", value); ipi::send_ipi(value); Ok(()) } - IA32_X2APIC_ESR - | IA32_X2APIC_LVT_THERMAL - | IA32_X2APIC_LVT_PMI - | IA32_X2APIC_LVT_LINT0 - | IA32_X2APIC_LVT_LINT1 - | IA32_X2APIC_LVT_ERROR => { - Ok(()) // ignore these register writes + IA32_X2APIC_LVT_TIMER => { + self.virt_lvt_timer_bits = value as u32; + self.virt_timer_vector = value.get_bits(0..=7) as _; + unsafe { + self.phys_lapic + .set_timer_mode(match value.get_bits(17..19) { + 0 => TimerMode::OneShot, + 1 => TimerMode::Periodic, + _ => TimerMode::TscDeadline, + }); + if value.get_bit(16) { + self.phys_lapic.disable_timer(); + } else { + self.phys_lapic.enable_timer(); + } + } + Ok(()) } - IA32_X2APIC_LVT_TIMER => self.virt_lapic_timer.set_lvt_timer(value as u32), - IA32_X2APIC_INIT_COUNT => self.virt_lapic_timer.set_initial_count(value as u32), - IA32_X2APIC_DIV_CONF => self.virt_lapic_timer.set_divide(value as u32), _ => hv_result_err!(ENOSYS), } } - - pub fn check_timer_interrupt(&mut self) { - if self.virt_lapic_timer.check_interrupt() { - inject_vector(this_cpu_id(), self.virt_lapic_timer.vector(), None, false); - } - } } diff --git a/src/device/irqchip/pic/mod.rs b/src/device/irqchip/pic/mod.rs index 156feaf5..13131f28 100644 --- a/src/device/irqchip/pic/mod.rs +++ b/src/device/irqchip/pic/mod.rs @@ -1,6 +1,5 @@ pub mod hpet; -pub mod i8254; -pub mod i8259a; +pub mod ioapic; pub mod lapic; use crate::{ @@ -32,9 +31,9 @@ impl PendingVectors { match self.inner.get(cpu_id) { Some(pending_vectors) => { let mut vectors = pending_vectors.lock(); - if vectors.len() > 2 { + /*if vectors.len() > 2 { info!("len: {:x}", vectors.len()); - } + }*/ if allow_repeat || !vectors.contains(&(vector, err_code)) { vectors.push_back((vector, err_code)); } diff --git a/src/device/uart/mod.rs b/src/device/uart/mod.rs index 3da2f253..755a8fb2 100644 --- a/src/device/uart/mod.rs +++ b/src/device/uart/mod.rs @@ -23,4 +23,4 @@ pub use ns16440a::{console_getchar, console_putchar}; mod uart16550a; #[cfg(target_arch = "x86_64")] -pub use uart16550a::{console_getchar, console_putchar, UartReg, VirtUart16550a}; +pub use uart16550a::{console_getchar, console_putchar, UartReg}; diff --git a/src/device/uart/uart16550a.rs b/src/device/uart/uart16550a.rs index e37c4002..9b51cfa6 100644 --- a/src/device/uart/uart16550a.rs +++ b/src/device/uart/uart16550a.rs @@ -1,9 +1,4 @@ -use crate::{ - arch::device::{ - all_virt_devices, DeviceMsg, PortIoDevice, PIC_MASTER_BASE_PORT, UART_COM1_BASE_PORT, - }, - error::HvResult, -}; +use crate::{arch::pio::UART_COM1_BASE_PORT, error::HvResult}; use alloc::vec::Vec; use core::ops::Range; use spin::Mutex; @@ -23,9 +18,7 @@ pub mod UartReg { pub const SCRATCH: u16 = 7; } -const UART_COM1_IRQ: u32 = 4; const UART_CLOCK_FACTOR: usize = 16; -const UART_FIFO_CAPACITY: usize = 64; const OSC_FREQ: usize = 1_843_200; lazy_static::lazy_static! { @@ -173,166 +166,6 @@ impl Uart16550a { } } -pub struct VirtUart16550aUnlocked { - iir: u8, - ier: u8, - lcr: u8, - lsr: u8, - fifo: Fifo, -} - -impl VirtUart16550aUnlocked { - fn new() -> Self { - Self { - iir: 0, - ier: 0, - lcr: 0, - lsr: (LineStatusFlags::XMIT_HOLD_REG_EMPTY | LineStatusFlags::XMIT_EMPTY).bits(), - fifo: Fifo::new(), - } - } - - fn update_irq(&mut self) { - let mut iir: u8 = 0; - - if self.ier & InterruptEnableFlags::ENABLE_RCVR_DATA_AVAIL_INTR.bits() != 0 - && self.lsr & LineStatusFlags::RCVR_DATA_READY.bits() != 0 - { - iir |= InterruptIdentFlags::RCVR_DATA_AVAIL.bits(); - } - - if self.ier & InterruptEnableFlags::ENABLE_XMIT_HOLD_REG_EMPTY_INTR.bits() != 0 - && self.lsr & LineStatusFlags::XMIT_HOLD_REG_EMPTY.bits() != 0 - { - iir |= InterruptIdentFlags::XMIT_HOLD_REG_EMPTY.bits(); - } - - if iir == 0 { - self.iir = InterruptIdentFlags::NO_INTR_IS_PENDING.bits(); - } else { - self.iir = iir; - all_virt_devices().send_msg( - PIC_MASTER_BASE_PORT, - UART_COM1_IRQ, - DeviceMsg::UPDATE_IRQ_HIGH, - ); - } - } -} - -pub struct VirtUart16550a { - base_port: u16, - port_range: Vec>, - uart: Mutex, -} - -impl VirtUart16550a { - pub fn new(base_port: u16) -> Self { - Self { - base_port, - port_range: vec![base_port..base_port + 8], - uart: Mutex::new(VirtUart16550aUnlocked::new()), - } - } -} - -impl PortIoDevice for VirtUart16550a { - fn port_range(&self) -> &Vec> { - &self.port_range - } - - fn read(&self, port: u16, msg: u8) -> HvResult { - let mut uart = self.uart.lock(); - - let ret = match port - self.base_port { - UartReg::RCVR_BUFFER => { - if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { - 1 // dll - } else { - // read a byte from FIFO - if uart.fifo.is_empty() { - 0 - } else { - uart.fifo.pop() - } - } - } - UartReg::INTR_ENABLE => { - if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { - 0 // dlm - } else { - uart.ier - } - } - UartReg::INTR_IDENT => { - // info!("IIR read, {:x}", uart.iir); - uart.iir | InterruptIdentFlags::FIFO_ENABLED_16550_MODE.bits() - } - UartReg::LINE_CTRL => uart.lcr, - UartReg::LINE_STATUS => { - // check if the physical serial port has an available byte, and push it to FIFO. - if !uart.fifo.is_full() { - if let Some(c) = console_getchar() { - uart.fifo.push(c); - } - } - if !uart.fifo.is_empty() { - uart.lsr |= LineStatusFlags::RCVR_DATA_READY.bits(); - } else { - uart.lsr &= (!LineStatusFlags::RCVR_DATA_READY).bits(); - } - uart.lsr - } - UartReg::MODEM_CTRL | UartReg::MODEM_STATUS | UartReg::SCRATCH => { - debug!("Unimplemented serial port I/O read: {:#x}", port); // unimplemented - 0 - } - _ => unreachable!(), - }; - - uart.update_irq(); - Ok(ret as u32) - } - - fn write(&self, port: u16, value: u32, msg: u8) -> HvResult { - let mut uart = self.uart.lock(); - let value: u8 = value as u8; - - match port - self.base_port { - UartReg::XMIT_BUFFER => { - if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { - // dll - } else { - uart.lsr |= - (LineStatusFlags::XMIT_HOLD_REG_EMPTY | LineStatusFlags::XMIT_EMPTY).bits(); - if value != 0xff { - console_putchar(value as u8); - } - } - } - UartReg::INTR_ENABLE => { - if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { - // dlm - } else { - // info!("ier: {:x}", uart.ier); - uart.ier = value & 0x0f; - } - } - UartReg::LINE_CTRL => { - uart.lcr = value; - } - UartReg::FIFO_CTRL | UartReg::MODEM_CTRL | UartReg::SCRATCH => { - debug!("Unimplemented serial port I/O write: {:#x}", port); - } - UartReg::LINE_STATUS => {} // ignore - _ => unreachable!(), - } - - uart.update_irq(); - Ok(()) - } -} - pub fn console_putchar(c: u8) { COM1.lock().putchar(c); } diff --git a/src/main.rs b/src/main.rs index b9e21895..756a05b7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -153,7 +153,7 @@ fn x86_rust_main_tmp(cpuid: usize, host_dtb: usize) { #[cfg(target_arch = "x86_64")] MultibootInfo::init(host_dtb); #[cfg(target_arch = "x86_64")] - arch::apic::init_ioapic(); + device::irqchip::pic::ioapic::init_ioapic(); } let cpu = PerCpu::new(cpuid); diff --git a/src/platform/qemu_x86_64.rs b/src/platform/qemu_x86_64.rs index ee887666..f1e9314e 100644 --- a/src/platform/qemu_x86_64.rs +++ b/src/platform/qemu_x86_64.rs @@ -14,21 +14,16 @@ pub const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0xc000; pub const ROOT_ZONE_CPUS: u64 = (1 << 0) | (1 << 1) | (1 << 2); pub const ROOT_ZONE_NAME: &str = "root-linux"; -pub const ROOT_ZONE_CMDLINE: &str = - "console=ttyS0 earlyprintk=serial rdinit=/init nokaslr noapic\0"; +pub const ROOT_ZONE_CMDLINE: &str = "console=ttyS0 earlyprintk=serial rdinit=/init nokaslr\0"; // noapic -pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 7] = [ +pub const MEM_TYPE_ROM: u32 = 3; + +pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 3] = [ HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, physical_start: 0x500_0000, virtual_start: 0x0, - size: 0x1_0000, - }, // ram - HvConfigMemoryRegion { - mem_type: MEM_TYPE_RAM, - physical_start: 0x501_0000, - virtual_start: 0x1_0000, - size: 0x14ff_0000, + size: 0x1500_0000, }, // ram HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, @@ -37,33 +32,19 @@ pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 7] = [ size: 0x4000_0000, }, // ram HvConfigMemoryRegion { - mem_type: MEM_TYPE_RAM, + mem_type: MEM_TYPE_ROM, physical_start: 0x2000_0000, virtual_start: 0x1500_0000, size: 0x20_0000, - }, // ram - HvConfigMemoryRegion { - mem_type: MEM_TYPE_IO, - physical_start: 0xfec0_0000, - virtual_start: 0xfec0_0000, - size: 0x1000, - }, // io apic - HvConfigMemoryRegion { - mem_type: MEM_TYPE_IO, - physical_start: 0xfed0_0000, - virtual_start: 0xfed0_0000, - size: 0x1000, - }, // hpet - HvConfigMemoryRegion { - mem_type: MEM_TYPE_IO, - physical_start: 0xfee0_0000, - virtual_start: 0xfee0_0000, - size: 0x1000, - }, // local apic + }, // initrd ]; pub const ROOT_ZONE_IRQS: [u32; 32] = [0; 32]; -pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig {}; +pub const ROOT_IOAPIC_BASE: usize = 0xfec0_0000; +pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { + ioapic_base: ROOT_IOAPIC_BASE, + ioapic_size: 0x1000, +}; // TODO: temp pub const GUEST_PT1: GuestPhysAddr = 0x1000; From 2b9625aafc81b2321a260917bed39610c7147e8f Mon Sep 17 00:00:00 2001 From: Solicey Date: Thu, 3 Apr 2025 15:53:04 +0800 Subject: [PATCH 10/29] enable PCI devices, activate vt-d DMA remapping --- Cargo.lock | 12 + Cargo.toml | 1 + scripts/qemu-x86_64.mk | 24 +- src/arch/x86_64/acpi.rs | 474 ++++++++++++++++++ src/arch/x86_64/boot.rs | 17 +- src/arch/x86_64/cpu.rs | 9 +- src/arch/x86_64/gdt.rs | 7 +- .../irqchip/pic => arch/x86_64}/hpet.rs | 0 src/arch/x86_64/idt.rs | 70 ++- src/arch/x86_64/ipi.rs | 17 +- src/arch/x86_64/mm.rs | 21 - src/arch/x86_64/mmio.rs | 2 +- src/arch/x86_64/mod.rs | 2 + src/arch/x86_64/pci.rs | 72 +++ src/arch/x86_64/pio.rs | 22 +- src/arch/x86_64/s2pt.rs | 4 + src/arch/x86_64/trap.rs | 24 +- src/arch/x86_64/zone.rs | 18 +- src/device/irqchip/pic/ioapic.rs | 46 +- src/device/irqchip/pic/lapic.rs | 8 +- src/device/irqchip/pic/mod.rs | 10 +- src/device/irqchip/pic/vtd.rs | 460 +++++++++++++++++ src/main.rs | 18 +- src/platform/qemu_x86_64.rs | 118 +++-- 24 files changed, 1279 insertions(+), 177 deletions(-) rename src/{device/irqchip/pic => arch/x86_64}/hpet.rs (100%) create mode 100644 src/arch/x86_64/pci.rs create mode 100644 src/device/irqchip/pic/vtd.rs diff --git a/Cargo.lock b/Cargo.lock index 8f3f0f3a..748c61e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,17 @@ dependencies = [ "tock-registers", ] +[[package]] +name = "acpi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94476c7ef97af4c4d998b3f422c1b01d5211aad57c80ed200baf148d1f1efab6" +dependencies = [ + "bit_field 0.10.2", + "bitflags 2.6.0", + "log", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -91,6 +102,7 @@ name = "hvisor" version = "0.1.0" dependencies = [ "aarch64-cpu", + "acpi", "bit_field 0.10.2", "bitflags 2.6.0", "bitmap-allocator", diff --git a/Cargo.toml b/Cargo.toml index 11e03933..a19ccd6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ x86 = "0.52.0" x86_64 = "=0.14.10" x2apic = "0.4.3" raw-cpuid = "10.7.0" +acpi = "5.2.0" [features] platform_qemu = [] diff --git a/scripts/qemu-x86_64.mk b/scripts/qemu-x86_64.mk index d94efbff..18cbaeb7 100644 --- a/scripts/qemu-x86_64.mk +++ b/scripts/qemu-x86_64.mk @@ -1,4 +1,5 @@ -QEMU := qemu-system-x86_64 +QEMU := /home/sora/qemu/build/qemu-system-x86_64 +# /home/sora/qemu/build/qemu-system-x86_64 acpi_asl_dir := scripts/x86_64/acpi acpi_aml_dir := $(image_dir)/acpi @@ -11,6 +12,7 @@ zone0_setup := $(image_dir)/setup.bin zone0_vmlinux := $(image_dir)/vmlinux.bin zone0_initrd := $(image_dir)/initramfs.cpio.gz zone0_boot16 := $(image_dir)/boot16.bin +zone0_rootfs := $(image_dir)/rootfs1.img aml_hpet := $(acpi_aml_dir)/hpet.aml aml_madt := $(acpi_aml_dir)/madt.aml @@ -18,12 +20,18 @@ aml_rsdp := $(acpi_aml_dir)/rsdp.aml aml_rsdt := $(acpi_aml_dir)/rsdt.aml aml_xsdt := $(acpi_aml_dir)/xsdt.aml -QEMU_ARGS := -machine q35 +QEMU_ARGS := -machine q35,kernel-irqchip=split QEMU_ARGS += -cpu host,+x2apic,+invtsc -accel kvm QEMU_ARGS += -smp 4 QEMU_ARGS += -serial mon:stdio -QEMU_ARGS += -m 2G +QEMU_ARGS += -m 4G QEMU_ARGS += -nographic +QEMU_ARGS += -device intel-iommu,intremap=on,eim=on,caching-mode=on,device-iotlb=on + +QEMU_ARGS += -device ioh3420,id=pcie.1,chassis=1 +QEMU_ARGS += -drive if=none,file="$(zone0_rootfs)",id=X10008000,format=raw +QEMU_ARGS += -device virtio-blk-pci,drive=X10008000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on +# QEMU_ARGS += --trace "virtio_*" --trace "virtqueue_*" --trace "vtd_dma*" --trace "iommu_*" QEMU_ARGS += -kernel $(hvisor_elf) # QEMU_ARGS += -device loader,file="$(zone0_bios)",addr=0x5008000,force-raw=on @@ -35,11 +43,11 @@ QEMU_ARGS += -device loader,file="$(zone0_vmlinux)",addr=0x5100000,force-raw=on QEMU_ARGS += -device loader,file="$(zone0_initrd)",addr=0x20000000,force-raw=on QEMU_ARGS += -append "initrd_size=$(shell stat -c%s $(zone0_initrd))" -QEMU_ARGS += -device loader,file="$(aml_rsdp)",addr=0x50f2400,force-raw=on -QEMU_ARGS += -device loader,file="$(aml_rsdt)",addr=0x50f2440,force-raw=on -QEMU_ARGS += -device loader,file="$(aml_xsdt)",addr=0x50f2480,force-raw=on -QEMU_ARGS += -device loader,file="$(aml_madt)",addr=0x50f2500,force-raw=on -QEMU_ARGS += -device loader,file="$(aml_hpet)",addr=0x50f2740,force-raw=on +# QEMU_ARGS += -device loader,file="$(aml_rsdp)",addr=0x50f2400,force-raw=on +# QEMU_ARGS += -device loader,file="$(aml_rsdt)",addr=0x50f2440,force-raw=on +# QEMU_ARGS += -device loader,file="$(aml_xsdt)",addr=0x50f2480,force-raw=on +# QEMU_ARGS += -device loader,file="$(aml_madt)",addr=0x50f2500,force-raw=on +# QEMU_ARGS += -device loader,file="$(aml_hpet)",addr=0x50f2740,force-raw=on $(hvisor_bin): elf aml $(OBJCOPY) $(hvisor_elf) --strip-all -O binary $@ diff --git a/src/arch/x86_64/acpi.rs b/src/arch/x86_64/acpi.rs index e69de29b..a8b8e5a2 100644 --- a/src/arch/x86_64/acpi.rs +++ b/src/arch/x86_64/acpi.rs @@ -0,0 +1,474 @@ +use crate::{ + config::HvConfigMemoryRegion, + error::HvResult, + percpu::this_zone, + platform::qemu_x86_64::{ROOT_ZONE_ACPI_REGION, ROOT_ZONE_RSDP_REGION}, +}; +use acpi::{ + fadt::Fadt, + madt::{LocalApicEntry, Madt, MadtEntry}, + mcfg::Mcfg, + rsdp::Rsdp, + sdt::{SdtHeader, Signature}, + AcpiHandler, AcpiTables, AmlTable, PciConfigRegions, +}; +use alloc::{ + collections::{btree_map::BTreeMap, btree_set::BTreeSet}, + vec::Vec, +}; +use core::{pin::Pin, ptr::NonNull}; +use spin::Mutex; + +const RSDP_V1_SIZE: usize = 20; +const RSDP_V2_SIZE: usize = 36; + +const RSDP_RSDT_OFFSET: usize = 16; +const RSDP_RSDT_PTR_SIZE: usize = 4; +const RSDT_PTR_SIZE: usize = 4; + +const FADT_DSDT_OFFSET_32: usize = 0x28; +const FADT_DSDT_OFFSET_64: usize = 0x8c; + +const FADT_FACS_OFFSET_32: usize = 0x24; +const FADT_FACS_OFFSET_64: usize = 0x84; + +const SDT_HEADER_SIZE: usize = 36; + +macro_rules! acpi_table { + ($a: ident, $b: ident) => { + #[repr(transparent)] + struct $a { + header: SdtHeader, + } + + unsafe impl acpi::AcpiTable for $a { + const SIGNATURE: Signature = Signature::$b; + fn header(&self) -> &SdtHeader { + &self.header + } + } + }; +} + +#[derive(Clone, Debug)] +struct HvAcpiHandler {} + +impl AcpiHandler for HvAcpiHandler { + unsafe fn map_physical_region( + &self, + physical_address: usize, + size: usize, + ) -> acpi::PhysicalMapping { + acpi::PhysicalMapping::new( + physical_address, + NonNull::new(physical_address as *mut T).unwrap(), + size, + size, + self.clone(), + ) + } + + fn unmap_physical_region(region: &acpi::PhysicalMapping) {} +} + +lazy_static::lazy_static! { + static ref ROOT_ACPI: Mutex = { + Mutex::new(RootAcpi::default()) + }; +} + +#[derive(Clone, Debug, Default)] +pub struct AcpiTable { + bytes: Vec, + gpa: usize, + hpa: usize, + is_addr_set: bool, + is_dirty: bool, +} + +impl AcpiTable { + pub fn set_u8(&mut self, value: u8, offset: usize) { + self.bytes[offset] = value; + self.is_dirty = true; + } + + pub fn set_u32(&mut self, value: u32, offset: usize) { + let bytes = value.to_ne_bytes(); + self.bytes[offset..offset + 4].copy_from_slice(&bytes); + self.is_dirty = true; + } + + pub fn set_u64(&mut self, value: u64, offset: usize) { + let bytes = value.to_ne_bytes(); + self.bytes[offset..offset + 8].copy_from_slice(&bytes); + self.is_dirty = true; + } + + // not for rsdp + pub fn set_len(&mut self, len: usize) { + self.bytes.resize(len, 0); + self.set_u32(len as u32, 4); + self.is_dirty = true; + } + + pub fn get_len(&self) -> usize { + self.bytes.len() + } + + pub fn get_bytes(&self) -> &Vec { + &self.bytes + } + + pub fn get_u8(&self, offset: usize) -> u8 { + self.bytes[offset] + } + + pub fn get_u16(&self, offset: usize) -> u16 { + let bytes: [u8; 2] = self.bytes[offset..offset + 2].try_into().unwrap(); + u16::from_ne_bytes(bytes) + } + + pub fn get_u32(&self, offset: usize) -> u32 { + let bytes: [u8; 4] = self.bytes[offset..offset + 4].try_into().unwrap(); + u32::from_ne_bytes(bytes) + } + + pub fn get_u64(&self, offset: usize) -> u64 { + let bytes: [u8; 8] = self.bytes[offset..offset + 8].try_into().unwrap(); + u64::from_ne_bytes(bytes) + } + + pub fn fill(&mut self, ptr: *const u8, len: usize) { + self.bytes.clear(); + if self.bytes.capacity() < len { + self.bytes.reserve(len); + } + + unsafe { + core::ptr::copy_nonoverlapping(ptr, self.bytes.as_mut_ptr(), len); + self.bytes.set_len(len); + } + } + + pub fn copy_to_mem(&self) { + unsafe { + core::ptr::copy_nonoverlapping( + self.bytes.as_ptr(), + self.hpa as *mut u8, + self.bytes.len(), + ) + }; + } + + pub fn remove(&mut self, start: usize, len: usize) { + let tot_len = self.bytes.len(); + let end = start + len; + assert!(end <= tot_len); + + if len == 0 { + return; + } + + unsafe { + let ptr = self.bytes.as_mut_ptr(); + core::ptr::copy(ptr.add(end), ptr.add(start), tot_len - end); + } + self.set_len(tot_len - len); + } + + pub fn set_addr(&mut self, hpa: usize, gpa: usize) { + self.hpa = hpa; + self.gpa = gpa; + self.is_addr_set = true; + } + + /// for rsdp, offset = 8; for the others, offset = 9. + pub fn update_checksum(&mut self, offset: usize) { + self.bytes[offset] = 0; + let sum = self + .bytes + .iter() + .fold(0u8, |sum, &byte| sum.wrapping_add(byte)); + self.bytes[offset] = 0u8.wrapping_sub(sum); + } +} + +#[derive(Copy, Clone, Debug)] +struct AcpiPointer { + pub from_sig: Signature, + pub from_offset: usize, + pub to_sig: Signature, + pub pointer_size: usize, +} + +#[derive(Clone, Debug, Default)] +pub struct RootAcpi { + rsdp: AcpiTable, + tables: BTreeMap, + pointers: Vec, +} + +impl RootAcpi { + fn add_pointer( + &mut self, + from_sig: Signature, + from_offset: usize, + to_sig: Signature, + pointer_size: usize, + ) { + self.pointers.push(AcpiPointer { + from_sig, + from_offset, + to_sig, + pointer_size, + }); + } + + fn add_new_table(&mut self, sig: Signature, ptr: *const u8, len: usize) { + let mut table = AcpiTable::default(); + table.fill(ptr, len); + self.tables.insert(sig, table); + } + + fn get_mut_table(&mut self, sig: Signature) -> Option<&mut AcpiTable> { + self.tables.get_mut(&sig) + } + + fn get_table(&self, sig: &Signature) -> Option { + if self.tables.contains_key(sig) { + Some(self.tables.get(sig).unwrap().clone()) + } else { + None + } + } + + pub fn copy_to_zone_region( + &self, + rsdp_zone_region: &HvConfigMemoryRegion, + acpi_zone_region: &HvConfigMemoryRegion, + ) { + let mut rsdp = self.rsdp.clone(); + let mut tables = self.tables.clone(); + + // set rsdp addr + rsdp.set_addr( + rsdp_zone_region.physical_start as _, + rsdp_zone_region.virtual_start as _, + ); + + let cpu_set = this_zone().read().cpu_set; + let mut madt_cur: usize = SDT_HEADER_SIZE + 8; + let mut madt = tables.get_mut(&Signature::MADT).unwrap(); + + // fix madt cpu info + for entry in + unsafe { Pin::new_unchecked(&*(madt.get_bytes().clone().as_ptr() as *const Madt)) } + .entries() + { + let mut entry_len = madt.get_u8(madt_cur + 1) as usize; + match entry { + MadtEntry::LocalApic(entry) => { + if !cpu_set.contains_cpu(entry.processor_id as _) { + madt.remove(madt_cur, entry_len); + entry_len = 0; + } + } + MadtEntry::LocalX2Apic(entry) => { + if !cpu_set.contains_cpu(entry.processor_uid as _) {} + } + _ => {} + } + madt_cur += entry_len; + } + + // set pointers + let hpa_start = acpi_zone_region.physical_start as usize; + let gpa_start = acpi_zone_region.virtual_start as usize; + let mut cur: usize = 0; + + let mut tables_involved = BTreeSet::::new(); + + for pointer in self.pointers.iter() { + let to = tables.get_mut(&pointer.to_sig).unwrap(); + tables_involved.insert(pointer.to_sig); + + if !to.is_addr_set { + info!( + "sig: {:x?}, hpa: {:x?}, gpa: {:x?}, size: {:x?}", + pointer.to_sig, + hpa_start + cur, + gpa_start + cur, + to.get_len() + ); + to.set_addr(hpa_start + cur, gpa_start + cur); + cur += to.get_len(); + } + let to_gpa = to.gpa; + + let from = match pointer.from_sig == pointer.to_sig { + true => &mut rsdp, + false => tables.get_mut(&pointer.from_sig).unwrap(), + }; + match pointer.pointer_size { + 4 => { + from.set_u32(to_gpa as _, pointer.from_offset); + } + 8 => { + from.set_u64(to_gpa as _, pointer.from_offset); + } + _ => { + warn!("Unused pointer size!"); + } + } + } + + // update checksums + rsdp.update_checksum(8); + for (sig, table) in tables.iter_mut() { + if table.is_dirty { + table.update_checksum(9); + } + } + + // copy to memory + rsdp.copy_to_mem(); + for (sig, table) in tables.iter() { + // don't copy tables that are not inside ACPI tree + if tables_involved.contains(sig) { + table.copy_to_mem(); + } + } + } + + // let zone 0 bsp cpu does the work + pub fn init(&mut self) { + let rsdp_mapping = unsafe { Rsdp::search_for_on_bios(HvAcpiHandler {}).unwrap() }; + // FIXME: temporarily suppose we use ACPI 1.0 + assert!(rsdp_mapping.revision() == 0); + + self.rsdp.fill( + rsdp_mapping.virtual_start().as_ptr() as *const u8, + RSDP_V1_SIZE, + ); + self.add_pointer( + Signature::RSDT, + RSDP_RSDT_OFFSET, + Signature::RSDT, + RSDP_RSDT_PTR_SIZE, + ); + + // get rsdt + + self.add_new_table( + Signature::RSDT, + rsdp_mapping.rsdt_address() as usize as *const u8, + SDT_HEADER_SIZE, + ); + let mut rsdt_offset = self.get_mut_table(Signature::RSDT).unwrap().get_len(); + + let tables = + unsafe { AcpiTables::from_validated_rsdp(HvAcpiHandler {}, rsdp_mapping) }.unwrap(); + + if let Ok(madt) = tables.find_table::() { + self.add_new_table( + Signature::MADT, + madt.physical_start() as *const u8, + madt.region_length(), + ); + + info!("-------------------------------- MADT --------------------------------"); + for entry in madt.get().entries() { + info!("{:x?}", entry); + } + + self.add_pointer(Signature::RSDT, rsdt_offset, Signature::MADT, RSDT_PTR_SIZE); + rsdt_offset += RSDT_PTR_SIZE; + } + + if let Ok(mcfg) = tables.find_table::() { + self.add_new_table( + Signature::MCFG, + mcfg.physical_start() as *const u8, + mcfg.region_length(), + ); + + info!("-------------------------------- MCFG --------------------------------"); + for entry in mcfg.entries() { + info!("{:x?}", entry); + } + + self.add_pointer(Signature::RSDT, rsdt_offset, Signature::MCFG, RSDT_PTR_SIZE); + rsdt_offset += RSDT_PTR_SIZE; + } + + if let Ok(fadt) = tables.find_table::() { + self.add_new_table( + Signature::FADT, + fadt.physical_start() as *const u8, + fadt.region_length(), + ); + + self.add_pointer(Signature::RSDT, rsdt_offset, Signature::FADT, RSDT_PTR_SIZE); + rsdt_offset += RSDT_PTR_SIZE; + + // dsdt + + if let Ok(dsdt) = tables.dsdt() { + self.add_new_table( + Signature::DSDT, + (dsdt.address - SDT_HEADER_SIZE) as *const u8, + (dsdt.length as usize + SDT_HEADER_SIZE), + ); + + self.add_pointer(Signature::FADT, FADT_DSDT_OFFSET_32, Signature::DSDT, 4); + self.add_pointer(Signature::FADT, FADT_DSDT_OFFSET_64, Signature::DSDT, 8); + } + + // facs + + if let Ok(facs_addr) = fadt.facs_address() { + self.add_new_table(Signature::FACS, facs_addr as *const u8, unsafe { + *((facs_addr + 4) as *const u32) as usize + }); + + self.add_pointer(Signature::FADT, FADT_FACS_OFFSET_32, Signature::FACS, 4); + self.add_pointer(Signature::FADT, FADT_FACS_OFFSET_64, Signature::FACS, 8); + } + } + + acpi_table!(Dmar, DMAR); + if let Ok(dmar) = tables.find_table::() { + self.add_new_table( + Signature::DMAR, + dmar.physical_start() as *const u8, + dmar.region_length(), + ); + + info!("dmar: {:x?}", unsafe { + *((dmar.physical_start() + 56) as *const [u8; 8]) + }); + + // self.add_pointer(Signature::RSDT, rsdt_offset, Signature::DMAR, RSDT_PTR_SIZE); + rsdt_offset += RSDT_PTR_SIZE; + } + + if let Some(rsdt) = self.get_mut_table(Signature::RSDT) { + rsdt.set_len(rsdt_offset); + } + } +} + +// let zone 0 bsp cpu does the work +pub fn root_init() { + ROOT_ACPI.lock().init(); +} + +pub fn copy_to_root_zone_region() { + ROOT_ACPI + .lock() + .copy_to_zone_region(&ROOT_ZONE_RSDP_REGION, &ROOT_ZONE_ACPI_REGION); +} + +pub fn root_get_table(sig: &Signature) -> Option { + ROOT_ACPI.lock().get_table(sig) +} diff --git a/src/arch/x86_64/boot.rs b/src/arch/x86_64/boot.rs index 994e937d..8331dd89 100644 --- a/src/arch/x86_64/boot.rs +++ b/src/arch/x86_64/boot.rs @@ -2,7 +2,7 @@ use crate::{ config::{root_zone_config, HvZoneConfig, MEM_TYPE_RAM}, error::HvResult, memory::{GuestPhysAddr, HostPhysAddr}, - platform::qemu_x86_64::gpa_as_mut_ptr, + platform::qemu_x86_64::root_zone_gpa_as_mut_ptr, }; use alloc::string::{String, ToString}; use core::{ @@ -83,7 +83,7 @@ impl BootParams { root_cmdline_addr: GuestPhysAddr, root_cmdline: &str, ) -> HvResult { - let boot_params_hpa = gpa_as_mut_ptr(setup_addr) as HostPhysAddr; + let boot_params_hpa = root_zone_gpa_as_mut_ptr(setup_addr) as HostPhysAddr; let boot_params = unsafe { &mut *(boot_params_hpa as *mut BootParams) }; // TODO: get kernel version @@ -101,7 +101,7 @@ impl BootParams { unsafe { copy_nonoverlapping( root_cmdline.as_ptr(), - gpa_as_mut_ptr(root_cmdline_addr), + root_zone_gpa_as_mut_ptr(root_cmdline_addr), root_cmdline.len(), ) }; @@ -139,6 +139,15 @@ impl BootParams { }; index += 1; } + /* FIXME: reserved? + _ => { + self.e820_table[index] = BootE820Entry { + addr: mem_region.virtual_start, + size: mem_region.size, + _type: E820Type::E820_RESERVED, + }; + index += 1; + }*/ _ => {} } } @@ -146,6 +155,8 @@ impl BootParams { } fn set_initrd(&mut self, ramdisk_image: u32, ramdisk_size: u32) { + // FIXME: + return; self.ramdisk_image = ramdisk_image; self.ramdisk_size = ramdisk_size; info!("initrd size: {}", self.ramdisk_size); diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index ef2ae3ca..6a151f21 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -2,7 +2,7 @@ use crate::{ arch::{ boot::BootParams, gdt::{get_tr_base, GdtStruct}, - ipi, + hpet, ipi, msr::{ Msr::{self, *}, MsrBitmap, @@ -12,7 +12,7 @@ use crate::{ vmx::*, }, consts::{core_end, MAX_CPU_NUM, PER_CPU_SIZE}, - device::irqchip::pic::{check_pending_vectors, hpet, lapic::VirtLocalApic}, + device::irqchip::pic::{check_pending_vectors, lapic::VirtLocalApic, vtd}, error::{HvError, HvResult}, memory::{addr::phys_to_virt, GuestPhysAddr, HostPhysAddr, PhysAddr, PAGE_SIZE}, percpu::this_cpu_data, @@ -39,6 +39,8 @@ use x86_64::{ structures::tss::TaskStateSegment, }; +use super::acpi::RootAcpi; + const AP_START_PAGE_IDX: u8 = 6; const AP_START_PAGE_PADDR: PhysAddr = AP_START_PAGE_IDX as usize * PAGE_SIZE; @@ -243,6 +245,9 @@ impl ArchCpu { .unwrap(); per_cpu.activate_gpm(); + // must be called after activate_gpm() + vtd::activate(); + while VM_LAUNCH_READY.load(Ordering::Acquire) < MAX_CPU_NUM as u32 - 1 { core::hint::spin_loop(); } diff --git a/src/arch/x86_64/gdt.rs b/src/arch/x86_64/gdt.rs index 72331c22..effc2baa 100644 --- a/src/arch/x86_64/gdt.rs +++ b/src/arch/x86_64/gdt.rs @@ -1,5 +1,5 @@ use crate::arch::cpu::{self, this_cpu_id}; -use alloc::{boxed::Box, collections::btree_map::BTreeMap}; +use alloc::boxed::Box; use bit_field::BitField; use spin::Mutex; use x86_64::{ @@ -21,11 +21,6 @@ pub struct GdtStruct { tss: &'static TaskStateSegment, } -/*lazy_static! { - static ref TSS: Mutex> = Mutex::new(BTreeMap::new()); - static ref GDT: Mutex> = Mutex::new(BTreeMap::new()); -}*/ - impl GdtStruct { pub const KCODE32_SELECTOR: SegmentSelector = SegmentSelector::new(1, PrivilegeLevel::Ring0); pub const KCODE64_SELECTOR: SegmentSelector = SegmentSelector::new(2, PrivilegeLevel::Ring0); diff --git a/src/device/irqchip/pic/hpet.rs b/src/arch/x86_64/hpet.rs similarity index 100% rename from src/device/irqchip/pic/hpet.rs rename to src/arch/x86_64/hpet.rs diff --git a/src/arch/x86_64/idt.rs b/src/arch/x86_64/idt.rs index 964b7c6c..d7dcd0fc 100644 --- a/src/arch/x86_64/idt.rs +++ b/src/arch/x86_64/idt.rs @@ -1,14 +1,41 @@ +use core::u32; + +use crate::error::HvResult; +use alloc::collections::btree_map::BTreeMap; +use spin::{Mutex, Once}; use x86_64::structures::idt::{Entry, HandlerFunc, InterruptDescriptorTable}; -const NUM_INT: usize = 256; +const VECTOR_CNT: usize = 256; #[allow(non_snake_case)] pub mod IdtVector { + pub const ALLOC_START: u8 = 0x20; + pub const ALLOC_END: u8 = 0xdf; + pub const VIRT_IPI_VECTOR: u8 = 0xe0; pub const APIC_TIMER_VECTOR: u8 = 0xf0; pub const APIC_SPURIOUS_VECTOR: u8 = 0xf1; pub const APIC_ERROR_VECTOR: u8 = 0xf2; - pub const UART_COM1_VECTOR: u8 = 0xf3; +} + +lazy_static::lazy_static! { + static ref ALLOC_VECTORS: Mutex = { + Mutex::new(AllocVectors::new()) + }; +} + +struct AllocVectors { + hv_to_gv: [u32; VECTOR_CNT], + gv_to_hv: BTreeMap, +} + +impl AllocVectors { + fn new() -> Self { + Self { + hv_to_gv: [u32::MAX; VECTOR_CNT], + gv_to_hv: BTreeMap::new(), + } + } } pub struct IdtStruct { @@ -19,7 +46,7 @@ impl IdtStruct { pub fn new() -> Self { extern "C" { #[link_name = "_hyp_trap_vector"] - static ENTRIES: [extern "C" fn(); NUM_INT]; + static ENTRIES: [extern "C" fn(); VECTOR_CNT]; } let mut idt = Self { table: InterruptDescriptorTable::new(), @@ -27,10 +54,10 @@ impl IdtStruct { let entries = unsafe { core::slice::from_raw_parts_mut( &mut idt.table as *mut _ as *mut Entry, - NUM_INT, + VECTOR_CNT, ) }; - for i in 0..NUM_INT { + for i in 0..VECTOR_CNT { entries[i].set_handler_fn(unsafe { core::mem::transmute(ENTRIES[i]) }); } idt @@ -40,3 +67,36 @@ impl IdtStruct { self.table.load(); } } + +pub fn get_host_vector(gv: u32) -> HvResult { + let mut alloc_vectors = ALLOC_VECTORS.lock(); + + if alloc_vectors.gv_to_hv.contains_key(&gv) { + return Ok(*alloc_vectors.gv_to_hv.get(&gv).unwrap()); + } + + for hv in IdtVector::ALLOC_START..=IdtVector::ALLOC_END { + if alloc_vectors.hv_to_gv[hv as usize] == u32::MAX { + alloc_vectors.hv_to_gv[hv as usize] = gv; + alloc_vectors.gv_to_hv.insert(gv, hv); + + info!("gv: {:x}, hv: {:x}", gv, hv); + + return Ok(hv); + } + } + + hv_result_err!(EPERM) +} + +pub fn get_guest_vector(hv: u8) -> HvResult { + let alloc_vectors = ALLOC_VECTORS.lock(); + + if let Some(&gv) = alloc_vectors.hv_to_gv.get(hv as usize) { + if gv != u32::MAX { + return Ok(gv); + } + } + + hv_result_err!(EPERM) +} diff --git a/src/arch/x86_64/ipi.rs b/src/arch/x86_64/ipi.rs index 8afec9e4..a833c69c 100644 --- a/src/arch/x86_64/ipi.rs +++ b/src/arch/x86_64/ipi.rs @@ -99,6 +99,7 @@ pub fn send_ipi(value: u64) -> HvResult { dest_set.iter().for_each(|dest| { match delivery_mode { IpiDeliveryMode::FIXED => { + // info!("dest: {:x}, vector: {:x}", dest, vector); inject_vector(dest, vector, None, true); arch_send_event(dest as _, IdtVector::VIRT_IPI_VECTOR as _); } @@ -143,20 +144,4 @@ pub fn handle_virt_ipi() { if event::check_events() { return; } - - // inject ipi - /*let mut vectors = &mut get_ipi_info(this_cpu_id()).unwrap().lock().fixed_vectors; - if vectors.len() > 1 { - // info!("handle_virt_ipi vectors len: {:x}", vectors.len()); - } - - while vectors.len() != 0 { - if let Some(vector) = vectors.pop_front() { - // info!("handle_virt_ipi vector: {:x}", vector); - this_cpu_data() - .arch_cpu - .virt_lapic - .inject_event((vector & 0xff) as u8, None); - } - }*/ } diff --git a/src/arch/x86_64/mm.rs b/src/arch/x86_64/mm.rs index a76ae01b..0b91d578 100644 --- a/src/arch/x86_64/mm.rs +++ b/src/arch/x86_64/mm.rs @@ -8,26 +8,5 @@ pub fn new_s2_memory_set() -> MemorySet { } pub fn init_hv_page_table() -> HvResult { - use x86_64::structures::paging::{PageTable, PageTableFlags as PTF}; - let pt1 = unsafe { &mut *(gpa_as_mut_ptr(GUEST_PT1) as *mut PageTable) }; - let pt2 = unsafe { &mut *(gpa_as_mut_ptr(GUEST_PT2) as *mut PageTable) }; - // identity mapping - pt1[0].set_addr( - x86_64::PhysAddr::new(GUEST_PT2 as _), - PTF::PRESENT | PTF::WRITABLE, - ); - pt2[0].set_addr( - x86_64::PhysAddr::new(0), - PTF::PRESENT | PTF::WRITABLE | PTF::HUGE_PAGE, - ); - - unsafe { - core::ptr::copy_nonoverlapping( - crate::platform::qemu_x86_64::test_guest_2 as usize as *const u8, - gpa_as_mut_ptr(GUEST_ENTRY), - 0x1000, - ); - } - Ok(()) } diff --git a/src/arch/x86_64/mmio.rs b/src/arch/x86_64/mmio.rs index c8a0f7f8..f6d2b4ed 100644 --- a/src/arch/x86_64/mmio.rs +++ b/src/arch/x86_64/mmio.rs @@ -90,7 +90,7 @@ impl RmReg { let mut gen_regs = this_cpu_data().arch_cpu.regs_mut(); let mut value = self.read().unwrap(); - value.set_bits(0..(size * 4), new_value.get_bits(0..(size * 4))); + value.set_bits(0..(size * 8), new_value.get_bits(0..(size * 8))); match self { RmReg::AX => gen_regs.rax = value, diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 5b33e2d7..2b3ea2f7 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -6,12 +6,14 @@ pub mod cpu; pub mod cpuid; pub mod entry; pub mod gdt; +pub mod hpet; pub mod idt; pub mod ipi; pub mod mm; pub mod mmio; pub mod msr; pub mod paging; +pub mod pci; pub mod pio; pub mod s1pt; pub mod s2pt; diff --git a/src/arch/x86_64/pci.rs b/src/arch/x86_64/pci.rs new file mode 100644 index 00000000..994c765b --- /dev/null +++ b/src/arch/x86_64/pci.rs @@ -0,0 +1,72 @@ +use crate::{ + arch::{ + acpi, + mmio::{mmio_handler, MMIoDevice}, + zone::HvArchZoneConfig, + }, + error::HvResult, + memory::{GuestPhysAddr, MMIOAccess}, + zone::Zone, +}; +use ::acpi::{mcfg::Mcfg, sdt::Signature}; +use alloc::{sync::Arc, vec::Vec}; +use core::ops::Range; + +lazy_static::lazy_static! { + static ref VIRT_PCI_CONFIG_SPACE: (Arc,) = (Arc::new(VirtPciConfigSpace::new()),); +} + +pub struct VirtPciConfigSpace {} + +impl VirtPciConfigSpace { + fn new() -> Self { + Self {} + } +} + +impl MMIoDevice for VirtPciConfigSpace { + fn gpa_range(&self) -> &Vec> { + todo!() + } + + fn read(&self, gpa: GuestPhysAddr) -> HvResult { + let value = unsafe { core::ptr::read_unaligned(gpa as *const u64) }; + // info!("pci config read! gpa: {:x}, value: {:x}", gpa, value); + Ok(value) + } + + fn write(&self, gpa: GuestPhysAddr, value: u64, size: usize) -> HvResult { + info!( + "pci config write! gpa: {:x}, value: {:x}, size: {:x}", + gpa, value, size, + ); + + todo!() + } + + fn trigger(&self, signal: usize) -> HvResult { + todo!() + } +} + +impl Zone { + pub fn pci_config_space_mmio_init(&mut self, arch: &HvArchZoneConfig) { + let bytes = acpi::root_get_table(&Signature::MCFG) + .unwrap() + .get_bytes() + .clone(); + let mcfg = unsafe { &*(bytes.as_ptr() as *const Mcfg) }; + + for entry in mcfg.entries() { + let start = entry.base_address as usize; + let size = + ((entry.bus_number_end as usize - entry.bus_number_start as usize) + 1) << 20; + // info!("entry start: {:x} size: {:x}", start, size); + self.mmio_region_register(start, size, pci_config_space_mmio_handler, 0); + } + } +} + +fn pci_config_space_mmio_handler(mmio: &mut MMIOAccess, _arg: usize) -> HvResult { + mmio_handler(mmio, &VIRT_PCI_CONFIG_SPACE.0) +} diff --git a/src/arch/x86_64/pio.rs b/src/arch/x86_64/pio.rs index 607e6a29..f6c67f28 100644 --- a/src/arch/x86_64/pio.rs +++ b/src/arch/x86_64/pio.rs @@ -1,4 +1,5 @@ use crate::{ + device::irqchip::pic::vtd::{PCI_CONFIG_ADDR, PCI_CONFIG_DATA}, error::HvResult, memory::{Frame, HostPhysAddr}, }; @@ -24,12 +25,19 @@ impl PortIoBitmap { a: Frame::new_zero()?, b: Frame::new_zero()?, }; - bitmap.a.fill(0xff); - bitmap.b.fill(0xff); - for port in UART_COM1_BASE_PORT..UART_COM1_BASE_PORT + 8 { - bitmap.set_intercept(port, false); - } + bitmap.a.fill(0); + bitmap.b.fill(0); + + // ban i8259a ports + bitmap.set_intercept(0x20, true); + bitmap.set_intercept(0x21, true); + bitmap.set_intercept(0xa0, true); + bitmap.set_intercept(0xa1, true); + + // ban pci config ports + // bitmap.set_intercept(PCI_CONFIG_ADDR, true); + // bitmap.set_intercept(PCI_CONFIG_DATA, true); Ok(bitmap) } @@ -44,10 +52,10 @@ impl PortIoBitmap { fn set_intercept(&mut self, mut port: u16, intercept: bool) { let bitmap = match port <= 0x7fff { - true => unsafe { core::slice::from_raw_parts_mut(self.a.as_mut_ptr(), 1024) }, + true => unsafe { core::slice::from_raw_parts_mut(self.a.as_mut_ptr(), 0x1000) }, false => { port -= 0x8000; - unsafe { core::slice::from_raw_parts_mut(self.b.as_mut_ptr(), 1024) } + unsafe { core::slice::from_raw_parts_mut(self.b.as_mut_ptr(), 0x1000) } } }; diff --git a/src/arch/x86_64/s2pt.rs b/src/arch/x86_64/s2pt.rs index 42126b28..54fc445d 100644 --- a/src/arch/x86_64/s2pt.rs +++ b/src/arch/x86_64/s2pt.rs @@ -4,6 +4,7 @@ use crate::{ vmcs::*, }, consts::PAGE_SIZE, + device::irqchip::pic::vtd, error::HvResult, memory::{ addr::{GuestPhysAddr, HostPhysAddr, PhysAddr}, @@ -144,6 +145,7 @@ impl From for DescriptorAttr { if !flags.contains(MemFlags::IO) { attr.set_mem_type(MemType::WriteBack); } else { + attr.set_mem_type(MemType::WriteThrough); // attr &= !Self::READ; } attr @@ -240,6 +242,8 @@ impl PagingInstr for S2PTInstr { let s2ptp = S2PTPointer::from_table_phys(root_paddr).bits(); crate::arch::vmcs::VmcsControl64::EPTP.write(s2ptp).unwrap(); unsafe { invs2pt(InvS2PTType::SingleContext, s2ptp) }; + + vtd::update_dma_translation_tables(root_paddr); } fn flush(_vaddr: Option) {} diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index c0709a05..46e9a1b3 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -2,7 +2,8 @@ use crate::{ arch::{ cpu::{this_cpu_id, ArchCpu}, cpuid::{CpuIdEax, ExtendedFeaturesEcx, FeatureInfoFlags}, - idt::{IdtStruct, IdtVector}, + hpet, + idt::{get_guest_vector, get_host_vector, IdtStruct, IdtVector}, ipi, msr::Msr::{self, *}, s2pt::Stage2PageFaultInfo, @@ -13,7 +14,6 @@ use crate::{ irqchip::{ inject_vector, pic::{ - hpet, ioapic::{ioapic_inject_irq, irqs}, lapic::VirtLocalApic, }, @@ -88,11 +88,14 @@ fn handle_irq(vector: u8) { None, true, ), - IdtVector::UART_COM1_VECTOR => { - ioapic_inject_irq(irqs::UART_COM1_IRQ); - } _ => { - // println!("Unhandled irq {}", vector); + inject_vector( + this_cpu_id(), + vector as _, + //get_guest_vector(vector).unwrap() as _, + None, + false, + ); } } unsafe { VirtLocalApic::phys_local_apic().end_of_interrupt() }; @@ -214,11 +217,12 @@ fn handle_hypercall(arch_cpu: &mut ArchCpu) -> HvResult { fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvResult { let io_info = VmxIoExitInfo::new()?; - trace!( + + /*info!( "VM exit: I/O instruction @ {:#x}: {:#x?}", - exit_info.guest_rip, - io_info, - ); + exit_info.guest_rip, io_info, + );*/ + if io_info.is_string { error!("INS/OUTS instructions are not supported!"); return hv_result_err!(ENOSYS); diff --git a/src/arch/x86_64/zone.rs b/src/arch/x86_64/zone.rs index 43262cf1..8e2e4fda 100644 --- a/src/arch/x86_64/zone.rs +++ b/src/arch/x86_64/zone.rs @@ -3,7 +3,7 @@ use crate::{ config::*, error::HvResult, memory::{GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion}, - platform::qemu_x86_64::MEM_TYPE_ROM, + platform::qemu_x86_64::{MEM_TYPE_RAM_NOT_ALLOC, MEM_TYPE_ROM}, zone::Zone, }; @@ -16,8 +16,6 @@ pub struct HvArchZoneConfig { impl Zone { pub fn pt_init(&mut self, mem_regions: &[HvConfigMemoryRegion]) -> HvResult { - // The first memory region is used to map the guest physical memory. - for mem_region in mem_regions.iter() { let mut flags = MemFlags::READ | MemFlags::WRITE | MemFlags::EXECUTE; if mem_region.mem_type == MEM_TYPE_IO { @@ -26,7 +24,7 @@ impl Zone { flags &= !MemFlags::WRITE; } match mem_region.mem_type { - MEM_TYPE_RAM | MEM_TYPE_ROM | MEM_TYPE_IO => { + MEM_TYPE_RAM | MEM_TYPE_ROM | MEM_TYPE_RAM_NOT_ALLOC | MEM_TYPE_IO => { self.gpm.insert(MemoryRegion::new_with_offset_mapper( mem_region.virtual_start as GuestPhysAddr, mem_region.physical_start as HostPhysAddr, @@ -34,14 +32,7 @@ impl Zone { flags, ))? } - /*MEM_TYPE_IO => { - self.mmio_region_register( - mem_region.physical_start as _, - mem_region.size as _, - mmio_handler, - mem_region.physical_start as _, - ); - } + /* MEM_TYPE_VIRTIO => { self.mmio_region_register( mem_region.physical_start as _, @@ -56,12 +47,13 @@ impl Zone { } } - info!("VM stage 2 memory set: {:#x?}", self.gpm); + // info!("VM stage 2 memory set: {:#x?}", self.gpm); Ok(()) } pub fn mmio_init(&mut self, hv_config: &HvArchZoneConfig) { self.ioapic_mmio_init(hv_config); + self.pci_config_space_mmio_init(hv_config); } pub fn isa_init(&mut self, fdt: &fdt::Fdt) {} diff --git a/src/device/irqchip/pic/ioapic.rs b/src/device/irqchip/pic/ioapic.rs index 852b28df..22072aa2 100644 --- a/src/device/irqchip/pic/ioapic.rs +++ b/src/device/irqchip/pic/ioapic.rs @@ -1,10 +1,14 @@ use self::{irqs::*, IoApicReg::*}; use crate::{ - arch::{idt::IdtVector, mmio::mmio_handler, mmio::MMIoDevice, zone::HvArchZoneConfig}, - device::irqchip::pic::{enable_irq, hpet, inject_vector}, + arch::{ + idt::{get_host_vector, IdtVector}, + mmio::{mmio_handler, MMIoDevice}, + zone::HvArchZoneConfig, + }, + device::irqchip::pic::{enable_irq, inject_vector}, error::HvResult, memory::{GuestPhysAddr, MMIOAccess}, - platform::qemu_x86_64::ROOT_IOAPIC_BASE, + platform::qemu_x86_64::ROOT_ZONE_IOAPIC_BASE, zone::Zone, }; use alloc::{sync::Arc, vec::Vec}; @@ -13,7 +17,7 @@ use core::{ops::Range, time::Duration, u32}; use raw_cpuid::CpuId; use spin::Mutex; use x2apic::{ - ioapic::{IoApic, IrqFlags, IrqMode}, + ioapic::{IoApic, IrqFlags, IrqMode, RedirectionTableEntry}, lapic::{LocalApic, LocalApicBuilder, TimerDivide, TimerMode}, }; use x86_64::instructions::port::Port; @@ -32,10 +36,14 @@ pub mod IoApicReg { const IOAPIC_MAX_REDIRECT_ENTRIES: u64 = 0x17; -static mut IO_APIC: Option = None; +lazy_static::lazy_static! { + static ref IO_APIC: Mutex = { + unsafe { Mutex::new(IoApic::new(ROOT_ZONE_IOAPIC_BASE as _)) } + }; +} lazy_static::lazy_static! { - static ref VIRT_IOAPIC: (Arc,) = (Arc::new(VirtIoApic::new(ROOT_IOAPIC_BASE)),); + static ref VIRT_IOAPIC: (Arc,) = (Arc::new(VirtIoApic::new(ROOT_ZONE_IOAPIC_BASE)),); } #[derive(Default)] @@ -118,6 +126,15 @@ impl MMIoDevice for VirtIoApic { entry.set_bits(0..=31, value.get_bits(0..=31)); } else { entry.set_bits(32..=63, value.get_bits(0..=31)); + + // use host vector instead of guest vector + /* entry.set_bits( + 0..=7, + get_host_vector(entry.get_bits(0..=7) as u32).unwrap() as _, + ); */ + unsafe { + configure_gsi_from_raw(index as _, *entry); + }; } } } @@ -149,25 +166,16 @@ fn ioapic_mmio_handler(mmio: &mut MMIOAccess, _arg: usize) -> HvResult { mmio_handler(mmio, &VIRT_IOAPIC.0) } -unsafe fn configure_gsi(io_apic: &mut IoApic, dest: u8, gsi: u8, vector: u8) { - let mut entry = io_apic.table_entry(gsi); - entry.set_dest(dest); - entry.set_vector(vector); - entry.set_mode(IrqMode::Fixed); - io_apic.set_table_entry(gsi, entry); - io_apic.enable_irq(gsi); +unsafe fn configure_gsi_from_raw(irq: u8, raw: u64) { + let mut io_apic = IO_APIC.lock(); + io_apic.set_table_entry(irq, core::mem::transmute(raw)); } pub fn init_ioapic() { println!("Initializing I/O APIC..."); unsafe { Port::::new(0x20).write(0xff); - Port::::new(0xA0).write(0xff); - - let mut io_apic = IoApic::new(ROOT_IOAPIC_BASE as _); - // println!("ioapic id: {:x}", io_apic.id()); - configure_gsi(&mut io_apic, 0, UART_COM1_IRQ, IdtVector::UART_COM1_VECTOR); - IO_APIC = Some(io_apic); + Port::::new(0xa0).write(0xff); } } diff --git a/src/device/irqchip/pic/lapic.rs b/src/device/irqchip/pic/lapic.rs index dd2489c5..5bb6c15a 100644 --- a/src/device/irqchip/pic/lapic.rs +++ b/src/device/irqchip/pic/lapic.rs @@ -2,18 +2,16 @@ use crate::{ arch::{ cpu::{this_cpu_id, ArchCpu}, idt::IdtVector, - ipi::{self, IpiDeliveryMode}, + ipi, msr::Msr::{self, *}, vmcs::Vmcs, }, - device::irqchip::{inject_vector, pic::hpet}, error::HvResult, percpu::{this_cpu_data, this_zone, CpuSet}, }; -use alloc::collections::vec_deque::VecDeque; use bit_field::BitField; -use core::{arch::x86_64::_rdtsc, ops::Range, u32}; -use x2apic::lapic::{LocalApic, LocalApicBuilder, TimerDivide, TimerMode}; +use core::{ops::Range, u32}; +use x2apic::lapic::{LocalApic, LocalApicBuilder, TimerMode}; pub struct VirtLocalApic { pub phys_lapic: LocalApic, diff --git a/src/device/irqchip/pic/mod.rs b/src/device/irqchip/pic/mod.rs index 13131f28..090f80a6 100644 --- a/src/device/irqchip/pic/mod.rs +++ b/src/device/irqchip/pic/mod.rs @@ -1,9 +1,9 @@ -pub mod hpet; pub mod ioapic; pub mod lapic; +pub mod vtd; use crate::{ - arch::{cpu::this_cpu_id, ipi, vmcs::Vmcs}, + arch::{acpi, cpu::this_cpu_id, ipi, vmcs::Vmcs}, consts::MAX_CPU_NUM, zone::Zone, }; @@ -89,9 +89,13 @@ pub fn percpu_init() {} pub fn primary_init_early() { ipi::init(MAX_CPU_NUM); PENDING_VECTORS.call_once(|| PendingVectors::new(MAX_CPU_NUM)); + acpi::root_init(); } -pub fn primary_init_late() {} +pub fn primary_init_late() { + acpi::copy_to_root_zone_region(); + vtd::init(); +} impl Zone { pub fn arch_irqchip_reset(&self) {} diff --git a/src/device/irqchip/pic/vtd.rs b/src/device/irqchip/pic/vtd.rs new file mode 100644 index 00000000..70c0bc0b --- /dev/null +++ b/src/device/irqchip/pic/vtd.rs @@ -0,0 +1,460 @@ +use crate::{ + arch::acpi, + memory::{Frame, HostPhysAddr}, + percpu::this_zone, + zone::this_zone_id, +}; +use ::acpi::{mcfg::Mcfg, sdt::Signature}; +use alloc::{collections::btree_map::BTreeMap, vec::Vec}; +use bit_field::BitField; +use core::{arch::asm, default, hint::spin_loop, mem::size_of, usize}; +use dma_remap_reg::*; +use spin::{Mutex, Once}; +use x86_64::instructions::port::Port; + +const IR_ENTRY_CNT: usize = 256; + +// I/O ports +pub const PCI_CONFIG_ADDR: u16 = 0xcf8; +pub const PCI_CONFIG_DATA: u16 = 0xcfc; +const PCI_CONFIG_ENABLE: u32 = 0x80000000; + +const ROOT_TABLE_ENTRY_SIZE: usize = 16; +const CONTEXT_TABLE_ENTRY_SIZE: usize = 16; + +// DMA-remapping registers + +mod dma_remap_reg { + /// Extended Capability Register + pub const DMAR_ECAP_REG: usize = 0x10; + /// Global Command Register + pub const DMAR_GCMD_REG: usize = 0x18; + /// Global Status Register + pub const DMAR_GSTS_REG: usize = 0x1c; + /// Root Table Address Register + pub const DMAR_RTADDR_REG: usize = 0x20; + /// Fault Event Control Register + pub const DMAR_FECTL_REG: usize = 0x38; + /// Invalidation Queue Tail Register + pub const DMAR_IQT_REG: usize = 0x88; + /// Invalidation Queue Address Register + pub const DMAR_IQA_REG: usize = 0x90; + /// Interrupt Remapping Table Address Register + pub const DMAR_IRTA_REG: usize = 0xb8; +} + +static DRHD_UNITS: Once>> = Once::new(); + +bitflags::bitflags! { + #[derive(Clone, Copy, Debug)] + pub struct EcapFlags: u64 { + /// Extended Interrupt Mode + const EIM = 1 << 4; + /// Interrupt Remapping Support + const IR = 1 << 3; + /// Queued Invalidation Support + const QI = 1 << 1; + } + + #[derive(Clone, Copy, Debug)] + pub struct GstsFlags: u32 { + /// Translation Enable Status + const TES = 1 << 31; + /// Root Table Pointer Status + const RTPS = 1 << 30; + /// Queue Invalidation Enable Status + const QIES = 1 << 26; + /// Interrupt Remapping Enable Status + const IRES = 1 << 25; + /// Interrupt Remap Table Pointer Status + const IRTPS = 1 << 24; + } + + #[derive(Clone, Copy, Debug)] + pub struct GcmdFlags: u32 { + /// Translation Enable + const TE = 1 << 31; + /// Set Root Table Pointer + const SRTP = 1 << 30; + /// Queue Invalidation Enable + const QIE = 1 << 26; + /// Interrupt Remapping Enable + const IRE = 1 << 25; + /// Set Interrupt Remap Table Pointer + const SIRTP = 1 << 24; + } +} + +numeric_enum_macro::numeric_enum! { +#[repr(u8)] +#[derive(Clone, Debug, PartialEq)] +pub enum DeviceScopeType { + NotUsed = 0x00, + PciEndpointDevice = 0x01, + PciSubHierarchy = 0x02, + IoApic = 0x03, + MsiCapableHpet = 0x04, + AcpiNamespaceDevice = 0x05 +} +} + +#[derive(Clone, Debug)] +struct DeviceScope { + scope_type: DeviceScopeType, + id: u8, + bus: u8, + dev_func: u8, +} + +#[derive(Debug)] +struct Drhd { + flags: u8, + segment: u16, + reg_hpa: usize, + scopes: Vec, + + root_table: Frame, + context_tables: BTreeMap, + qi_queue: Frame, + ir_table: Frame, + /// cache value of DMAR_GCMD_REG + gcmd: GcmdFlags, +} + +impl Drhd { + fn activate(&mut self) { + self.activate_dma_translation(); + } + + fn activate_dma_translation(&mut self) { + if !self.gcmd.contains(GcmdFlags::TE) { + self.gcmd |= GcmdFlags::TE; + self.mmio_write_u32(DMAR_GCMD_REG, self.gcmd.bits()); + + self.wait(GstsFlags::TES, false); + } + } + + fn activate_interrupt_remapping(&mut self) { + if !self.gcmd.contains(GcmdFlags::IRE) { + self.gcmd |= GcmdFlags::IRE; + self.mmio_write_u32(DMAR_GCMD_REG, self.gcmd.bits()); + + self.wait(GstsFlags::IRES, false); + } + } + + fn activate_qi(&mut self) { + let qi_queue_hpa = self.qi_queue.start_paddr(); + self.mmio_write_u64(DMAR_IQA_REG, qi_queue_hpa as u64); + self.mmio_write_u32(DMAR_IQT_REG, 0); + + if !self.gcmd.contains(GcmdFlags::QIE) { + self.gcmd |= GcmdFlags::QIE; + + self.mmio_write_u32(DMAR_GCMD_REG, self.gcmd.bits()); + + self.wait(GstsFlags::QIES, false); + } + } + + fn add_context_entry(&mut self, bus: u8, dev_func: u8, zone_s2pt_hpa: HostPhysAddr) { + let root_entry_hpa = self.root_table.start_paddr() + (bus as usize) * ROOT_TABLE_ENTRY_SIZE; + let root_entry_low = unsafe { &mut *(root_entry_hpa as *mut u64) }; + + // context table not present + if !root_entry_low.get_bit(0) { + let context_table = Frame::new_zero().unwrap(); + let context_table_hpa = context_table.start_paddr(); + + // set context-table pointer + root_entry_low.set_bits(12..=63, context_table_hpa.get_bits(12..=63) as _); + // set present + root_entry_low.set_bit(0, true); + + flush_cache_range(root_entry_hpa, ROOT_TABLE_ENTRY_SIZE); + self.context_tables.insert(bus, context_table); + } + + let context_table_hpa = self.context_tables.get(&bus).unwrap().start_paddr(); + let context_entry_hpa = context_table_hpa + (dev_func as usize) * CONTEXT_TABLE_ENTRY_SIZE; + let context_entry = unsafe { &mut *(context_entry_hpa as *mut u128) }; + + // s2pt not present + if !context_entry.get_bit(0) { + // address width: 010b (48bit 4-level page table) + context_entry.set_bits(64..=66, 0b010); + // domain identifier: zone id + context_entry.set_bits(72..=87, this_zone_id() as _); + // second stage page translation pointer + context_entry.set_bits(12..=63, zone_s2pt_hpa.get_bits(12..=63) as _); + // present + context_entry.set_bit(0, true); + + flush_cache_range(context_entry_hpa, CONTEXT_TABLE_ENTRY_SIZE); + } + } + + fn add_interrupt_table_entry(&mut self, irq: u32) { + assert!(irq < (IR_ENTRY_CNT as u32)); + + let ir_table_hpa = self.ir_table.start_paddr(); + let irte_hpa = ir_table_hpa + (irq as usize) * size_of::(); + let irte_ptr = irte_hpa as *mut u128; + let mut irte: u128 = 0; + + // present + irte.set_bit(0, true); + // irte mode: remap + irte.set_bit(15, false); + // vector + irte.set_bits(16..=23, irq as _); + // dest id + irte.set_bits(32..=63, 0); + + unsafe { *irte_ptr = irte }; + flush_cache_range(irte_hpa, size_of::()); + + // TODO: iec + } + + fn check_capability(&mut self) { + let ecap = EcapFlags::from_bits_truncate(self.mmio_read_u64(DMAR_ECAP_REG)); + info!("ecap: {:x?}", ecap); + assert!(ecap.contains(EcapFlags::EIM | EcapFlags::IR | EcapFlags::QI)); + } + + fn init(&mut self) { + self.check_capability(); + self.set_interrupt(); + self.set_root_table(); + self.activate_qi(); + + // self.set_interrupt_remap_table(); + /* for irq in 0..IR_ENTRY_CNT { + self.add_interrupt_table_entry(irq as _); + } */ + // self.activate_interrupt_remapping(); + } + + fn set_interrupt(&mut self) { + self.mmio_write_u32(DMAR_FECTL_REG, 0); + } + + fn set_interrupt_remap_table(&mut self) { + // bit 12-63: ir table address + // bit 11: x2apic mode active + // bit 0-3: X, where 2 ^ (X + 1) == number of entries + let address: u64 = + (self.ir_table.start_paddr() as u64) | (1 << 11) | ((IR_ENTRY_CNT.ilog2() - 1) as u64); + + self.mmio_write_u64(DMAR_IRTA_REG, address); + self.mmio_write_u32(DMAR_GCMD_REG, (self.gcmd | GcmdFlags::SIRTP).bits()); + + self.wait(GstsFlags::IRTPS, false); + } + + fn set_root_table(&mut self) { + self.mmio_write_u64(DMAR_RTADDR_REG, self.root_table.start_paddr() as _); + self.mmio_write_u32(DMAR_GCMD_REG, (self.gcmd | GcmdFlags::SRTP).bits()); + + self.wait(GstsFlags::RTPS, false); + } + + fn update_dma_translation_tables(&mut self, zone_s2pt_hpa: HostPhysAddr) { + let bdfs: Vec<(u8, u8)> = self + .scopes + .iter() + .filter(|scope| scope.scope_type == DeviceScopeType::PciEndpointDevice) + .map(|scope| (scope.bus, scope.dev_func)) + .collect(); + + for (bus, dev_func) in bdfs { + self.add_context_entry(bus, dev_func, zone_s2pt_hpa); + } + } + + fn wait(&mut self, mask: GstsFlags, cond: bool) { + let mut status = GstsFlags::empty(); + loop { + spin_loop(); + status = GstsFlags::from_bits_truncate(self.mmio_read_u32(DMAR_GSTS_REG)); + + if status.contains(mask) != cond { + break; + } + } + } + + fn mmio_read_u32(&self, reg: usize) -> u32 { + unsafe { *((self.reg_hpa + reg) as *const u32) } + } + + fn mmio_read_u64(&self, reg: usize) -> u64 { + unsafe { *((self.reg_hpa + reg) as *const u64) } + } + + fn mmio_write_u32(&self, reg: usize, value: u32) { + unsafe { *((self.reg_hpa + reg) as *mut u32) = value }; + } + + fn mmio_write_u64(&self, reg: usize, value: u64) { + unsafe { *((self.reg_hpa + reg) as *mut u64) = value }; + } +} + +fn get_secondary_bus(bus: u8, dev: u8, func: u8) -> u8 { + unsafe { + Port::::new(PCI_CONFIG_ADDR).write( + PCI_CONFIG_ENABLE + | ((bus as u32) << 16) + | ((dev as u32) << 11) + | ((func as u32) << 8) + | 0x18, + ); + let data = Port::::new(PCI_CONFIG_DATA).read(); + ((data >> 8) & 0xff) as u8 + } +} + +pub fn parse_root_drhds() -> Vec> { + let mut drhds: Vec> = Vec::new(); + + let dmar = acpi::root_get_table(&Signature::DMAR).unwrap(); + let mut cur: usize = 48; // start offset of remapping structures + let len = dmar.get_len(); + + while cur < len { + let struct_type = dmar.get_u16(cur); + let struct_len = dmar.get_u16(cur + 2) as usize; + + // drhd + if struct_type == 0 { + let mut drhd = Drhd { + flags: dmar.get_u8(cur + 4), + segment: dmar.get_u16(cur + 6), + reg_hpa: dmar.get_u64(cur + 8) as usize, + scopes: Vec::new(), + + root_table: Frame::new_zero().unwrap(), + context_tables: BTreeMap::new(), + qi_queue: Frame::new_zero().unwrap(), + ir_table: Frame::new_zero().unwrap(), + gcmd: GcmdFlags::empty(), + }; + + let mut scope_cur = cur + 16; // start offset of device scopes + // device scopes + while scope_cur < cur + struct_len { + let scope_len = dmar.get_u8(scope_cur + 1) as usize; + + let mut bus = dmar.get_u8(scope_cur + 5); + let mut path_cur = scope_cur + 6; + let mut dev = dmar.get_u8(path_cur); + let mut func = dmar.get_u8(path_cur + 1); + + // info!("bdf: {:x} {:x} {:x}", bus, dev, func); + + path_cur += 2; + while path_cur < scope_cur + scope_len { + bus = get_secondary_bus(bus, dev, func); + dev = dmar.get_u8(path_cur); + func = dmar.get_u8(path_cur + 1); + // info!("bdf: {:x} {:x} {:x}", bus, dev, func); + path_cur += 2; + } + + let mut scope = DeviceScope { + scope_type: DeviceScopeType::try_from(dmar.get_u8(scope_cur + 0)).unwrap(), + id: dmar.get_u8(scope_cur + 4), + bus, + dev_func: (dev << 3) | func, + }; + info!("{:x?}", scope); + drhd.scopes.push(scope); + + scope_cur += scope_len; + } + + drhds.push(Mutex::new(drhd)); + } else { + } + + cur += struct_len; + } + + drhds +} + +pub fn init() { + DRHD_UNITS.call_once(|| parse_root_drhds()); + for unit in DRHD_UNITS.get().unwrap().iter() { + unit.lock().init(); + } + + init_msi_cap_hpa_space(); +} + +pub fn update_dma_translation_tables(zone_s2pt_hpa: HostPhysAddr) { + for unit in DRHD_UNITS.get().unwrap().iter() { + unit.lock().update_dma_translation_tables(zone_s2pt_hpa); + } +} + +/// should be called after gpm is activated +pub fn activate() { + for unit in DRHD_UNITS.get().unwrap().iter() { + unit.lock().activate(); + } +} + +fn flush_cache_range(hpa: usize, size: usize) { + let mut i = 0usize; + while (i < size) { + unsafe { asm!("clflushopt [{addr}]", addr = in(reg) hpa + i) }; + i += 64; + } +} + +fn init_msi_cap_hpa_space() { + let bytes = acpi::root_get_table(&Signature::MCFG) + .unwrap() + .get_bytes() + .clone(); + let mcfg = unsafe { &*(bytes.as_ptr() as *const Mcfg) }; + + for unit in DRHD_UNITS.get().unwrap().iter() { + let drhd = unit.lock(); + + for entry in mcfg.entries() { + if entry.pci_segment_group != drhd.segment { + break; + } + let bus_range = entry.bus_number_start..=entry.bus_number_end; + + for scope in drhd.scopes.iter() { + if scope.scope_type != DeviceScopeType::PciEndpointDevice { + continue; + } + + if !bus_range.contains(&scope.bus) { + continue; + } + + let config_space_hpa = (entry.base_address as usize) + + ((scope.bus as usize) << 20) + + ((scope.dev_func as usize) << 12); + let mut cap_pointer = unsafe { *((config_space_hpa + 0x34) as *const u8) } as usize; + + info!("dev_fun: {:x}", scope.dev_func); + while cap_pointer != 0 { + let cap_hpa = config_space_hpa + cap_pointer; + let cap_id = unsafe { *(cap_hpa as *const u8) }; + + info!("cap id: {:x}, hpa: {:x}", cap_id, cap_hpa); + cap_pointer = unsafe { *((cap_hpa + 1) as *const u8) } as usize; + } + } + } + } +} diff --git a/src/main.rs b/src/main.rs index 756a05b7..a60d0a02 100644 --- a/src/main.rs +++ b/src/main.rs @@ -151,9 +151,10 @@ fn x86_rust_main_tmp(cpuid: usize, host_dtb: usize) { memory::heap::init(); memory::heap::test(); #[cfg(target_arch = "x86_64")] - MultibootInfo::init(host_dtb); - #[cfg(target_arch = "x86_64")] - device::irqchip::pic::ioapic::init_ioapic(); + { + MultibootInfo::init(host_dtb); + device::irqchip::pic::ioapic::init_ioapic(); + } } let cpu = PerCpu::new(cpuid); @@ -189,9 +190,16 @@ fn x86_rust_main_tmp(cpuid: usize, host_dtb: usize) { wait_for_counter(&INIT_EARLY_OK, 1); } - // x86_64::instructions::interrupts::int3(); - // info!("END OF MAIN"); + device::irqchip::percpu_init(); + + INITED_CPUS.fetch_add(1, Ordering::SeqCst); + wait_for_counter(&INITED_CPUS, MAX_CPU_NUM as _); + if is_primary { + primary_init_late(); + } else { + wait_for_counter(&INIT_LATE_OK, 1); + } cpu.run_vm(); loop {} } diff --git a/src/platform/qemu_x86_64.rs b/src/platform/qemu_x86_64.rs index f1e9314e..c0e4fd3b 100644 --- a/src/platform/qemu_x86_64.rs +++ b/src/platform/qemu_x86_64.rs @@ -1,35 +1,53 @@ use crate::{ arch::zone::HvArchZoneConfig, config::*, - memory::{GuestPhysAddr, HostPhysAddr}, + memory::{GuestPhysAddr, GuestVirtAddr, HostPhysAddr}, }; +pub const MEM_TYPE_ROM: u32 = 3; +pub const MEM_TYPE_RAM_NOT_ALLOC: u32 = 4; + pub const ROOT_ZONE_DTB_ADDR: u64 = 0x00000000; -pub const ROOT_ZONE_ENTRY: u64 = 0x8000; // 0x10_0000; -pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x500_0000; // 0x500_0000; -pub const ROOT_ZONE_SETUP_ADDR: GuestPhysAddr = 0xd000; pub const ROOT_ZONE_BOOT_STACK: GuestPhysAddr = 0x7000; -pub const ROOT_ZONE_INITRD_ADDR: GuestPhysAddr = 0x1500_0000; +pub const ROOT_ZONE_ENTRY: u64 = 0x8000; pub const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0xc000; +pub const ROOT_ZONE_SETUP_ADDR: GuestPhysAddr = 0xd000; +pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x500_0000; +pub const ROOT_ZONE_INITRD_ADDR: GuestPhysAddr = 0x1500_0000; pub const ROOT_ZONE_CPUS: u64 = (1 << 0) | (1 << 1) | (1 << 2); -pub const ROOT_ZONE_NAME: &str = "root-linux"; -pub const ROOT_ZONE_CMDLINE: &str = "console=ttyS0 earlyprintk=serial rdinit=/init nokaslr\0"; // noapic +pub const ROOT_ZONE_RSDP_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { + mem_type: MEM_TYPE_ROM, + physical_start: 0x50e_0000, + virtual_start: 0xe_0000, + size: 0x2_0000, +}; -pub const MEM_TYPE_ROM: u32 = 3; +pub const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM_NOT_ALLOC, + physical_start: 0x6020_0000, // hpa + virtual_start: 0x5520_0000, // gpa + size: 0xf000, // modify size accordingly +}; + +pub const ROOT_ZONE_NAME: &str = "root-linux"; +pub const ROOT_ZONE_CMDLINE: &str = + "console=ttyS0 earlyprintk=serial nointremap root=/dev/vda rw init=/bin/sh\0"; +//"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic -pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 3] = [ +pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 9] = [ HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, physical_start: 0x500_0000, virtual_start: 0x0, - size: 0x1500_0000, + size: 0xe_0000, }, // ram + ROOT_ZONE_RSDP_REGION, // rsdp HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, - physical_start: 0x2020_0000, - virtual_start: 0x1520_0000, - size: 0x4000_0000, + physical_start: 0x510_0000, + virtual_start: 0x10_0000, + size: 0x14f0_0000, }, // ram HvConfigMemoryRegion { mem_type: MEM_TYPE_ROM, @@ -37,54 +55,48 @@ pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 3] = [ virtual_start: 0x1500_0000, size: 0x20_0000, }, // initrd + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x2020_0000, + virtual_start: 0x1520_0000, + size: 0x4000_0000, + }, // ram + ROOT_ZONE_ACPI_REGION, // acpi + HvConfigMemoryRegion { + mem_type: MEM_TYPE_IO, + physical_start: 0xfed0_0000, + virtual_start: 0xfed0_0000, + size: 0x1000, + }, // hpet + /*HvConfigMemoryRegion { + mem_type: MEM_TYPE_IO, + physical_start: 0xb000_0000, + virtual_start: 0xb000_0000, + size: 0x1000_0000, + }, // TODO: pci config*/ + HvConfigMemoryRegion { + mem_type: MEM_TYPE_IO, + physical_start: 0xfea0_0000, + virtual_start: 0xfea0_0000, + size: 0x20_0000, + }, // TODO: pci + HvConfigMemoryRegion { + mem_type: MEM_TYPE_IO, + physical_start: 0x70_0000_0000, + virtual_start: 0x70_0000_0000, + size: 0x1000_4000, + }, // FIXME: pci 0000:00:03.0 ]; pub const ROOT_ZONE_IRQS: [u32; 32] = [0; 32]; -pub const ROOT_IOAPIC_BASE: usize = 0xfec0_0000; +pub const ROOT_ZONE_IOAPIC_BASE: usize = 0xfec0_0000; pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { - ioapic_base: ROOT_IOAPIC_BASE, + ioapic_base: ROOT_ZONE_IOAPIC_BASE, ioapic_size: 0x1000, }; -// TODO: temp -pub const GUEST_PT1: GuestPhysAddr = 0x1000; -pub const GUEST_PT2: GuestPhysAddr = 0x2000; -pub const GUEST_ENTRY: GuestPhysAddr = 0x10_0000; -pub const GUEST_STACK_TOP: GuestPhysAddr = 0x7000; -pub const GUEST_PHYS_MEMORY_START: HostPhysAddr = 0x100_0000; - -pub fn gpa_as_mut_ptr(guest_paddr: GuestPhysAddr) -> *mut u8 { +pub fn root_zone_gpa_as_mut_ptr(guest_paddr: GuestPhysAddr) -> *mut u8 { let offset = ROOT_ZONE_KERNEL_ADDR as usize; let host_vaddr = guest_paddr + offset; host_vaddr as *mut u8 } - -#[naked] -pub unsafe extern "C" fn test_guest() -> ! { - core::arch::asm!( - " - mov rax, 0 - mov rdi, 2 - mov rsi, 3 - mov rdx, 3 - mov rcx, 3 - 2: - vmcall - add rax, 1 - jmp 2b", - options(noreturn), - ); -} - -pub unsafe extern "C" fn test_guest_2() -> ! { - core::arch::asm!( - "vmcall", - inout("rax") 0 => _, - in("rdi") 2, - in("rsi") 3, - in("rdx") 3, - in("rcx") 3, - ); - core::arch::asm!("mov qword ptr [$0xffff233], $2333"); // panic - loop {} -} From 4e4ee639b9312886b3c3d178057bd25e0ea7d6b4 Mon Sep 17 00:00:00 2001 From: Solicey Date: Sun, 6 Apr 2025 19:03:31 +0800 Subject: [PATCH 11/29] implement pci device probing, activate pci virtualization --- platform/x86_64/qemu/board.rs | 22 +-- platform/x86_64/qemu/cargo/features | 1 + platform/x86_64/qemu/platform.mk | 2 +- src/arch/x86_64/acpi.rs | 13 +- src/arch/x86_64/cpu.rs | 25 ++- src/arch/x86_64/mmio.rs | 91 +++++++++-- src/arch/x86_64/mod.rs | 1 + src/arch/x86_64/msr.rs | 24 +-- src/arch/x86_64/pci.rs | 82 ++++++++-- src/arch/x86_64/pio.rs | 45 ++++-- src/arch/x86_64/s2pt.rs | 5 +- src/arch/x86_64/trap.rs | 7 - src/arch/x86_64/vmx.rs | 8 +- .../irqchip/pic => arch/x86_64}/vtd.rs | 149 +++++++----------- src/arch/x86_64/zone.rs | 1 - src/device/irqchip/mod.rs | 4 +- src/device/irqchip/pic/ioapic.rs | 32 ++-- src/device/irqchip/pic/mod.rs | 5 +- src/event.rs | 2 +- src/main.rs | 1 - src/memory/mmio.rs | 8 +- src/pci/pci.rs | 59 +++++-- src/pci/pcibar.rs | 2 +- src/platform/mod.rs | 7 + src/platform/qemu_x86_64.rs | 102 ------------ src/zone.rs | 22 +++ 26 files changed, 383 insertions(+), 337 deletions(-) rename src/{device/irqchip/pic => arch/x86_64}/vtd.rs (76%) delete mode 100644 src/platform/qemu_x86_64.rs diff --git a/platform/x86_64/qemu/board.rs b/platform/x86_64/qemu/board.rs index eb695373..dff6b9f8 100644 --- a/platform/x86_64/qemu/board.rs +++ b/platform/x86_64/qemu/board.rs @@ -50,7 +50,7 @@ pub const ROOT_ZONE_CMDLINE: &str = "console=ttyS0 earlyprintk=serial nointremap root=/dev/vda rw init=/bin/sh\0"; //"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic -pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 9] = [ +pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 7] = [ HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, physical_start: 0x500_0000, @@ -83,24 +83,6 @@ pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 9] = [ virtual_start: 0xfed0_0000, size: 0x1000, }, // hpet - /*HvConfigMemoryRegion { - mem_type: MEM_TYPE_IO, - physical_start: 0xb000_0000, - virtual_start: 0xb000_0000, - size: 0x1000_0000, - }, // TODO: pci config*/ - HvConfigMemoryRegion { - mem_type: MEM_TYPE_IO, - physical_start: 0xfea0_0000, - virtual_start: 0xfea0_0000, - size: 0x20_0000, - }, // TODO: pci - HvConfigMemoryRegion { - mem_type: MEM_TYPE_IO, - physical_start: 0x70_0000_0000, - virtual_start: 0x70_0000_0000, - size: 0x1000_4000, - }, // FIXME: pci 0000:00:03.0 ]; pub const ROOT_ZONE_IRQS: [u32; 32] = [0; 32]; @@ -110,6 +92,8 @@ pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { ioapic_size: 0x1000, }; +pub const ROOT_PCI_DEVS: [u64; 7] = [0x0, 0x8, 0x18, 0xf8, 0xfa, 0xfb, 0x100]; // 0x10, + pub fn root_zone_gpa_as_mut_ptr(guest_paddr: GuestPhysAddr) -> *mut u8 { let offset = ROOT_ZONE_KERNEL_ADDR as usize; let host_vaddr = guest_paddr + offset; diff --git a/platform/x86_64/qemu/cargo/features b/platform/x86_64/qemu/cargo/features index e69de29b..71878594 100644 --- a/platform/x86_64/qemu/cargo/features +++ b/platform/x86_64/qemu/cargo/features @@ -0,0 +1 @@ +pci \ No newline at end of file diff --git a/platform/x86_64/qemu/platform.mk b/platform/x86_64/qemu/platform.mk index 87ccf19e..fe6337be 100644 --- a/platform/x86_64/qemu/platform.mk +++ b/platform/x86_64/qemu/platform.mk @@ -17,7 +17,7 @@ QEMU_ARGS += -device intel-iommu,intremap=on,eim=on,caching-mode=on,device-iotlb QEMU_ARGS += -device ioh3420,id=pcie.1,chassis=1 QEMU_ARGS += -drive if=none,file="$(zone0_rootfs)",id=X10008000,format=raw -QEMU_ARGS += -device virtio-blk-pci,drive=X10008000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on +QEMU_ARGS += -device virtio-blk-pci,bus=pcie.1,drive=X10008000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on # bus=pcie.1, # QEMU_ARGS += --trace "virtio_*" --trace "virtqueue_*" --trace "vtd_dma*" --trace "iommu_*" QEMU_ARGS += -kernel $(hvisor_elf) diff --git a/src/arch/x86_64/acpi.rs b/src/arch/x86_64/acpi.rs index 7e8d5023..9a887378 100644 --- a/src/arch/x86_64/acpi.rs +++ b/src/arch/x86_64/acpi.rs @@ -1,4 +1,5 @@ use crate::{ + arch::pci::probe_root_pci_devices, config::HvConfigMemoryRegion, error::HvResult, percpu::this_zone, @@ -7,7 +8,7 @@ use crate::{ use acpi::{ fadt::Fadt, madt::{LocalApicEntry, Madt, MadtEntry}, - mcfg::Mcfg, + mcfg::{Mcfg, McfgEntry}, rsdp::Rsdp, sdt::{SdtHeader, Signature}, AcpiHandler, AcpiTables, AmlTable, PciConfigRegions, @@ -16,7 +17,7 @@ use alloc::{ collections::{btree_map::BTreeMap, btree_set::BTreeSet}, vec::Vec, }; -use core::{pin::Pin, ptr::NonNull}; +use core::{mem::size_of, pin::Pin, ptr::NonNull}; use spin::Mutex; const RSDP_V1_SIZE: usize = 20; @@ -391,10 +392,18 @@ impl RootAcpi { mcfg.physical_start() as *const u8, mcfg.region_length(), ); + let new_mcfg = self.get_mut_table(Signature::MCFG).unwrap(); info!("-------------------------------- MCFG --------------------------------"); + let mut offset = size_of::() + 0xb; for entry in mcfg.entries() { info!("{:x?}", entry); + // we don't have such many buses, probe devices to get the max_bus we have + let (_, _, max_bus) = probe_root_pci_devices(entry.base_address as _); + + // update bus_number_end + new_mcfg.set_u8(max_bus, offset); + offset += size_of::(); } self.add_pointer(Signature::RSDT, rsdt_offset, Signature::MCFG, RSDT_PTR_SIZE); diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index 7c36720f..f02c3779 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -10,12 +10,13 @@ use crate::{ pio::PortIoBitmap, vmcs::*, vmx::*, + vtd, }, consts::{core_end, MAX_CPU_NUM, PER_CPU_SIZE}, - device::irqchip::pic::{check_pending_vectors, lapic::VirtLocalApic, vtd}, + device::irqchip::pic::{check_pending_vectors, lapic::VirtLocalApic}, error::{HvError, HvResult}, memory::{addr::phys_to_virt, GuestPhysAddr, HostPhysAddr, PhysAddr, PAGE_SIZE}, - percpu::this_cpu_data, + percpu::{this_cpu_data, this_zone}, platform::{ ROOT_ZONE_BOOT_STACK, ROOT_ZONE_CMDLINE, ROOT_ZONE_CMDLINE_ADDR, ROOT_ZONE_INITRD_ADDR, ROOT_ZONE_SETUP_ADDR, @@ -164,8 +165,6 @@ pub struct ArchCpu { vmcs_revision_id: u32, vmxon_region: VmxRegion, vmcs_region: VmxRegion, - msr_bitmap: MsrBitmap, - pio_bitmap: PortIoBitmap, } impl ArchCpu { @@ -180,10 +179,8 @@ impl ArchCpu { gdt: GdtStruct::new(tss), virt_lapic: VirtLocalApic::new(), vmcs_revision_id: 0, - vmxon_region: VmxRegion::uninit(), - vmcs_region: VmxRegion::uninit(), - msr_bitmap: MsrBitmap::uninit(), - pio_bitmap: PortIoBitmap::uninit(), + vmxon_region: VmxRegion::fake_init(), + vmcs_region: VmxRegion::fake_init(), } } @@ -270,7 +267,7 @@ impl ArchCpu { // get VMCS revision identifier in IA32_VMX_BASIC MSR self.vmcs_revision_id = get_vmcs_revision_id(); - self.vmxon_region = VmxRegion::new(self.vmcs_revision_id, false).unwrap(); + self.vmxon_region = VmxRegion::new(self.vmcs_revision_id, false); unsafe { execute_vmxon(self.vmxon_region.start_paddr() as u64).unwrap() }; @@ -324,9 +321,7 @@ impl ArchCpu { } fn setup_vmcs(&mut self, entry: GuestPhysAddr, set_rip: bool) -> HvResult { - self.vmcs_region = VmxRegion::new(self.vmcs_revision_id, false)?; - self.msr_bitmap = MsrBitmap::intercept_def()?; - self.pio_bitmap = PortIoBitmap::intercept_def()?; + self.vmcs_region = VmxRegion::new(self.vmcs_revision_id, false); let start_paddr = self.vmcs_region.start_paddr() as usize; Vmcs::clear(start_paddr)?; @@ -415,9 +410,9 @@ impl ArchCpu { // Pass-through exceptions, don't use I/O bitmap, set MSR bitmaps. VmcsControl32::EXCEPTION_BITMAP.write(0)?; - VmcsControl64::IO_BITMAP_A_ADDR.write(self.pio_bitmap.bitmap_a_addr() as _)?; - VmcsControl64::IO_BITMAP_B_ADDR.write(self.pio_bitmap.bitmap_b_addr() as _)?; - VmcsControl64::MSR_BITMAPS_ADDR.write(self.msr_bitmap.phys_addr() as _)?; + VmcsControl64::IO_BITMAP_A_ADDR.write(this_zone().read().pio_bitmap.bitmap_a_addr() as _)?; + VmcsControl64::IO_BITMAP_B_ADDR.write(this_zone().read().pio_bitmap.bitmap_b_addr() as _)?; + VmcsControl64::MSR_BITMAPS_ADDR.write(this_zone().read().msr_bitmap.phys_addr() as _)?; Ok(()) } diff --git a/src/arch/x86_64/mmio.rs b/src/arch/x86_64/mmio.rs index f6d2b4ed..7f372c97 100644 --- a/src/arch/x86_64/mmio.rs +++ b/src/arch/x86_64/mmio.rs @@ -4,7 +4,7 @@ use crate::{ vmcs::{VmcsGuest16, VmcsGuestNW}, }, error::HvResult, - memory::{GuestPhysAddr, GuestVirtAddr, HostPhysAddr, MMIOAccess}, + memory::{GuestPhysAddr, GuestVirtAddr, HostPhysAddr, MMIOAccess, MMIOHandler}, percpu::{this_cpu_data, this_zone}, }; use alloc::{sync::Arc, vec::Vec}; @@ -134,8 +134,10 @@ numeric_enum_macro::numeric_enum! { #[derive(Debug)] pub enum OpCode { // move r to r/m + MovEbGb = 0x88, MovEvGv = 0x89, // move r/m to r + MovGbEb = 0x8a, MovGvEv = 0x8b, } } @@ -151,6 +153,8 @@ bitflags::bitflags! { } const REX_PREFIX_HIGH: u8 = 0x4; +const OPERAND_SIZE_OVERRIDE_PREFIX: u8 = 0x66; + // len stands for instruction len enum OprandType { Reg { reg: RmReg, len: usize }, @@ -294,11 +298,54 @@ fn gva_to_gpa(gva: GuestVirtAddr) -> HvResult { Ok(page_gpa | (gva & 0xfff)) } -fn emulate_inst(inst: &Vec, dev: &Arc) -> HvResult { +fn get_default_operand_size() -> HvResult { + let cr0 = VmcsGuestNW::CR0.read()?; + let mut size = size_of::(); + + // in protection mode + if cr0 & Cr0::CR0_PROTECTED_MODE.bits() != 0 { + let gdtr_hpa = gpa_to_hpa(gva_to_gpa(VmcsGuestNW::GDTR_BASE.read()?)?)?; + let cs_sel = VmcsGuest16::CS_SELECTOR.read()? as usize; + // info!("gdtr: {:x}", gdtr_hpa); + let cs_desc = unsafe { *((gdtr_hpa + (cs_sel & !(0x7))) as *const u64) }; + // info!("cs_desc: {:x}", cs_desc); + + // default operation size + let cs_d = cs_desc.get_bit(54); + // long mode + let cs_l = cs_desc.get_bit(53); + + // in 64-bit long mode or set CS.D to 1 + if (!cs_d && cs_l) || cs_d { + size = size_of::(); + } + } + + Ok(size) +} + +fn emulate_inst( + inst: &Vec, + handler: &MMIOHandler, + mmio: &mut MMIOAccess, + base: usize, +) -> HvResult { assert!(inst.len() > 0); + let mut size = get_default_operand_size()?; + let mut size_override = false; let mut cur_id = 0; + if inst[cur_id] == OPERAND_SIZE_OVERRIDE_PREFIX { + if size == size_of::() { + size = size_of::(); + } else { + size = size_of::(); + } + cur_id += 1; + size_override = true; + } + let mut rex = RexPrefixLow::from_bits_truncate(0); if inst[cur_id].get_bits(4..=7) == REX_PREFIX_HIGH { rex = RexPrefixLow::from_bits_truncate(inst[cur_id].get_bits(0..=3)); @@ -309,8 +356,15 @@ fn emulate_inst(inst: &Vec, dev: &Arc) -> HvResult { let opcode: OpCode = inst[cur_id].try_into().unwrap(); cur_id += 1; + if !size_override { + size = match opcode { + OpCode::MovEbGb | OpCode::MovGbEb => size_of::(), + _ => size, + }; + } + match opcode { - OpCode::MovEvGv => { + OpCode::MovEbGb | OpCode::MovEvGv => { let mod_rm = ModRM::new(inst[cur_id], &rex); cur_id += 1; @@ -321,18 +375,24 @@ fn emulate_inst(inst: &Vec, dev: &Arc) -> HvResult { match dst { OprandType::Reg { reg, len } => { cur_id += len; - reg.write(src_val, size_of::()).unwrap(); + reg.write(src_val, size).unwrap(); } OprandType::Gpa { gpa, len } => { cur_id += len; - dev.write(gpa, src_val, size_of::()).unwrap(); + + mmio.address = gpa - base; + mmio.is_write = true; + mmio.size = size; + mmio.value = src_val as _; + + handler(mmio, base); } _ => {} } Ok(cur_id) } - OpCode::MovGvEv => { + OpCode::MovGbEb | OpCode::MovGvEv => { let mod_rm = ModRM::new(inst[cur_id], &rex); cur_id += 1; @@ -346,12 +406,19 @@ fn emulate_inst(inst: &Vec, dev: &Arc) -> HvResult { } OprandType::Gpa { gpa, len } => { cur_id += len; - dev.read(gpa).unwrap() + + mmio.address = gpa - base; + mmio.is_write = false; + mmio.size = size; + mmio.value = 0; + // info!("src_val: {:x}", gpa); + + handler(mmio, base); + mmio.value as u64 } }; - // info!("src_val: {:x}", src_val); - dst.write(src_val, size_of::()).unwrap(); + dst.write(src_val, size).unwrap(); Ok(cur_id) } _ => { @@ -363,13 +430,13 @@ fn emulate_inst(inst: &Vec, dev: &Arc) -> HvResult { } } -pub fn mmio_handler(mmio: &mut MMIOAccess, dev: &Arc) -> HvResult { +pub fn instruction_emulator(handler: &MMIOHandler, mmio: &mut MMIOAccess, base: usize) -> HvResult { let rip_hpa = gpa_to_hpa(gva_to_gpa(VmcsGuestNW::RIP.read()?)?)? as *const u8; let inst = unsafe { from_raw_parts(rip_hpa, 15) }.to_vec(); - // info!("rip_hpa: {:?}, inst: {:x?}", rip_hpa, inst); + let len = emulate_inst(&inst, handler, mmio, base).unwrap(); + // info!("rip_hpa: {:?}, inst: {:x?}, len: {:x}", rip_hpa, inst, len); - let len = emulate_inst(&inst, dev).unwrap(); this_cpu_data().arch_cpu.advance_guest_rip(len as _)?; Ok(()) diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 2b3ea2f7..3c9d4a40 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -20,6 +20,7 @@ pub mod s2pt; pub mod trap; pub mod vmcs; pub mod vmx; +pub mod vtd; pub mod zone; pub use s1pt::Stage1PageTable; diff --git a/src/arch/x86_64/msr.rs b/src/arch/x86_64/msr.rs index abbe31f0..1a9edaec 100644 --- a/src/arch/x86_64/msr.rs +++ b/src/arch/x86_64/msr.rs @@ -144,27 +144,9 @@ pub struct MsrBitmap { } impl MsrBitmap { - pub fn uninit() -> Self { - Self { - frame: unsafe { Frame::from_paddr(0) }, - } - } - - pub fn passthrough_all() -> HvResult { - Ok(Self { - frame: Frame::new_zero()?, - }) - } - - pub fn intercept_all() -> HvResult { - let mut frame = Frame::new()?; - frame.fill(u8::MAX); - Ok(Self { frame }) - } - - pub fn intercept_def() -> HvResult { + pub fn new() -> Self { let mut bitmap = Self { - frame: Frame::new_zero()?, + frame: Frame::new_zero().unwrap(), }; bitmap.set_read_intercept(IA32_APIC_BASE, true); @@ -190,7 +172,7 @@ impl MsrBitmap { } }*/ - Ok(bitmap) + bitmap } pub fn phys_addr(&self) -> HostPhysAddr { diff --git a/src/arch/x86_64/pci.rs b/src/arch/x86_64/pci.rs index 994c765b..cca93495 100644 --- a/src/arch/x86_64/pci.rs +++ b/src/arch/x86_64/pci.rs @@ -1,15 +1,11 @@ use crate::{ - arch::{ - acpi, - mmio::{mmio_handler, MMIoDevice}, - zone::HvArchZoneConfig, - }, + arch::{acpi, mmio::MMIoDevice, zone::HvArchZoneConfig}, error::HvResult, - memory::{GuestPhysAddr, MMIOAccess}, + memory::{mmio_generic_handler, GuestPhysAddr, MMIOAccess}, zone::Zone, }; use ::acpi::{mcfg::Mcfg, sdt::Signature}; -use alloc::{sync::Arc, vec::Vec}; +use alloc::{collections::vec_deque::VecDeque, sync::Arc, vec::Vec}; use core::ops::Range; lazy_static::lazy_static! { @@ -62,11 +58,79 @@ impl Zone { let size = ((entry.bus_number_end as usize - entry.bus_number_start as usize) + 1) << 20; // info!("entry start: {:x} size: {:x}", start, size); - self.mmio_region_register(start, size, pci_config_space_mmio_handler, 0); + self.mmio_region_register(start, size, mmio_generic_handler, 0); } } } -fn pci_config_space_mmio_handler(mmio: &mut MMIOAccess, _arg: usize) -> HvResult { +/*fn pci_config_space_mmio_handler(mmio: &mut MMIOAccess, _arg: usize) -> HvResult { mmio_handler(mmio, &VIRT_PCI_CONFIG_SPACE.0) +}*/ + +pub fn get_config_space_info() -> HvResult<(u64, u64)> { + let bytes = acpi::root_get_table(&Signature::MCFG) + .unwrap() + .get_bytes() + .clone(); + let mcfg = unsafe { &*(bytes.as_ptr() as *const Mcfg) }; + + for entry in mcfg.entries() { + assert!(entry.pci_segment_group == 0); + let size = ((entry.bus_number_end as u64 - entry.bus_number_start as u64) + 1) << 20; + return Ok((entry.base_address, size)); + } + + hv_result_err!(ENODEV) +} + +pub fn probe_root_pci_devices(config_base_hpa: usize) -> (Vec, usize, u8) { + let mut bdfs: Vec = Vec::new(); + let mut config_space_size = 0usize; + + // info!("entry start: {:x} size: {:x}", start, size); + let mut buses: VecDeque = VecDeque::new(); + let mut max_bus: u8 = 0; + buses.push_back(max_bus); + + while !buses.is_empty() { + let bus = buses.pop_front().unwrap(); + let bus_config_hpa = (config_base_hpa as usize) + ((bus as usize) << 20); + let mut bus_empty: bool = true; + + for dev_func in 0u8..=255 { + let bdf = ((bus as u16) << 8) + (dev_func as u16); + let bdf_config_hpa = bus_config_hpa + ((dev_func as usize) << 12); + + let vendor_id = unsafe { *(bdf_config_hpa as *const u16) }; + if vendor_id == 0xffff { + continue; + } + + let device_id = unsafe { *((bdf_config_hpa + 0x2) as *const u16) }; + let header_type = unsafe { *((bdf_config_hpa + 0xe) as *const u8) }; + + info!( + "bdf: {:x}, bus: {:x}, dev_func: {:x}, vendor id: {:x}, device id: {:x}, header type: {:x}", + bdf, bus, dev_func, vendor_id, device_id, header_type + ); + + bdfs.push(bdf); + bus_empty = false; + + // pci bridge + if header_type == 0x1 { + let secondary_bus = unsafe { *((bdf_config_hpa + 0x19) as *const u8) }; + buses.push_back(secondary_bus); + } + } + + if !bus_empty && bus > max_bus { + max_bus = bus; + } + } + + config_space_size = ((max_bus as usize - 0usize) + 1) << 20; + // info!("config space size: {:x}", config_space_size); + + (bdfs, config_space_size, max_bus) } diff --git a/src/arch/x86_64/pio.rs b/src/arch/x86_64/pio.rs index f6c67f28..53e31cc6 100644 --- a/src/arch/x86_64/pio.rs +++ b/src/arch/x86_64/pio.rs @@ -1,5 +1,7 @@ +use core::ops::Range; + use crate::{ - device::irqchip::pic::vtd::{PCI_CONFIG_ADDR, PCI_CONFIG_DATA}, + arch::vtd::{PCI_CONFIG_ADDR, PCI_CONFIG_DATA}, error::HvResult, memory::{Frame, HostPhysAddr}, }; @@ -13,21 +15,14 @@ pub struct PortIoBitmap { } impl PortIoBitmap { - pub fn uninit() -> Self { - Self { - a: unsafe { Frame::from_paddr(0) }, - b: unsafe { Frame::from_paddr(0) }, - } - } - - pub fn intercept_def() -> HvResult { + pub fn new(zoneid: usize) -> Self { let mut bitmap = Self { - a: Frame::new_zero()?, - b: Frame::new_zero()?, + a: Frame::new_zero().unwrap(), + b: Frame::new_zero().unwrap(), }; - bitmap.a.fill(0); - bitmap.b.fill(0); + bitmap.a.fill(0xff); + bitmap.b.fill(0xff); // ban i8259a ports bitmap.set_intercept(0x20, true); @@ -36,10 +31,20 @@ impl PortIoBitmap { bitmap.set_intercept(0xa1, true); // ban pci config ports - // bitmap.set_intercept(PCI_CONFIG_ADDR, true); - // bitmap.set_intercept(PCI_CONFIG_DATA, true); + // TODO: handle config space operations from io ports + bitmap.set_intercept(PCI_CONFIG_ADDR, true); + bitmap.set_intercept(PCI_CONFIG_DATA, true); + // bitmap.set_range_intercept(0xcf8..0xd00, true); - Ok(bitmap) + if zoneid == 0 { + // passthrough uart com1 + bitmap.set_range_intercept(0x3f8..0x400, false); + // FIXME: get port info from ACPI FACP table + bitmap.set_intercept(0xb2, false); + bitmap.set_range_intercept(0x600..0x630, false); + } + + bitmap } pub fn bitmap_a_addr(&self) -> HostPhysAddr { @@ -50,7 +55,13 @@ impl PortIoBitmap { self.b.start_paddr() } - fn set_intercept(&mut self, mut port: u16, intercept: bool) { + pub fn set_range_intercept(&mut self, mut ports: Range, intercept: bool) { + for port in ports { + self.set_intercept(port, intercept); + } + } + + pub fn set_intercept(&mut self, mut port: u16, intercept: bool) { let bitmap = match port <= 0x7fff { true => unsafe { core::slice::from_raw_parts_mut(self.a.as_mut_ptr(), 0x1000) }, false => { diff --git a/src/arch/x86_64/s2pt.rs b/src/arch/x86_64/s2pt.rs index 54fc445d..78f05f86 100644 --- a/src/arch/x86_64/s2pt.rs +++ b/src/arch/x86_64/s2pt.rs @@ -2,14 +2,15 @@ use crate::{ arch::{ paging::{GenericPTE, Level4PageTable, PagingInstr}, vmcs::*, + vtd, }, consts::PAGE_SIZE, - device::irqchip::pic::vtd, error::HvResult, memory::{ addr::{GuestPhysAddr, HostPhysAddr, PhysAddr}, MemFlags, }, + zone::this_zone_id, }; use bit_field::BitField; use bitflags::bitflags; @@ -243,7 +244,7 @@ impl PagingInstr for S2PTInstr { crate::arch::vmcs::VmcsControl64::EPTP.write(s2ptp).unwrap(); unsafe { invs2pt(InvS2PTType::SingleContext, s2ptp) }; - vtd::update_dma_translation_tables(root_paddr); + vtd::update_dma_translation_tables(this_zone_id(), root_paddr); } fn flush(_vaddr: Option) {} diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index 46e9a1b3..53811e26 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -333,14 +333,7 @@ fn handle_s2pt_violation(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvR value: 0, })?; - // FIXME: do advance_guest_rip in mmio handler, for the inst len is not correct - // arch_cpu.advance_guest_rip(exit_info.exit_instruction_length as _)?; Ok(()) - - /*panic!( - "VM exit: S2PT violation @ {:#x}, fault_paddr={:#x}, access_flags=({:?}), {:#x?}", - exit_info.guest_rip, fault_info.fault_guest_paddr, fault_info.access_flags, arch_cpu - );*/ } fn handle_triple_fault(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvResult { diff --git a/src/arch/x86_64/vmx.rs b/src/arch/x86_64/vmx.rs index 14ff7975..9bd1ccc3 100644 --- a/src/arch/x86_64/vmx.rs +++ b/src/arch/x86_64/vmx.rs @@ -369,20 +369,20 @@ pub struct VmxRegion { } impl VmxRegion { - pub fn uninit() -> Self { + pub fn fake_init() -> Self { Self { frame: unsafe { Frame::from_paddr(0) }, } } - pub fn new(revision_id: u32, shadow_indicator: bool) -> HvResult { - let frame = Frame::new_zero()?; + pub fn new(revision_id: u32, shadow_indicator: bool) -> Self { + let frame = Frame::new_zero().unwrap(); unsafe { (*(frame.start_paddr() as *mut u32)) .set_bits(0..=30, revision_id) .set_bit(31, shadow_indicator); } - Ok(Self { frame }) + Self { frame } } pub fn start_paddr(&self) -> PhysAddr { diff --git a/src/device/irqchip/pic/vtd.rs b/src/arch/x86_64/vtd.rs similarity index 76% rename from src/device/irqchip/pic/vtd.rs rename to src/arch/x86_64/vtd.rs index 3547aa00..c7e803b4 100644 --- a/src/device/irqchip/pic/vtd.rs +++ b/src/arch/x86_64/vtd.rs @@ -42,7 +42,7 @@ mod dma_remap_reg { pub const DMAR_IRTA_REG: usize = 0xb8; } -static DRHD_UNITS: Once>> = Once::new(); +static VTD: Once> = Once::new(); bitflags::bitflags! { #[derive(Clone, Copy, Debug)] @@ -84,7 +84,7 @@ bitflags::bitflags! { } } -numeric_enum_macro::numeric_enum! { +/*numeric_enum_macro::numeric_enum! { #[repr(u8)] #[derive(Clone, Debug, PartialEq)] pub enum DeviceScopeType { @@ -95,22 +95,19 @@ pub enum DeviceScopeType { MsiCapableHpet = 0x04, AcpiNamespaceDevice = 0x05 } -} +}*/ #[derive(Clone, Debug)] -struct DeviceScope { - scope_type: DeviceScopeType, - id: u8, +struct VtdDevice { + zone_id: usize, bus: u8, dev_func: u8, } #[derive(Debug)] -struct Drhd { - flags: u8, - segment: u16, - reg_hpa: usize, - scopes: Vec, +struct Vtd { + reg_base_hpa: usize, + devices: BTreeMap, root_table: Frame, context_tables: BTreeMap, @@ -120,7 +117,7 @@ struct Drhd { gcmd: GcmdFlags, } -impl Drhd { +impl Vtd { fn activate(&mut self) { self.activate_dma_translation(); } @@ -194,6 +191,10 @@ impl Drhd { } } + fn add_device(&mut self, zone_id: usize, bdf: u64) { + self.devices.insert(bdf, zone_id); + } + fn add_interrupt_table_entry(&mut self, irq: u32) { assert!(irq < (IR_ENTRY_CNT as u32)); @@ -229,11 +230,11 @@ impl Drhd { self.set_root_table(); self.activate_qi(); - // self.set_interrupt_remap_table(); - /* for irq in 0..IR_ENTRY_CNT { + /* self.set_interrupt_remap_table(); + for irq in 0..IR_ENTRY_CNT { self.add_interrupt_table_entry(irq as _); - } */ - // self.activate_interrupt_remapping(); + } + self.activate_interrupt_remapping(); */ } fn set_interrupt(&mut self) { @@ -260,12 +261,12 @@ impl Drhd { self.wait(GstsFlags::RTPS, false); } - fn update_dma_translation_tables(&mut self, zone_s2pt_hpa: HostPhysAddr) { + fn update_dma_translation_tables(&mut self, zone_id: usize, zone_s2pt_hpa: HostPhysAddr) { let bdfs: Vec<(u8, u8)> = self - .scopes + .devices .iter() - .filter(|scope| scope.scope_type == DeviceScopeType::PciEndpointDevice) - .map(|scope| (scope.bus, scope.dev_func)) + .filter(|&(_, &dev_zone_id)| dev_zone_id == zone_id) + .map(|(&bdf, _)| (bdf.get_bits(8..=15) as u8, bdf.get_bits(0..=7) as u8)) .collect(); for (bus, dev_func) in bdfs { @@ -285,19 +286,19 @@ impl Drhd { } fn mmio_read_u32(&self, reg: usize) -> u32 { - unsafe { *((self.reg_hpa + reg) as *const u32) } + unsafe { *((self.reg_base_hpa + reg) as *const u32) } } fn mmio_read_u64(&self, reg: usize) -> u64 { - unsafe { *((self.reg_hpa + reg) as *const u64) } + unsafe { *((self.reg_base_hpa + reg) as *const u64) } } fn mmio_write_u32(&self, reg: usize, value: u32) { - unsafe { *((self.reg_hpa + reg) as *mut u32) = value }; + unsafe { *((self.reg_base_hpa + reg) as *mut u32) = value }; } fn mmio_write_u64(&self, reg: usize, value: u64) { - unsafe { *((self.reg_hpa + reg) as *mut u64) = value }; + unsafe { *((self.reg_base_hpa + reg) as *mut u64) = value }; } } @@ -315,95 +316,62 @@ fn get_secondary_bus(bus: u8, dev: u8, func: u8) -> u8 { } } -pub fn parse_root_drhds() -> Vec> { - let mut drhds: Vec> = Vec::new(); - +pub fn parse_root_dmar() -> Mutex { let dmar = acpi::root_get_table(&Signature::DMAR).unwrap(); let mut cur: usize = 48; // start offset of remapping structures let len = dmar.get_len(); + let mut reg_base_hpa: usize = 0; + while cur < len { let struct_type = dmar.get_u16(cur); let struct_len = dmar.get_u16(cur + 2) as usize; - // drhd if struct_type == 0 { - let mut drhd = Drhd { - flags: dmar.get_u8(cur + 4), - segment: dmar.get_u16(cur + 6), - reg_hpa: dmar.get_u64(cur + 8) as usize, - scopes: Vec::new(), - - root_table: Frame::new_zero().unwrap(), - context_tables: BTreeMap::new(), - qi_queue: Frame::new_zero().unwrap(), - ir_table: Frame::new_zero().unwrap(), - gcmd: GcmdFlags::empty(), - }; - - let mut scope_cur = cur + 16; // start offset of device scopes - // device scopes - while scope_cur < cur + struct_len { - let scope_len = dmar.get_u8(scope_cur + 1) as usize; - - let mut bus = dmar.get_u8(scope_cur + 5); - let mut path_cur = scope_cur + 6; - let mut dev = dmar.get_u8(path_cur); - let mut func = dmar.get_u8(path_cur + 1); - - // info!("bdf: {:x} {:x} {:x}", bus, dev, func); - - path_cur += 2; - while path_cur < scope_cur + scope_len { - bus = get_secondary_bus(bus, dev, func); - dev = dmar.get_u8(path_cur); - func = dmar.get_u8(path_cur + 1); - // info!("bdf: {:x} {:x} {:x}", bus, dev, func); - path_cur += 2; - } - - let mut scope = DeviceScope { - scope_type: DeviceScopeType::try_from(dmar.get_u8(scope_cur + 0)).unwrap(), - id: dmar.get_u8(scope_cur + 4), - bus, - dev_func: (dev << 3) | func, - }; - info!("{:x?}", scope); - drhd.scopes.push(scope); + let segment = dmar.get_u16(cur + 6); - scope_cur += scope_len; + // we only support segment 0 + if segment == 0 { + reg_base_hpa = dmar.get_u64(cur + 8) as usize; } - - drhds.push(Mutex::new(drhd)); - } else { } - cur += struct_len; } - drhds + assert!(reg_base_hpa != 0); + + Mutex::new(Vtd { + reg_base_hpa, + devices: BTreeMap::new(), + root_table: Frame::new_zero().unwrap(), + context_tables: BTreeMap::new(), + qi_queue: Frame::new().unwrap(), + ir_table: Frame::new().unwrap(), + gcmd: GcmdFlags::empty(), + }) } +// called after acpi init pub fn init() { - DRHD_UNITS.call_once(|| parse_root_drhds()); - for unit in DRHD_UNITS.get().unwrap().iter() { - unit.lock().init(); - } + VTD.call_once(|| parse_root_dmar()); + VTD.get().unwrap().lock().init(); + // init_msi_cap_hpa_space(); +} - init_msi_cap_hpa_space(); +pub fn add_device(zone_id: usize, bdf: u64) { + VTD.get().unwrap().lock().add_device(zone_id, bdf); } -pub fn update_dma_translation_tables(zone_s2pt_hpa: HostPhysAddr) { - for unit in DRHD_UNITS.get().unwrap().iter() { - unit.lock().update_dma_translation_tables(zone_s2pt_hpa); - } +pub fn update_dma_translation_tables(zone_id: usize, zone_s2pt_hpa: HostPhysAddr) { + VTD.get() + .unwrap() + .lock() + .update_dma_translation_tables(zone_id, zone_s2pt_hpa); } /// should be called after gpm is activated pub fn activate() { - for unit in DRHD_UNITS.get().unwrap().iter() { - unit.lock().activate(); - } + VTD.get().unwrap().lock().activate(); } fn flush_cache_range(hpa: usize, size: usize) { @@ -414,6 +382,7 @@ fn flush_cache_range(hpa: usize, size: usize) { } } +/* fn init_msi_cap_hpa_space() { let bytes = acpi::root_get_table(&Signature::MCFG) .unwrap() @@ -421,7 +390,7 @@ fn init_msi_cap_hpa_space() { .clone(); let mcfg = unsafe { &*(bytes.as_ptr() as *const Mcfg) }; - for unit in DRHD_UNITS.get().unwrap().iter() { + for unit in VTD.get().unwrap().iter() { let drhd = unit.lock(); for entry in mcfg.entries() { @@ -455,4 +424,4 @@ fn init_msi_cap_hpa_space() { } } } -} +}*/ diff --git a/src/arch/x86_64/zone.rs b/src/arch/x86_64/zone.rs index 38f228b0..de5497f0 100644 --- a/src/arch/x86_64/zone.rs +++ b/src/arch/x86_64/zone.rs @@ -1,5 +1,4 @@ use crate::{ - arch::mmio::mmio_handler, config::*, error::HvResult, memory::{GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion}, diff --git a/src/device/irqchip/mod.rs b/src/device/irqchip/mod.rs index 19dbae8b..cc817cfd 100644 --- a/src/device/irqchip/mod.rs +++ b/src/device/irqchip/mod.rs @@ -70,10 +70,10 @@ impl Zone { { self.vgicv3_mmio_init(hv_config); } - #[cfg(all(target_arch = "x86_64"))] + #[cfg(target_arch = "x86_64")] { self.ioapic_mmio_init(hv_config); - self.pci_config_space_mmio_init(hv_config); + // self.pci_config_space_mmio_init(hv_config); } } } diff --git a/src/device/irqchip/pic/ioapic.rs b/src/device/irqchip/pic/ioapic.rs index a7a02c49..fe340d3a 100644 --- a/src/device/irqchip/pic/ioapic.rs +++ b/src/device/irqchip/pic/ioapic.rs @@ -1,8 +1,5 @@ use crate::{ - arch::{ - mmio::{mmio_handler, MMIoDevice}, - zone::HvArchZoneConfig, - }, + arch::{mmio::MMIoDevice, zone::HvArchZoneConfig}, device::irqchip::pic::inject_vector, error::HvResult, memory::{GuestPhysAddr, MMIOAccess}, @@ -70,10 +67,10 @@ impl MMIoDevice for VirtIoApic { fn read(&self, gpa: GuestPhysAddr) -> HvResult { // info!("ioapic read! gpa: {:x}", gpa,); - if gpa == self.base_gpa { + if gpa == 0 { return Ok(self.inner.lock().cur_reg as _); } - assert!(gpa - self.base_gpa == 0x10); + assert!(gpa == 0x10); let inner = self.inner.lock(); match inner.cur_reg { @@ -101,13 +98,12 @@ impl MMIoDevice for VirtIoApic { "ioapic write! gpa: {:x}, value: {:x}, size: {:x}", gpa, value, size, );*/ - assert!(size == 4); - if gpa == self.base_gpa { + if gpa == 0 { self.inner.lock().cur_reg = value as _; return Ok(()); } - assert!(gpa - self.base_gpa == 0x10); + assert!(gpa == 0x10); let mut inner = self.inner.lock(); match inner.cur_reg { @@ -152,12 +148,24 @@ impl MMIoDevice for VirtIoApic { impl Zone { pub fn ioapic_mmio_init(&mut self, arch: &HvArchZoneConfig) { - self.mmio_region_register(arch.ioapic_base, arch.ioapic_size, ioapic_mmio_handler, 0); + self.mmio_region_register( + arch.ioapic_base, + arch.ioapic_size, + ioapic_mmio_handler, + arch.ioapic_base, + ); } } -fn ioapic_mmio_handler(mmio: &mut MMIOAccess, _arg: usize) -> HvResult { - mmio_handler(mmio, &VIRT_IOAPIC.0) +fn ioapic_mmio_handler(mmio: &mut MMIOAccess, _: usize) -> HvResult { + if mmio.is_write { + VIRT_IOAPIC + .0 + .write(mmio.address, mmio.value as _, mmio.size) + } else { + mmio.value = VIRT_IOAPIC.0.read(mmio.address).unwrap() as _; + Ok(()) + } } unsafe fn configure_gsi_from_raw(irq: u8, raw: u64) { diff --git a/src/device/irqchip/pic/mod.rs b/src/device/irqchip/pic/mod.rs index 6eb7b27e..6320a5bc 100644 --- a/src/device/irqchip/pic/mod.rs +++ b/src/device/irqchip/pic/mod.rs @@ -1,9 +1,8 @@ pub mod ioapic; pub mod lapic; -pub mod vtd; use crate::{ - arch::{acpi, ipi, vmcs::Vmcs}, + arch::{acpi, ipi, vmcs::Vmcs, vtd}, consts::MAX_CPU_NUM, zone::Zone, }; @@ -90,11 +89,11 @@ pub fn primary_init_early() { ipi::init(MAX_CPU_NUM); PENDING_VECTORS.call_once(|| PendingVectors::new(MAX_CPU_NUM)); acpi::root_init(); + vtd::init(); } pub fn primary_init_late() { acpi::copy_to_root_zone_region(); - vtd::init(); } impl Zone { diff --git a/src/event.rs b/src/event.rs index 0dd25468..3ac11532 100644 --- a/src/event.rs +++ b/src/event.rs @@ -109,7 +109,7 @@ pub fn check_events() -> bool { let cpu_data = this_cpu_data(); match fetch_event(cpu_data.id) { Some(IPI_EVENT_WAKEUP) => { - info!("cpu {} wakeup", cpu_data.id); + // info!("cpu {} wakeup", cpu_data.id); cpu_data.arch_cpu.run(); } Some(IPI_EVENT_SHUTDOWN) => { diff --git a/src/main.rs b/src/main.rs index b0e8e94f..6ee27f42 100644 --- a/src/main.rs +++ b/src/main.rs @@ -134,7 +134,6 @@ fn primary_init_early() { event::init(MAX_CPU_NUM); device::irqchip::primary_init_early(); - // TODO: tmp // crate::arch::mm::init_hv_page_table().unwrap(); #[cfg(all(feature = "iommu", target_arch = "aarch64"))] diff --git a/src/memory/mmio.rs b/src/memory/mmio.rs index f8dfe703..6bb4e261 100644 --- a/src/memory/mmio.rs +++ b/src/memory/mmio.rs @@ -83,7 +83,13 @@ pub fn mmio_handle_access(mmio: &mut MMIOAccess) -> HvResult { match res { Some((region, handler, arg)) => { mmio.address -= region.start; - handler(mmio, arg) + + if cfg!(target_arch = "x86_64") { + #[cfg(target_arch = "x86_64")] + crate::arch::mmio::instruction_emulator(&handler, mmio, arg) + } else { + handler(mmio, arg) + } } None => { warn!("Zone {} unhandled mmio fault {:#x?}", zone.read().id, mmio); diff --git a/src/pci/pci.rs b/src/pci/pci.rs index 821130b0..40469ef2 100644 --- a/src/pci/pci.rs +++ b/src/pci/pci.rs @@ -40,6 +40,8 @@ use super::{ #[cfg(all(feature = "iommu", target_arch = "aarch64"))] use crate::arch::iommu::iommu_add_device; +#[cfg(target_arch = "x86_64")] +use crate::arch::vtd; #[derive(Debug)] pub struct PciRoot { @@ -79,8 +81,11 @@ impl PciRoot { for ep in self.endpoints.iter() { let regions = ep.get_regions(); for mut region in regions { - if region.size < 0x1000 { - region.size = 0x1000; + // in x86_64, we allow io port region size not aligned to 4K + if cfg!(not(target_arch = "x86_64")) || region.bar_type != BarType::IO { + if region.size < 0x1000 { + region.size = 0x1000; + } } self.bar_regions.push(region); } @@ -88,8 +93,11 @@ impl PciRoot { for bridge in self.bridges.iter() { let regions = bridge.get_regions(); for mut region in regions { - if region.size < 0x1000 { - region.size = 0x1000; + // in x86_64, we allow io port region size not aligned to 4K + if cfg!(not(target_arch = "x86_64")) || region.bar_type != BarType::IO { + if region.size < 0x1000 { + region.size = 0x1000; + } } self.bar_regions.push(region); } @@ -144,6 +152,7 @@ impl Zone { } info!("PCIe init!"); + // info!("{:#x?}", pci_config); init_ecam_base(pci_config.ecam_base as _); @@ -157,10 +166,21 @@ impl Zone { if alloc_pci_devs[idx] != 0 { iommu_add_device(self.id, alloc_pci_devs[idx] as _); } + #[cfg(target_arch = "x86_64")] + vtd::add_device(self.id, alloc_pci_devs[idx]); } if self.id == 0 { - self.root_pci_init(pci_config); + #[cfg(target_arch = "x86_64")] + { + // crate::arch::pci::probe_root_pci_devices(pci_config.ecam_base as usize); + self.virtual_pci_mmio_init(pci_config); + self.virtual_pci_device_init(pci_config); + } + #[cfg(not(target_arch = "x86_64"))] + { + self.root_pci_init(pci_config); + } } else { self.virtual_pci_mmio_init(pci_config); self.virtual_pci_device_init(pci_config); @@ -284,21 +304,32 @@ impl Zone { }; region.start = cpu_base + region.start - pci_base; - region.start = align_down(region.start); + // in x86_64, we allow io port region size not aligned to 4K + if cfg!(not(target_arch = "x86_64")) || region.bar_type != BarType::IO { + region.start = align_down(region.start); + } info!( "pci bar region: type: {:?}, base: {:#x}, size:{:#x}", region.bar_type, region.start, region.size ); - self.gpm - .insert(MemoryRegion::new_with_offset_mapper( - region.start as GuestPhysAddr, - region.start, - region.size, - MemFlags::READ | MemFlags::WRITE, - )) - .ok(); + if cfg!(not(target_arch = "x86_64")) || region.bar_type != BarType::IO { + self.gpm + .insert(MemoryRegion::new_with_offset_mapper( + region.start as GuestPhysAddr, + region.start, + region.size, + MemFlags::READ | MemFlags::WRITE, + )) + .ok(); + } else { + #[cfg(target_arch = "x86_64")] + self.pio_bitmap.set_range_intercept( + (region.start as u16)..((region.start + region.size) as u16), + false, + ); + } } } } diff --git a/src/pci/pcibar.rs b/src/pci/pcibar.rs index 3a468c0c..b6ecb87c 100644 --- a/src/pci/pcibar.rs +++ b/src/pci/pcibar.rs @@ -27,7 +27,7 @@ pub struct BarRegion { pub bar_type: BarType, } -#[derive(Default, Debug, Copy, Clone)] +#[derive(Default, Debug, Copy, Clone, PartialEq)] pub enum BarType { Mem32, Mem64, diff --git a/src/platform/mod.rs b/src/platform/mod.rs index 7051885f..97c56c44 100644 --- a/src/platform/mod.rs +++ b/src/platform/mod.rs @@ -59,6 +59,13 @@ pub fn platform_root_zone_config() -> HvZoneConfig { root_pci_cfg = ROOT_PCI_CONFIG; num_pci_devs = ROOT_PCI_DEVS.len() as _; } + #[cfg(target_arch = "x86_64")] + { + pci_devs[..ROOT_PCI_DEVS.len()].copy_from_slice(&ROOT_PCI_DEVS); + (root_pci_cfg.ecam_base, root_pci_cfg.ecam_size) = + crate::arch::pci::get_config_space_info().unwrap(); + num_pci_devs = ROOT_PCI_DEVS.len() as _; + } HvZoneConfig::new( 0, diff --git a/src/platform/qemu_x86_64.rs b/src/platform/qemu_x86_64.rs deleted file mode 100644 index c0e4fd3b..00000000 --- a/src/platform/qemu_x86_64.rs +++ /dev/null @@ -1,102 +0,0 @@ -use crate::{ - arch::zone::HvArchZoneConfig, - config::*, - memory::{GuestPhysAddr, GuestVirtAddr, HostPhysAddr}, -}; - -pub const MEM_TYPE_ROM: u32 = 3; -pub const MEM_TYPE_RAM_NOT_ALLOC: u32 = 4; - -pub const ROOT_ZONE_DTB_ADDR: u64 = 0x00000000; -pub const ROOT_ZONE_BOOT_STACK: GuestPhysAddr = 0x7000; -pub const ROOT_ZONE_ENTRY: u64 = 0x8000; -pub const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0xc000; -pub const ROOT_ZONE_SETUP_ADDR: GuestPhysAddr = 0xd000; -pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x500_0000; -pub const ROOT_ZONE_INITRD_ADDR: GuestPhysAddr = 0x1500_0000; -pub const ROOT_ZONE_CPUS: u64 = (1 << 0) | (1 << 1) | (1 << 2); - -pub const ROOT_ZONE_RSDP_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { - mem_type: MEM_TYPE_ROM, - physical_start: 0x50e_0000, - virtual_start: 0xe_0000, - size: 0x2_0000, -}; - -pub const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { - mem_type: MEM_TYPE_RAM_NOT_ALLOC, - physical_start: 0x6020_0000, // hpa - virtual_start: 0x5520_0000, // gpa - size: 0xf000, // modify size accordingly -}; - -pub const ROOT_ZONE_NAME: &str = "root-linux"; -pub const ROOT_ZONE_CMDLINE: &str = - "console=ttyS0 earlyprintk=serial nointremap root=/dev/vda rw init=/bin/sh\0"; -//"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic - -pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 9] = [ - HvConfigMemoryRegion { - mem_type: MEM_TYPE_RAM, - physical_start: 0x500_0000, - virtual_start: 0x0, - size: 0xe_0000, - }, // ram - ROOT_ZONE_RSDP_REGION, // rsdp - HvConfigMemoryRegion { - mem_type: MEM_TYPE_RAM, - physical_start: 0x510_0000, - virtual_start: 0x10_0000, - size: 0x14f0_0000, - }, // ram - HvConfigMemoryRegion { - mem_type: MEM_TYPE_ROM, - physical_start: 0x2000_0000, - virtual_start: 0x1500_0000, - size: 0x20_0000, - }, // initrd - HvConfigMemoryRegion { - mem_type: MEM_TYPE_RAM, - physical_start: 0x2020_0000, - virtual_start: 0x1520_0000, - size: 0x4000_0000, - }, // ram - ROOT_ZONE_ACPI_REGION, // acpi - HvConfigMemoryRegion { - mem_type: MEM_TYPE_IO, - physical_start: 0xfed0_0000, - virtual_start: 0xfed0_0000, - size: 0x1000, - }, // hpet - /*HvConfigMemoryRegion { - mem_type: MEM_TYPE_IO, - physical_start: 0xb000_0000, - virtual_start: 0xb000_0000, - size: 0x1000_0000, - }, // TODO: pci config*/ - HvConfigMemoryRegion { - mem_type: MEM_TYPE_IO, - physical_start: 0xfea0_0000, - virtual_start: 0xfea0_0000, - size: 0x20_0000, - }, // TODO: pci - HvConfigMemoryRegion { - mem_type: MEM_TYPE_IO, - physical_start: 0x70_0000_0000, - virtual_start: 0x70_0000_0000, - size: 0x1000_4000, - }, // FIXME: pci 0000:00:03.0 -]; - -pub const ROOT_ZONE_IRQS: [u32; 32] = [0; 32]; -pub const ROOT_ZONE_IOAPIC_BASE: usize = 0xfec0_0000; -pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { - ioapic_base: ROOT_ZONE_IOAPIC_BASE, - ioapic_size: 0x1000, -}; - -pub fn root_zone_gpa_as_mut_ptr(guest_paddr: GuestPhysAddr) -> *mut u8 { - let offset = ROOT_ZONE_KERNEL_ADDR as usize; - let host_vaddr = guest_paddr + offset; - host_vaddr as *mut u8 -} diff --git a/src/zone.rs b/src/zone.rs index f1df5f3c..945fd28d 100644 --- a/src/zone.rs +++ b/src/zone.rs @@ -31,6 +31,11 @@ use crate::memory::{MMIOConfig, MMIOHandler, MMIORegion, MemorySet}; use crate::percpu::{get_cpu_data, this_zone, CpuSet}; use core::panic; +#[cfg(target_arch = "x86_64")] +use crate::arch::msr::MsrBitmap; +#[cfg(target_arch = "x86_64")] +use crate::arch::pio::PortIoBitmap; + pub struct Zone { pub name: [u8; CONFIG_NAME_MAXLEN], pub id: usize, @@ -39,6 +44,12 @@ pub struct Zone { pub irq_bitmap: [u32; 1024 / 32], pub gpm: MemorySet, pub pciroot: PciRoot, + + #[cfg(target_arch = "x86_64")] + pub msr_bitmap: MsrBitmap, + // x86_64 io port is quite different, and has to be seperate from gpm + #[cfg(target_arch = "x86_64")] + pub pio_bitmap: PortIoBitmap, } impl Zone { @@ -51,6 +62,11 @@ impl Zone { mmio: Vec::new(), irq_bitmap: [0; 1024 / 32], pciroot: PciRoot::new(), + + #[cfg(target_arch = "x86_64")] + msr_bitmap: MsrBitmap::new(), + #[cfg(target_arch = "x86_64")] + pio_bitmap: PortIoBitmap::new(zoneid), } } @@ -205,6 +221,12 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult>> { config.num_pci_devs as _, &config.alloc_pci_devs, ); + #[cfg(target_arch = "x86_64")] + zone.pci_init( + &config.pci_config, + config.num_pci_devs as _, + &config.alloc_pci_devs, + ); config.cpus().iter().for_each(|cpu_id| { zone.cpu_set.set_bit(*cpu_id as _); From fd05a71d93a05fa945b3d2bdf175eb16a1359304 Mon Sep 17 00:00:00 2001 From: Solicey Date: Wed, 9 Apr 2025 13:40:36 +0800 Subject: [PATCH 12/29] update guest bootloader --- platform/x86_64/qemu/board.rs | 13 +- platform/x86_64/qemu/image/bootloader/boot.S | 41 +++++ platform/x86_64/qemu/image/bootloader/boot.ld | 15 ++ platform/x86_64/qemu/image/bootloader/boot.mk | 38 +++++ .../x86_64/qemu/image/bootloader/boot16.bin | Bin 110 -> 0 bytes .../x86_64/qemu/image/bootloader/out/boot.asm | 44 +++++ .../x86_64/qemu/image/bootloader/out/boot.bin | Bin 0 -> 94 bytes .../x86_64/qemu/image/bootloader/out/boot.elf | Bin 0 -> 4672 bytes platform/x86_64/qemu/platform.mk | 8 +- src/arch/x86_64/ap_start.S | 1 + src/arch/x86_64/apic.rs | 155 ------------------ src/arch/x86_64/boot.rs | 54 ++++-- src/arch/x86_64/cpu.rs | 22 ++- src/arch/x86_64/gdt.rs | 2 +- src/arch/x86_64/mod.rs | 1 - src/arch/x86_64/trap.rs | 23 ++- src/hypercall/mod.rs | 10 ++ 17 files changed, 230 insertions(+), 197 deletions(-) create mode 100644 platform/x86_64/qemu/image/bootloader/boot.S create mode 100644 platform/x86_64/qemu/image/bootloader/boot.ld create mode 100644 platform/x86_64/qemu/image/bootloader/boot.mk delete mode 100755 platform/x86_64/qemu/image/bootloader/boot16.bin create mode 100644 platform/x86_64/qemu/image/bootloader/out/boot.asm create mode 100755 platform/x86_64/qemu/image/bootloader/out/boot.bin create mode 100755 platform/x86_64/qemu/image/bootloader/out/boot.elf delete mode 100644 src/arch/x86_64/apic.rs diff --git a/platform/x86_64/qemu/board.rs b/platform/x86_64/qemu/board.rs index dff6b9f8..fc96dc04 100644 --- a/platform/x86_64/qemu/board.rs +++ b/platform/x86_64/qemu/board.rs @@ -1,3 +1,5 @@ +use cortex_a::registers::DAIF::D; + // Copyright (c) 2025 Syswonder // hvisor is licensed under Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan PSL v2. @@ -27,9 +29,10 @@ pub const ROOT_ZONE_BOOT_STACK: GuestPhysAddr = 0x7000; pub const ROOT_ZONE_ENTRY: u64 = 0x8000; pub const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0xc000; pub const ROOT_ZONE_SETUP_ADDR: GuestPhysAddr = 0xd000; -pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x500_0000; +pub const ROOT_ZONE_VMLINUX_ENTRY_ADDR: GuestPhysAddr = 0x10_0000; +pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x500_0000; // hpa pub const ROOT_ZONE_INITRD_ADDR: GuestPhysAddr = 0x1500_0000; -pub const ROOT_ZONE_CPUS: u64 = (1 << 0) | (1 << 1) | (1 << 2); +pub const ROOT_ZONE_CPUS: u64 = (1 << 0) | (1 << 1); pub const ROOT_ZONE_RSDP_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { mem_type: MEM_TYPE_ROM, @@ -93,9 +96,3 @@ pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { }; pub const ROOT_PCI_DEVS: [u64; 7] = [0x0, 0x8, 0x18, 0xf8, 0xfa, 0xfb, 0x100]; // 0x10, - -pub fn root_zone_gpa_as_mut_ptr(guest_paddr: GuestPhysAddr) -> *mut u8 { - let offset = ROOT_ZONE_KERNEL_ADDR as usize; - let host_vaddr = guest_paddr + offset; - host_vaddr as *mut u8 -} diff --git a/platform/x86_64/qemu/image/bootloader/boot.S b/platform/x86_64/qemu/image/bootloader/boot.S new file mode 100644 index 00000000..9b979b63 --- /dev/null +++ b/platform/x86_64/qemu/image/bootloader/boot.S @@ -0,0 +1,41 @@ +.section .text +.code16 +.global entry16 +entry16: + cli + cld + + mov ecx, eax + xor ax, ax + mov ds, ax + mov es, ax + mov ss, ax + + lgdt [prot_gdt_desc] + mov eax, cr0 + or eax, 0x1 + mov cr0, eax + + ljmp 0x8, entry32 + +.code32 +.global entry32 +entry32: + mov ax, 0x10 + mov ds, ax + mov es, ax + mov ss, ax + mov fs, ax + mov gs, ax + + jmp ecx + +.balign 16 +prot_gdt: + .quad 0x0000000000000000 # 0x00: null + .quad 0x00cf9b000000ffff # 0x08: code segment (base=0, limit=0xfffff, type=32bit code exec/read, DPL=0, 4k) + .quad 0x00cf93000000ffff # 0x10: data segment (base=0, limit=0xfffff, type=32bit data read/write, DPL=0, 4k) + +prot_gdt_desc: + .short prot_gdt_desc - prot_gdt - 1 # limit + .long prot_gdt # base diff --git a/platform/x86_64/qemu/image/bootloader/boot.ld b/platform/x86_64/qemu/image/bootloader/boot.ld new file mode 100644 index 00000000..3f96b209 --- /dev/null +++ b/platform/x86_64/qemu/image/bootloader/boot.ld @@ -0,0 +1,15 @@ +OUTPUT_ARCH(i386) +BASE_ADDRESS = 0x8000; + +ENTRY(entry16) +SECTIONS +{ + . = BASE_ADDRESS; + .text : { + *(.text .text.*) + } + + /DISCARD/ : { + *(.eh_frame) *(.eh_frame_hdr) + } +} diff --git a/platform/x86_64/qemu/image/bootloader/boot.mk b/platform/x86_64/qemu/image/bootloader/boot.mk new file mode 100644 index 00000000..ab3176d9 --- /dev/null +++ b/platform/x86_64/qemu/image/bootloader/boot.mk @@ -0,0 +1,38 @@ +boot_dir := $(image_dir)/bootloader +boot_out_dir := $(image_dir)/bootloader/out + +boot_src := $(boot_dir)/boot.S +boot_lds := $(boot_dir)/boot.ld + +boot_o := $(boot_out_dir)/boot.o +boot_elf := $(boot_out_dir)/boot.elf +boot_bin := $(boot_out_dir)/boot.bin +boot_disa := $(boot_out_dir)/boot.asm + +AS ?= as +LD ?= ld +OBJCOPY ?= objcopy +OBJDUMP ?= objdump + +boot: $(boot_out_dir) $(boot_bin) + +disasm: + $(OBJDUMP) -d -m i8086 -M intel $(boot_elf) | less + +$(boot_out_dir): + mkdir -p $(boot_out_dir) + +$(boot_o): $(boot_src) + $(AS) --32 -msyntax=intel -mnaked-reg $< -o $@ + +$(boot_elf): $(boot_o) $(boot_lds) + $(LD) -T$(boot_lds) $< -o $@ + $(OBJDUMP) -d -m i8086 -M intel $@ > $(boot_disa) + +$(boot_bin): $(boot_elf) + $(OBJCOPY) $< --strip-all -O binary $@ + +clean: + rm -rf $(boot_out_dir) + +.PHONY: all disasm clean diff --git a/platform/x86_64/qemu/image/bootloader/boot16.bin b/platform/x86_64/qemu/image/bootloader/boot16.bin deleted file mode 100755 index 72a60227361eb054829f7a43972aaeb7e185688e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 110 zcmezM$M8VkjlKhY7x)>)G8*_54x}}oVB}Xi@JgRJh}{f8@c*G+?-n%>t9KiSgn|G6fvnl*8Nl@9^9: + 8000: fa cli + 8001: fc cld + 8002: 66 89 c1 mov ecx,eax + 8005: 31 c0 xor ax,ax + 8007: 8e d8 mov ds,ax + 8009: 8e c0 mov es,ax + 800b: 8e d0 mov ss,ax + 800d: 0f 01 16 58 80 lgdtw ds:0x8058 + 8012: 0f 20 c0 mov eax,cr0 + 8015: 66 83 c8 01 or eax,0x1 + 8019: 0f 22 c0 mov cr0,eax + 801c: ea 21 80 08 00 jmp 0x8:0x8021 + +0000000000008021 : + 8021: 66 b8 10 00 8e d8 mov eax,0xd88e0010 + 8027: 8e c0 mov es,ax + 8029: 8e d0 mov ss,ax + 802b: 8e e0 mov fs,ax + 802d: 8e e8 mov gs,ax + 802f: ff e1 jmp cx + 8031: 2e 8d b4 26 00 lea si,cs:[si+0x26] + 8036: 00 00 add BYTE PTR [bx+si],al + 8038: 00 8d b4 26 add BYTE PTR [di+0x26b4],cl + ... + 8048: ff (bad) + 8049: ff 00 inc WORD PTR [bx+si] + 804b: 00 00 add BYTE PTR [bx+si],al + 804d: 9b fwait + 804e: cf iret + 804f: 00 ff add bh,bh + 8051: ff 00 inc WORD PTR [bx+si] + 8053: 00 00 add BYTE PTR [bx+si],al + 8055: 93 xchg bx,ax + 8056: cf iret + 8057: 00 17 add BYTE PTR [bx],dl + 8059: 00 40 80 add BYTE PTR [bx+si-0x80],al + ... diff --git a/platform/x86_64/qemu/image/bootloader/out/boot.bin b/platform/x86_64/qemu/image/bootloader/out/boot.bin new file mode 100755 index 0000000000000000000000000000000000000000..24a1a8c9309a992c131042daaa02ea10ad6b5827 GIT binary patch literal 94 zcmezMC$00K;eoyzeFyq3@H2`JfjWMqH=CI&kOFs}h3;sBEngfSQx92hKsY8)6?8Cbw-SwT_)Q27Q_Iu6Q( z(QtK8@lk3t1V%$(Gz3ONU^E0qLtr!nMnhmU1V%$(Gz3ONU^E0qLxA=n@as=n=Rv~* zeK-0J^j+X*6pLu!S2&Q?e1ef*>A)++1`dX_9Rdsx*}eyTFaAH&>)oOTau$?=x$ysg zAZzw{1~5JOJcBrcLjz0>8x0y`5CD!ID59z21dD(C&wwT_233zP&XAf{QdDVd#Ngx@ z>>BUr;u7Q<91IaOG-F`UE3V8fNlXILB}EWA1I8*zttbH+N+FO2>IeCS4Q5UQlmQ(N zfscEj3d2-`^)kVE2u1=_e*#p98AyZd;ehfbpfo7FKyv8d093ymngn=k2T2Sxtb = None; -const IO_APIC_BASE: u64 = 0xfec00000; - -pub mod vectors { - pub const APIC_TIMER_VECTOR: u8 = 0xf0; - pub const APIC_SPURIOUS_VECTOR: u8 = 0xf1; - pub const APIC_ERROR_VECTOR: u8 = 0xf2; - pub const UART_COM1_VECTOR: u8 = 0xf3; -} - -static mut LOCAL_APIC: Option = None; -static mut CPU_FREQ_MHZ: u64 = 4_000; -const LAPIC_TICKS_PER_SEC: u64 = 1_000_000_000; // TODO: need to calibrate -const TICKS_PER_SEC: u64 = 100; - - -pub fn local_apic<'a>() -> &'a mut LocalApic { - // It's safe as LAPIC is per-cpu. - unsafe { LOCAL_APIC.as_mut().unwrap() } -} - -pub fn current_ticks() -> u64 { - unsafe { core::arch::x86_64::_rdtsc() } -} - -pub fn ticks_to_nanos(ticks: u64) -> u64 { - ticks * 1_000 / unsafe { CPU_FREQ_MHZ } -} - -pub fn current_time_nanos() -> u64 { - ticks_to_nanos(current_ticks()) -} - -pub fn current_time() -> TimeValue { - TimeValue::from_nanos(current_time_nanos()) -} - -pub fn busy_wait(duration: Duration) { - busy_wait_until(current_time() + duration); -} - -fn busy_wait_until(deadline: TimeValue) { - while current_time() < deadline { - core::hint::spin_loop(); - } -} - -// FIXME: temporary -unsafe fn configure_gsi(io_apic: &mut IoApic, gsi: u8, vector: u8) { - let mut entry = io_apic.table_entry(gsi); - entry.set_dest(0); // ! - entry.set_vector(vector); - entry.set_mode(IrqMode::Fixed); - entry.set_flags(IrqFlags::MASKED); - io_apic.set_table_entry(gsi, entry); - io_apic.enable_irq(gsi); -} - -pub fn init_ioapic() { - println!("Initializing I/O APIC..."); - unsafe { - Port::::new(0x20).write(0xff); - Port::::new(0xA0).write(0xff); - - let mut io_apic = IoApic::new(IO_APIC_BASE); - configure_gsi(&mut io_apic, UART_COM1_IRQ, 0xf3); - IO_APIC = Some(io_apic); - } -} - -/*pub fn init_lapic() { - println!("Initializing Local APIC..."); - - unsafe { - // Disable 8259A interrupt controllers - // TODO: only cpu0 does this - Port::::new(0x20).write(0xff); - Port::::new(0xA0).write(0xff); - } - - let mut lapic = LocalApicBuilder::new() - .timer_vector(APIC_TIMER_VECTOR as _) - .error_vector(APIC_ERROR_VECTOR as _) - .spurious_vector(APIC_SPURIOUS_VECTOR as _) - .build() - .unwrap(); - - if let Some(freq) = CpuId::new() - .get_processor_frequency_info() - .map(|info| info.processor_max_frequency()) - { - if freq > 0 { - println!("Got TSC frequency by CPUID: {} MHz", freq); - unsafe { CPU_FREQ_MHZ = freq as u64 } - } - } - - unsafe { - lapic.enable(); - } - - let mut best_freq_hz = 0; - for _ in 0..5 { - unsafe { lapic.set_timer_initial(u32::MAX) }; - let hpet_start = hpet::current_ticks(); - hpet::wait_millis(10); - let ticks = u32::MAX - unsafe { lapic.timer_current() }; - let hpet_end = hpet::current_ticks(); - - let nanos = hpet::ticks_to_nanos(hpet_end.wrapping_sub(hpet_start)); - let ticks_per_sec = (ticks as u64 * 1_000_000_000 / nanos) as u32; - - if ticks_per_sec > best_freq_hz { - best_freq_hz = ticks_per_sec; - } - } - println!( - "Calibrated LAPIC frequency: {}.{:03} MHz", - best_freq_hz / 1_000_000, - best_freq_hz % 1_000_000 / 1_000, - ); - - /*if let Some(sth) = CpuId::new().get_processor_brand_string() { - println!("{:?}", sth); - }*/ - - unsafe { - lapic.set_timer_mode(TimerMode::Periodic); - lapic.set_timer_divide(TimerDivide::Div256); - lapic.set_timer_initial((best_freq_hz as u64 / TICKS_PER_SEC) as u32); - } - - unsafe { LOCAL_APIC = Some(lapic) }; - - enable_irq(); -}*/ -*/ diff --git a/src/arch/x86_64/boot.rs b/src/arch/x86_64/boot.rs index b98448f3..3c5574ec 100644 --- a/src/arch/x86_64/boot.rs +++ b/src/arch/x86_64/boot.rs @@ -1,13 +1,15 @@ use crate::{ + arch::Stage2PageTable, config::{root_zone_config, HvZoneConfig, MEM_TYPE_RAM}, error::HvResult, - memory::{GuestPhysAddr, HostPhysAddr}, - platform::root_zone_gpa_as_mut_ptr, + memory::{GuestPhysAddr, HostPhysAddr, MemorySet}, + percpu::this_zone, }; use alloc::string::{String, ToString}; use core::{ + arch::global_asm, ffi::{c_char, CStr}, - ptr::copy_nonoverlapping, + ptr::{copy, copy_nonoverlapping}, }; use spin::Mutex; @@ -54,7 +56,11 @@ pub struct BootParams { e820_entries: u8, pad1: [u8; 0x8], setup_sects: u8, - pad2: [u8; 0x1b], + root_flags: u16, + syssize: u32, + pad2: [u8; 0xd], + boot_proto_version: u16, + pad3: [u8; 0x6], kernel_version: u16, type_of_loader: u8, loadflags: BootLoadFlags, @@ -64,16 +70,16 @@ pub struct BootParams { ramdisk_size: u32, bootsect_kludge: u32, heap_end_ptr: u16, - pad3: [u8; 2], + pad4: [u8; 2], cmd_line_ptr: u32, - pad4: [u8; 12], - cmdline_size: u32, pad5: [u8; 12], + cmdline_size: u32, + pad6: [u8; 12], payload_offset: u32, payload_length: u32, - pad6: [u8; 128], + pad7: [u8; 128], e820_table: [BootE820Entry; E820_MAX_ENTRIES_ZEROPAGE], - pad7: [u8; 0x330], + pad8: [u8; 0x330], } impl BootParams { @@ -82,11 +88,35 @@ impl BootParams { initrd_addr: GuestPhysAddr, root_cmdline_addr: GuestPhysAddr, root_cmdline: &str, + gpm: &MemorySet, ) -> HvResult { - let boot_params_hpa = root_zone_gpa_as_mut_ptr(setup_addr) as HostPhysAddr; + let boot_params_hpa = + unsafe { gpm.page_table_query(setup_addr).unwrap().0 } as HostPhysAddr; let boot_params = unsafe { &mut *(boot_params_hpa as *mut BootParams) }; - // TODO: get kernel version + if boot_params.boot_proto_version < 0x0204 { + panic!("kernel boot protocol version older than 2.04 not supported!"); + } + + /* + let setup_size = ((boot_params.setup_sects + 1) as usize) * 0x200; + let vmlinux_size = (boot_params.syssize as usize) * 0x10; + let kernel_hpa = unsafe { gpm.page_table_query(kernel_addr).unwrap().0 } as HostPhysAddr; + + // copy vmlinux to the right place + info!( + "{:x}, {:x}, {:x}", + boot_params_hpa + setup_size, + kernel_hpa, + vmlinux_size + ); + unsafe { + copy( + (boot_params_hpa + setup_size) as *const u8, + kernel_hpa as *mut u8, + vmlinux_size, + ) + };*/ // set bootloader type as undefined boot_params.type_of_loader = 0xff; @@ -101,7 +131,7 @@ impl BootParams { unsafe { copy_nonoverlapping( root_cmdline.as_ptr(), - root_zone_gpa_as_mut_ptr(root_cmdline_addr), + unsafe { gpm.page_table_query(root_cmdline_addr).unwrap().0 } as *mut u8, root_cmdline.len(), ) }; diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index f02c3779..699dff50 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -19,7 +19,7 @@ use crate::{ percpu::{this_cpu_data, this_zone}, platform::{ ROOT_ZONE_BOOT_STACK, ROOT_ZONE_CMDLINE, ROOT_ZONE_CMDLINE_ADDR, ROOT_ZONE_INITRD_ADDR, - ROOT_ZONE_SETUP_ADDR, + ROOT_ZONE_SETUP_ADDR, ROOT_ZONE_VMLINUX_ENTRY_ADDR, }, }; use alloc::boxed::Box; @@ -105,17 +105,17 @@ unsafe fn setup_ap_start_page(cpuid: usize) { fn ap_end(); fn ap_entry32(); } - const U64_PER_PAGE: usize = PAGE_SIZE / 8; + const U64_PER_PAGE: usize = PAGE_SIZE / size_of::(); - let ap_start_page_ptr = phys_to_virt(AP_START_PAGE_PADDR) as *mut usize; + let ap_start_page_ptr = AP_START_PAGE_PADDR as *mut u64; let ap_start_page = core::slice::from_raw_parts_mut(ap_start_page_ptr, U64_PER_PAGE); core::ptr::copy_nonoverlapping( - ap_start16 as *const usize, + ap_start16 as *const u64, ap_start_page_ptr, (ap_end as usize - ap_start16 as usize) / 8, ); - ap_start_page[U64_PER_PAGE - 2] = core_end() as usize + (cpuid + 1) * PER_CPU_SIZE; - ap_start_page[U64_PER_PAGE - 1] = ap_entry32 as usize; + ap_start_page[U64_PER_PAGE - 2] = (core_end() + (cpuid + 1) * PER_CPU_SIZE) as u64; + ap_start_page[U64_PER_PAGE - 1] = ap_entry32 as u64; } pub fn cpu_start(cpuid: usize, start_addr: usize, opaque: usize) { @@ -280,9 +280,9 @@ impl ArchCpu { ROOT_ZONE_INITRD_ADDR, ROOT_ZONE_CMDLINE_ADDR, ROOT_ZONE_CMDLINE, - // "console=ttyS0 earlyprintk=serial nokaslr\0" + &this_zone().read().gpm, // "console=ttyS0 earlyprintk=serial nokaslr\0" )?; - self.guest_regs.rax = this_cpu_data().cpu_on_entry as u64; + self.guest_regs.rax = ROOT_ZONE_VMLINUX_ENTRY_ADDR as u64; self.guest_regs.rsi = ROOT_ZONE_SETUP_ADDR as u64; Ok(()) } @@ -580,10 +580,8 @@ impl Debug for ArchCpu { .field("cr0", &VmcsGuestNW::CR0.read()?) .field("cr3", &VmcsGuestNW::CR3.read()?) .field("cr4", &VmcsGuestNW::CR4.read()?) - .field("cs", &VmcsGuest16::CS_SELECTOR.read()?) - .field("fs_base", &VmcsGuestNW::FS_BASE.read()?) - .field("gs_base", &VmcsGuestNW::GS_BASE.read()?) - .field("tss", &VmcsGuest16::TR_SELECTOR.read()?) + .field("gdtr_base", &VmcsGuestNW::GDTR_BASE.read()?) + .field("cs_selector", &VmcsGuest16::CS_SELECTOR.read()?) .finish()) })() .unwrap() diff --git a/src/arch/x86_64/gdt.rs b/src/arch/x86_64/gdt.rs index effc2baa..8cf220a2 100644 --- a/src/arch/x86_64/gdt.rs +++ b/src/arch/x86_64/gdt.rs @@ -39,7 +39,7 @@ impl GdtStruct { Self { table, tss } } - fn pointer(&self) -> DescriptorTablePointer { + pub fn pointer(&self) -> DescriptorTablePointer { DescriptorTablePointer { base: VirtAddr::new(self.table.as_ptr() as u64), limit: (core::mem::size_of_val(&self.table) - 1) as u16, diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 3c9d4a40..4fb2c45a 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -1,6 +1,5 @@ #![allow(unused)] pub mod acpi; -pub mod apic; pub mod boot; pub mod cpu; pub mod cpuid; diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index 53811e26..0fe0bb9b 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -21,6 +21,7 @@ use crate::{ uart::UartReg, }, error::HvResult, + hypercall::HyperCall, memory::{mmio_handle_access, MMIOAccess, MemFlags}, percpu::this_cpu_data, }; @@ -205,12 +206,24 @@ fn handle_external_interrupt() -> HvResult { } fn handle_hypercall(arch_cpu: &mut ArchCpu) -> HvResult { - let regs = arch_cpu.regs(); + let regs = arch_cpu.regs_mut(); debug!( - "VM exit: VMCALL({:#x}): {:?}", + "VM exit: VMCALL({:#x}): {:x?}", regs.rax, - [regs.rdi, regs.rsi, regs.rdx, regs.rcx] + [regs.rdi, regs.rsi] ); + let (code, arg0, arg1) = (regs.rax, regs.rdi, regs.rsi); + let cpu_data = this_cpu_data(); + let result = match HyperCall::new(cpu_data).hypercall(code as _, arg0, arg1) { + Ok(ret) => ret as _, + Err(e) => { + error!("hypercall error: {:#?}", e); + e.code() + } + }; + debug!("HVC result = {}", result); + regs.rax = result as _; + arch_cpu.advance_guest_rip(VM_EXIT_INSTR_LEN_VMCALL)?; Ok(()) } @@ -338,8 +351,8 @@ fn handle_s2pt_violation(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvR fn handle_triple_fault(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvResult { panic!( - "VM exit: Triple fault @ {:#x}, instr length: {:x}", - exit_info.guest_rip, exit_info.exit_instruction_length + "VM exit: Triple fault @ {:#x}, instr length: {:x}\n {:#x?}", + exit_info.guest_rip, exit_info.exit_instruction_length, arch_cpu ); // arch_cpu.advance_guest_rip(exit_info.exit_instruction_length as _)?; Ok(()) diff --git a/src/hypercall/mod.rs b/src/hypercall/mod.rs index 0e914118..9bc01f68 100644 --- a/src/hypercall/mod.rs +++ b/src/hypercall/mod.rs @@ -300,6 +300,16 @@ impl<'a> HyperCall<'a> { } fn hv_zone_list(&self, zones: *mut ZoneInfo, cnt: u64) -> HyperCallResult { + #[cfg(target_arch = "x86_64")] + let zones = unsafe { + this_zone() + .read() + .gpm + .page_table_query(zones as usize) + .unwrap() + .0 + } as *mut ZoneInfo; + if zones.is_null() { return hv_result_err!(EINVAL, "hv_zone_list: zones is null"); } From d64e28477af8020224647c64607279044530c714 Mon Sep 17 00:00:00 2001 From: Solicey Date: Fri, 11 Apr 2025 19:55:06 +0800 Subject: [PATCH 13/29] hacking to non-root linux --- platform/x86_64/qemu/board.rs | 54 ++++++++----- platform/x86_64/qemu/platform.mk | 4 +- src/arch/x86_64/acpi.rs | 44 +++++++--- src/arch/x86_64/boot.rs | 135 ++++++++++++++----------------- src/arch/x86_64/cpu.rs | 41 +++++----- src/arch/x86_64/ipi.rs | 22 ++--- src/arch/x86_64/pci.rs | 6 +- src/arch/x86_64/pio.rs | 5 +- src/arch/x86_64/zone.rs | 14 +++- src/device/irqchip/pic/ioapic.rs | 3 + src/device/irqchip/pic/lapic.rs | 5 +- src/device/irqchip/pic/mod.rs | 4 +- src/event.rs | 6 +- src/hypercall/mod.rs | 26 ++++++ src/pci/pci.rs | 5 +- src/zone.rs | 23 ++++-- 16 files changed, 231 insertions(+), 166 deletions(-) diff --git a/platform/x86_64/qemu/board.rs b/platform/x86_64/qemu/board.rs index fc96dc04..9b033841 100644 --- a/platform/x86_64/qemu/board.rs +++ b/platform/x86_64/qemu/board.rs @@ -15,45 +15,36 @@ use cortex_a::registers::DAIF::D; // // Authors: // -use crate::{ - arch::zone::HvArchZoneConfig, - config::*, - memory::{GuestPhysAddr, GuestVirtAddr, HostPhysAddr}, -}; +use crate::{arch::zone::HvArchZoneConfig, config::*, memory::GuestPhysAddr}; -pub const MEM_TYPE_ROM: u32 = 3; -pub const MEM_TYPE_RAM_NOT_ALLOC: u32 = 4; +pub const MEM_TYPE_OTHER_ZONES: u32 = 5; pub const ROOT_ZONE_DTB_ADDR: u64 = 0x00000000; pub const ROOT_ZONE_BOOT_STACK: GuestPhysAddr = 0x7000; pub const ROOT_ZONE_ENTRY: u64 = 0x8000; -pub const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0xc000; -pub const ROOT_ZONE_SETUP_ADDR: GuestPhysAddr = 0xd000; -pub const ROOT_ZONE_VMLINUX_ENTRY_ADDR: GuestPhysAddr = 0x10_0000; pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x500_0000; // hpa -pub const ROOT_ZONE_INITRD_ADDR: GuestPhysAddr = 0x1500_0000; pub const ROOT_ZONE_CPUS: u64 = (1 << 0) | (1 << 1); -pub const ROOT_ZONE_RSDP_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { - mem_type: MEM_TYPE_ROM, +const ROOT_ZONE_RSDP_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, physical_start: 0x50e_0000, virtual_start: 0xe_0000, size: 0x2_0000, }; -pub const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { - mem_type: MEM_TYPE_RAM_NOT_ALLOC, - physical_start: 0x6020_0000, // hpa - virtual_start: 0x5520_0000, // gpa +const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x4020_0000, // hpa + virtual_start: 0x3520_0000, // gpa size: 0xf000, // modify size accordingly }; pub const ROOT_ZONE_NAME: &str = "root-linux"; pub const ROOT_ZONE_CMDLINE: &str = - "console=ttyS0 earlyprintk=serial nointremap root=/dev/vda rw init=/bin/sh\0"; + "console=ttyS0 earlyprintk=serial nointremap root=/dev/vda rw init=/init\0"; //"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic -pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 7] = [ +pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 8] = [ HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, physical_start: 0x500_0000, @@ -68,16 +59,16 @@ pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 7] = [ size: 0x14f0_0000, }, // ram HvConfigMemoryRegion { - mem_type: MEM_TYPE_ROM, + mem_type: MEM_TYPE_RAM, physical_start: 0x2000_0000, virtual_start: 0x1500_0000, size: 0x20_0000, - }, // initrd + }, // ram HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, physical_start: 0x2020_0000, virtual_start: 0x1520_0000, - size: 0x4000_0000, + size: 0x2000_0000, }, // ram ROOT_ZONE_ACPI_REGION, // acpi HvConfigMemoryRegion { @@ -86,13 +77,32 @@ pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 7] = [ virtual_start: 0xfed0_0000, size: 0x1000, }, // hpet + // TODO: e820 mem space probe + HvConfigMemoryRegion { + mem_type: MEM_TYPE_OTHER_ZONES, + physical_start: 0x4030_0000, + virtual_start: 0x4030_0000, + size: 0x2000_0000, + }, // zone 1 ]; +const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0xc000; +const ROOT_ZONE_SETUP_ADDR: GuestPhysAddr = 0xd000; +const ROOT_ZONE_VMLINUX_ENTRY_ADDR: GuestPhysAddr = 0x10_0000; + pub const ROOT_ZONE_IRQS: [u32; 32] = [0; 32]; pub const ROOT_ZONE_IOAPIC_BASE: usize = 0xfec0_0000; pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { ioapic_base: ROOT_ZONE_IOAPIC_BASE, ioapic_size: 0x1000, + kernel_entry_gpa: ROOT_ZONE_VMLINUX_ENTRY_ADDR, + cmdline_load_gpa: ROOT_ZONE_CMDLINE_ADDR, + setup_load_gpa: ROOT_ZONE_SETUP_ADDR, + initrd_load_gpa: 0x0, + initrd_size: 0x0, + rsdp_memory_region_id: 0x1, + acpi_memory_region_id: 0x5, + initrd_memory_region_id: 0x0, }; pub const ROOT_PCI_DEVS: [u64; 7] = [0x0, 0x8, 0x18, 0xf8, 0xfa, 0xfb, 0x100]; // 0x10, diff --git a/platform/x86_64/qemu/platform.mk b/platform/x86_64/qemu/platform.mk index a96f5448..ecb890f4 100644 --- a/platform/x86_64/qemu/platform.mk +++ b/platform/x86_64/qemu/platform.mk @@ -24,8 +24,8 @@ QEMU_ARGS += -kernel $(hvisor_elf) QEMU_ARGS += -device loader,file="$(zone0_boot)",addr=0x5008000,force-raw=on QEMU_ARGS += -device loader,file="$(zone0_setup)",addr=0x500d000,force-raw=on QEMU_ARGS += -device loader,file="$(zone0_vmlinux)",addr=0x5100000,force-raw=on -QEMU_ARGS += -device loader,file="$(zone0_initrd)",addr=0x20000000,force-raw=on -QEMU_ARGS += -append "initrd_size=$(shell stat -c%s $(zone0_initrd))" +# QEMU_ARGS += -device loader,file="$(zone0_initrd)",addr=0x20000000,force-raw=on +# QEMU_ARGS += -append "initrd_size=$(shell stat -c%s $(zone0_initrd))" $(hvisor_bin): elf boot $(OBJCOPY) $(hvisor_elf) --strip-all -O binary $@ diff --git a/src/arch/x86_64/acpi.rs b/src/arch/x86_64/acpi.rs index 9a887378..33d26b9a 100644 --- a/src/arch/x86_64/acpi.rs +++ b/src/arch/x86_64/acpi.rs @@ -1,9 +1,8 @@ use crate::{ arch::pci::probe_root_pci_devices, - config::HvConfigMemoryRegion, + config::{HvConfigMemoryRegion, HvZoneConfig}, error::HvResult, - percpu::this_zone, - platform::{ROOT_ZONE_ACPI_REGION, ROOT_ZONE_RSDP_REGION}, + percpu::{this_zone, CpuSet}, }; use acpi::{ fadt::Fadt, @@ -207,6 +206,7 @@ pub struct RootAcpi { rsdp: AcpiTable, tables: BTreeMap, pointers: Vec, + devices: Vec, } impl RootAcpi { @@ -247,6 +247,8 @@ impl RootAcpi { &self, rsdp_zone_region: &HvConfigMemoryRegion, acpi_zone_region: &HvConfigMemoryRegion, + banned_tables: &BTreeSet, + cpu_set: &CpuSet, ) { let mut rsdp = self.rsdp.clone(); let mut tables = self.tables.clone(); @@ -257,7 +259,6 @@ impl RootAcpi { rsdp_zone_region.virtual_start as _, ); - let cpu_set = this_zone().read().cpu_set; let mut madt_cur: usize = SDT_HEADER_SIZE + 8; let mut madt = tables.get_mut(&Signature::MADT).unwrap(); @@ -304,12 +305,17 @@ impl RootAcpi { to.set_addr(hpa_start + cur, gpa_start + cur); cur += to.get_len(); } - let to_gpa = to.gpa; + + let to_gpa = match banned_tables.contains(&pointer.to_sig) { + true => 0, + false => to.gpa, + }; let from = match pointer.from_sig == pointer.to_sig { true => &mut rsdp, false => tables.get_mut(&pointer.from_sig).unwrap(), }; + match pointer.pointer_size { 4 => { from.set_u32(to_gpa as _, pointer.from_offset); @@ -392,18 +398,20 @@ impl RootAcpi { mcfg.physical_start() as *const u8, mcfg.region_length(), ); - let new_mcfg = self.get_mut_table(Signature::MCFG).unwrap(); info!("-------------------------------- MCFG --------------------------------"); let mut offset = size_of::() + 0xb; for entry in mcfg.entries() { info!("{:x?}", entry); // we don't have such many buses, probe devices to get the max_bus we have - let (_, _, max_bus) = probe_root_pci_devices(entry.base_address as _); + let (mut devices, _, max_bus) = probe_root_pci_devices(entry.base_address as _); // update bus_number_end - new_mcfg.set_u8(max_bus, offset); + self.get_mut_table(Signature::MCFG) + .unwrap() + .set_u8(max_bus, offset); offset += size_of::(); + self.devices.append(&mut devices); } self.add_pointer(Signature::RSDT, rsdt_offset, Signature::MCFG, RSDT_PTR_SIZE); @@ -472,12 +480,24 @@ pub fn root_init() { ROOT_ACPI.lock().init(); } -pub fn copy_to_root_zone_region() { - ROOT_ACPI - .lock() - .copy_to_zone_region(&ROOT_ZONE_RSDP_REGION, &ROOT_ZONE_ACPI_REGION); +pub fn copy_to_guest_memory_region(config: &HvZoneConfig, cpu_set: &CpuSet) { + let mut banned: BTreeSet = BTreeSet::new(); + // FIXME: temp + if config.zone_id != 0 { + banned.insert(Signature::FADT); + } + ROOT_ACPI.lock().copy_to_zone_region( + &config.memory_regions()[config.arch_config.rsdp_memory_region_id], + &config.memory_regions()[config.arch_config.acpi_memory_region_id], + &banned, + cpu_set, + ); } pub fn root_get_table(sig: &Signature) -> Option { ROOT_ACPI.lock().get_table(sig) } + +pub fn root_get_devices() -> Vec { + ROOT_ACPI.lock().devices.clone() +} diff --git a/src/arch/x86_64/boot.rs b/src/arch/x86_64/boot.rs index 3c5574ec..60aab8d9 100644 --- a/src/arch/x86_64/boot.rs +++ b/src/arch/x86_64/boot.rs @@ -4,15 +4,18 @@ use crate::{ error::HvResult, memory::{GuestPhysAddr, HostPhysAddr, MemorySet}, percpu::this_zone, + platform::MEM_TYPE_OTHER_ZONES, }; use alloc::string::{String, ToString}; use core::{ - arch::global_asm, + arch::{self, global_asm}, ffi::{c_char, CStr}, ptr::{copy, copy_nonoverlapping}, }; use spin::Mutex; +use super::zone::HvArchZoneConfig; + const E820_MAX_ENTRIES_ZEROPAGE: usize = 128; lazy_static::lazy_static! { @@ -39,6 +42,7 @@ numeric_enum_macro::numeric_enum! { #[derive(Debug, Copy, Clone, Eq, PartialEq)] #[allow(non_camel_case_types)] pub enum E820Type { + E820_DEFAULT = 0, E820_RAM = 1, E820_RESERVED = 2, E820_ACPI = 3, @@ -83,43 +87,26 @@ pub struct BootParams { } impl BootParams { - pub fn fill( - setup_addr: GuestPhysAddr, - initrd_addr: GuestPhysAddr, - root_cmdline_addr: GuestPhysAddr, - root_cmdline: &str, - gpm: &MemorySet, - ) -> HvResult { - let boot_params_hpa = - unsafe { gpm.page_table_query(setup_addr).unwrap().0 } as HostPhysAddr; + pub fn fill(config: &HvZoneConfig, gpm: &MemorySet) -> HvResult { + if config.arch_config.setup_load_gpa == 0 { + panic!("setup addr not set yet!"); + } + + let boot_params_hpa = unsafe { + gpm.page_table_query(config.arch_config.setup_load_gpa) + .unwrap() + .0 + } as HostPhysAddr; let boot_params = unsafe { &mut *(boot_params_hpa as *mut BootParams) }; + // info!("boot_proto_version: {:x?}", boot_params.boot_proto_version); if boot_params.boot_proto_version < 0x0204 { panic!("kernel boot protocol version older than 2.04 not supported!"); } - /* - let setup_size = ((boot_params.setup_sects + 1) as usize) * 0x200; - let vmlinux_size = (boot_params.syssize as usize) * 0x10; - let kernel_hpa = unsafe { gpm.page_table_query(kernel_addr).unwrap().0 } as HostPhysAddr; - - // copy vmlinux to the right place - info!( - "{:x}, {:x}, {:x}", - boot_params_hpa + setup_size, - kernel_hpa, - vmlinux_size - ); - unsafe { - copy( - (boot_params_hpa + setup_size) as *const u8, - kernel_hpa as *mut u8, - vmlinux_size, - ) - };*/ - // set bootloader type as undefined boot_params.type_of_loader = 0xff; + let mut loadflags = boot_params.loadflags; // print early messages loadflags &= !BootLoadFlags::QUIET_FLAG; @@ -127,66 +114,64 @@ impl BootParams { loadflags &= !BootLoadFlags::CAN_USE_HEAP; boot_params.loadflags = loadflags; - // TODO: tmp command - unsafe { - copy_nonoverlapping( - root_cmdline.as_ptr(), - unsafe { gpm.page_table_query(root_cmdline_addr).unwrap().0 } as *mut u8, - root_cmdline.len(), - ) - }; - boot_params.cmd_line_ptr = root_cmdline_addr as _; + boot_params.cmd_line_ptr = config.arch_config.cmdline_load_gpa as _; + // copy cmdline manually for root zone + if config.zone_id == 0 { + unsafe { + core::ptr::copy_nonoverlapping( + crate::platform::ROOT_ZONE_CMDLINE.as_ptr(), + gpm.page_table_query(config.arch_config.cmdline_load_gpa) + .unwrap() + .0 as *mut u8, + crate::platform::ROOT_ZONE_CMDLINE.len(), + ) + }; + } // set e820 // TODO: zone config - boot_params.set_e820_entries(&root_zone_config()); - - // parse cmdline - let hv_cmdline = CMDLINE.lock().clone(); - for param in hv_cmdline.split_whitespace() { - let mut parts = param.splitn(2, '='); - let key = parts.next().unwrap().to_string(); - let value = parts.next().map(|s| s.to_string()); - match key.as_str() { - "initrd_size" => { - boot_params.set_initrd(initrd_addr as _, value.unwrap().parse::().unwrap()) - } - _ => {} - } + boot_params.set_e820_entries(&config); + + if config.arch_config.initrd_load_gpa != 0 { + boot_params.set_initrd( + config.arch_config.initrd_load_gpa as _, + config.arch_config.initrd_size as _, + ); } Ok(()) } fn set_e820_entries(&mut self, config: &HvZoneConfig) { let mut index = 0; - for mem_region in config.memory_regions().iter() { - match mem_region.mem_type { - MEM_TYPE_RAM => { - self.e820_table[index] = BootE820Entry { - addr: mem_region.virtual_start, - size: mem_region.size, - _type: E820Type::E820_RAM, - }; - index += 1; - } - /* FIXME: reserved? - _ => { - self.e820_table[index] = BootE820Entry { - addr: mem_region.virtual_start, - size: mem_region.size, - _type: E820Type::E820_RESERVED, - }; - index += 1; - }*/ - _ => {} + for i in 0..config.memory_regions().len() { + let mem_region = config.memory_regions()[i]; + let mut e820_type = E820Type::E820_DEFAULT; + + if i == config.arch_config.rsdp_memory_region_id + || i == config.arch_config.acpi_memory_region_id + { + e820_type = E820Type::E820_ACPI; + } else if config.arch_config.initrd_load_gpa != 0 + && i == config.arch_config.initrd_memory_region_id + { + } else if mem_region.mem_type == MEM_TYPE_RAM { + e820_type = E820Type::E820_RAM; + } + + if e820_type != E820Type::E820_DEFAULT { + self.e820_table[index] = BootE820Entry { + addr: mem_region.virtual_start, + size: mem_region.size, + _type: e820_type, + }; + index += 1; } } + self.e820_entries = index as _; } fn set_initrd(&mut self, ramdisk_image: u32, ramdisk_size: u32) { - // FIXME: - return; self.ramdisk_image = ramdisk_image; self.ramdisk_size = ramdisk_size; info!("initrd size: {}", self.ramdisk_size); diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index 699dff50..91f62c4e 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -17,10 +17,7 @@ use crate::{ error::{HvError, HvResult}, memory::{addr::phys_to_virt, GuestPhysAddr, HostPhysAddr, PhysAddr, PAGE_SIZE}, percpu::{this_cpu_data, this_zone}, - platform::{ - ROOT_ZONE_BOOT_STACK, ROOT_ZONE_CMDLINE, ROOT_ZONE_CMDLINE_ADDR, ROOT_ZONE_INITRD_ADDR, - ROOT_ZONE_SETUP_ADDR, ROOT_ZONE_VMLINUX_ENTRY_ADDR, - }, + platform::{ROOT_ZONE_BOOT_STACK, ROOT_ZONE_CMDLINE}, }; use alloc::boxed::Box; use core::{ @@ -162,6 +159,7 @@ pub struct ArchCpu { pub power_on: bool, pub gdt: GdtStruct, pub virt_lapic: VirtLocalApic, + vmx_on: bool, vmcs_revision_id: u32, vmxon_region: VmxRegion, vmcs_region: VmxRegion, @@ -178,6 +176,7 @@ impl ArchCpu { power_on: false, gdt: GdtStruct::new(tss), virt_lapic: VirtLocalApic::new(), + vmx_on: false, vmcs_revision_id: 0, vmxon_region: VmxRegion::fake_init(), vmcs_region: VmxRegion::fake_init(), @@ -224,14 +223,13 @@ impl ArchCpu { pub fn run(&mut self) -> ! { assert!(this_cpu_id() == self.cpuid); - let mut per_cpu = this_cpu_data(); - if per_cpu.boot_cpu { - // only bsp does this - self.activate_vmx().unwrap(); - self.setup_boot_params().unwrap(); - } else { + self.power_on = true; + + self.activate_vmx().unwrap(); + + if !per_cpu.boot_cpu { // ap start up never returns to irq handler unsafe { self.virt_lapic.phys_lapic.end_of_interrupt() }; if let Some(ipi_info) = ipi::get_ipi_info(self.cpuid) { @@ -256,7 +254,16 @@ impl ArchCpu { loop {} } + pub fn set_boot_cpu_regs(&mut self, rax: u64, rsi: u64) { + self.guest_regs.rax = rax; + self.guest_regs.rsi = rsi; + } + + /// only activate once fn activate_vmx(&mut self) -> HvResult { + if self.vmx_on { + return Ok(()); + } assert!(check_vmx_support()); // assert!(!is_vmx_enabled()); @@ -271,19 +278,7 @@ impl ArchCpu { unsafe { execute_vmxon(self.vmxon_region.start_paddr() as u64).unwrap() }; - Ok(()) - } - - fn setup_boot_params(&mut self) -> HvResult { - BootParams::fill( - ROOT_ZONE_SETUP_ADDR, - ROOT_ZONE_INITRD_ADDR, - ROOT_ZONE_CMDLINE_ADDR, - ROOT_ZONE_CMDLINE, - &this_zone().read().gpm, // "console=ttyS0 earlyprintk=serial nokaslr\0" - )?; - self.guest_regs.rax = ROOT_ZONE_VMLINUX_ENTRY_ADDR as u64; - self.guest_regs.rsi = ROOT_ZONE_SETUP_ADDR as u64; + self.vmx_on = true; Ok(()) } diff --git a/src/arch/x86_64/ipi.rs b/src/arch/x86_64/ipi.rs index a833c69c..f3dd98f2 100644 --- a/src/arch/x86_64/ipi.rs +++ b/src/arch/x86_64/ipi.rs @@ -109,18 +109,18 @@ pub fn send_ipi(value: u64) -> HvResult { } IpiDeliveryMode::INIT => {} IpiDeliveryMode::START_UP => { - // TODO: target + // FIXME: start up once? let mut ipi_info = get_ipi_info(dest).unwrap().lock(); - if !ipi_info.has_start_up { - // we only start up once - ipi_info.has_start_up = true; - ipi_info.start_up_addr = (vector as usize) << 12; - event::send_event( - dest, - IdtVector::VIRT_IPI_VECTOR as _, - event::IPI_EVENT_WAKEUP, - ); - } + //if !ipi_info.has_start_up { + // we only start up once + //ipi_info.has_start_up = true; + ipi_info.start_up_addr = (vector as usize) << 12; + event::send_event( + dest, + IdtVector::VIRT_IPI_VECTOR as _, + event::IPI_EVENT_WAKEUP, + ); + //} } _ => {} } diff --git a/src/arch/x86_64/pci.rs b/src/arch/x86_64/pci.rs index cca93495..bf213c34 100644 --- a/src/arch/x86_64/pci.rs +++ b/src/arch/x86_64/pci.rs @@ -83,8 +83,8 @@ pub fn get_config_space_info() -> HvResult<(u64, u64)> { hv_result_err!(ENODEV) } -pub fn probe_root_pci_devices(config_base_hpa: usize) -> (Vec, usize, u8) { - let mut bdfs: Vec = Vec::new(); +pub fn probe_root_pci_devices(config_base_hpa: usize) -> (Vec, usize, u8) { + let mut bdfs: Vec = Vec::new(); let mut config_space_size = 0usize; // info!("entry start: {:x} size: {:x}", start, size); @@ -114,7 +114,7 @@ pub fn probe_root_pci_devices(config_base_hpa: usize) -> (Vec, usize, u8) { bdf, bus, dev_func, vendor_id, device_id, header_type ); - bdfs.push(bdf); + bdfs.push(bdf as _); bus_empty = false; // pci bridge diff --git a/src/arch/x86_64/pio.rs b/src/arch/x86_64/pio.rs index 53e31cc6..542215b9 100644 --- a/src/arch/x86_64/pio.rs +++ b/src/arch/x86_64/pio.rs @@ -36,9 +36,10 @@ impl PortIoBitmap { bitmap.set_intercept(PCI_CONFIG_DATA, true); // bitmap.set_range_intercept(0xcf8..0xd00, true); + // FIXME: temp passthrough uart com1 + bitmap.set_range_intercept(0x3f8..0x400, false); + if zoneid == 0 { - // passthrough uart com1 - bitmap.set_range_intercept(0x3f8..0x400, false); // FIXME: get port info from ACPI FACP table bitmap.set_intercept(0xb2, false); bitmap.set_range_intercept(0x600..0x630, false); diff --git a/src/arch/x86_64/zone.rs b/src/arch/x86_64/zone.rs index de5497f0..eb659381 100644 --- a/src/arch/x86_64/zone.rs +++ b/src/arch/x86_64/zone.rs @@ -2,7 +2,7 @@ use crate::{ config::*, error::HvResult, memory::{GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion}, - platform::{MEM_TYPE_RAM_NOT_ALLOC, MEM_TYPE_ROM}, + platform::MEM_TYPE_OTHER_ZONES, zone::Zone, }; @@ -11,6 +11,14 @@ use crate::{ pub struct HvArchZoneConfig { pub ioapic_base: usize, pub ioapic_size: usize, + pub kernel_entry_gpa: usize, + pub cmdline_load_gpa: usize, + pub setup_load_gpa: usize, + pub initrd_load_gpa: usize, + pub initrd_size: usize, + pub rsdp_memory_region_id: usize, + pub acpi_memory_region_id: usize, + pub initrd_memory_region_id: usize, } impl Zone { @@ -19,11 +27,9 @@ impl Zone { let mut flags = MemFlags::READ | MemFlags::WRITE | MemFlags::EXECUTE; if mem_region.mem_type == MEM_TYPE_IO { flags |= MemFlags::IO; - } else if mem_region.mem_type == MEM_TYPE_ROM { - flags &= !MemFlags::WRITE; } match mem_region.mem_type { - MEM_TYPE_RAM | MEM_TYPE_ROM | MEM_TYPE_RAM_NOT_ALLOC | MEM_TYPE_IO => { + MEM_TYPE_RAM | MEM_TYPE_IO | MEM_TYPE_OTHER_ZONES => { self.gpm.insert(MemoryRegion::new_with_offset_mapper( mem_region.virtual_start as GuestPhysAddr, mem_region.physical_start as HostPhysAddr, diff --git a/src/device/irqchip/pic/ioapic.rs b/src/device/irqchip/pic/ioapic.rs index fe340d3a..e3ecabf2 100644 --- a/src/device/irqchip/pic/ioapic.rs +++ b/src/device/irqchip/pic/ioapic.rs @@ -148,6 +148,9 @@ impl MMIoDevice for VirtIoApic { impl Zone { pub fn ioapic_mmio_init(&mut self, arch: &HvArchZoneConfig) { + if arch.ioapic_base == 0 || arch.ioapic_size == 0 { + return; + } self.mmio_region_register( arch.ioapic_base, arch.ioapic_size, diff --git a/src/device/irqchip/pic/lapic.rs b/src/device/irqchip/pic/lapic.rs index 206e7767..98e358a7 100644 --- a/src/device/irqchip/pic/lapic.rs +++ b/src/device/irqchip/pic/lapic.rs @@ -49,7 +49,10 @@ impl VirtLocalApic { pub fn rdmsr(&mut self, msr: Msr) -> HvResult { match msr { - IA32_X2APIC_APICID => Ok(this_cpu_id() as u64), + IA32_X2APIC_APICID => { + // info!("apicid: {:x}", this_cpu_id()); + Ok(this_cpu_id() as u64) + } IA32_X2APIC_LDR => Ok(this_cpu_id() as u64), // logical apic id IA32_X2APIC_LVT_TIMER => Ok(self.virt_lvt_timer_bits as _), _ => hv_result_err!(ENOSYS), diff --git a/src/device/irqchip/pic/mod.rs b/src/device/irqchip/pic/mod.rs index 6320a5bc..ad3c11cb 100644 --- a/src/device/irqchip/pic/mod.rs +++ b/src/device/irqchip/pic/mod.rs @@ -92,9 +92,7 @@ pub fn primary_init_early() { vtd::init(); } -pub fn primary_init_late() { - acpi::copy_to_root_zone_region(); -} +pub fn primary_init_late() {} impl Zone { pub fn arch_irqchip_reset(&self) {} diff --git a/src/event.rs b/src/event.rs index 7b397e79..fd7af573 100644 --- a/src/event.rs +++ b/src/event.rs @@ -113,7 +113,11 @@ pub fn check_events() -> bool { let cpu_data = this_cpu_data(); match fetch_event(cpu_data.id) { Some(IPI_EVENT_WAKEUP) => { - // info!("cpu {} wakeup", cpu_data.id); + #[cfg(target_arch = "x86_64")] + if cpu_data.arch_cpu.power_on { + // x86 wake up cpu will send ipi twice, but we only want once + return false; + } cpu_data.arch_cpu.run(); } Some(IPI_EVENT_SHUTDOWN) => { diff --git a/src/hypercall/mod.rs b/src/hypercall/mod.rs index f0bc23b3..decef057 100644 --- a/src/hypercall/mod.rs +++ b/src/hypercall/mod.rs @@ -220,6 +220,16 @@ impl<'a> HyperCall<'a> { } pub fn hv_zone_config_check(&self, magic_version: *mut u64) -> HyperCallResult { + #[cfg(target_arch = "x86_64")] + let magic_version = unsafe { + this_zone() + .read() + .gpm + .page_table_query(magic_version as usize) + .unwrap() + .0 as *mut u64 + }; + unsafe { *magic_version = CONFIG_MAGIC_VERSION as _; } @@ -233,6 +243,15 @@ impl<'a> HyperCall<'a> { | crate::arch::mm::LOONGARCH64_CACHED_DMW_PREFIX) as *const HvZoneConfig) }; + #[cfg(target_arch = "x86_64")] + let config = unsafe { + &*(this_zone() + .read() + .gpm + .page_table_query(config as *const HvZoneConfig as usize) + .unwrap() + .0 as *mut HvZoneConfig) + }; info!("hv_zone_start: config: {:#x?}", config); if !is_this_root_zone() { @@ -258,6 +277,13 @@ impl<'a> HyperCall<'a> { let _lock = target_data.ctrl_lock.lock(); if !target_data.arch_cpu.power_on { + #[cfg(target_arch = "x86_64")] + send_event( + boot_cpu, + crate::arch::idt::IdtVector::VIRT_IPI_VECTOR as _, + IPI_EVENT_WAKEUP, + ); + #[cfg(not(target_arch = "x86_64"))] send_event(boot_cpu, SGI_IPI_ID as _, IPI_EVENT_WAKEUP); } else { error!("hv_zone_start: cpu {} already on", boot_cpu); diff --git a/src/pci/pci.rs b/src/pci/pci.rs index 40469ef2..c19c3235 100644 --- a/src/pci/pci.rs +++ b/src/pci/pci.rs @@ -173,9 +173,12 @@ impl Zone { if self.id == 0 { #[cfg(target_arch = "x86_64")] { - // crate::arch::pci::probe_root_pci_devices(pci_config.ecam_base as usize); + let root_zone_alloc_devs = self.pciroot.alloc_devs.clone(); + // self.pciroot.alloc_devs = crate::arch::acpi::root_get_devices(); + info!("probe devices: {:x?}", self.pciroot.alloc_devs); self.virtual_pci_mmio_init(pci_config); self.virtual_pci_device_init(pci_config); + self.pciroot.alloc_devs = root_zone_alloc_devs; } #[cfg(not(target_arch = "x86_64"))] { diff --git a/src/zone.rs b/src/zone.rs index 60a1f2e3..df8f7cfc 100644 --- a/src/zone.rs +++ b/src/zone.rs @@ -233,12 +233,6 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult>> { config.num_pci_devs as _, &config.alloc_pci_devs, ); - #[cfg(target_arch = "x86_64")] - zone.pci_init( - &config.pci_config, - config.num_pci_devs as _, - &config.alloc_pci_devs, - ); let mut cpu_num = 0; config.cpus().iter().for_each(|cpu_id| { @@ -269,6 +263,17 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult>> { info!("zone cpu_set: {:#b}", zone.cpu_set.bitmap); let cpu_set = zone.cpu_set; + #[cfg(target_arch = "x86_64")] + { + zone.pci_init( + &config.pci_config, + config.num_pci_devs as _, + &config.alloc_pci_devs, + ); + crate::arch::boot::BootParams::fill(&config, &zone.gpm); + crate::arch::acpi::copy_to_guest_memory_region(&config, &cpu_set); + } + let new_zone_pointer = Arc::new(RwLock::new(zone)); { cpu_set.iter().for_each(|cpuid| { @@ -277,6 +282,12 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult>> { //chose boot cpu if cpuid == cpu_set.first_cpu().unwrap() { cpu_data.boot_cpu = true; + + #[cfg(target_arch = "x86_64")] + cpu_data.arch_cpu.set_boot_cpu_regs( + config.arch_config.kernel_entry_gpa as _, + config.arch_config.setup_load_gpa as _, + ); } cpu_data.cpu_on_entry = config.entry_point as _; cpu_data.dtb_ipa = dtb_ipa as _; From b0734f90c3a82b117706d2a0b24057e6b69ded7a Mon Sep 17 00:00:00 2001 From: Solicey Date: Tue, 15 Apr 2025 09:31:56 +0800 Subject: [PATCH 14/29] enable msi vector remap, modify cpu idle, enable zone shutdown --- platform/x86_64/qemu/board.rs | 2 +- platform/x86_64/qemu/platform.mk | 4 +- src/arch/x86_64/acpi.rs | 26 +++++- src/arch/x86_64/cpu.rs | 131 +++++++++++++++++---------- src/arch/x86_64/idt.rs | 45 ++++++---- src/arch/x86_64/ipi.rs | 36 ++++---- src/arch/x86_64/pci.rs | 146 +++++++++++++++++++++++++++++-- src/arch/x86_64/pio.rs | 4 +- src/arch/x86_64/s2pt.rs | 6 +- src/arch/x86_64/trap.rs | 10 ++- src/arch/x86_64/vmcs.rs | 1 + src/arch/x86_64/vtd.rs | 2 +- src/device/irqchip/pic/ioapic.rs | 11 +-- src/device/irqchip/pic/lapic.rs | 1 + src/hypercall/mod.rs | 7 -- src/pci/pci.rs | 38 ++++++++ src/zone.rs | 2 +- 17 files changed, 359 insertions(+), 113 deletions(-) diff --git a/platform/x86_64/qemu/board.rs b/platform/x86_64/qemu/board.rs index 9b033841..150be778 100644 --- a/platform/x86_64/qemu/board.rs +++ b/platform/x86_64/qemu/board.rs @@ -41,7 +41,7 @@ const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { pub const ROOT_ZONE_NAME: &str = "root-linux"; pub const ROOT_ZONE_CMDLINE: &str = - "console=ttyS0 earlyprintk=serial nointremap root=/dev/vda rw init=/init\0"; + "console=ttyS0 earlyprintk=serial nointremap no_timer_check root=/dev/vda rw init=/init\0"; //"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 8] = [ diff --git a/platform/x86_64/qemu/platform.mk b/platform/x86_64/qemu/platform.mk index ecb890f4..31cf425e 100644 --- a/platform/x86_64/qemu/platform.mk +++ b/platform/x86_64/qemu/platform.mk @@ -1,5 +1,5 @@ QEMU := /home/sora/qemu/build/qemu-system-x86_64 -# /home/sora/qemu/build/qemu-system-x86_64 +# FIXME: /home/sora/qemu/build/qemu-system-x86_64 zone0_boot := $(image_dir)/bootloader/out/boot.bin zone0_setup := $(image_dir)/kernel/setup.bin @@ -8,7 +8,7 @@ zone0_initrd := $(image_dir)/virtdisk/initramfs.cpio.gz zone0_rootfs := $(image_dir)/virtdisk/rootfs1.img QEMU_ARGS := -machine q35,kernel-irqchip=split -QEMU_ARGS += -cpu host,+x2apic,+invtsc -accel kvm +QEMU_ARGS += -cpu host,+x2apic,+invtsc,+vmx -accel kvm # cpu: host Broadwell YongFeng QEMU_ARGS += -smp 4 QEMU_ARGS += -serial mon:stdio QEMU_ARGS += -m 4G diff --git a/src/arch/x86_64/acpi.rs b/src/arch/x86_64/acpi.rs index 33d26b9a..5502c8bf 100644 --- a/src/arch/x86_64/acpi.rs +++ b/src/arch/x86_64/acpi.rs @@ -207,6 +207,10 @@ pub struct RootAcpi { tables: BTreeMap, pointers: Vec, devices: Vec, + // key: data reg hpa, value: bdf + msi_data_reg_map: BTreeMap, + // key: msi-x table bar, value: bdf + msix_bar_map: BTreeMap, } impl RootAcpi { @@ -404,14 +408,18 @@ impl RootAcpi { for entry in mcfg.entries() { info!("{:x?}", entry); // we don't have such many buses, probe devices to get the max_bus we have - let (mut devices, _, max_bus) = probe_root_pci_devices(entry.base_address as _); + let (mut devices, mut msi_data_reg_map, mut msix_bar_map, _, max_bus) = + probe_root_pci_devices(entry.base_address as _); // update bus_number_end self.get_mut_table(Signature::MCFG) .unwrap() .set_u8(max_bus, offset); offset += size_of::(); + self.devices.append(&mut devices); + self.msi_data_reg_map.append(&mut msi_data_reg_map); + self.msix_bar_map.append(&mut msix_bar_map); } self.add_pointer(Signature::RSDT, rsdt_offset, Signature::MCFG, RSDT_PTR_SIZE); @@ -501,3 +509,19 @@ pub fn root_get_table(sig: &Signature) -> Option { pub fn root_get_devices() -> Vec { ROOT_ACPI.lock().devices.clone() } + +pub fn is_msi_data_reg(hpa: usize) -> Option { + if let Some(&bdf) = ROOT_ACPI.lock().msi_data_reg_map.get(&hpa) { + Some(bdf) + } else { + None + } +} + +pub fn is_msix_bar(hpa: usize) -> Option { + if let Some(&bdf) = ROOT_ACPI.lock().msix_bar_map.get(&hpa) { + Some(bdf) + } else { + None + } +} diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index 91f62c4e..e3c88c25 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -3,6 +3,7 @@ use crate::{ boot::BootParams, gdt::{get_tr_base, GdtStruct}, hpet, ipi, + mm::new_s2_memory_set, msr::{ Msr::{self, *}, MsrBitmap, @@ -15,7 +16,12 @@ use crate::{ consts::{core_end, MAX_CPU_NUM, PER_CPU_SIZE}, device::irqchip::pic::{check_pending_vectors, lapic::VirtLocalApic}, error::{HvError, HvResult}, - memory::{addr::phys_to_virt, GuestPhysAddr, HostPhysAddr, PhysAddr, PAGE_SIZE}, + memory::{ + addr::{phys_to_virt, PHYS_VIRT_OFFSET}, + mm::PARKING_MEMORY_SET, + Frame, GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion, PhysAddr, PAGE_SIZE, + PARKING_INST_PAGE, + }, percpu::{this_cpu_data, this_zone}, platform::{ROOT_ZONE_BOOT_STACK, ROOT_ZONE_CMDLINE}, }; @@ -30,6 +36,7 @@ use core::{ }; use raw_cpuid::CpuId; use x86::{ + bits64::vmx, dtables::{self, DescriptorTablePointer}, vmx::vmcs::control::{ EntryControls, ExitControls, PinbasedControls, PrimaryControls, SecondaryControls, @@ -45,7 +52,7 @@ use super::acpi::RootAcpi; const AP_START_PAGE_IDX: u8 = 6; const AP_START_PAGE_PADDR: PhysAddr = AP_START_PAGE_IDX as usize * PAGE_SIZE; -static VM_LAUNCH_READY: AtomicU32 = AtomicU32::new(0); +static VMXON_DONE: AtomicU32 = AtomicU32::new(0); global_asm!( include_str!("ap_start.S"), @@ -163,6 +170,7 @@ pub struct ArchCpu { vmcs_revision_id: u32, vmxon_region: VmxRegion, vmcs_region: VmxRegion, + vm_launch_guest_regs: GeneralRegisters, } impl ArchCpu { @@ -180,6 +188,7 @@ impl ArchCpu { vmcs_revision_id: 0, vmxon_region: VmxRegion::fake_init(), vmcs_region: VmxRegion::fake_init(), + vm_launch_guest_regs: GeneralRegisters::default(), } } @@ -205,10 +214,35 @@ impl ArchCpu { pub fn idle(&mut self) -> ! { assert!(this_cpu_id() == self.cpuid); + self.power_on = false; self.activate_vmx().unwrap(); - VM_LAUNCH_READY.fetch_add(1, Ordering::SeqCst); - loop {} + // info!("idle! cpuid: {:x}", self.cpuid); + + PARKING_MEMORY_SET.call_once(|| { + let parking_code: [u8; 2] = [0xeb, 0xfe]; // jump short -3 + unsafe { + PARKING_INST_PAGE[..2].copy_from_slice(&parking_code); + } + + let mut gpm = new_s2_memory_set(); + gpm.insert(MemoryRegion::new_with_offset_mapper( + 0 as GuestPhysAddr, + unsafe { &PARKING_INST_PAGE as *const _ as HostPhysAddr - PHYS_VIRT_OFFSET }, + PAGE_SIZE, + MemFlags::READ | MemFlags::WRITE | MemFlags::EXECUTE, + )) + .unwrap(); + gpm + }); + + self.setup_vmcs(0, true).unwrap(); + self.host_stack_top = (core_end() + (self.cpuid + 1) * PER_CPU_SIZE) as _; + + unsafe { + PARKING_MEMORY_SET.get().unwrap().activate(); + self.vmx_launch(); + } } /// Guest general-purpose registers. @@ -225,13 +259,12 @@ impl ArchCpu { assert!(this_cpu_id() == self.cpuid); let mut per_cpu = this_cpu_data(); - self.power_on = true; + // info!("run! cpuid: {:x}", self.cpuid); + self.power_on = true; self.activate_vmx().unwrap(); if !per_cpu.boot_cpu { - // ap start up never returns to irq handler - unsafe { self.virt_lapic.phys_lapic.end_of_interrupt() }; if let Some(ipi_info) = ipi::get_ipi_info(self.cpuid) { per_cpu.cpu_on_entry = ipi_info.lock().start_up_addr; } @@ -239,27 +272,28 @@ impl ArchCpu { // info!("AP start up! addr: {:x}", per_cpu.cpu_on_entry); } - self.setup_vmcs(per_cpu.cpu_on_entry, per_cpu.boot_cpu) - .unwrap(); + self.setup_vmcs(per_cpu.cpu_on_entry, false).unwrap(); per_cpu.activate_gpm(); - // must be called after activate_gpm() - vtd::activate(); + if per_cpu.boot_cpu { + // must be called after activate_gpm() + vtd::activate(); + self.guest_regs = self.vm_launch_guest_regs.clone(); + } - while VM_LAUNCH_READY.load(Ordering::Acquire) < MAX_CPU_NUM as u32 - 1 { + while VMXON_DONE.load(Ordering::Acquire) < MAX_CPU_NUM as u32 - 1 { core::hint::spin_loop(); } + self.host_stack_top = (core_end() + (self.cpuid + 1) * PER_CPU_SIZE) as _; unsafe { self.vmx_launch() }; - loop {} } - pub fn set_boot_cpu_regs(&mut self, rax: u64, rsi: u64) { - self.guest_regs.rax = rax; - self.guest_regs.rsi = rsi; + pub fn set_boot_cpu_vm_launch_regs(&mut self, rax: u64, rsi: u64) { + self.vm_launch_guest_regs.rax = rax; + self.vm_launch_guest_regs.rsi = rsi; } - /// only activate once fn activate_vmx(&mut self) -> HvResult { if self.vmx_on { return Ok(()); @@ -279,6 +313,7 @@ impl ArchCpu { unsafe { execute_vmxon(self.vmxon_region.start_paddr() as u64).unwrap() }; self.vmx_on = true; + VMXON_DONE.fetch_add(1, Ordering::SeqCst); Ok(()) } @@ -315,7 +350,8 @@ impl ArchCpu { Ok(()) } - fn setup_vmcs(&mut self, entry: GuestPhysAddr, set_rip: bool) -> HvResult { + // after activate_vmx + fn setup_vmcs(&mut self, entry: GuestPhysAddr, is_idle: bool) -> HvResult { self.vmcs_region = VmxRegion::new(self.vmcs_revision_id, false); let start_paddr = self.vmcs_region.start_paddr() as usize; @@ -323,14 +359,14 @@ impl ArchCpu { Vmcs::load(start_paddr)?; self.setup_vmcs_host(&self.host_stack_top as *const _ as usize)?; - self.setup_vmcs_guest(entry, set_rip, ROOT_ZONE_BOOT_STACK)?; + self.setup_vmcs_guest(entry, ROOT_ZONE_BOOT_STACK)?; self.setup_vmcs_control()?; Ok(()) } fn setup_vmcs_control(&mut self) -> HvResult { - // Intercept NMI and external interrupts. + // intercept NMI and external interrupts use PinbasedControls as PinCtrl; Vmcs::set_control( VmcsControl32::PINBASED_EXEC_CONTROLS, @@ -340,25 +376,23 @@ impl ArchCpu { 0, )?; - // Use I/O bitmaps and MSR bitmaps, activate secondary controls, - // disable CR3 load/store interception. + // use I/O bitmaps and MSR bitmaps, activate secondary controls, + // disable CR3 load/store interception use PrimaryControls as CpuCtrl; Vmcs::set_control( VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS, Msr::IA32_VMX_TRUE_PROCBASED_CTLS, Msr::IA32_VMX_PROCBASED_CTLS.read() as u32, - ( - // CpuCtrl::RDTSC_EXITING | - CpuCtrl::HLT_EXITING - | CpuCtrl::USE_IO_BITMAPS - | CpuCtrl::USE_MSR_BITMAPS - | CpuCtrl::SECONDARY_CONTROLS - ) + (CpuCtrl::HLT_EXITING + // | CpuCtrl::RDTSC_EXITING + | CpuCtrl::USE_IO_BITMAPS + | CpuCtrl::USE_MSR_BITMAPS + | CpuCtrl::SECONDARY_CONTROLS) .bits(), (CpuCtrl::CR3_LOAD_EXITING | CpuCtrl::CR3_STORE_EXITING).bits(), )?; - // Enable EPT, RDTSCP, INVPCID, and unrestricted guest. + // enable EPT, RDTSCP, INVPCID, and unrestricted guest use SecondaryControls as CpuCtrl2; Vmcs::set_control( VmcsControl32::SECONDARY_PROCBASED_EXEC_CONTROLS, @@ -366,13 +400,14 @@ impl ArchCpu { 0, (CpuCtrl2::ENABLE_EPT | CpuCtrl2::ENABLE_RDTSCP + // | CpuCtrl2::VIRTUALIZE_X2APIC | CpuCtrl2::ENABLE_INVPCID | CpuCtrl2::UNRESTRICTED_GUEST) .bits(), 0, )?; - // Load guest IA32_PAT/IA32_EFER on VM entry. + // load guest IA32_PAT/IA32_EFER on VM entry use EntryControls as EntryCtrl; Vmcs::set_control( VmcsControl32::VMENTRY_CONTROLS, @@ -382,7 +417,7 @@ impl ArchCpu { 0, )?; - // Switch to 64-bit host, acknowledge interrupt info, switch IA32_PAT/IA32_EFER on VM exit. + // switch to 64-bit host, acknowledge interrupt info, switch IA32_PAT/IA32_EFER on VM exit use ExitControls as ExitCtrl; Vmcs::set_control( VmcsControl32::VMEXIT_CONTROLS, @@ -398,25 +433,29 @@ impl ArchCpu { 0, )?; - // No MSR switches if hypervisor doesn't use and there is only one vCPU. + // no MSR switches if hypervisor doesn't use and there is only one vCPU VmcsControl32::VMEXIT_MSR_STORE_COUNT.write(0)?; VmcsControl32::VMEXIT_MSR_LOAD_COUNT.write(0)?; VmcsControl32::VMENTRY_MSR_LOAD_COUNT.write(0)?; - // Pass-through exceptions, don't use I/O bitmap, set MSR bitmaps. + // pass-through exceptions, set I/O bitmap and MSR bitmaps VmcsControl32::EXCEPTION_BITMAP.write(0)?; - VmcsControl64::IO_BITMAP_A_ADDR.write(this_zone().read().pio_bitmap.bitmap_a_addr() as _)?; - VmcsControl64::IO_BITMAP_B_ADDR.write(this_zone().read().pio_bitmap.bitmap_b_addr() as _)?; - VmcsControl64::MSR_BITMAPS_ADDR.write(this_zone().read().msr_bitmap.phys_addr() as _)?; + + if self.power_on { + VmcsControl64::IO_BITMAP_A_ADDR + .write(this_zone().read().pio_bitmap.bitmap_a_addr() as _)?; + VmcsControl64::IO_BITMAP_B_ADDR + .write(this_zone().read().pio_bitmap.bitmap_b_addr() as _)?; + VmcsControl64::MSR_BITMAPS_ADDR.write(this_zone().read().msr_bitmap.phys_addr() as _)?; + } + + // set virtual-APIC page address + // self.virt_lapic.vapic_page = Frame::new_zero().unwrap(); + // VmcsControl64::VIRT_APIC_ADDR.write(self.virt_lapic.vapic_page.start_paddr() as _); Ok(()) } - fn setup_vmcs_guest( - &mut self, - entry: GuestPhysAddr, - set_rip: bool, - rsp: GuestPhysAddr, - ) -> HvResult { + fn setup_vmcs_guest(&mut self, entry: GuestPhysAddr, rsp: GuestPhysAddr) -> HvResult { let cr0_guest = Cr0Flags::EXTENSION_TYPE | Cr0Flags::NUMERIC_ERROR; let cr4_guest = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS; @@ -469,7 +508,7 @@ impl ArchCpu { VmcsGuest64::IA32_EFER.write(0)?; // for AP start up, set CS_BASE to entry address, and RIP to 0. - if !set_rip { + if self.power_on && !this_cpu_data().boot_cpu { VmcsGuestNW::RIP.write(0)?; VmcsGuestNW::CS_BASE.write(entry)?; } @@ -545,12 +584,12 @@ impl ArchCpu { #[naked] unsafe extern "C" fn vmx_launch(&mut self) -> ! { asm!( - "mov [rdi + {host_stack_top}], rsp", // save current RSP to host_stack_top + // "mov [rdi + {host_stack_top}], rsp", // save current RSP to host_stack_top "mov rsp, rdi", // set RSP to guest regs area restore_regs_from_stack!(), "vmlaunch", "jmp {failed}", - host_stack_top = const size_of::(), + // host_stack_top = const size_of::(), failed = sym Self::vmx_entry_failed, options(noreturn), ) diff --git a/src/arch/x86_64/idt.rs b/src/arch/x86_64/idt.rs index d7dcd0fc..7f201ce4 100644 --- a/src/arch/x86_64/idt.rs +++ b/src/arch/x86_64/idt.rs @@ -1,6 +1,6 @@ use core::u32; -use crate::error::HvResult; +use crate::{error::HvResult, zone::this_zone_id}; use alloc::collections::btree_map::BTreeMap; use spin::{Mutex, Once}; use x86_64::structures::idt::{Entry, HandlerFunc, InterruptDescriptorTable}; @@ -25,14 +25,16 @@ lazy_static::lazy_static! { } struct AllocVectors { - hv_to_gv: [u32; VECTOR_CNT], - gv_to_hv: BTreeMap, + // key: (zone_id, host vector) value: guest vector + hv_to_gv: BTreeMap<(usize, u8), u32>, + // key: (zone_id, guest vector) value: host vector + gv_to_hv: BTreeMap<(usize, u32), u8>, } impl AllocVectors { fn new() -> Self { Self { - hv_to_gv: [u32::MAX; VECTOR_CNT], + hv_to_gv: BTreeMap::new(), gv_to_hv: BTreeMap::new(), } } @@ -68,35 +70,44 @@ impl IdtStruct { } } -pub fn get_host_vector(gv: u32) -> HvResult { +pub fn get_host_vector(gv: u32, zone_id: usize) -> Option { let mut alloc_vectors = ALLOC_VECTORS.lock(); - if alloc_vectors.gv_to_hv.contains_key(&gv) { - return Ok(*alloc_vectors.gv_to_hv.get(&gv).unwrap()); + if let Some(&hv) = alloc_vectors.gv_to_hv.get(&(zone_id, gv)) { + return Some(hv); } for hv in IdtVector::ALLOC_START..=IdtVector::ALLOC_END { - if alloc_vectors.hv_to_gv[hv as usize] == u32::MAX { - alloc_vectors.hv_to_gv[hv as usize] = gv; - alloc_vectors.gv_to_hv.insert(gv, hv); + if !alloc_vectors.hv_to_gv.contains_key(&(zone_id, hv)) { + alloc_vectors.hv_to_gv.insert((zone_id, hv), gv); + alloc_vectors.gv_to_hv.insert((zone_id, gv), hv); - info!("gv: {:x}, hv: {:x}", gv, hv); + // info!("gv: {:x}, hv: {:x}", gv, hv); - return Ok(hv); + return Some(hv); } } - hv_result_err!(EPERM) + None } -pub fn get_guest_vector(hv: u8) -> HvResult { +pub fn get_guest_vector(hv: u8, zone_id: usize) -> Option { let alloc_vectors = ALLOC_VECTORS.lock(); - if let Some(&gv) = alloc_vectors.hv_to_gv.get(hv as usize) { + if let Some(&gv) = alloc_vectors.hv_to_gv.get(&(zone_id, hv)) { if gv != u32::MAX { - return Ok(gv); + return Some(gv); } } - hv_result_err!(EPERM) + None +} + +pub fn clear_vectors(hv: u8, zone_id: usize) { + let mut alloc_vectors = ALLOC_VECTORS.lock(); + + if let Some(&gv) = alloc_vectors.hv_to_gv.get(&(zone_id, hv)) { + alloc_vectors.hv_to_gv.remove_entry(&(zone_id, hv)); + alloc_vectors.gv_to_hv.remove_entry(&(zone_id, gv)); + } } diff --git a/src/arch/x86_64/ipi.rs b/src/arch/x86_64/ipi.rs index f3dd98f2..254f1b49 100644 --- a/src/arch/x86_64/ipi.rs +++ b/src/arch/x86_64/ipi.rs @@ -2,6 +2,7 @@ use crate::{ device::irqchip::inject_vector, error::HvResult, event, + hypercall::SGI_IPI_ID, percpu::{this_cpu_data, this_zone, CpuSet}, }; use alloc::{collections::vec_deque::VecDeque, vec::Vec}; @@ -28,15 +29,11 @@ pub mod IpiDestShorthand { pub struct IpiInfo { pub start_up_addr: usize, - pub has_start_up: bool, } impl IpiInfo { fn new() -> Self { - Self { - start_up_addr: 0, - has_start_up: false, - } + Self { start_up_addr: 0 } } } @@ -101,26 +98,18 @@ pub fn send_ipi(value: u64) -> HvResult { IpiDeliveryMode::FIXED => { // info!("dest: {:x}, vector: {:x}", dest, vector); inject_vector(dest, vector, None, true); - arch_send_event(dest as _, IdtVector::VIRT_IPI_VECTOR as _); + arch_send_event(dest as _, SGI_IPI_ID as _); } IpiDeliveryMode::NMI => { inject_vector(dest, 2, None, true); - arch_send_event(dest as _, IdtVector::VIRT_IPI_VECTOR as _); + arch_send_event(dest as _, SGI_IPI_ID as _); } IpiDeliveryMode::INIT => {} IpiDeliveryMode::START_UP => { // FIXME: start up once? let mut ipi_info = get_ipi_info(dest).unwrap().lock(); - //if !ipi_info.has_start_up { - // we only start up once - //ipi_info.has_start_up = true; ipi_info.start_up_addr = (vector as usize) << 12; - event::send_event( - dest, - IdtVector::VIRT_IPI_VECTOR as _, - event::IPI_EVENT_WAKEUP, - ); - //} + event::send_event(dest, SGI_IPI_ID as _, event::IPI_EVENT_WAKEUP); } _ => {} } @@ -129,19 +118,24 @@ pub fn send_ipi(value: u64) -> HvResult { Ok(()) } -pub fn arch_send_event(dest: u64, vector: u64) { +pub fn arch_send_event(dest: u64, _: u64) { unsafe { this_cpu_data() .arch_cpu .virt_lapic .phys_lapic - .send_ipi(vector as _, dest as _) + .send_ipi(IdtVector::VIRT_IPI_VECTOR, dest as _) }; } pub fn handle_virt_ipi() { + unsafe { + this_cpu_data() + .arch_cpu + .virt_lapic + .phys_lapic + .end_of_interrupt() + }; // this may never return! - if event::check_events() { - return; - } + event::check_events(); } diff --git a/src/arch/x86_64/pci.rs b/src/arch/x86_64/pci.rs index bf213c34..4c55edce 100644 --- a/src/arch/x86_64/pci.rs +++ b/src/arch/x86_64/pci.rs @@ -1,12 +1,18 @@ use crate::{ - arch::{acpi, mmio::MMIoDevice, zone::HvArchZoneConfig}, + arch::{acpi, idt, mmio::MMIoDevice, zone::HvArchZoneConfig}, error::HvResult, - memory::{mmio_generic_handler, GuestPhysAddr, MMIOAccess}, + memory::{mmio_generic_handler, mmio_perform_access, GuestPhysAddr, MMIOAccess}, + percpu::this_zone, zone::Zone, }; use ::acpi::{mcfg::Mcfg, sdt::Signature}; -use alloc::{collections::vec_deque::VecDeque, sync::Arc, vec::Vec}; -use core::ops::Range; +use alloc::{ + collections::{btree_map::BTreeMap, vec_deque::VecDeque}, + sync::Arc, + vec::Vec, +}; +use bit_field::BitField; +use core::{mem::size_of, ops::Range, panic}; lazy_static::lazy_static! { static ref VIRT_PCI_CONFIG_SPACE: (Arc,) = (Arc::new(VirtPciConfigSpace::new()),); @@ -83,8 +89,20 @@ pub fn get_config_space_info() -> HvResult<(u64, u64)> { hv_result_err!(ENODEV) } -pub fn probe_root_pci_devices(config_base_hpa: usize) -> (Vec, usize, u8) { +pub fn probe_root_pci_devices( + config_base_hpa: usize, +) -> ( + Vec, + BTreeMap, + BTreeMap, + usize, + u8, +) { let mut bdfs: Vec = Vec::new(); + // key: data reg hpa, value: bdf + let mut msi_data_reg_map: BTreeMap = BTreeMap::new(); + // key: msi-x table bar, value: bdf + let mut msix_bar_map: BTreeMap = BTreeMap::new(); let mut config_space_size = 0usize; // info!("entry start: {:x} size: {:x}", start, size); @@ -122,6 +140,67 @@ pub fn probe_root_pci_devices(config_base_hpa: usize) -> (Vec, usize, u8) let secondary_bus = unsafe { *((bdf_config_hpa + 0x19) as *const u8) }; buses.push_back(secondary_bus); } + + // probe msi/msi-x capability registers + let mut cap_pointer = unsafe { *((bdf_config_hpa + 0x34) as *const u8) } as usize; + while cap_pointer != 0 { + let cap_hpa = bdf_config_hpa + cap_pointer; + let cap_id = unsafe { *(cap_hpa as *const u8) }; + + if cap_id == 0x5 { + // msi capablility + let msg_ctrl_reg = unsafe { *((cap_hpa + 0x2) as *const u16) }; + let is_64b = msg_ctrl_reg.get_bit(7); + + let data_reg_hpa = match is_64b { + true => cap_hpa + 0xc, + false => cap_hpa + 0x8, + }; + msi_data_reg_map.insert(data_reg_hpa, bdf as _); + info!("msi data reg hpa: {:x?}", data_reg_hpa); + } else if cap_id == 0x11 { + // msi-x capability + let msg_ctrl_reg = unsafe { *((cap_hpa + 0x2) as *const u16) }; + let table_size = msg_ctrl_reg.get_bits(0..=10) as usize; + let table_bir = + unsafe { *((cap_hpa + 0x4) as *const u16) }.get_bits(0..=2) as usize; + + // find msi-x table bar + let bar_hpa = bdf_config_hpa + 0x10 + (table_bir) * size_of::(); + let mut bar = unsafe { *(bar_hpa as *const u32) } as usize; + assert!(!bar.get_bit(0)); // memory request + match bar.get_bits(1..=2) { + 0b00 => { + // 32-bit decoding + bar &= !(0xfff); + } + 0b10 => { + // 64-bit decoding + let bar_high = + unsafe { *((bar_hpa + size_of::()) as *const u32) } as usize; + bar = (bar_high << 6) + bar.get_bits(26..=31); + } + _ => { + panic!("MSI-X table BAR type error!"); + } + } + + info!( + "table size: {:x}, table bir: {:x}, bar: {:x}", + table_size, table_bir, bar + ); + msix_bar_map.insert(bar, bdf as _); + + for i in 0..=table_size { + let data_reg_hpa = bar + i * size_of::() + 2 * size_of::(); + msi_data_reg_map.insert(data_reg_hpa, bdf as _); + info!("msi-x data reg hpa: {:x?}", data_reg_hpa); + } + } + + info!("cap id: {:x}, hpa: {:x}", cap_id, cap_hpa); + cap_pointer = unsafe { *((cap_hpa + 1) as *const u8) } as usize; + } } if !bus_empty && bus > max_bus { @@ -132,5 +211,60 @@ pub fn probe_root_pci_devices(config_base_hpa: usize) -> (Vec, usize, u8) config_space_size = ((max_bus as usize - 0usize) + 1) << 20; // info!("config space size: {:x}", config_space_size); - (bdfs, config_space_size, max_bus) + ( + bdfs, + msi_data_reg_map, + msix_bar_map, + config_space_size, + max_bus, + ) +} + +pub fn mmio_msix_table_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { + let hpa = base + mmio.address; + + let zone = this_zone(); + let zone_id = zone.read().id; + + let bdf = acpi::is_msi_data_reg(hpa); + if bdf.is_some() && zone.write().pciroot.is_assigned_device(bdf.unwrap()) { + mmio_msi_data_reg_handler(mmio, base, bdf.unwrap(), zone_id) + } else { + mmio_perform_access(base, mmio); + Ok(()) + } +} + +pub fn mmio_msi_data_reg_handler( + mmio: &mut MMIOAccess, + base: usize, + bdf: usize, + zone_id: usize, +) -> HvResult { + let hpa = base + mmio.address; + + let host_vector = unsafe { core::ptr::read_volatile(hpa as *mut u32) } as u8; + if mmio.is_write { + let alloc_host_vector = idt::get_host_vector(mmio.value as _, zone_id).unwrap(); + if host_vector != alloc_host_vector { + idt::clear_vectors(host_vector, zone_id); + mmio.value = alloc_host_vector as _; + mmio_perform_access(base, mmio); + } + } else { + if let Some(guest_vector) = idt::get_guest_vector(host_vector, zone_id) { + mmio.value = guest_vector as _; + } else { + mmio.value = host_vector as _; + } + } + /*info!( + "mmio_msi_data_reg_handler! hpa: {:x}, bdf: {:x}, is write: {:x?}, read value: {:x}, write value: {:x}", + base + mmio.address, + bdf, + mmio.is_write, + host_vector, + mmio.value + );*/ + Ok(()) } diff --git a/src/arch/x86_64/pio.rs b/src/arch/x86_64/pio.rs index 542215b9..6d363e89 100644 --- a/src/arch/x86_64/pio.rs +++ b/src/arch/x86_64/pio.rs @@ -36,10 +36,12 @@ impl PortIoBitmap { bitmap.set_intercept(PCI_CONFIG_DATA, true); // bitmap.set_range_intercept(0xcf8..0xd00, true); - // FIXME: temp passthrough uart com1 + // FIXME: i8254 + // bitmap.set_range_intercept(0x300..0x310, false); bitmap.set_range_intercept(0x3f8..0x400, false); if zoneid == 0 { + // passthrough uart com1 // FIXME: get port info from ACPI FACP table bitmap.set_intercept(0xb2, false); bitmap.set_range_intercept(0x600..0x630, false); diff --git a/src/arch/x86_64/s2pt.rs b/src/arch/x86_64/s2pt.rs index 78f05f86..67aa1db0 100644 --- a/src/arch/x86_64/s2pt.rs +++ b/src/arch/x86_64/s2pt.rs @@ -10,6 +10,7 @@ use crate::{ addr::{GuestPhysAddr, HostPhysAddr, PhysAddr}, MemFlags, }, + percpu::this_cpu_data, zone::this_zone_id, }; use bit_field::BitField; @@ -244,7 +245,10 @@ impl PagingInstr for S2PTInstr { crate::arch::vmcs::VmcsControl64::EPTP.write(s2ptp).unwrap(); unsafe { invs2pt(InvS2PTType::SingleContext, s2ptp) }; - vtd::update_dma_translation_tables(this_zone_id(), root_paddr); + // if this cpu is boot cpu and it is running + if this_cpu_data().arch_cpu.power_on && this_cpu_data().boot_cpu { + vtd::update_dma_translation_tables(this_zone_id(), root_paddr); + } } fn flush(_vaddr: Option) {} diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index 0fe0bb9b..a40a7b6c 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -24,6 +24,7 @@ use crate::{ hypercall::HyperCall, memory::{mmio_handle_access, MMIOAccess, MemFlags}, percpu::this_cpu_data, + zone::this_zone_id, }; use x86_64::registers::control::Cr4Flags; @@ -82,7 +83,11 @@ pub fn arch_handle_trap(tf: &mut TrapFrame) { fn handle_irq(vector: u8) { match vector { - IdtVector::VIRT_IPI_VECTOR => ipi::handle_virt_ipi(), + IdtVector::VIRT_IPI_VECTOR => { + ipi::handle_virt_ipi(); + // send eoi inside handler, so return directly + return; + } IdtVector::APIC_TIMER_VECTOR => inject_vector( this_cpu_id(), this_cpu_data().arch_cpu.virt_lapic.virt_timer_vector, @@ -92,8 +97,7 @@ fn handle_irq(vector: u8) { _ => { inject_vector( this_cpu_id(), - vector as _, - //get_guest_vector(vector).unwrap() as _, + get_guest_vector(vector, this_zone_id()).unwrap() as _, None, false, ); diff --git a/src/arch/x86_64/vmcs.rs b/src/arch/x86_64/vmcs.rs index 047dec7b..a9718c45 100644 --- a/src/arch/x86_64/vmcs.rs +++ b/src/arch/x86_64/vmcs.rs @@ -565,6 +565,7 @@ impl Vmcs { } if (allowed1 & set) != set { // failed if set 0-bits in allowed1 + warn!("allow1: {:x}", allowed1); return hv_result_err!( EINVAL, format!("can not set bits {:#x} in {:?}", set, control) diff --git a/src/arch/x86_64/vtd.rs b/src/arch/x86_64/vtd.rs index c7e803b4..093ec57c 100644 --- a/src/arch/x86_64/vtd.rs +++ b/src/arch/x86_64/vtd.rs @@ -209,7 +209,7 @@ impl Vtd { irte.set_bit(15, false); // vector irte.set_bits(16..=23, irq as _); - // dest id + // FIXME: dest id irte.set_bits(32..=63, 0); unsafe { *irte_ptr = irte }; diff --git a/src/device/irqchip/pic/ioapic.rs b/src/device/irqchip/pic/ioapic.rs index e3ecabf2..3ea2e3a1 100644 --- a/src/device/irqchip/pic/ioapic.rs +++ b/src/device/irqchip/pic/ioapic.rs @@ -1,10 +1,10 @@ use crate::{ - arch::{mmio::MMIoDevice, zone::HvArchZoneConfig}, + arch::{idt, mmio::MMIoDevice, zone::HvArchZoneConfig}, device::irqchip::pic::inject_vector, error::HvResult, memory::{GuestPhysAddr, MMIOAccess}, platform::ROOT_ZONE_IOAPIC_BASE, - zone::Zone, + zone::{this_zone_id, Zone}, }; use alloc::{sync::Arc, vec::Vec}; use bit_field::BitField; @@ -118,10 +118,11 @@ impl MMIoDevice for VirtIoApic { entry.set_bits(32..=63, value.get_bits(0..=31)); // use host vector instead of guest vector - /* entry.set_bits( + entry.set_bits( 0..=7, - get_host_vector(entry.get_bits(0..=7) as u32).unwrap() as _, - ); */ + idt::get_host_vector(entry.get_bits(0..=7) as u32, this_zone_id()) + .unwrap() as _, + ); unsafe { configure_gsi_from_raw(index as _, *entry); }; diff --git a/src/device/irqchip/pic/lapic.rs b/src/device/irqchip/pic/lapic.rs index 98e358a7..dfe4d6c4 100644 --- a/src/device/irqchip/pic/lapic.rs +++ b/src/device/irqchip/pic/lapic.rs @@ -6,6 +6,7 @@ use crate::{ msr::Msr::{self, *}, }, error::HvResult, + memory::Frame, percpu::this_cpu_data, }; use bit_field::BitField; diff --git a/src/hypercall/mod.rs b/src/hypercall/mod.rs index decef057..4e0476b2 100644 --- a/src/hypercall/mod.rs +++ b/src/hypercall/mod.rs @@ -277,13 +277,6 @@ impl<'a> HyperCall<'a> { let _lock = target_data.ctrl_lock.lock(); if !target_data.arch_cpu.power_on { - #[cfg(target_arch = "x86_64")] - send_event( - boot_cpu, - crate::arch::idt::IdtVector::VIRT_IPI_VECTOR as _, - IPI_EVENT_WAKEUP, - ); - #[cfg(not(target_arch = "x86_64"))] send_event(boot_cpu, SGI_IPI_ID as _, IPI_EVENT_WAKEUP); } else { error!("hv_zone_start: cpu {} already on", boot_cpu); diff --git a/src/pci/pci.rs b/src/pci/pci.rs index c19c3235..90bc3470 100644 --- a/src/pci/pci.rs +++ b/src/pci/pci.rs @@ -292,6 +292,9 @@ impl Zone { } fn pci_bars_register(&mut self, pci_config: &HvPciConfig) { + #[cfg(target_arch = "x86_64")] + let mut msix_bar_regions: Vec = Vec::new(); + for region in self.pciroot.bar_regions.iter_mut() { let (cpu_base, pci_base) = match region.bar_type { BarType::IO => (pci_config.io_base as usize, pci_config.pci_io_base as usize), @@ -317,6 +320,19 @@ impl Zone { region.bar_type, region.start, region.size ); + #[cfg(target_arch = "x86_64")] + { + // check whether this bar is msi-x table + // if true, use msi-x table handler instead + if region.bar_type != BarType::IO { + if let Some(bdf) = crate::arch::acpi::is_msix_bar(region.start) { + info!("msi-x bar! hpa: {:x} bdf: {:x}", region.start, bdf); + msix_bar_regions.push(region.clone()); + continue; + } + } + } + if cfg!(not(target_arch = "x86_64")) || region.bar_type != BarType::IO { self.gpm .insert(MemoryRegion::new_with_offset_mapper( @@ -334,6 +350,18 @@ impl Zone { ); } } + + #[cfg(target_arch = "x86_64")] + { + for region in msix_bar_regions.iter() { + self.mmio_region_register( + region.start, + region.size, + crate::arch::x86_64::pci::mmio_msix_table_handler, + region.start, + ); + } + } } } @@ -345,11 +373,21 @@ pub fn mmio_pci_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { let bus = bdf >> 8; let zone = this_zone(); + let zone_id = zone.read().id; let mut binding = zone.write(); let is_assigned = binding.pciroot.is_assigned_device(bdf); match is_assigned { true => { + #[cfg(target_arch = "x86_64")] + { + if let Some(bdf) = crate::arch::acpi::is_msi_data_reg(base + mmio.address) { + crate::arch::pci::mmio_msi_data_reg_handler(mmio, base, bdf, zone_id); + } else { + mmio_perform_access(base, mmio); + } + } + #[cfg(not(target_arch = "x86_64"))] mmio_perform_access(base, mmio); } false => { diff --git a/src/zone.rs b/src/zone.rs index df8f7cfc..5d8d20ea 100644 --- a/src/zone.rs +++ b/src/zone.rs @@ -284,7 +284,7 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult>> { cpu_data.boot_cpu = true; #[cfg(target_arch = "x86_64")] - cpu_data.arch_cpu.set_boot_cpu_regs( + cpu_data.arch_cpu.set_boot_cpu_vm_launch_regs( config.arch_config.kernel_entry_gpa as _, config.arch_config.setup_load_gpa as _, ); From 69bc9636218b12a6b6bd8fc65dba32bc852b34b0 Mon Sep 17 00:00:00 2001 From: Solicey Date: Sat, 19 Apr 2025 10:00:55 +0800 Subject: [PATCH 15/29] launch virtio console on non-root linux --- platform/x86_64/qemu/board.rs | 4 +- src/arch/x86_64/acpi.rs | 41 +++++++-- src/arch/x86_64/boot.rs | 9 +- src/arch/x86_64/cpu.rs | 4 +- src/arch/x86_64/idt.rs | 117 ++++++++++++++++---------- src/arch/x86_64/pci.rs | 140 ++++++++++++++++++------------- src/arch/x86_64/pio.rs | 42 ++++------ src/arch/x86_64/trap.rs | 89 +++++++++++--------- src/arch/x86_64/vtd.rs | 63 -------------- src/arch/x86_64/zone.rs | 4 +- src/device/irqchip/pic/ioapic.rs | 91 ++++++++++++-------- src/device/irqchip/pic/mod.rs | 14 +++- src/device/virtio_trampoline.rs | 4 +- src/hypercall/mod.rs | 30 +++++++ src/memory/mmio.rs | 8 +- src/pci/pci.rs | 1 - src/platform/mod.rs | 3 +- src/zone.rs | 1 + 18 files changed, 380 insertions(+), 285 deletions(-) diff --git a/platform/x86_64/qemu/board.rs b/platform/x86_64/qemu/board.rs index 9621d02d..fe503813 100644 --- a/platform/x86_64/qemu/board.rs +++ b/platform/x86_64/qemu/board.rs @@ -39,7 +39,7 @@ const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { pub const ROOT_ZONE_NAME: &str = "root-linux"; pub const ROOT_ZONE_CMDLINE: &str = - "console=ttyS0 earlyprintk=serial nointremap no_timer_check root=/dev/vda rw init=/init\0"; + "console=ttyS0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all,lastbus=1 root=/dev/vda rw init=/init\0"; //"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 8] = [ @@ -103,4 +103,4 @@ pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { initrd_memory_region_id: 0x0, }; -pub const ROOT_PCI_DEVS: [u64; 7] = [0x0, 0x8, 0x18, 0xf8, 0xfa, 0xfb, 0x100]; // 0x10, +pub const ROOT_PCI_DEVS: [u64; 8] = [0x0, 0x8, 0x10, 0x18, 0xf8, 0xfa, 0xfb, 0x100]; // 0x10, diff --git a/src/arch/x86_64/acpi.rs b/src/arch/x86_64/acpi.rs index 5502c8bf..4c7b29a0 100644 --- a/src/arch/x86_64/acpi.rs +++ b/src/arch/x86_64/acpi.rs @@ -207,6 +207,8 @@ pub struct RootAcpi { tables: BTreeMap, pointers: Vec, devices: Vec, + config_space_base: usize, + config_space_size: usize, // key: data reg hpa, value: bdf msi_data_reg_map: BTreeMap, // key: msi-x table bar, value: bdf @@ -287,6 +289,20 @@ impl RootAcpi { madt_cur += entry_len; } + // FIXME: temp clear dsdt + // let mut dsdt = tables.get_mut(&Signature::DSDT).unwrap(); + // dsdt.set_u32(SDT_HEADER_SIZE as _, 0x4); + + // FIXME: temp add mcfg entry + /*let mut mcfg = tables.get_mut(&Signature::MCFG).unwrap(); + let mcfg_len = mcfg.get_u32(0x4) as usize; + let mut entry = vec![ + 0x00u8, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0xff, 0x00, 0x00, + 0x00, 0x00, + ]; + mcfg.set_u32(mcfg_len as u32 + entry.len() as u32, 0x4); + mcfg.bytes.append(&mut entry);*/ + // set pointers let hpa_start = acpi_zone_region.physical_start as usize; let gpa_start = acpi_zone_region.virtual_start as usize; @@ -405,8 +421,15 @@ impl RootAcpi { info!("-------------------------------- MCFG --------------------------------"); let mut offset = size_of::() + 0xb; - for entry in mcfg.entries() { + + if let Some(entry) = mcfg + .entries() + .iter() + .find(|&entry| entry.pci_segment_group == 0) + { + // we only support segment group 0 info!("{:x?}", entry); + // we don't have such many buses, probe devices to get the max_bus we have let (mut devices, mut msi_data_reg_map, mut msix_bar_map, _, max_bus) = probe_root_pci_devices(entry.base_address as _); @@ -418,6 +441,11 @@ impl RootAcpi { offset += size_of::(); self.devices.append(&mut devices); + + self.config_space_base = entry.base_address as _; + self.config_space_size = + (((max_bus as u64 - entry.bus_number_start as u64) + 1) << 20) as usize; + self.msi_data_reg_map.append(&mut msi_data_reg_map); self.msix_bar_map.append(&mut msix_bar_map); } @@ -491,9 +519,9 @@ pub fn root_init() { pub fn copy_to_guest_memory_region(config: &HvZoneConfig, cpu_set: &CpuSet) { let mut banned: BTreeSet = BTreeSet::new(); // FIXME: temp - if config.zone_id != 0 { - banned.insert(Signature::FADT); - } + // if config.zone_id != 0 { + // banned.insert(Signature::FADT); + // } ROOT_ACPI.lock().copy_to_zone_region( &config.memory_regions()[config.arch_config.rsdp_memory_region_id], &config.memory_regions()[config.arch_config.acpi_memory_region_id], @@ -506,8 +534,9 @@ pub fn root_get_table(sig: &Signature) -> Option { ROOT_ACPI.lock().get_table(sig) } -pub fn root_get_devices() -> Vec { - ROOT_ACPI.lock().devices.clone() +pub fn root_get_config_space_info() -> Option<(usize, usize)> { + let acpi = ROOT_ACPI.lock(); + Some((acpi.config_space_base, acpi.config_space_size)) } pub fn is_msi_data_reg(hpa: usize) -> Option { diff --git a/src/arch/x86_64/boot.rs b/src/arch/x86_64/boot.rs index 60aab8d9..7f523456 100644 --- a/src/arch/x86_64/boot.rs +++ b/src/arch/x86_64/boot.rs @@ -1,6 +1,6 @@ use crate::{ arch::Stage2PageTable, - config::{root_zone_config, HvZoneConfig, MEM_TYPE_RAM}, + config::{root_zone_config, HvPciConfig, HvZoneConfig, MEM_TYPE_RAM}, error::HvResult, memory::{GuestPhysAddr, HostPhysAddr, MemorySet}, percpu::this_zone, @@ -168,6 +168,13 @@ impl BootParams { } } + self.e820_table[index] = BootE820Entry { + addr: config.pci_config.ecam_base as _, + size: config.pci_config.ecam_size as _, + _type: E820Type::E820_RESERVED, + }; + index += 1; + self.e820_entries = index as _; } diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index 742e0907..0fb56459 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -443,9 +443,9 @@ impl ArchCpu { if self.power_on { VmcsControl64::IO_BITMAP_A_ADDR - .write(this_zone().read().pio_bitmap.bitmap_a_addr() as _)?; + .write(this_zone().read().pio_bitmap.a.start_paddr() as _)?; VmcsControl64::IO_BITMAP_B_ADDR - .write(this_zone().read().pio_bitmap.bitmap_b_addr() as _)?; + .write(this_zone().read().pio_bitmap.b.start_paddr() as _)?; VmcsControl64::MSR_BITMAPS_ADDR.write(this_zone().read().msr_bitmap.phys_addr() as _)?; } diff --git a/src/arch/x86_64/idt.rs b/src/arch/x86_64/idt.rs index 7f201ce4..51dc7438 100644 --- a/src/arch/x86_64/idt.rs +++ b/src/arch/x86_64/idt.rs @@ -1,7 +1,6 @@ -use core::u32; - use crate::{error::HvResult, zone::this_zone_id}; -use alloc::collections::btree_map::BTreeMap; +use alloc::{collections::btree_map::BTreeMap, vec::Vec}; +use core::u32; use spin::{Mutex, Once}; use x86_64::structures::idt::{Entry, HandlerFunc, InterruptDescriptorTable}; @@ -19,19 +18,77 @@ pub mod IdtVector { } lazy_static::lazy_static! { - static ref ALLOC_VECTORS: Mutex = { - Mutex::new(AllocVectors::new()) + static ref ALLOC_VECTORS: Mutex = { + Mutex::new(RemapVectorsUnlocked::new()) }; } -struct AllocVectors { - // key: (zone_id, host vector) value: guest vector - hv_to_gv: BTreeMap<(usize, u8), u32>, - // key: (zone_id, guest vector) value: host vector - gv_to_hv: BTreeMap<(usize, u32), u8>, +static REMAP_VECTORS: Once = Once::new(); + +struct RemapVectors { + inner: Vec>, } -impl AllocVectors { +impl RemapVectors { + fn new(max_zones: usize) -> Self { + let mut vs = vec![]; + for _ in 0..max_zones { + let v = Mutex::new(RemapVectorsUnlocked::new()); + vs.push(v) + } + Self { inner: vs } + } + + fn get_host_vector(&self, gv: u32, zone_id: usize) -> Option { + let mut vectors = self.inner.get(zone_id).unwrap().lock(); + + if let Some(&hv) = vectors.gv_to_hv.get(&gv) { + return Some(hv); + } + + for hv in IdtVector::ALLOC_START..=IdtVector::ALLOC_END { + if !vectors.hv_to_gv.contains_key(&hv) { + vectors.hv_to_gv.insert(hv, gv); + vectors.gv_to_hv.insert(gv, hv); + + // info!("gv: {:x}, hv: {:x}", gv, hv); + return Some(hv); + } + } + + None + } + + fn get_guest_vector(&self, hv: u8, zone_id: usize) -> Option { + let mut vectors = self.inner.get(zone_id).unwrap().lock(); + + if let Some(&gv) = vectors.hv_to_gv.get(&hv) { + if gv != u32::MAX { + return Some(gv); + } + } + + None + } + + fn clear_vectors(&self, hv: u8, zone_id: usize) { + let mut vectors = self.inner.get(zone_id).unwrap().lock(); + + if let Some(&gv) = vectors.hv_to_gv.get(&hv) { + vectors.hv_to_gv.remove_entry(&hv); + vectors.gv_to_hv.remove_entry(&gv); + } + } +} + +struct RemapVectorsUnlocked { + // key: host vector value: guest vector + hv_to_gv: BTreeMap, + // key: guest vector value: host vector + gv_to_hv: BTreeMap, +} + +impl RemapVectorsUnlocked { fn new() -> Self { Self { hv_to_gv: BTreeMap::new(), @@ -71,43 +128,17 @@ impl IdtStruct { } pub fn get_host_vector(gv: u32, zone_id: usize) -> Option { - let mut alloc_vectors = ALLOC_VECTORS.lock(); - - if let Some(&hv) = alloc_vectors.gv_to_hv.get(&(zone_id, gv)) { - return Some(hv); - } - - for hv in IdtVector::ALLOC_START..=IdtVector::ALLOC_END { - if !alloc_vectors.hv_to_gv.contains_key(&(zone_id, hv)) { - alloc_vectors.hv_to_gv.insert((zone_id, hv), gv); - alloc_vectors.gv_to_hv.insert((zone_id, gv), hv); - - // info!("gv: {:x}, hv: {:x}", gv, hv); - - return Some(hv); - } - } - - None + REMAP_VECTORS.get().unwrap().get_host_vector(gv, zone_id) } pub fn get_guest_vector(hv: u8, zone_id: usize) -> Option { - let alloc_vectors = ALLOC_VECTORS.lock(); - - if let Some(&gv) = alloc_vectors.hv_to_gv.get(&(zone_id, hv)) { - if gv != u32::MAX { - return Some(gv); - } - } - - None + REMAP_VECTORS.get().unwrap().get_guest_vector(hv, zone_id) } pub fn clear_vectors(hv: u8, zone_id: usize) { - let mut alloc_vectors = ALLOC_VECTORS.lock(); + REMAP_VECTORS.get().unwrap().clear_vectors(hv, zone_id); +} - if let Some(&gv) = alloc_vectors.hv_to_gv.get(&(zone_id, hv)) { - alloc_vectors.hv_to_gv.remove_entry(&(zone_id, hv)); - alloc_vectors.gv_to_hv.remove_entry(&(zone_id, gv)); - } +pub fn init(max_zones: usize) { + REMAP_VECTORS.call_once(|| RemapVectors::new(max_zones)); } diff --git a/src/arch/x86_64/pci.rs b/src/arch/x86_64/pci.rs index 4c55edce..994f56ed 100644 --- a/src/arch/x86_64/pci.rs +++ b/src/arch/x86_64/pci.rs @@ -1,7 +1,9 @@ use crate::{ arch::{acpi, idt, mmio::MMIoDevice, zone::HvArchZoneConfig}, error::HvResult, - memory::{mmio_generic_handler, mmio_perform_access, GuestPhysAddr, MMIOAccess}, + memory::{ + mmio_generic_handler, mmio_handle_access, mmio_perform_access, GuestPhysAddr, MMIOAccess, + }, percpu::this_zone, zone::Zone, }; @@ -14,42 +16,10 @@ use alloc::{ use bit_field::BitField; use core::{mem::size_of, ops::Range, panic}; -lazy_static::lazy_static! { - static ref VIRT_PCI_CONFIG_SPACE: (Arc,) = (Arc::new(VirtPciConfigSpace::new()),); -} - -pub struct VirtPciConfigSpace {} - -impl VirtPciConfigSpace { - fn new() -> Self { - Self {} - } -} - -impl MMIoDevice for VirtPciConfigSpace { - fn gpa_range(&self) -> &Vec> { - todo!() - } - - fn read(&self, gpa: GuestPhysAddr) -> HvResult { - let value = unsafe { core::ptr::read_unaligned(gpa as *const u64) }; - // info!("pci config read! gpa: {:x}, value: {:x}", gpa, value); - Ok(value) - } - - fn write(&self, gpa: GuestPhysAddr, value: u64, size: usize) -> HvResult { - info!( - "pci config write! gpa: {:x}, value: {:x}, size: {:x}", - gpa, value, size, - ); - - todo!() - } - - fn trigger(&self, signal: usize) -> HvResult { - todo!() - } -} +use super::{ + pio::{PCI_CONFIG_ADDR_PORT, PCI_CONFIG_DATA_PORT}, + vmx::VmxIoExitInfo, +}; impl Zone { pub fn pci_config_space_mmio_init(&mut self, arch: &HvArchZoneConfig) { @@ -69,26 +39,6 @@ impl Zone { } } -/*fn pci_config_space_mmio_handler(mmio: &mut MMIOAccess, _arg: usize) -> HvResult { - mmio_handler(mmio, &VIRT_PCI_CONFIG_SPACE.0) -}*/ - -pub fn get_config_space_info() -> HvResult<(u64, u64)> { - let bytes = acpi::root_get_table(&Signature::MCFG) - .unwrap() - .get_bytes() - .clone(); - let mcfg = unsafe { &*(bytes.as_ptr() as *const Mcfg) }; - - for entry in mcfg.entries() { - assert!(entry.pci_segment_group == 0); - let size = ((entry.bus_number_end as u64 - entry.bus_number_start as u64) + 1) << 20; - return Ok((entry.base_address, size)); - } - - hv_result_err!(ENODEV) -} - pub fn probe_root_pci_devices( config_base_hpa: usize, ) -> ( @@ -258,13 +208,85 @@ pub fn mmio_msi_data_reg_handler( mmio.value = host_vector as _; } } - /*info!( + trace!( "mmio_msi_data_reg_handler! hpa: {:x}, bdf: {:x}, is write: {:x?}, read value: {:x}, write value: {:x}", base + mmio.address, bdf, mmio.is_write, host_vector, mmio.value - );*/ + ); Ok(()) } + +fn get_pci_mmio_addr() -> Option { + let addr = this_zone().read().pio_bitmap.pci_config_addr as usize; + let (base, _) = crate::arch::acpi::root_get_config_space_info().unwrap(); + + let enable = addr.get_bit(31); + let bdf = addr.get_bits(8..=23); + let reg = addr.get_bits(2..=7); + + if enable { + // info!("pio: {:x}, bdf: {:x}", base + (bdf << 12) + (reg << 2), bdf); + Some(base + (bdf << 12) + (reg << 2)) + } else { + None + } +} + +pub fn handle_pci_config_port_read(io_info: &VmxIoExitInfo) -> u32 { + let mut value = 0u32; + if PCI_CONFIG_ADDR_PORT.contains(&io_info.port) { + value = this_zone().read().pio_bitmap.pci_config_addr; + + let offset_bit = 8 * (io_info.port - PCI_CONFIG_ADDR_PORT.start) as usize; + value = value.get_bits(offset_bit..offset_bit + (8 * io_info.access_size) as usize); + } else { + if let Some(mmio_addr) = get_pci_mmio_addr() { + let offset: usize = (io_info.port - PCI_CONFIG_DATA_PORT.start) as usize; + if this_zone() + .read() + .find_mmio_region(mmio_addr + offset, io_info.access_size as _) + .is_some() + { + let mut mmio_access = MMIOAccess { + address: mmio_addr + offset, + size: io_info.access_size as _, + is_write: false, + value: 0, + }; + mmio_handle_access(&mut mmio_access); + value = mmio_access.value as _; + // info!("value: {:x}", value); + } + } + } + value +} + +pub fn handle_pci_config_port_write(io_info: &VmxIoExitInfo, value: u32) { + if PCI_CONFIG_ADDR_PORT.contains(&io_info.port) { + let offset_bit = 8 * (io_info.port - PCI_CONFIG_ADDR_PORT.start) as usize; + this_zone().write().pio_bitmap.pci_config_addr.set_bits( + offset_bit..offset_bit + (8 * (io_info.access_size as usize)), + value, + ); + } else { + if let Some(mmio_addr) = get_pci_mmio_addr() { + let offset: usize = (io_info.port - PCI_CONFIG_DATA_PORT.start) as usize; + if this_zone() + .read() + .find_mmio_region(mmio_addr + offset, io_info.access_size as _) + .is_some() + { + mmio_handle_access(&mut MMIOAccess { + address: mmio_addr + offset, + size: io_info.access_size as _, + is_write: true, + value: value as _, + }); + } + } + } +} diff --git a/src/arch/x86_64/pio.rs b/src/arch/x86_64/pio.rs index 6d363e89..919ba98c 100644 --- a/src/arch/x86_64/pio.rs +++ b/src/arch/x86_64/pio.rs @@ -1,24 +1,28 @@ use core::ops::Range; use crate::{ - arch::vtd::{PCI_CONFIG_ADDR, PCI_CONFIG_DATA}, error::HvResult, memory::{Frame, HostPhysAddr}, + zone::this_zone_id, }; pub const UART_COM1_BASE_PORT: u16 = 0x3f8; +pub const PCI_CONFIG_ADDR_PORT: Range = 0xcf8..0xcfc; +pub const PCI_CONFIG_DATA_PORT: Range = 0xcfc..0xd00; #[derive(Debug)] pub struct PortIoBitmap { - a: Frame, - b: Frame, + pub a: Frame, + pub b: Frame, + pub pci_config_addr: u32, } impl PortIoBitmap { - pub fn new(zoneid: usize) -> Self { + pub fn new(zone_id: usize) -> Self { let mut bitmap = Self { a: Frame::new_zero().unwrap(), b: Frame::new_zero().unwrap(), + pci_config_addr: 0, }; bitmap.a.fill(0xff); @@ -32,30 +36,20 @@ impl PortIoBitmap { // ban pci config ports // TODO: handle config space operations from io ports - bitmap.set_intercept(PCI_CONFIG_ADDR, true); - bitmap.set_intercept(PCI_CONFIG_DATA, true); - // bitmap.set_range_intercept(0xcf8..0xd00, true); + bitmap.set_range_intercept(PCI_CONFIG_ADDR_PORT, true); + bitmap.set_range_intercept(PCI_CONFIG_DATA_PORT, true); - // FIXME: i8254 - // bitmap.set_range_intercept(0x300..0x310, false); - bitmap.set_range_intercept(0x3f8..0x400, false); - - if zoneid == 0 { - // passthrough uart com1 - // FIXME: get port info from ACPI FACP table - bitmap.set_intercept(0xb2, false); - bitmap.set_range_intercept(0x600..0x630, false); + // FIXME: uart & i8254 + if zone_id == 0 { + bitmap.set_range_intercept(0x60..0x65, false); + bitmap.set_range_intercept(0x3f8..0x400, false); } - bitmap - } - - pub fn bitmap_a_addr(&self) -> HostPhysAddr { - self.a.start_paddr() - } + // FIXME: get port info from ACPI FACP table + bitmap.set_intercept(0xb2, false); + bitmap.set_range_intercept(0x600..0x630, false); - pub fn bitmap_b_addr(&self) -> HostPhysAddr { - self.b.start_paddr() + bitmap } pub fn set_range_intercept(&mut self, mut ports: Range, intercept: bool) { diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index a40a7b6c..caed120d 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -1,3 +1,5 @@ +use core::mem::size_of; + use crate::{ arch::{ cpu::{this_cpu_id, ArchCpu}, @@ -23,11 +25,17 @@ use crate::{ error::HvResult, hypercall::HyperCall, memory::{mmio_handle_access, MMIOAccess, MemFlags}, - percpu::this_cpu_data, + percpu::{this_cpu_data, this_zone}, zone::this_zone_id, }; +use bit_field::BitField; use x86_64::registers::control::Cr4Flags; +use super::{ + pci::{handle_pci_config_port_read, handle_pci_config_port_write}, + pio::{PCI_CONFIG_ADDR_PORT, PCI_CONFIG_DATA_PORT}, +}; + core::arch::global_asm!( include_str!("trap.S"), sym arch_handle_trap @@ -94,14 +102,14 @@ fn handle_irq(vector: u8) { None, true, ), - _ => { - inject_vector( - this_cpu_id(), - get_guest_vector(vector, this_zone_id()).unwrap() as _, - None, - false, - ); - } + _ => match get_guest_vector(vector, this_zone_id()) { + Some(gv) => { + inject_vector(this_cpu_id(), gv as _, None, false); + } + None => { + warn!("can't find guest vector with host vector {:x}", vector); + } + }, } unsafe { VirtLocalApic::phys_local_apic().end_of_interrupt() }; } @@ -249,38 +257,41 @@ fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvR return hv_result_err!(ENOSYS); } - /*if let Some(dev) = all_virt_devices().find_port_io_device(io_info.port) { - if io_info.is_in { - let value = dev.read(io_info.port, 0)?; - let rax = &mut arch_cpu.regs_mut().rax; - // SDM Vol. 1, Section 3.4.1.1: - // * 32-bit operands generate a 32-bit result, zero-extended to a 64-bit result in the - // destination general-purpose register. - // * 8-bit and 16-bit operands generate an 8-bit or 16-bit result. The upper 56 bits or - // 48 bits (respectively) of the destination general-purpose register are not modified - // by the operation. - match io_info.access_size { - 1 => *rax = (*rax & !0xff) | (value & 0xff) as u64, - 2 => *rax = (*rax & !0xffff) | (value & 0xffff) as u64, - 4 => *rax = value as u64, - _ => unreachable!(), - } - } else { - let rax = arch_cpu.regs().rax; - let value = match io_info.access_size { - 1 => rax & 0xff, - 2 => rax & 0xffff, - 4 => rax, - _ => unreachable!(), - } as u32; - dev.write(io_info.port, value, 0)?; + let mut value: u32 = 0; + if !io_info.is_in { + let rax = arch_cpu.regs().rax; + value = match io_info.access_size { + 1 => rax & 0xff, + 2 => rax & 0xffff, + 4 => rax, + _ => unreachable!(), + } as _; + + if PCI_CONFIG_ADDR_PORT.contains(&io_info.port) + || PCI_CONFIG_DATA_PORT.contains(&io_info.port) + { + handle_pci_config_port_write(&io_info, value); } } else { - debug!( - "Unsupported I/O port {:#x} access: {:#x?} \n {:#x?}", - io_info.port, io_info, arch_cpu - ) - }*/ + if PCI_CONFIG_ADDR_PORT.contains(&io_info.port) + || PCI_CONFIG_DATA_PORT.contains(&io_info.port) + { + value = handle_pci_config_port_read(&io_info); + } + let rax = &mut arch_cpu.regs_mut().rax; + // SDM Vol. 1, Section 3.4.1.1: + // * 32-bit operands generate a 32-bit result, zero-extended to a 64-bit result in the + // destination general-purpose register. + // * 8-bit and 16-bit operands generate an 8-bit or 16-bit result. The upper 56 bits or + // 48 bits (respectively) of the destination general-purpose register are not modified + // by the operation. + match io_info.access_size { + 1 => *rax = (*rax & !0xff) | (value & 0xff) as u64, + 2 => *rax = (*rax & !0xffff) | (value & 0xffff) as u64, + 4 => *rax = value as u64, + _ => unreachable!(), + } + } arch_cpu.advance_guest_rip(exit_info.exit_instruction_length as _)?; Ok(()) diff --git a/src/arch/x86_64/vtd.rs b/src/arch/x86_64/vtd.rs index 093ec57c..60fc3764 100644 --- a/src/arch/x86_64/vtd.rs +++ b/src/arch/x86_64/vtd.rs @@ -13,11 +13,6 @@ use x86_64::instructions::port::Port; const IR_ENTRY_CNT: usize = 256; -// I/O ports -pub const PCI_CONFIG_ADDR: u16 = 0xcf8; -pub const PCI_CONFIG_DATA: u16 = 0xcfc; -const PCI_CONFIG_ENABLE: u32 = 0x80000000; - const ROOT_TABLE_ENTRY_SIZE: usize = 16; const CONTEXT_TABLE_ENTRY_SIZE: usize = 16; @@ -302,20 +297,6 @@ impl Vtd { } } -fn get_secondary_bus(bus: u8, dev: u8, func: u8) -> u8 { - unsafe { - Port::::new(PCI_CONFIG_ADDR).write( - PCI_CONFIG_ENABLE - | ((bus as u32) << 16) - | ((dev as u32) << 11) - | ((func as u32) << 8) - | 0x18, - ); - let data = Port::::new(PCI_CONFIG_DATA).read(); - ((data >> 8) & 0xff) as u8 - } -} - pub fn parse_root_dmar() -> Mutex { let dmar = acpi::root_get_table(&Signature::DMAR).unwrap(); let mut cur: usize = 48; // start offset of remapping structures @@ -381,47 +362,3 @@ fn flush_cache_range(hpa: usize, size: usize) { i += 64; } } - -/* -fn init_msi_cap_hpa_space() { - let bytes = acpi::root_get_table(&Signature::MCFG) - .unwrap() - .get_bytes() - .clone(); - let mcfg = unsafe { &*(bytes.as_ptr() as *const Mcfg) }; - - for unit in VTD.get().unwrap().iter() { - let drhd = unit.lock(); - - for entry in mcfg.entries() { - if entry.pci_segment_group != drhd.segment { - break; - } - let bus_range = entry.bus_number_start..=entry.bus_number_end; - - for scope in drhd.scopes.iter() { - if scope.scope_type != DeviceScopeType::PciEndpointDevice { - continue; - } - - if !bus_range.contains(&scope.bus) { - continue; - } - - let config_space_hpa = (entry.base_address as usize) - + ((scope.bus as usize) << 20) - + ((scope.dev_func as usize) << 12); - let mut cap_pointer = unsafe { *((config_space_hpa + 0x34) as *const u8) } as usize; - - info!("dev_fun: {:x}", scope.dev_func); - while cap_pointer != 0 { - let cap_hpa = config_space_hpa + cap_pointer; - let cap_id = unsafe { *(cap_hpa as *const u8) }; - - info!("cap id: {:x}, hpa: {:x}", cap_id, cap_hpa); - cap_pointer = unsafe { *((cap_hpa + 1) as *const u8) } as usize; - } - } - } - } -}*/ diff --git a/src/arch/x86_64/zone.rs b/src/arch/x86_64/zone.rs index eb659381..b5604460 100644 --- a/src/arch/x86_64/zone.rs +++ b/src/arch/x86_64/zone.rs @@ -1,5 +1,6 @@ use crate::{ config::*, + device::virtio_trampoline::mmio_virtio_handler, error::HvResult, memory::{GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion}, platform::MEM_TYPE_OTHER_ZONES, @@ -37,7 +38,6 @@ impl Zone { flags, ))? } - /* MEM_TYPE_VIRTIO => { self.mmio_region_register( mem_region.physical_start as _, @@ -45,7 +45,7 @@ impl Zone { mmio_virtio_handler, mem_region.physical_start as _, ); - }*/ + } _ => { panic!("Unsupported memory type: {}", mem_region.mem_type) } diff --git a/src/device/irqchip/pic/ioapic.rs b/src/device/irqchip/pic/ioapic.rs index 3ea2e3a1..04cedf86 100644 --- a/src/device/irqchip/pic/ioapic.rs +++ b/src/device/irqchip/pic/ioapic.rs @@ -9,7 +9,7 @@ use crate::{ use alloc::{sync::Arc, vec::Vec}; use bit_field::BitField; use core::{ops::Range, u32}; -use spin::Mutex; +use spin::{Mutex, Once}; use x2apic::ioapic::IoApic; use x86_64::instructions::port::Port; @@ -33,9 +33,7 @@ lazy_static::lazy_static! { }; } -lazy_static::lazy_static! { - static ref VIRT_IOAPIC: (Arc,) = (Arc::new(VirtIoApic::new(ROOT_ZONE_IOAPIC_BASE)),); -} +static VIRT_IOAPIC: Once = Once::new(); #[derive(Default)] struct VirtIoApicUnlocked { @@ -44,35 +42,30 @@ struct VirtIoApicUnlocked { } pub struct VirtIoApic { - base_gpa: usize, - gpa_range: Vec>, - inner: Mutex, + inner: Vec>, } impl VirtIoApic { - pub fn new(base_gpa: GuestPhysAddr) -> Self { - Self { - base_gpa, - gpa_range: vec![base_gpa..base_gpa + 0x1000], - inner: Mutex::new(VirtIoApicUnlocked::default()), + pub fn new(max_zones: usize) -> Self { + let mut vs = vec![]; + for _ in 0..max_zones { + let v = Mutex::new(VirtIoApicUnlocked::default()); + vs.push(v) } - } -} - -impl MMIoDevice for VirtIoApic { - fn gpa_range(&self) -> &Vec> { - &self.gpa_range + Self { inner: vs } } fn read(&self, gpa: GuestPhysAddr) -> HvResult { // info!("ioapic read! gpa: {:x}", gpa,); + let zone_id = this_zone_id(); + let ioapic = self.inner.get(zone_id).unwrap(); if gpa == 0 { - return Ok(self.inner.lock().cur_reg as _); + return Ok(ioapic.lock().cur_reg as _); } assert!(gpa == 0x10); - let inner = self.inner.lock(); + let inner = ioapic.lock(); match inner.cur_reg { IoApicReg::ID => Ok(0), IoApicReg::VERSION => Ok(IOAPIC_MAX_REDIRECT_ENTRIES << 16 | 0x11), // max redirect entries: 0x17, version: 0x11 @@ -82,7 +75,13 @@ impl MMIoDevice for VirtIoApic { let index = (reg >> 1) as usize; if let Some(entry) = inner.rte.get(index) { if reg % 2 == 0 { - Ok((*entry).get_bits(0..=31)) + let mut lower = (*entry).get_bits(0..=31); + if let Some(gv) = + idt::get_guest_vector(lower.get_bits(0..=7) as u8, zone_id) + { + lower.set_bits(0..=7, gv as _); + } + Ok(lower.get_bits(0..=31)) } else { Ok((*entry).get_bits(32..=63)) } @@ -99,13 +98,15 @@ impl MMIoDevice for VirtIoApic { gpa, value, size, );*/ + let zone_id = this_zone_id(); + let ioapic = self.inner.get(zone_id).unwrap(); if gpa == 0 { - self.inner.lock().cur_reg = value as _; + ioapic.lock().cur_reg = value as _; return Ok(()); } assert!(gpa == 0x10); - let mut inner = self.inner.lock(); + let mut inner = ioapic.lock(); match inner.cur_reg { IoApicReg::ID | IoApicReg::VERSION | IoApicReg::ARBITRATION => {} mut reg => { @@ -114,18 +115,19 @@ impl MMIoDevice for VirtIoApic { if let Some(entry) = inner.rte.get_mut(index) { if reg % 2 == 0 { entry.set_bits(0..=31, value.get_bits(0..=31)); - } else { - entry.set_bits(32..=63, value.get_bits(0..=31)); - // use host vector instead of guest vector entry.set_bits( 0..=7, - idt::get_host_vector(entry.get_bits(0..=7) as u32, this_zone_id()) - .unwrap() as _, + idt::get_host_vector(entry.get_bits(0..=7) as u32, zone_id).unwrap() + as _, ); - unsafe { - configure_gsi_from_raw(index as _, *entry); - }; + } else { + entry.set_bits(32..=63, value.get_bits(0..=31)); + + if zone_id == 0 { + // only root zone modify the real I/O APIC + unsafe { configure_gsi_from_raw(index as _, *entry) }; + } } } } @@ -134,13 +136,25 @@ impl MMIoDevice for VirtIoApic { } fn trigger(&self, signal: usize) -> HvResult { - if let Some(entry) = self.inner.lock().rte.get(signal) { + let zone_id = this_zone_id(); + let ioapic = self.inner.get(zone_id).unwrap(); + if let Some(entry) = ioapic.lock().rte.get(signal) { // TODO: physical & logical mode let dest = entry.get_bits(56..=63) as usize; let masked = entry.get_bit(16); let vector = entry.get_bits(0..=7) as u8; + /*info!( + "trigger gv: {:x} zone: {:x}", + idt::get_guest_vector(vector as _, zone_id).unwrap(), + zone_id + );*/ if !masked { - inject_vector(dest, vector, None, true); + inject_vector( + dest, + idt::get_guest_vector(vector as _, zone_id).unwrap() as _, + None, + false, + ); } } Ok(()) @@ -164,10 +178,11 @@ impl Zone { fn ioapic_mmio_handler(mmio: &mut MMIOAccess, _: usize) -> HvResult { if mmio.is_write { VIRT_IOAPIC - .0 + .get() + .unwrap() .write(mmio.address, mmio.value as _, mmio.size) } else { - mmio.value = VIRT_IOAPIC.0.read(mmio.address).unwrap() as _; + mmio.value = VIRT_IOAPIC.get().unwrap().read(mmio.address).unwrap() as _; Ok(()) } } @@ -185,6 +200,10 @@ pub fn init_ioapic() { } } +pub fn init_virt_ioapic(max_zones: usize) { + VIRT_IOAPIC.call_once(|| VirtIoApic::new(max_zones)); +} + pub fn ioapic_inject_irq(irq: u8) { - VIRT_IOAPIC.0.trigger(irq as _); + VIRT_IOAPIC.get().unwrap().trigger(irq as _); } diff --git a/src/device/irqchip/pic/mod.rs b/src/device/irqchip/pic/mod.rs index ad3c11cb..dfed9da7 100644 --- a/src/device/irqchip/pic/mod.rs +++ b/src/device/irqchip/pic/mod.rs @@ -2,12 +2,13 @@ pub mod ioapic; pub mod lapic; use crate::{ - arch::{acpi, ipi, vmcs::Vmcs, vtd}, - consts::MAX_CPU_NUM, + arch::{acpi, idt, ipi, vmcs::Vmcs, vtd}, + consts::{MAX_CPU_NUM, MAX_ZONE_NUM}, zone::Zone, }; use alloc::{collections::vec_deque::VecDeque, vec::Vec}; use core::arch::asm; +use ioapic::ioapic_inject_irq; use spin::{Mutex, Once}; static PENDING_VECTORS: Once = Once::new(); @@ -48,6 +49,9 @@ impl PendingVectors { if let Some(vector) = vectors.front() { let allow_interrupt = Vmcs::allow_interrupt().unwrap(); if vector.0 < 32 || allow_interrupt { + if vectors.len() == 10 { + warn!("too many pending vectors!"); + } // if it's an exception, or an interrupt that is not blocked, inject it directly. Vmcs::inject_interrupt(vector.0, vector.1).unwrap(); vectors.pop_front(); @@ -81,13 +85,17 @@ pub fn disable_irq() { unsafe { asm!("cli") }; } -pub fn inject_irq(_irq: usize, _is_hardware: bool) {} +pub fn inject_irq(_irq: usize, _is_hardware: bool) { + ioapic_inject_irq(_irq as _); +} pub fn percpu_init() {} pub fn primary_init_early() { ipi::init(MAX_CPU_NUM); PENDING_VECTORS.call_once(|| PendingVectors::new(MAX_CPU_NUM)); + idt::init(MAX_ZONE_NUM); + ioapic::init_virt_ioapic(MAX_ZONE_NUM); acpi::root_init(); vtd::init(); } diff --git a/src/device/virtio_trampoline.rs b/src/device/virtio_trampoline.rs index d938c54e..14449b13 100644 --- a/src/device/virtio_trampoline.rs +++ b/src/device/virtio_trampoline.rs @@ -43,10 +43,12 @@ pub const MAX_REQ: u32 = 32; pub const MAX_DEVS: usize = 4; // Attention: The max virtio-dev number for vm is 4. pub const MAX_CPUS: usize = 4; -#[cfg(not(target_arch = "riscv64"))] +#[cfg(all(not(target_arch = "riscv64"), not(target_arch = "x86_64")))] pub const IRQ_WAKEUP_VIRTIO_DEVICE: usize = 32 + 0x20; #[cfg(target_arch = "riscv64")] pub const IRQ_WAKEUP_VIRTIO_DEVICE: usize = 0x20; +#[cfg(target_arch = "x86_64")] +pub const IRQ_WAKEUP_VIRTIO_DEVICE: usize = 0x6; /// non root zone's virtio request handler pub fn mmio_virtio_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { diff --git a/src/hypercall/mod.rs b/src/hypercall/mod.rs index c059e981..51dc6146 100644 --- a/src/hypercall/mod.rs +++ b/src/hypercall/mod.rs @@ -43,6 +43,7 @@ numeric_enum! { pub enum HyperCallCode { HvVirtioInit = 0, HvVirtioInjectIrq = 1, + HvVirtioGetIrq = 86, HvZoneStart = 2, HvZoneShutdown = 3, HvZoneList = 4, @@ -80,6 +81,8 @@ impl<'a> HyperCall<'a> { match code { HyperCallCode::HvVirtioInit => self.hv_virtio_init(arg0), HyperCallCode::HvVirtioInjectIrq => self.hv_virtio_inject_irq(), + #[cfg(target_arch = "x86_64")] + HyperCallCode::HvVirtioGetIrq => self.hv_virtio_get_irq(arg0 as *mut u32), HyperCallCode::HvZoneStart => { self.hv_zone_start(&*(arg0 as *const HvZoneConfig), arg1) } @@ -144,6 +147,15 @@ impl<'a> HyperCall<'a> { #[cfg(target_arch = "loongarch64")] let shared_region_addr_pa = shared_region_addr_pa | crate::arch::mm::LOONGARCH64_CACHED_DMW_PREFIX as usize; + #[cfg(target_arch = "x86_64")] + let shared_region_addr_pa = unsafe { + this_zone() + .read() + .gpm + .page_table_query(shared_region_addr_pa) + .unwrap() + .0 + }; assert!(shared_region_addr_pa % PAGE_SIZE == 0); // let offset = shared_region_addr_pa & (PAGE_SIZE - 1); @@ -160,6 +172,8 @@ impl<'a> HyperCall<'a> { .lock() .set_base_addr(shared_region_addr_pa as _); info!("hvisor device region base is {:#x?}", shared_region_addr_pa); + + // FIXME: HyperCallResult::Ok(0) } @@ -222,6 +236,22 @@ impl<'a> HyperCall<'a> { HyperCallResult::Ok(0) } + #[cfg(target_arch = "x86_64")] + fn hv_virtio_get_irq(&self, virtio_irq: *mut u32) -> HyperCallResult { + let virtio_irq = unsafe { + this_zone() + .read() + .gpm + .page_table_query(virtio_irq as usize) + .unwrap() + .0 as *mut u32 + }; + unsafe { + (*virtio_irq) = crate::device::virtio_trampoline::IRQ_WAKEUP_VIRTIO_DEVICE as _; + }; + HyperCallResult::Ok(0) + } + pub fn hv_zone_config_check(&self, magic_version: *mut u64) -> HyperCallResult { #[cfg(target_arch = "x86_64")] let magic_version = unsafe { diff --git a/src/memory/mmio.rs b/src/memory/mmio.rs index e7ead940..6e37532e 100644 --- a/src/memory/mmio.rs +++ b/src/memory/mmio.rs @@ -92,8 +92,12 @@ pub fn mmio_handle_access(mmio: &mut MMIOAccess) -> HvResult { mmio.address -= region.start; if cfg!(target_arch = "x86_64") { - #[cfg(target_arch = "x86_64")] - crate::arch::mmio::instruction_emulator(&handler, mmio, arg) + if mmio.size == 0 { + #[cfg(target_arch = "x86_64")] + crate::arch::mmio::instruction_emulator(&handler, mmio, arg) + } else { + handler(mmio, arg) + } } else { handler(mmio, arg) } diff --git a/src/pci/pci.rs b/src/pci/pci.rs index 90bc3470..cab57e18 100644 --- a/src/pci/pci.rs +++ b/src/pci/pci.rs @@ -152,7 +152,6 @@ impl Zone { } info!("PCIe init!"); - // info!("{:#x?}", pci_config); init_ecam_base(pci_config.ecam_base as _); diff --git a/src/platform/mod.rs b/src/platform/mod.rs index 97c56c44..d9d35c61 100644 --- a/src/platform/mod.rs +++ b/src/platform/mod.rs @@ -62,8 +62,9 @@ pub fn platform_root_zone_config() -> HvZoneConfig { #[cfg(target_arch = "x86_64")] { pci_devs[..ROOT_PCI_DEVS.len()].copy_from_slice(&ROOT_PCI_DEVS); + let config_space_info = crate::arch::acpi::root_get_config_space_info().unwrap(); (root_pci_cfg.ecam_base, root_pci_cfg.ecam_size) = - crate::arch::pci::get_config_space_info().unwrap(); + (config_space_info.0 as _, config_space_info.1 as _); num_pci_devs = ROOT_PCI_DEVS.len() as _; } diff --git a/src/zone.rs b/src/zone.rs index 34845cb7..832f554f 100644 --- a/src/zone.rs +++ b/src/zone.rs @@ -270,6 +270,7 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult>> { config.num_pci_devs as _, &config.alloc_pci_devs, ); + info!("{:#x?}", config.pci_config); crate::arch::boot::BootParams::fill(&config, &zone.gpm); crate::arch::acpi::copy_to_guest_memory_region(&config, &cpu_set); } From 1d009b213c81a701ea345d67f0ec291a19725ffe Mon Sep 17 00:00:00 2001 From: Solicey Date: Sat, 26 Apr 2025 20:49:34 +0800 Subject: [PATCH 16/29] support virtio blk, virtio net --- platform/x86_64/qemu/platform.mk | 7 ++++ src/arch/x86_64/cpu.rs | 12 ++++-- src/arch/x86_64/ipi.rs | 20 ++++----- src/arch/x86_64/msr.rs | 17 ++++---- src/arch/x86_64/pio.rs | 1 + src/arch/x86_64/trap.rs | 11 ++--- src/arch/x86_64/vtd.rs | 16 ++++--- src/consts.rs | 2 +- src/device/irqchip/pic/ioapic.rs | 29 ++++++++++--- src/device/irqchip/pic/lapic.rs | 19 +++++++++ src/device/irqchip/pic/mod.rs | 72 +++++++++++++++++--------------- src/device/virtio_trampoline.rs | 14 ++++++- src/hypercall/mod.rs | 15 ++++++- src/pci/pci.rs | 19 +++++---- 14 files changed, 164 insertions(+), 90 deletions(-) diff --git a/platform/x86_64/qemu/platform.mk b/platform/x86_64/qemu/platform.mk index 31cf425e..2ab556da 100644 --- a/platform/x86_64/qemu/platform.mk +++ b/platform/x86_64/qemu/platform.mk @@ -6,6 +6,7 @@ zone0_setup := $(image_dir)/kernel/setup.bin zone0_vmlinux := $(image_dir)/kernel/vmlinux.bin zone0_initrd := $(image_dir)/virtdisk/initramfs.cpio.gz zone0_rootfs := $(image_dir)/virtdisk/rootfs1.img +zone1_rootfs := $(image_dir)/virtdisk/rootfs2.img QEMU_ARGS := -machine q35,kernel-irqchip=split QEMU_ARGS += -cpu host,+x2apic,+invtsc,+vmx -accel kvm # cpu: host Broadwell YongFeng @@ -18,6 +19,12 @@ QEMU_ARGS += -device intel-iommu,intremap=on,eim=on,caching-mode=on,device-iotlb QEMU_ARGS += -device ioh3420,id=pcie.1,chassis=1 QEMU_ARGS += -drive if=none,file="$(zone0_rootfs)",id=X10008000,format=raw QEMU_ARGS += -device virtio-blk-pci,bus=pcie.1,drive=X10008000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on # bus=pcie.1, +# QEMU_ARGS += -drive if=none,file="$(zone1_rootfs)",id=X10009000,format=raw +# QEMU_ARGS += -device virtio-blk-pci,bus=pcie.1,drive=X10009000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on +# QEMU_ARGS += -netdev tap,id=net0,ifname=tap0,script=no,downscript=no +# QEMU_ARGS += -device virtio-net-pci,bus=pcie.1,netdev=net0,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on +# QEMU_ARGS += -netdev tap,id=net0,vhostforce=on +# QEMU_ARGS += -device virtio-net-pci,bus=pcie.1,netdev=net0,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on # QEMU_ARGS += --trace "virtio_*" --trace "virtqueue_*" --trace "vtd_dma*" --trace "iommu_*" QEMU_ARGS += -kernel $(hvisor_elf) diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index 0fb56459..f9b57bd7 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -129,9 +129,9 @@ pub fn cpu_start(cpuid: usize, start_addr: usize, opaque: usize) { // Intel SDM Vol 3C, Section 8.4.4, MP Initialization Example unsafe { lapic.send_init_ipi(cpuid as u32) }; - hpet::busy_wait(Duration::from_millis(10)); // 10ms + hpet::busy_wait(Duration::from_millis(50)); // 10ms unsafe { lapic.send_sipi(AP_START_PAGE_IDX, cpuid as u32) }; - hpet::busy_wait(Duration::from_micros(200)); // 200us + hpet::busy_wait(Duration::from_micros(2000)); // 200us unsafe { lapic.send_sipi(AP_START_PAGE_IDX, cpuid as u32) }; } @@ -212,6 +212,8 @@ impl ArchCpu { } pub fn idle(&mut self) -> ! { + unsafe { self.virt_lapic.phys_lapic.end_of_interrupt() }; + assert!(this_cpu_id() == self.cpuid); self.power_on = false; @@ -256,6 +258,8 @@ impl ArchCpu { } pub fn run(&mut self) -> ! { + unsafe { self.virt_lapic.phys_lapic.end_of_interrupt() }; + assert!(this_cpu_id() == self.cpuid); let mut per_cpu = this_cpu_data(); @@ -555,7 +559,9 @@ impl ArchCpu { fn vmexit_handler(&mut self) { crate::arch::trap::handle_vmexit(self).unwrap(); - check_pending_vectors(this_cpu_id()); + if self.virt_lapic.has_eoi && check_pending_vectors(this_cpu_id()) { + self.virt_lapic.has_eoi = false; + } } unsafe fn vmx_entry_failed() -> ! { diff --git a/src/arch/x86_64/ipi.rs b/src/arch/x86_64/ipi.rs index 254f1b49..edadcc7f 100644 --- a/src/arch/x86_64/ipi.rs +++ b/src/arch/x86_64/ipi.rs @@ -97,12 +97,10 @@ pub fn send_ipi(value: u64) -> HvResult { match delivery_mode { IpiDeliveryMode::FIXED => { // info!("dest: {:x}, vector: {:x}", dest, vector); - inject_vector(dest, vector, None, true); - arch_send_event(dest as _, SGI_IPI_ID as _); + inject_vector(dest, vector, None, false); } IpiDeliveryMode::NMI => { - inject_vector(dest, 2, None, true); - arch_send_event(dest as _, SGI_IPI_ID as _); + inject_vector(dest, 2, None, false); } IpiDeliveryMode::INIT => {} IpiDeliveryMode::START_UP => { @@ -129,13 +127,11 @@ pub fn arch_send_event(dest: u64, _: u64) { } pub fn handle_virt_ipi() { - unsafe { - this_cpu_data() - .arch_cpu - .virt_lapic - .phys_lapic - .end_of_interrupt() - }; // this may never return! - event::check_events(); + loop { + let ret = event::check_events(); + if !ret { + break; + } + } } diff --git a/src/arch/x86_64/msr.rs b/src/arch/x86_64/msr.rs index 1a9edaec..6df0bfe9 100644 --- a/src/arch/x86_64/msr.rs +++ b/src/arch/x86_64/msr.rs @@ -155,22 +155,21 @@ impl MsrBitmap { bitmap.set_read_intercept(IA32_X2APIC_LVT_TIMER, true); bitmap.set_write_intercept(IA32_APIC_BASE, true); + bitmap.set_write_intercept(IA32_X2APIC_EOI, true); bitmap.set_write_intercept(IA32_X2APIC_ICR, true); bitmap.set_write_intercept(IA32_X2APIC_LVT_TIMER, true); - /*bitmap.set_read_intercept(IA32_APIC_BASE, true); - bitmap.set_write_intercept(IA32_APIC_BASE, true); - - bitmap.set_read_intercept(IA32_TSC_DEADLINE, true); - bitmap.set_write_intercept(IA32_TSC_DEADLINE, true); + for addr in (IA32_X2APIC_ISR0 as u32)..(IA32_X2APIC_ISR7 as u32 + 1) { + if let Ok(msr) = Msr::try_from(addr) { + bitmap.set_read_intercept(msr, true); + } + } - // Intercept all x2APIC MSR accesses - for addr in VirtLocalApic::msr_range() { + for addr in (IA32_X2APIC_IRR0 as u32)..(IA32_X2APIC_IRR7 as u32 + 1) { if let Ok(msr) = Msr::try_from(addr) { bitmap.set_read_intercept(msr, true); - bitmap.set_write_intercept(msr, true); } - }*/ + } bitmap } diff --git a/src/arch/x86_64/pio.rs b/src/arch/x86_64/pio.rs index 919ba98c..3d67f8e8 100644 --- a/src/arch/x86_64/pio.rs +++ b/src/arch/x86_64/pio.rs @@ -44,6 +44,7 @@ impl PortIoBitmap { bitmap.set_range_intercept(0x60..0x65, false); bitmap.set_range_intercept(0x3f8..0x400, false); } + // bitmap.set_range_intercept(0x3f8..0x400, false); // FIXME: get port info from ACPI FACP table bitmap.set_intercept(0xb2, false); diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index caed120d..2178a34b 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -15,10 +15,7 @@ use crate::{ device::{ irqchip::{ inject_vector, - pic::{ - ioapic::{ioapic_inject_irq, irqs}, - lapic::VirtLocalApic, - }, + pic::{ioapic::irqs, lapic::VirtLocalApic}, }, uart::UartReg, }, @@ -93,14 +90,12 @@ fn handle_irq(vector: u8) { match vector { IdtVector::VIRT_IPI_VECTOR => { ipi::handle_virt_ipi(); - // send eoi inside handler, so return directly - return; } IdtVector::APIC_TIMER_VECTOR => inject_vector( this_cpu_id(), this_cpu_data().arch_cpu.virt_lapic.virt_timer_vector, None, - true, + false, ), _ => match get_guest_vector(vector, this_zone_id()) { Some(gv) => { @@ -321,7 +316,7 @@ fn handle_msr_read(arch_cpu: &mut ArchCpu) -> HvResult { warn!("Failed to handle RDMSR({:#x}): {:?}", rcx, res); } } else { - warn!("Unrecognized RDMSR({:#x})", rcx); + // warn!("Unrecognized RDMSR({:#x})", rcx); } arch_cpu.advance_guest_rip(VM_EXIT_INSTR_LEN_RDMSR)?; diff --git a/src/arch/x86_64/vtd.rs b/src/arch/x86_64/vtd.rs index 60fc3764..e168b2f6 100644 --- a/src/arch/x86_64/vtd.rs +++ b/src/arch/x86_64/vtd.rs @@ -6,7 +6,13 @@ use crate::{ use ::acpi::{mcfg::Mcfg, sdt::Signature}; use alloc::{collections::btree_map::BTreeMap, vec::Vec}; use bit_field::BitField; -use core::{arch::asm, hint::spin_loop, mem::size_of, usize}; +use core::{ + arch::asm, + hint::spin_loop, + mem::size_of, + ptr::{read_volatile, write_volatile}, + usize, +}; use dma_remap_reg::*; use spin::{Mutex, Once}; use x86_64::instructions::port::Port; @@ -281,19 +287,19 @@ impl Vtd { } fn mmio_read_u32(&self, reg: usize) -> u32 { - unsafe { *((self.reg_base_hpa + reg) as *const u32) } + unsafe { read_volatile((self.reg_base_hpa + reg) as *const u32) } } fn mmio_read_u64(&self, reg: usize) -> u64 { - unsafe { *((self.reg_base_hpa + reg) as *const u64) } + unsafe { read_volatile((self.reg_base_hpa + reg) as *const u64) } } fn mmio_write_u32(&self, reg: usize, value: u32) { - unsafe { *((self.reg_base_hpa + reg) as *mut u32) = value }; + unsafe { write_volatile((self.reg_base_hpa + reg) as *mut u32, value) }; } fn mmio_write_u64(&self, reg: usize, value: u64) { - unsafe { *((self.reg_base_hpa + reg) as *mut u64) = value }; + unsafe { write_volatile((self.reg_base_hpa + reg) as *mut u64, value) }; } } diff --git a/src/consts.rs b/src/consts.rs index 0c00bba3..d092d068 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -41,7 +41,7 @@ pub const MAX_ZONE_NUM: usize = 3; pub static mut NCPU: usize = MAX_CPU_NUM; -pub const MAX_WAIT_TIMES: usize = 10000000; +pub const MAX_WAIT_TIMES: usize = 50000000; pub fn core_end() -> VirtAddr { __core_end as _ diff --git a/src/device/irqchip/pic/ioapic.rs b/src/device/irqchip/pic/ioapic.rs index 04cedf86..8048355f 100644 --- a/src/device/irqchip/pic/ioapic.rs +++ b/src/device/irqchip/pic/ioapic.rs @@ -1,5 +1,5 @@ use crate::{ - arch::{idt, mmio::MMIoDevice, zone::HvArchZoneConfig}, + arch::{cpu::this_cpu_id, idt, ipi, mmio::MMIoDevice, zone::HvArchZoneConfig}, device::irqchip::pic::inject_vector, error::HvResult, memory::{GuestPhysAddr, MMIOAccess}, @@ -135,10 +135,19 @@ impl VirtIoApic { Ok(()) } - fn trigger(&self, signal: usize) -> HvResult { + fn get_irq_cpu(&self, irq: usize, zone_id: usize) -> Option { + let ioapic = self.inner.get(zone_id).unwrap(); + if let Some(entry) = ioapic.lock().rte.get(irq) { + let dest = entry.get_bits(56..=63) as usize; + return Some(dest); + } + None + } + + fn trigger(&self, irq: usize, allow_repeat: bool) -> HvResult { let zone_id = this_zone_id(); let ioapic = self.inner.get(zone_id).unwrap(); - if let Some(entry) = ioapic.lock().rte.get(signal) { + if let Some(entry) = ioapic.lock().rte.get(irq) { // TODO: physical & logical mode let dest = entry.get_bits(56..=63) as usize; let masked = entry.get_bit(16); @@ -153,7 +162,7 @@ impl VirtIoApic { dest, idt::get_guest_vector(vector as _, zone_id).unwrap() as _, None, - false, + allow_repeat, ); } } @@ -204,6 +213,14 @@ pub fn init_virt_ioapic(max_zones: usize) { VIRT_IOAPIC.call_once(|| VirtIoApic::new(max_zones)); } -pub fn ioapic_inject_irq(irq: u8) { - VIRT_IOAPIC.get().unwrap().trigger(irq as _); +pub fn ioapic_inject_irq(irq: u8, allow_repeat: bool) { + VIRT_IOAPIC.get().unwrap().trigger(irq as _, allow_repeat); +} + +pub fn get_irq_cpu(irq: usize, zone_id: usize) -> usize { + VIRT_IOAPIC + .get() + .unwrap() + .get_irq_cpu(irq, zone_id) + .unwrap() } diff --git a/src/device/irqchip/pic/lapic.rs b/src/device/irqchip/pic/lapic.rs index dfe4d6c4..be07284c 100644 --- a/src/device/irqchip/pic/lapic.rs +++ b/src/device/irqchip/pic/lapic.rs @@ -5,6 +5,7 @@ use crate::{ ipi, msr::Msr::{self, *}, }, + device::irqchip::pic::pop_vector, error::HvResult, memory::Frame, percpu::this_cpu_data, @@ -16,6 +17,7 @@ use x2apic::lapic::{LocalApic, LocalApicBuilder, TimerMode}; pub struct VirtLocalApic { pub phys_lapic: LocalApic, pub virt_timer_vector: u8, + pub has_eoi: bool, virt_lvt_timer_bits: u32, } @@ -36,6 +38,7 @@ impl VirtLocalApic { Self { phys_lapic: lapic, virt_timer_vector: 0, + has_eoi: true, virt_lvt_timer_bits: (1 << 16) as _, // masked } } @@ -55,6 +58,16 @@ impl VirtLocalApic { Ok(this_cpu_id() as u64) } IA32_X2APIC_LDR => Ok(this_cpu_id() as u64), // logical apic id + IA32_X2APIC_ISR0 | IA32_X2APIC_ISR1 | IA32_X2APIC_ISR2 | IA32_X2APIC_ISR3 + | IA32_X2APIC_ISR4 | IA32_X2APIC_ISR5 | IA32_X2APIC_ISR6 | IA32_X2APIC_ISR7 => { + // info!("isr!"); + Ok(0) + } + IA32_X2APIC_IRR0 | IA32_X2APIC_IRR1 | IA32_X2APIC_IRR2 | IA32_X2APIC_IRR3 + | IA32_X2APIC_IRR4 | IA32_X2APIC_IRR5 | IA32_X2APIC_IRR6 | IA32_X2APIC_IRR7 => { + // info!("irr!"); + Ok(0) + } IA32_X2APIC_LVT_TIMER => Ok(self.virt_lvt_timer_bits as _), _ => hv_result_err!(ENOSYS), } @@ -62,6 +75,12 @@ impl VirtLocalApic { pub fn wrmsr(&mut self, msr: Msr, value: u64) -> HvResult { match msr { + IA32_X2APIC_EOI => { + // info!("eoi"); + pop_vector(this_cpu_id()); + self.has_eoi = true; + Ok(()) + } IA32_X2APIC_ICR => { // info!("ICR value: {:x}", value); ipi::send_ipi(value); diff --git a/src/device/irqchip/pic/mod.rs b/src/device/irqchip/pic/mod.rs index dfed9da7..ebc29d6d 100644 --- a/src/device/irqchip/pic/mod.rs +++ b/src/device/irqchip/pic/mod.rs @@ -2,7 +2,7 @@ pub mod ioapic; pub mod lapic; use crate::{ - arch::{acpi, idt, ipi, vmcs::Vmcs, vtd}, + arch::{acpi, cpu::this_cpu_id, idt, ipi, vmcs::Vmcs, vtd}, consts::{MAX_CPU_NUM, MAX_ZONE_NUM}, zone::Zone, }; @@ -28,41 +28,37 @@ impl PendingVectors { } fn add_vector(&self, cpu_id: usize, vector: u8, err_code: Option, allow_repeat: bool) { - match self.inner.get(cpu_id) { - Some(pending_vectors) => { - let mut vectors = pending_vectors.lock(); - /*if vectors.len() > 2 { - info!("len: {:x}", vectors.len()); - }*/ - if allow_repeat || !vectors.contains(&(vector, err_code)) { - vectors.push_back((vector, err_code)); - } - } - _ => {} + let mut vectors = self.inner.get(cpu_id).unwrap().lock(); + if vectors.len() > 10 { + warn!("too many pending vectors! cnt: {:x?}", vectors.len()); + } + if allow_repeat || !vectors.contains(&(vector, err_code)) { + vectors.push_back((vector, err_code)); } } - fn check_pending_vectors(&self, cpu_id: usize) { - match self.inner.get(cpu_id) { - Some(pending_vectors) => { - let mut vectors = pending_vectors.lock(); - if let Some(vector) = vectors.front() { - let allow_interrupt = Vmcs::allow_interrupt().unwrap(); - if vector.0 < 32 || allow_interrupt { - if vectors.len() == 10 { - warn!("too many pending vectors!"); - } - // if it's an exception, or an interrupt that is not blocked, inject it directly. - Vmcs::inject_interrupt(vector.0, vector.1).unwrap(); - vectors.pop_front(); - } else { - // interrupts are blocked, enable interrupt-window exiting. - Vmcs::set_interrupt_window(true).unwrap(); - } + fn check_pending_vectors(&self, cpu_id: usize) -> bool { + let mut vectors = self.inner.get(cpu_id).unwrap().lock(); + if let Some(vector) = vectors.front() { + let allow_interrupt = Vmcs::allow_interrupt().unwrap(); + if vector.0 < 32 || allow_interrupt { + if vectors.len() > 10 { + warn!("too many pending vectors!"); } + // if it's an exception, or an interrupt that is not blocked, inject it directly. + Vmcs::inject_interrupt(vector.0, vector.1).unwrap(); + // vectors.pop_front(); + return true; + } else { + // interrupts are blocked, enable interrupt-window exiting. + Vmcs::set_interrupt_window(true).unwrap(); } - _ => {} } + false + } + + fn pop_vector(&self, cpu_id: usize) { + self.inner.get(cpu_id).unwrap().lock().pop_front(); } } @@ -71,10 +67,18 @@ pub fn inject_vector(cpu_id: usize, vector: u8, err_code: Option, allow_rep .get() .unwrap() .add_vector(cpu_id, vector, err_code, allow_repeat); + if cpu_id != this_cpu_id() { + // wake up dest + ipi::arch_send_event(cpu_id as _, 0); + } +} + +pub fn check_pending_vectors(cpu_id: usize) -> bool { + PENDING_VECTORS.get().unwrap().check_pending_vectors(cpu_id) } -pub fn check_pending_vectors(cpu_id: usize) { - PENDING_VECTORS.get().unwrap().check_pending_vectors(cpu_id); +pub fn pop_vector(cpu_id: usize) { + PENDING_VECTORS.get().unwrap().pop_vector(cpu_id); } pub fn enable_irq() { @@ -85,8 +89,8 @@ pub fn disable_irq() { unsafe { asm!("cli") }; } -pub fn inject_irq(_irq: usize, _is_hardware: bool) { - ioapic_inject_irq(_irq as _); +pub fn inject_irq(_irq: usize, allow_repeat: bool) { + ioapic_inject_irq(_irq as _, allow_repeat); } pub fn percpu_init() {} diff --git a/src/device/virtio_trampoline.rs b/src/device/virtio_trampoline.rs index 14449b13..ce86caea 100644 --- a/src/device/virtio_trampoline.rs +++ b/src/device/virtio_trampoline.rs @@ -89,7 +89,11 @@ pub fn mmio_virtio_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { #[cfg(not(target_arch = "loongarch64"))] if dev.need_wakeup() { debug!("need wakeup, sending ipi to wake up virtio device"); + #[cfg(not(target_arch = "x86_64"))] let root_cpu = root_zone().read().cpu_set.first_cpu().unwrap(); + #[cfg(target_arch = "x86_64")] + let root_cpu = + crate::device::irqchip::pic::ioapic::get_irq_cpu(IRQ_WAKEUP_VIRTIO_DEVICE, 0); send_event(root_cpu, SGI_IPI_ID as _, IPI_EVENT_WAKEUP_VIRTIO_DEVICE); } drop(dev); @@ -101,11 +105,17 @@ pub fn mmio_virtio_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { // fence(Ordering::Acquire); count += 1; if count == MAX_WAIT_TIMES { - warn!("virtio backend is too slow, please check it!"); + warn!( + "virtio backend is too slow, please check it! addr: {:x} is_write: {:x?}", + mmio.address, mmio.is_write + ); fence(Ordering::Acquire); } if count == MAX_WAIT_TIMES * 10 { - error!("virtio backend may have some problem, please check it!"); + error!( + "virtio backend may have some problem, please check it! addr: {:x} is_write: {:x?}", + mmio.address, mmio.is_write + ); count = 0; } } diff --git a/src/hypercall/mod.rs b/src/hypercall/mod.rs index 51dc6146..3ae19814 100644 --- a/src/hypercall/mod.rs +++ b/src/hypercall/mod.rs @@ -195,7 +195,20 @@ impl<'a> HyperCall<'a> { let irq_id = region.res_list[res_front].irq_id as u64; let target_zone = region.res_list[res_front].target_zone; let target_cpu = match find_zone(target_zone as _) { - Some(zone) => zone.read().cpu_set.first_cpu().unwrap(), + Some(zone) => { + #[cfg(not(target_arch = "x86_64"))] + { + zone.read().cpu_set.first_cpu().unwrap() + } + #[cfg(target_arch = "x86_64")] + { + // we use I/O APIC remap table to decide target cpu in x86_64 + crate::device::irqchip::pic::ioapic::get_irq_cpu( + irq_id as _, + target_zone as _, + ) + } + } _ => { fence(Ordering::SeqCst); region.res_front = (region.res_front + 1) & (MAX_REQ - 1); diff --git a/src/pci/pci.rs b/src/pci/pci.rs index cab57e18..4dc4ca30 100644 --- a/src/pci/pci.rs +++ b/src/pci/pci.rs @@ -1,3 +1,4 @@ +use core::ptr::{read_volatile, write_volatile}; // Copyright (c) 2025 Syswonder // hvisor is licensed under Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan PSL v2. @@ -112,11 +113,11 @@ impl PciRoot { for bar_id in 0..NUM_BAR_REGS_TYPE0 { unsafe { let reg_ptr = (cfg_base + offsets[bar_id]) as *mut u32; - let origin_val = *reg_ptr; - *reg_ptr = 0xffffffffu32; - let new_val = *reg_ptr; + let origin_val = read_volatile(reg_ptr); + write_volatile(reg_ptr, 0xffffffffu32); + let new_val = read_volatile(reg_ptr); ep.bars_init(bar_id, origin_val, new_val); - *reg_ptr = origin_val; + write_volatile(reg_ptr, origin_val); } } } @@ -129,11 +130,11 @@ impl PciRoot { for bar_id in 0..NUM_BAR_REGS_TYPE1 { unsafe { let reg_ptr = (cfg_base + offsets[bar_id]) as *mut u32; - let origin_val = *reg_ptr; - *reg_ptr = 0xffffffffu32; - let new_val = *reg_ptr; + let origin_val = read_volatile(reg_ptr); + write_volatile(reg_ptr, 0xffffffffu32); + let new_val = read_volatile(reg_ptr); bridge.bars_init(bar_id, origin_val, new_val); - *reg_ptr = origin_val; + write_volatile(reg_ptr, origin_val); } } } @@ -315,7 +316,7 @@ impl Zone { } info!( - "pci bar region: type: {:?}, base: {:#x}, size:{:#x}", + "pci bar region: type: {:?}, base: {:#x}, size: {:#x}", region.bar_type, region.start, region.size ); From f3e75763cc7fc11e448346253eee2b9dd9a91022 Mon Sep 17 00:00:00 2001 From: Solicey Date: Sat, 24 May 2025 10:08:01 +0800 Subject: [PATCH 17/29] tiny fixes --- platform/x86_64/qemu/board.rs | 6 +- src/arch/x86_64/ap_start.S | 5 ++ src/arch/x86_64/cpu.rs | 36 +++++++----- src/arch/x86_64/gdt.rs | 94 -------------------------------- src/arch/x86_64/mod.rs | 1 - src/arch/x86_64/multiboot.S | 4 ++ src/consts.rs | 2 +- src/device/irqchip/pic/ioapic.rs | 10 +++- src/device/irqchip/pic/lapic.rs | 3 - src/device/irqchip/pic/mod.rs | 37 ++++++++----- src/device/virtio_trampoline.rs | 1 + src/main.rs | 5 -- 12 files changed, 69 insertions(+), 135 deletions(-) delete mode 100644 src/arch/x86_64/gdt.rs diff --git a/platform/x86_64/qemu/board.rs b/platform/x86_64/qemu/board.rs index fe503813..686bf71d 100644 --- a/platform/x86_64/qemu/board.rs +++ b/platform/x86_64/qemu/board.rs @@ -32,7 +32,7 @@ const ROOT_ZONE_RSDP_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, - physical_start: 0x4020_0000, // hpa + physical_start: 0x3a20_0000, // hpa virtual_start: 0x3520_0000, // gpa size: 0xf000, // modify size accordingly }; @@ -58,13 +58,13 @@ pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 8] = [ }, // ram HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, - physical_start: 0x2000_0000, + physical_start: 0x1a00_0000, virtual_start: 0x1500_0000, size: 0x20_0000, }, // ram HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, - physical_start: 0x2020_0000, + physical_start: 0x1a20_0000, virtual_start: 0x1520_0000, size: 0x2000_0000, }, // ram diff --git a/src/arch/x86_64/ap_start.S b/src/arch/x86_64/ap_start.S index a07a8c98..8a730730 100644 --- a/src/arch/x86_64/ap_start.S +++ b/src/arch/x86_64/ap_start.S @@ -35,6 +35,9 @@ ap_start16: .code32 ap_start32: + mov ax, 0x20 + ltr ax + mov esp, [stack_ptr] mov eax, [entry_ptr] jmp eax @@ -50,6 +53,8 @@ ap_start32: .quad 0x00cf9b000000ffff // 0x08: code segment (base=0, limit=0xfffff, type=32bit code exec/read, DPL=0, 4k) .quad 0x00af9b000000ffff // 0x10: code segment (base=0, limit=0xfffff, type=64bit code exec/read, DPL=0, 4k) .quad 0x00cf93000000ffff // 0x18: data segment (base=0, limit=0xfffff, type=32bit data read/write, DPL=0, 4k) + .quad 0x00008934ee800067 // 0x20: tss low + .quad 0x00000000ffffff80 // 0x28: tss high .Lap_tmp_gdt_end: // 0x7000 diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index f9b57bd7..de7383da 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -1,7 +1,6 @@ use crate::{ arch::{ boot::BootParams, - gdt::{get_tr_base, GdtStruct}, hpet, ipi, mm::new_s2_memory_set, msr::{ @@ -26,6 +25,7 @@ use crate::{ platform::{ROOT_ZONE_BOOT_STACK, ROOT_ZONE_CMDLINE}, }; use alloc::boxed::Box; +use bit_field::BitField; use core::{ arch::{asm, global_asm}, fmt::{Debug, Formatter, Result}, @@ -42,10 +42,7 @@ use x86::{ EntryControls, ExitControls, PinbasedControls, PrimaryControls, SecondaryControls, }, }; -use x86_64::{ - registers::control::{Cr0, Cr0Flags, Cr3, Cr4, Cr4Flags}, - structures::tss::TaskStateSegment, -}; +use x86_64::registers::control::{Cr0, Cr0Flags, Cr3, Cr4, Cr4Flags}; use super::acpi::RootAcpi; @@ -164,7 +161,6 @@ pub struct ArchCpu { host_stack_top: u64, pub cpuid: usize, pub power_on: bool, - pub gdt: GdtStruct, pub virt_lapic: VirtLocalApic, vmx_on: bool, vmcs_revision_id: u32, @@ -175,14 +171,11 @@ pub struct ArchCpu { impl ArchCpu { pub fn new(cpuid: usize) -> Self { - let boxed = Box::new(TaskStateSegment::new()); - let tss = Box::leak(boxed); Self { guest_regs: GeneralRegisters::default(), host_stack_top: 0, cpuid, power_on: false, - gdt: GdtStruct::new(tss), virt_lapic: VirtLocalApic::new(), vmx_on: false, vmcs_revision_id: 0, @@ -222,7 +215,7 @@ impl ArchCpu { // info!("idle! cpuid: {:x}", self.cpuid); PARKING_MEMORY_SET.call_once(|| { - let parking_code: [u8; 2] = [0xeb, 0xfe]; // jump short -3 + let parking_code: [u8; 2] = [0xeb, 0xfe]; // jump short -2 unsafe { PARKING_INST_PAGE[..2].copy_from_slice(&parking_code); } @@ -559,9 +552,7 @@ impl ArchCpu { fn vmexit_handler(&mut self) { crate::arch::trap::handle_vmexit(self).unwrap(); - if self.virt_lapic.has_eoi && check_pending_vectors(this_cpu_id()) { - self.virt_lapic.has_eoi = false; - } + check_pending_vectors(this_cpu_id()); } unsafe fn vmx_entry_failed() -> ! { @@ -609,6 +600,25 @@ pub fn this_cpu_id() -> usize { } } +fn get_tr_base( + tr: x86::segmentation::SegmentSelector, + gdt: &x86::dtables::DescriptorTablePointer, +) -> u64 { + let index = tr.index() as usize; + let table_len = (gdt.limit as usize + 1) / core::mem::size_of::(); + let table = unsafe { core::slice::from_raw_parts(gdt.base, table_len) }; + let entry = table[index]; + if entry & (1 << 47) != 0 { + // present + let base_low = entry.get_bits(16..40) | entry.get_bits(56..64) << 24; + let base_high = table[index + 1] & 0xffff_ffff; + base_low | base_high << 32 + } else { + // no present + 0 + } +} + impl Debug for ArchCpu { fn fmt(&self, f: &mut Formatter) -> Result { (|| -> HvResult { diff --git a/src/arch/x86_64/gdt.rs b/src/arch/x86_64/gdt.rs deleted file mode 100644 index 8cf220a2..00000000 --- a/src/arch/x86_64/gdt.rs +++ /dev/null @@ -1,94 +0,0 @@ -use crate::arch::cpu::{self, this_cpu_id}; -use alloc::boxed::Box; -use bit_field::BitField; -use spin::Mutex; -use x86_64::{ - addr::VirtAddr, - instructions::tables::{lgdt, load_tss}, - registers::segmentation::{Segment, SegmentSelector, CS}, - structures::{ - gdt::{Descriptor, DescriptorFlags}, - tss::TaskStateSegment, - DescriptorTablePointer, - }, - PrivilegeLevel, -}; - -#[repr(align(16))] -#[derive(Debug)] -pub struct GdtStruct { - table: [u64; 16], - tss: &'static TaskStateSegment, -} - -impl GdtStruct { - pub const KCODE32_SELECTOR: SegmentSelector = SegmentSelector::new(1, PrivilegeLevel::Ring0); - pub const KCODE64_SELECTOR: SegmentSelector = SegmentSelector::new(2, PrivilegeLevel::Ring0); - pub const KDATA_SELECTOR: SegmentSelector = SegmentSelector::new(3, PrivilegeLevel::Ring0); - pub const TSS_SELECTOR: SegmentSelector = SegmentSelector::new(4, PrivilegeLevel::Ring0); - - pub fn new(tss: &'static TaskStateSegment) -> Self { - let mut table: [u64; 16] = [0; 16]; - table[1] = DescriptorFlags::KERNEL_CODE32.bits(); - table[2] = DescriptorFlags::KERNEL_CODE64.bits(); - table[3] = DescriptorFlags::KERNEL_DATA.bits(); - if let Descriptor::SystemSegment(low, high) = Descriptor::tss_segment(&tss) { - table[4] = low; - table[5] = high; - } - Self { table, tss } - } - - pub fn pointer(&self) -> DescriptorTablePointer { - DescriptorTablePointer { - base: VirtAddr::new(self.table.as_ptr() as u64), - limit: (core::mem::size_of_val(&self.table) - 1) as u16, - } - } - - pub fn load(&self) { - unsafe { - lgdt(&self.pointer()); - CS::set_reg(GdtStruct::KCODE64_SELECTOR); - load_tss(GdtStruct::TSS_SELECTOR); - } - } -} - -/*fn new_static_tss() -> &'static TaskStateSegment { - let mut boxed = Box::new(TaskStateSegment::new()); - Box::leak(boxed) -} - -pub fn load_gdt_tss() { - println!("Initializing GDT and TSS..."); - - let tss = &mut TSS.lock(); - let gdt = &mut GDT.lock(); - let cpuid = this_cpu_id(); - - tss.insert(cpuid, TaskStateSegment::new()); - gdt.insert(cpuid, GdtStruct::new(tss.get(&cpuid).unwrap())); - if let Some(tss) = tss.get(&cpuid) { - gdt.insert(cpuid, GdtStruct::new(tss)); - } -}*/ - -pub fn get_tr_base( - tr: x86::segmentation::SegmentSelector, - gdt: &x86::dtables::DescriptorTablePointer, -) -> u64 { - let index = tr.index() as usize; - let table_len = (gdt.limit as usize + 1) / core::mem::size_of::(); - let table = unsafe { core::slice::from_raw_parts(gdt.base, table_len) }; - let entry = table[index]; - if entry & (1 << 47) != 0 { - // present - let base_low = entry.get_bits(16..40) | entry.get_bits(56..64) << 24; - let base_high = table[index + 1] & 0xffff_ffff; - base_low | base_high << 32 - } else { - // no present - 0 - } -} diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 4fb2c45a..aa15b3c7 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -4,7 +4,6 @@ pub mod boot; pub mod cpu; pub mod cpuid; pub mod entry; -pub mod gdt; pub mod hpet; pub mod idt; pub mod ipi; diff --git a/src/arch/x86_64/multiboot.S b/src/arch/x86_64/multiboot.S index ffaafd34..91af4acf 100644 --- a/src/arch/x86_64/multiboot.S +++ b/src/arch/x86_64/multiboot.S @@ -83,6 +83,8 @@ bsp_entry64: // reload GDT by high address movabs rax, offset .Ltmp_gdt_desc lgdt [rax] + mov ax, 0x20 + ltr ax ENTRY64_COMMON @@ -126,6 +128,8 @@ ap_entry64: .quad 0x00cf9b000000ffff // 0x08: code segment (base=0, limit=0xfffff, type=32bit code exec/read, DPL=0, 4k) .quad 0x00af9b000000ffff // 0x10: code segment (base=0, limit=0xfffff, type=64bit code exec/read, DPL=0, 4k) .quad 0x00cf93000000ffff // 0x18: data segment (base=0, limit=0xfffff, type=32bit data read/write, DPL=0, 4k) + .quad 0x00008934ee800067 // 0x20: tss low + .quad 0x00000000ffffff80 // 0x28: tss high .Ltmp_gdt_end: .balign 4096 diff --git a/src/consts.rs b/src/consts.rs index d092d068..0c00bba3 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -41,7 +41,7 @@ pub const MAX_ZONE_NUM: usize = 3; pub static mut NCPU: usize = MAX_CPU_NUM; -pub const MAX_WAIT_TIMES: usize = 50000000; +pub const MAX_WAIT_TIMES: usize = 10000000; pub fn core_end() -> VirtAddr { __core_end as _ diff --git a/src/device/irqchip/pic/ioapic.rs b/src/device/irqchip/pic/ioapic.rs index 8048355f..0fe1b47c 100644 --- a/src/device/irqchip/pic/ioapic.rs +++ b/src/device/irqchip/pic/ioapic.rs @@ -121,6 +121,12 @@ impl VirtIoApic { idt::get_host_vector(entry.get_bits(0..=7) as u32, zone_id).unwrap() as _, ); + /*info!( + "write {:x} is edge: {:x?}, {:x}", + index, + value.get_bit(15), + value + );*/ } else { entry.set_bits(32..=63, value.get_bits(0..=31)); @@ -178,13 +184,13 @@ impl Zone { self.mmio_region_register( arch.ioapic_base, arch.ioapic_size, - ioapic_mmio_handler, + mmio_ioapic_handler, arch.ioapic_base, ); } } -fn ioapic_mmio_handler(mmio: &mut MMIOAccess, _: usize) -> HvResult { +fn mmio_ioapic_handler(mmio: &mut MMIOAccess, _: usize) -> HvResult { if mmio.is_write { VIRT_IOAPIC .get() diff --git a/src/device/irqchip/pic/lapic.rs b/src/device/irqchip/pic/lapic.rs index be07284c..fc0673c8 100644 --- a/src/device/irqchip/pic/lapic.rs +++ b/src/device/irqchip/pic/lapic.rs @@ -17,7 +17,6 @@ use x2apic::lapic::{LocalApic, LocalApicBuilder, TimerMode}; pub struct VirtLocalApic { pub phys_lapic: LocalApic, pub virt_timer_vector: u8, - pub has_eoi: bool, virt_lvt_timer_bits: u32, } @@ -38,7 +37,6 @@ impl VirtLocalApic { Self { phys_lapic: lapic, virt_timer_vector: 0, - has_eoi: true, virt_lvt_timer_bits: (1 << 16) as _, // masked } } @@ -78,7 +76,6 @@ impl VirtLocalApic { IA32_X2APIC_EOI => { // info!("eoi"); pop_vector(this_cpu_id()); - self.has_eoi = true; Ok(()) } IA32_X2APIC_ICR => { diff --git a/src/device/irqchip/pic/mod.rs b/src/device/irqchip/pic/mod.rs index ebc29d6d..bb2ccc35 100644 --- a/src/device/irqchip/pic/mod.rs +++ b/src/device/irqchip/pic/mod.rs @@ -13,43 +13,53 @@ use spin::{Mutex, Once}; static PENDING_VECTORS: Once = Once::new(); +struct InnerPendingVectors { + pub queue: VecDeque<(u8, Option)>, + pub has_eoi: bool, +} + struct PendingVectors { - inner: Vec)>>>, + inner: Vec>, } impl PendingVectors { fn new(max_cpus: usize) -> Self { let mut vs = vec![]; for _ in 0..max_cpus { - let v = Mutex::new(VecDeque::new()); - vs.push(v) + let v = Mutex::new(InnerPendingVectors { + queue: VecDeque::new(), + has_eoi: true, + }); + vs.push(v); } Self { inner: vs } } fn add_vector(&self, cpu_id: usize, vector: u8, err_code: Option, allow_repeat: bool) { let mut vectors = self.inner.get(cpu_id).unwrap().lock(); - if vectors.len() > 10 { - warn!("too many pending vectors! cnt: {:x?}", vectors.len()); + if vectors.queue.len() > 10 { + warn!("too many pending vectors! cnt: {:x?}", vectors.queue.len()); } - if allow_repeat || !vectors.contains(&(vector, err_code)) { - vectors.push_back((vector, err_code)); + if allow_repeat || !vectors.queue.contains(&(vector, err_code)) { + vectors.queue.push_back((vector, err_code)); } } fn check_pending_vectors(&self, cpu_id: usize) -> bool { let mut vectors = self.inner.get(cpu_id).unwrap().lock(); - if let Some(vector) = vectors.front() { - let allow_interrupt = Vmcs::allow_interrupt().unwrap(); + + if let Some(vector) = vectors.queue.front() { + let allow_interrupt = Vmcs::allow_interrupt().unwrap() && vectors.has_eoi; if vector.0 < 32 || allow_interrupt { - if vectors.len() > 10 { + if vectors.queue.len() > 10 { warn!("too many pending vectors!"); } // if it's an exception, or an interrupt that is not blocked, inject it directly. Vmcs::inject_interrupt(vector.0, vector.1).unwrap(); - // vectors.pop_front(); + vectors.has_eoi = false; + vectors.queue.pop_front(); return true; - } else { + } else if vectors.has_eoi { // interrupts are blocked, enable interrupt-window exiting. Vmcs::set_interrupt_window(true).unwrap(); } @@ -58,7 +68,8 @@ impl PendingVectors { } fn pop_vector(&self, cpu_id: usize) { - self.inner.get(cpu_id).unwrap().lock().pop_front(); + let mut vectors = self.inner.get(cpu_id).unwrap().lock(); + vectors.has_eoi = true; } } diff --git a/src/device/virtio_trampoline.rs b/src/device/virtio_trampoline.rs index ce86caea..9b93c988 100644 --- a/src/device/virtio_trampoline.rs +++ b/src/device/virtio_trampoline.rs @@ -105,6 +105,7 @@ pub fn mmio_virtio_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { // fence(Ordering::Acquire); count += 1; if count == MAX_WAIT_TIMES { + #[cfg(not(target_arch = "x86_64"))] warn!( "virtio backend is too slow, please check it! addr: {:x} is_write: {:x?}", mmio.address, mmio.is_write diff --git a/src/main.rs b/src/main.rs index 1b418508..4ae51141 100644 --- a/src/main.rs +++ b/src/main.rs @@ -195,17 +195,12 @@ fn rust_main(cpuid: usize, host_dtb: usize) { } let cpu = PerCpu::new(cpuid); - #[cfg(target_arch = "x86_64")] - crate::device::irqchip::pic::enable_irq(); println!( "Booting CPU {}: {:p} arch:{:p}, DTB: {:#x}", cpu.id, cpu as *const _, &cpu.arch_cpu as *const _, host_dtb ); - #[cfg(target_arch = "x86_64")] - cpu.arch_cpu.gdt.load(); // load gdt and tss - // Don't you wanna know how many cpu(s) on board? :D let mut ncpu: usize = 0; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] From 483444206deeb4750272f68f993db52df8683172 Mon Sep 17 00:00:00 2001 From: Solicey Date: Wed, 9 Jul 2025 12:07:35 +0800 Subject: [PATCH 18/29] x86 add platform NUC 14 --- .gitignore | 3 + Cargo.toml | 15 +- Makefile | 2 +- platform/x86_64/nuc14/board.rs | 115 ++++++ .../x86_64/nuc14/cargo/config.template.toml | 10 + platform/x86_64/nuc14/cargo/features | 2 + platform/x86_64/nuc14/image/bootloader/boot.S | 41 ++ .../x86_64/nuc14/image/bootloader/boot.ld | 15 + .../x86_64/nuc14/image/bootloader/boot.mk | 36 ++ .../nuc14/image/font/solarize-12x29.psf | Bin 0 -> 29728 bytes .../x86_64/nuc14/image/font/spleen-6x12.psf | Bin 0 -> 7859 bytes .../x86_64/nuc14/image/iso/boot/grub/grub.cfg | 27 ++ platform/x86_64/nuc14/linker.ld | 53 +++ platform/x86_64/nuc14/platform.mk | 49 +++ platform/x86_64/nuc14/test/runner.sh | 0 platform/x86_64/qemu/board.rs | 13 +- platform/x86_64/qemu/cargo/features | 3 +- platform/x86_64/qemu/image/bootloader/boot.mk | 10 +- .../x86_64/qemu/image/bootloader/out/boot.asm | 44 --- .../x86_64/qemu/image/bootloader/out/boot.bin | Bin 94 -> 0 bytes .../x86_64/qemu/image/bootloader/out/boot.elf | Bin 4672 -> 0 bytes .../x86_64/qemu/image/font/solarize-12x29.psf | Bin 0 -> 29728 bytes .../x86_64/qemu/image/font/spleen-6x12.psf | Bin 0 -> 7859 bytes .../x86_64/qemu/image/iso/boot/grub/grub.cfg | 27 ++ platform/x86_64/qemu/linker.ld | 1 + platform/x86_64/qemu/platform.mk | 29 +- src/arch/x86_64/acpi.rs | 373 +++++++++++------- src/arch/x86_64/boot.rs | 282 +++++++++++-- src/arch/x86_64/cpu.rs | 2 + src/arch/x86_64/entry.rs | 38 +- src/arch/x86_64/graphics.rs | 208 ++++++++++ src/arch/x86_64/ipi.rs | 3 +- src/arch/x86_64/mod.rs | 1 + src/arch/x86_64/multiboot.S | 140 +++++-- src/arch/x86_64/pci.rs | 23 +- src/arch/x86_64/pio.rs | 2 + src/arch/x86_64/trap.rs | 11 +- src/arch/x86_64/vtd.rs | 11 +- src/arch/x86_64/zone.rs | 4 +- src/device/irqchip/pic/ioapic.rs | 28 +- src/device/irqchip/pic/mod.rs | 1 - src/device/uart/mod.rs | 4 +- src/device/uart/uart16550a.rs | 179 ++++++++- src/logging.rs | 51 +++ src/main.rs | 21 +- src/percpu.rs | 8 + src/zone.rs | 2 +- 47 files changed, 1552 insertions(+), 335 deletions(-) create mode 100644 platform/x86_64/nuc14/board.rs create mode 100644 platform/x86_64/nuc14/cargo/config.template.toml create mode 100644 platform/x86_64/nuc14/cargo/features create mode 100644 platform/x86_64/nuc14/image/bootloader/boot.S create mode 100644 platform/x86_64/nuc14/image/bootloader/boot.ld create mode 100644 platform/x86_64/nuc14/image/bootloader/boot.mk create mode 100644 platform/x86_64/nuc14/image/font/solarize-12x29.psf create mode 100644 platform/x86_64/nuc14/image/font/spleen-6x12.psf create mode 100644 platform/x86_64/nuc14/image/iso/boot/grub/grub.cfg create mode 100644 platform/x86_64/nuc14/linker.ld create mode 100644 platform/x86_64/nuc14/platform.mk create mode 100644 platform/x86_64/nuc14/test/runner.sh delete mode 100644 platform/x86_64/qemu/image/bootloader/out/boot.asm delete mode 100755 platform/x86_64/qemu/image/bootloader/out/boot.bin delete mode 100755 platform/x86_64/qemu/image/bootloader/out/boot.elf create mode 100644 platform/x86_64/qemu/image/font/solarize-12x29.psf create mode 100644 platform/x86_64/qemu/image/font/spleen-6x12.psf create mode 100644 platform/x86_64/qemu/image/iso/boot/grub/grub.cfg create mode 100644 src/arch/x86_64/graphics.rs diff --git a/.gitignore b/.gitignore index 2c573fb6..09f1e3f8 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,9 @@ /images/* /platform/*/*/image/kernel/* /platform/*/*/image/virtdisk/* +/platform/*/*/image/bootloader/out +/platform/*/*/image/iso/boot/kernel/* +/platform/*/*/image/iso/boot/hvisor /tools/hvisor /tmp *.mod.[co] diff --git a/Cargo.toml b/Cargo.toml index 4ea9293b..a3d8e61b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,10 @@ fdt = { path = "./vendor/fdt" } qemu-exit = "3.0.2" cortex-a = "8.1.1" cfg-if = "1.0" -bitvec = { version="1.0.1", default-features = false, features = ["atomic", "alloc"] } +bitvec = { version = "1.0.1", default-features = false, features = [ + "atomic", + "alloc", +] } [dependencies.fdt-rs] version = "0.4.5" @@ -41,11 +44,11 @@ riscv-pac = "0.2.0" loongArch64 = "0.2.4" [target.'cfg(target_arch = "x86_64")'.dependencies] -x86 = "0.52.0" +x86 = "=0.52.0" x86_64 = "=0.14.10" -x2apic = "0.4.3" -raw-cpuid = "10.7.0" -acpi = "5.2.0" +x2apic = "=0.4.3" +raw-cpuid = "=10.7.0" +acpi = "=5.2.0" [features] ############# aarch64 ############## @@ -79,6 +82,8 @@ sstc = [] loongson_7a2000 = [] # uart driver loongson_uart = [] +############# x86_64 ############### +graphics = [] [profile.dev] diff --git a/Makefile b/Makefile index 3cd41566..b33fefea 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ LOG ?= info STATS ?= off PORT ?= 2333 MODE ?= debug -BOARD ?= qemu +BOARD ?= nuc14 FEATURES= BID ?= diff --git a/platform/x86_64/nuc14/board.rs b/platform/x86_64/nuc14/board.rs new file mode 100644 index 00000000..2335d1f6 --- /dev/null +++ b/platform/x86_64/nuc14/board.rs @@ -0,0 +1,115 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// +use crate::{arch::zone::HvArchZoneConfig, config::*, memory::GuestPhysAddr}; + +pub const MEM_TYPE_OTHER_ZONES: u32 = 5; + +pub const ROOT_ZONE_DTB_ADDR: u64 = 0x00000000; +pub const ROOT_ZONE_BOOT_STACK: GuestPhysAddr = 0x7000; +pub const ROOT_ZONE_ENTRY: u64 = 0x8000; +pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x500_0000; // hpa +pub const ROOT_ZONE_CPUS: u64 = (1 << 0); + +const ROOT_ZONE_RSDP_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x50e_0000, + virtual_start: 0xe_0000, + size: 0x2_0000, +}; + +const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x3a20_0000, // hpa + virtual_start: 0x3520_0000, // gpa + size: 0x10_0000, // modify size accordingly +}; + +pub const ROOT_ZONE_NAME: &str = "root-linux"; +pub const ROOT_ZONE_CMDLINE: &str = + "video=vesafb console=ttyS0 console=tty0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all,lastbus=1 root=/dev/vda rw init=/init\0"; +//"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic +// video=vesafb + +pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 8] = [ + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x500_0000, + virtual_start: 0x0, + size: 0xe_0000, + }, // ram + ROOT_ZONE_RSDP_REGION, // rsdp + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x510_0000, + virtual_start: 0x10_0000, + size: 0x14f0_0000, + }, // ram + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x1a00_0000, + virtual_start: 0x1500_0000, + size: 0x20_0000, + }, // ram + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x1a20_0000, + virtual_start: 0x1520_0000, + size: 0x2000_0000, + }, // ram + ROOT_ZONE_ACPI_REGION, // acpi + HvConfigMemoryRegion { + mem_type: MEM_TYPE_IO, + physical_start: 0xfed0_0000, + virtual_start: 0xfed0_0000, + size: 0x1000, + }, // hpet + // TODO: e820 mem space probe + HvConfigMemoryRegion { + mem_type: MEM_TYPE_OTHER_ZONES, + physical_start: 0x4030_0000, + virtual_start: 0x4030_0000, + size: 0x2000_0000, + }, // zone 1 +]; + +const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0x9000; +const ROOT_ZONE_SETUP_ADDR: GuestPhysAddr = 0xa000; +const ROOT_ZONE_VMLINUX_ENTRY_ADDR: GuestPhysAddr = 0x10_0000; +const ROOT_ZONE_SCREEN_BASE_ADDR: GuestPhysAddr = 0x7000_0000; + +pub const ROOT_ZONE_IRQS: [u32; 32] = [0; 32]; +pub const ROOT_ZONE_IOAPIC_BASE: usize = 0xfec0_0000; +pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { + ioapic_base: ROOT_ZONE_IOAPIC_BASE, + ioapic_size: 0x1000, + kernel_entry_gpa: ROOT_ZONE_VMLINUX_ENTRY_ADDR, + cmdline_load_gpa: ROOT_ZONE_CMDLINE_ADDR, + setup_load_gpa: ROOT_ZONE_SETUP_ADDR, + initrd_load_gpa: 0x0, + initrd_size: 0x0, + rsdp_memory_region_id: 0x1, + acpi_memory_region_id: 0x5, + initrd_memory_region_id: 0x0, + screen_base: ROOT_ZONE_SCREEN_BASE_ADDR, +}; + +pub const ROOT_PCI_DEVS: [u64; 16] = [ + 0x0, 0x10, 0x20, 0x40, 0x50, 0x68, 0x90, 0xa0, 0xa2, 0xa3, 0xb0, 0xe0, 0xe8, 0xf8, 0xfb, 0xfc, +]; // 0xfd, + +#[cfg(all(feature = "graphics", target_arch = "x86_64"))] +pub const GRAPHICS_FONT: &[u8] = + include_bytes!("../../platform/x86_64/qemu/image/font/spleen-6x12.psf"); diff --git a/platform/x86_64/nuc14/cargo/config.template.toml b/platform/x86_64/nuc14/cargo/config.template.toml new file mode 100644 index 00000000..a454e986 --- /dev/null +++ b/platform/x86_64/nuc14/cargo/config.template.toml @@ -0,0 +1,10 @@ +[target.x86_64-unknown-none] +linker = "rust-lld" +rustflags = [ + "-Clink-arg=-Tplatform/__ARCH__/__BOARD__/linker.ld", + "-Clink-arg=-no-pie", + "-Clinker-flavor=ld.lld", + "-Cforce-frame-pointers=yes", + "-Ctarget-feature=-mmx,-sse,+soft-float", + "-Cno-redzone=yes", +] \ No newline at end of file diff --git a/platform/x86_64/nuc14/cargo/features b/platform/x86_64/nuc14/cargo/features new file mode 100644 index 00000000..e525097a --- /dev/null +++ b/platform/x86_64/nuc14/cargo/features @@ -0,0 +1,2 @@ +pci +graphics \ No newline at end of file diff --git a/platform/x86_64/nuc14/image/bootloader/boot.S b/platform/x86_64/nuc14/image/bootloader/boot.S new file mode 100644 index 00000000..9b979b63 --- /dev/null +++ b/platform/x86_64/nuc14/image/bootloader/boot.S @@ -0,0 +1,41 @@ +.section .text +.code16 +.global entry16 +entry16: + cli + cld + + mov ecx, eax + xor ax, ax + mov ds, ax + mov es, ax + mov ss, ax + + lgdt [prot_gdt_desc] + mov eax, cr0 + or eax, 0x1 + mov cr0, eax + + ljmp 0x8, entry32 + +.code32 +.global entry32 +entry32: + mov ax, 0x10 + mov ds, ax + mov es, ax + mov ss, ax + mov fs, ax + mov gs, ax + + jmp ecx + +.balign 16 +prot_gdt: + .quad 0x0000000000000000 # 0x00: null + .quad 0x00cf9b000000ffff # 0x08: code segment (base=0, limit=0xfffff, type=32bit code exec/read, DPL=0, 4k) + .quad 0x00cf93000000ffff # 0x10: data segment (base=0, limit=0xfffff, type=32bit data read/write, DPL=0, 4k) + +prot_gdt_desc: + .short prot_gdt_desc - prot_gdt - 1 # limit + .long prot_gdt # base diff --git a/platform/x86_64/nuc14/image/bootloader/boot.ld b/platform/x86_64/nuc14/image/bootloader/boot.ld new file mode 100644 index 00000000..3f96b209 --- /dev/null +++ b/platform/x86_64/nuc14/image/bootloader/boot.ld @@ -0,0 +1,15 @@ +OUTPUT_ARCH(i386) +BASE_ADDRESS = 0x8000; + +ENTRY(entry16) +SECTIONS +{ + . = BASE_ADDRESS; + .text : { + *(.text .text.*) + } + + /DISCARD/ : { + *(.eh_frame) *(.eh_frame_hdr) + } +} diff --git a/platform/x86_64/nuc14/image/bootloader/boot.mk b/platform/x86_64/nuc14/image/bootloader/boot.mk new file mode 100644 index 00000000..e23e4540 --- /dev/null +++ b/platform/x86_64/nuc14/image/bootloader/boot.mk @@ -0,0 +1,36 @@ +boot_dir := $(image_dir)/bootloader +boot_out_dir := $(image_dir)/bootloader/out + +boot_src := $(boot_dir)/boot.S +boot_lds := $(boot_dir)/boot.ld + +boot_o := $(boot_out_dir)/boot.o +boot_elf := $(boot_out_dir)/boot.elf +boot_bin := $(boot_out_dir)/boot.bin +boot_disa := $(boot_out_dir)/boot.asm + +AS ?= as +LD ?= ld +OBJCOPY ?= objcopy +OBJDUMP ?= objdump + +boot: mkout $(boot_bin) + +disasm: + $(OBJDUMP) -d -m i8086 -M intel $(boot_elf) | less + +mkout: + rm -rf $(boot_out_dir) + mkdir -p $(boot_out_dir) + +$(boot_o): $(boot_src) + $(AS) --32 -msyntax=intel -mnaked-reg $< -o $@ + +$(boot_elf): $(boot_o) $(boot_lds) + $(LD) -T$(boot_lds) $< -o $@ + $(OBJDUMP) -d -m i8086 -M intel $@ > $(boot_disa) + +$(boot_bin): $(boot_elf) + $(OBJCOPY) $< --strip-all -O binary $@ + +.PHONY: all disasm \ No newline at end of file diff --git a/platform/x86_64/nuc14/image/font/solarize-12x29.psf b/platform/x86_64/nuc14/image/font/solarize-12x29.psf new file mode 100644 index 0000000000000000000000000000000000000000..071330e9e585e79ebcd2079e1daf51e0f8e572c7 GIT binary patch literal 29728 zcmcIt&yO6(b*|;CcPMGOGlXDeNUnB@q(EJJxCXh5C`}Vom?%~v`<9$kt5tTBfRKhU zAT1GvQ$T6a1 zHA?DXr>nc`t5-i>y?XVdYya#Ye)q2goa7&NXN34oT%O0@Cg0b(4bc-9bV5ocbV2vT z#!|?Ce6lL7i~m3Rbxqt$iLbb=LpGHHpRS(TUsork#nZZcp3YXK*W{{HpWHDOsXnCtW&^EpsBh3UIuvtCZ_rTl^Lp3Q z+ccnoV0r|yzJYi}PR%~mxd@TrL(4=CPIl*jXzuyu_w1x zS2rQw6pB(MOh-@6mYAA7F%i&4ruR6E?1RI1L_h88^U!tQNS{j2r&rRuMx>yiTSC&) zvM(jwfzs@l5HJzHBhHE>J*!_6*P}S*v7QxuXz5g(&Qe_ah_8)X7hB?+;syO`>bJM` zHc*6F6GDTfM9XH@8uzVSk=Lu~3;LT#M~JIZlXOoH^?8y;&7J?GnQgJ1}_hw+q27 z$tP}Qt~I-Bbxa98G!M-{4bV?ZT^*+GX(wUYP{Y}9wn>}l=P&3fO=&mNDs?D+&h39p zhvrZnidW$8(uO)z@2N4dZV0PU6i|WL?Psscvf82BRvuq z#kSZHFNy=X|5(w4ZmAtvkK9DqojIp=t;)u=rd|3rVJmGf^yN10HnaFTVog`^9W35= zkjIW*n^Oldx;O__z``@OxlpF&0(e}|@1d;Q=vy0wlb@+^zCzS3Z&oY0F|~}oGMMEV zwoTEAkRUGAygWCv`bnVsRb{;PVOR9@dEKX^Na)#yX#XL1Mrjwv80if;lnm$_pr=6RL6`Ly+^i&q zkUj9}XMNFFqdjIEGoH{dLmfhPZLYE!IMVOZWUlEajj0`)u~8{{AVPn{ZAGk!4suJ142`+77719%R3xAq}RZw{RC`*%O!aWs=18$$hJBFU*U5 zO|Gj4sg(ClUOV}I|4045kv~@7)@eUcJsn1J+|PiXf()RIqkobl(-iH+G>o%gm1;#y z`+CYg=jgP7C)$dVQgX9gkVMNKq)n+k75mI_*`@pXt8g~Ol#&#Lc9!U|PITr2Pu3@4 zXbloE9fyHpIiLG8rKC#Kps`kF2cTLjAqvbN87i{Q1g)ry^(VjsU1k=;dx;(C83cIgnhmz?%`F^^a-b`Pjmtf0V z3M1{@WQC<=wNG_>RxcGNOpS9jv0og;dB zKz-`aHd)K+7VVf&WD><^@TY`t^>$FZz_QnNC+;NdMT!(UGGCf8kJqFh)p)IIX zrV>6~2KSM!NhZ5D^(`unS{|Bmc>f7Y0y{hQ&08mr-Ln_9KUurLTjB{0ml5> z6=;`Mv^6u*y9jj~e>cT0YWFy(P0DG^)7z9<0;pLB2D zhf3U4Noz67+-q$@!d;8Wu5v5w>SDOMyN9MI@6bc4E^P6+?U~$+we8o6A?ofD*6s6H@EOb*J}Zajs=O+$io4=&|I?PW^0GNh?j;+^cy=g00sSob z3scR@JuQytzFf5IrV$-V38ivQ3)-Sx?gi(Bc}?A*`(#g@r^|HN9H@g#@7-7T)gEl> zJ^e1GQ(8D;4R#fH)|>a$2S5(d--MhU>0snZ^KpQ3BZ2S;KnhiBC%mf_-ZE87{2O{^ zT!9v@W=3pDK7$qzc4ZxZ3|vP)SJ{(sT+h#5?B5k1Taq7}k0tv*74P?7ZY@Lil21VY zGGhs9BSUsvrsjoV$II=;WiIz@Nu`F1lF`{3i+fdbotl(O0xfluEd$D`AsZt#NM9Bo z+wvVC`(i9E)4YPgo%;yLZw*how}AEgNZ-^WmZgkF`uz%ooloy%Z&-7*pXG7+VrU%l zjOe0G4k#W8ZH2MkRlN$5XZm@*KQ^@+nDu%JIgCeUkhw(|S3->BogwVvkr}KBneu!v zy5JcES`^DP))y*BL&97i_l^hG{1{Lehaf5T>cZxuY4%#*UNJ2EpC0q zt?P5DoX4N%VP*xmUv(Hg^)gnvOkRM!e+Nto<;gR zS6>(Okz=wwg8i-Pv;8#tNFQfs@|RP~Hhg9!A&R)QNM91-kiI0uAvvGUPKZak_8E_K zBiA^jKhBOrVvMXLhSbraY$c=;*gv}Ckb&#pC^})n@caY z6@NBhUYriXdV<@j@Rv~>voSoqES%@K9b#f!fx|v_UWLWyj)U;E@=NY#~=SzOO zT5(`i}l+PXDRDr(dRW z{_-blgc&j;@LO6@4DRJAv`-PUJEqV+qmYI68HMEhI(s2@FArmFYl%lXNr*=>S9VoM z{sL(oc6-_R8oLTvMz|*hiI;IZkGO2D#T2_|$!jZ7JPR#53hDC8*Xq>btGdb^R)zN2 zlIK~huiOv^8PiEN4;8yNi&2kOK>F>oorU2_#xL7Aq%T8pNapR@XFSri&v>M3pH;~E zR_02IuT@C@ebJ9ymwJOX6+i7lTQAe%IppuRp38C?=9$j`mv@bX zc3t|tU>wpf>MG>2B;?_J4OWHBLoWM#Z4$SS(a&cbve4VN8n5K3&TA!(C&%FQP(0G< zp;)9h100o4zii|7Yx;IV9Mb1GB<#dV+wS!A>oX3S*DFf6f@l2*z?b+cq#tv=zZ$tb zm+Ik3q}czhSKHBA+zUI#J`0d+vlq6AobRQbBkNM@&j81b(0xgWL;8{shvYJL?K2+f z+Gjk{*|%{>e}o=~^k;x$k+~dJBGbBkQoi*Tx3}5v9paF_48E|H&`(cQHy?)vXIY z$Kq+grT!oDfWDhH#ndx9=_?uSq+98k0ySat_xAi6t6lP^AzyxiPtjL}v1f1Ck(;fP z%28$&bn&bUY3s9{$)qo6mWsAD#F6CkYAyT2Ma*>`r)_PDd3CnWb3XVw&Yi7jI?hTS_SxgcSjqfvo-P< zno$2Ouc|ALugIo6mE&@(p=%a?%H#yn%0V6Kc{gSgpm0I+*Q3`tYTz-UjdX+QjmFlX;%} zUH^yu?>r9kJex_(HIzu~O1KmXozg#>ft-K!nGR`?&i!K0AJ=}#WRd?gTYTfC#@HK{ zQ~me6tmTw=Q#&bGzShsZS)P9_;?JhA$PXtnV(*8Q636ovDIC5)bHbNX=7Roy|5vkL z)s)b(-RI3VqMrC0{7!=yPPsfKYl3A8e@AES5?>NfN`dweZ zPqCIIugZgTA72H5=sInyV(7vhSu8&rYOD}Lc5y{}hmY!8Ht+p*Ek=m)JT-K;qXIj= z@sZ1|Y@@c7@z;z4pn0!2y9Y>ZuDwFvCT79CpU4sdlJ`PnlB;iu5&P7fXuN3a9)k++ zK;5PnXkFt3H|$WeQp9J$xV_$G$Pi~EO5I1U+0sSL%qiS|`tiioCnNiGo_esP*|O!+ zOD!b=r_*7!l&PbA4rPCwBru-sV0Y%**p@PW2bslCj=$sX>$sKtxsE#gYqC zeZ>jGDsdv646weKr>UtEvfgR6dn-VEHH86K%J^Hf=>~?i6y+O}u9(C|}`8S9y^yX8c zcrG_Pcb7Wcml(mPVWjn^S-4cJUMulP$8$XEt19{s2W+}9_1B^%$nWYA{AP6-Vsy@> z=;L?wj!USTPEkLgTIVAtgjt#et(jU-_Trbbd_-6DHH{(z$Qc@1ps9T&et_0!=^SmT z%>}`x4m!VmzN5aU_)H*6Uo}X=?M#2!S)ogq%gU_A?02k#EOENqzDPAn6g4om^`HdZ zp#$BthVeNasJG-{!t^NnW#3!!Kz&YOp1s*%ZYTCt&vZ;zZK-Xwr*MpFLq2(b-VO@? zNWVtzbn13zpg*T6y@MV&)blRKp3>*~>`uqbx-pHI=Q4q5K7DoBGU4 zv*>fNR$aFQ+X76OMWbuzZ|9WWNZof;P4i(qmtQ34TLo%#4!K9rmKm*?Mo<>um*$X) zG^Lf1EmQkQyq;-#r{}lSrItLM4Be;qvQ)oEKQ--A>vX--^V=9_w?htv;u4+f`IPn> zb-auB=X7Y^R&UD>(zVM+b}YHD!rHe_ zj)BqbTedqV;*bvLfqsXSVkOJeXh_Yo{+V#6*dD3_^#J40BNd22@~MP=%kurL?4eYk z$MsR{Z$i$pJaM{?#6#_R-FAKU1o1Q76t{FPLG@ORlYrI7pDu=F>y1aRWr_2jj7^`a z=)))b4btagDcM6$K59U^p2(JJZqsBl5wu3lCv&ft&xkl{%UzX$gIXcWvlVYFKWux&4jK#x^giL1!j1uzR&(&gWz?1 literal 0 HcmV?d00001 diff --git a/platform/x86_64/nuc14/image/font/spleen-6x12.psf b/platform/x86_64/nuc14/image/font/spleen-6x12.psf new file mode 100644 index 0000000000000000000000000000000000000000..892d085c647255d9d284c87fe956027e85f3970b GIT binary patch literal 7859 zcmeHJheH#|_m5`-+XWF7Yg82FP!Ku69>vp$1;h$A5F08Qv4M(2MF=36WOwOcKuQFp z2uP7;77^6TCD^dTob9Z5ipCRtzu8RyzyHJU!R~(M&6_vxedf(fdBN9v zIGkSi!#v%XHisjy7T|vjcv#xr9A~DhU7WRAAy41p3EQx5AeoX<5a`Qv_y!ha6=dnK zP;be6tOZ;HfwfM-%*Lt5*i2x}GLF&D!p_8F*NWrp?A+1iti??& zqu}|<Zm1kPOESOFhHoQuHPofPk5F!sZI7#Yapv%(w5;ka`RSmC;}T>|UY z_8@pXZO0hg%dCn{wED09!^2+#11=XE@I@lr5Q?J}wYuDO>!{-p>kSNqA`v?rV`~Eg zmLHBtB#}t8TrM8U(cUDv$Mi7`91r%-j>lMBTwKi741?S1jnga^<9{(58!jJ4e3`L~@+Unm zH=SNrVk{+0xfNp@)*gSE5@u>xUp~4@*Od~liqVV3l17Q%hMh_8Q{13cw2c+l)+!Xy z(K{7HGA>ze8NEW=z>XD*oq5i< z-DZ#ZXF1U_Wa`$}S65iDWHxS_F#Da$?bQpv+;xLF zf?O6DFnbnnTA>Ix+iIKHJWh8iu^nsl z6UJkI*1I!NGn`=i4$SSqc-r>8`SbbD<>@w_z8)IJZY!3v7@w1Jtz2HL^N9yz8ZpK> zwqqv#Ha(t~f$UmfeLKeXI*d8j4`u!6)%vw#PGz~=mxujZ3z$OGoqKpWh0I-o)qEc7 zhPCJZ5ANU)bKagi#_h3T+#U~J!e+LCeF}2*hXTeNYXf_MJx50ad#UL$KIZT~Wn*Ud zF&?bv3LlPaopSh9{J8=yZ*__Rce)<4_b)ymd8{!0IUF;AL?STLVGM8+V>XwzGl3&w zV`~;MvHg^nZl`s;n{`mmm$&E0*fV*tYs4`SGM5TdpY6FbHQ#n7;9cd+aM(64-h9l& z%}i|8_@=~nCA(h~3QRxV!Hm8c-y3abAlvQ;hdYE(;JFxS)nWGC`m>D=>-0JsR-?D! zh@+W3Rm;qaW79PlGxkKl!7*5izl<=L`L@N+U@S+vbTMet8!$FFbVgWCbWp+CG~&I2 ze*;+S{nX;VS_b3AYS-g@v3uQ_BT*<`VjBr#!^Fcp494deLmca$FPAeMvY25D-^cno zVKeZ>p4fbwbpzS=kakXRkeFy)7apshSC^%(-q;0euRXCuj&D4jJ?o#v?h*+;g==#e zz5ZMh>1=d2p&qy2(f@lUK=M3xZdfm?`tZGq{Zt9bG*9Q-#Nx zFyu4BG??FYtdf1{dfM8&Ho)olO^)MnZo5XT1^ANIW%66)z{f;g-ufpMK5m!hY^rG> zicIzm3EL!Xc|303oHx6#@l2Y#nR}QAH|}z9zB2eqfgSCY_D+bE_uiL!^W7xG)v(i^h1%WsT*7`x-!;{&rA?v{-> zR2v}OW4PR#>Yo)ky`trjap*=)QSYwGq<5#hHrXusJ$}Qu{eMmF<8AOsPJ9p6}5^ z%`M#acQePe`TLsxx~*yu7RUxon32Y@ z4@Cs@Text!iRq-lmbY)d3GvN8Uj6Mr=F|ICMg4DV*uSnOy3g>_>D-fd$8w(AS$)wI zMrPMcv)fQVE`IoKPT+xDgRP?Zux|awehJstXeaEd8z!EaP&2{*z4Kbr4Szpt9X#fJ zZKDG(Wc9}Nex>h@`h8mDQ}0pgdhTI{>aEKm>CS)q?{vy{6AvwWJL*HXEt3WrZrhVR z=P%RL;{oQ$gD0cbE)R!|l#X~5wrA&5xOrmh*rLP5_wV@qV`6c>`XBEHu9{%^!e{oOnP)m=S5a{%+h}^N=_ES@*ehVLeP1*6-`G@hmTL$#TX}y+zt&BbsFI$nY zxvZ;aLQHdAeSLzHrsqUzyij!fptD8QPFcgX;cv-3=ce>nKG5sD*hn+j>Dg$DyIp^r zj$AV0qbV=M({7VZOK9PWo1ry^%~$um;r;q-&NKHeWGr`o?XJwZYxK(th=3(hyCl7kk`ybde?^`>5^mOS_di`HzCI47&8Z^w}{luHEe|!FW zR|}h*CKH=WcQS7nmL0xXb$RCejaeDB%S!rNxrc{m`4H>0k&n9>R@$y{OM7D1dc)8I zd?UMjAJc2}kKYSRR~t`9L*mviTQ}HnQ}x@B;BG^AnNB>g5_!vr4QtK2{jeC88Ckm2 zZSR&P?#~83=sU|Qk(#_e?%l830uEeyT3~eIM4zfbj{iK`&@I0g_2IscRFxa%3o>UD+2E5;bj z-97jEx~?I{*CVIFwyUQe{8}&XvG2glyGzrPUZn=Uy?E+`pZjk|OtqeLBcE0wKixzihl^+Qt z{w`O(_j9Uk@jRU4VK6RkiXl;=z1I}ENA>TIUWH8=N8j0W-)~gg|8&LWA5&fDow@ha z_nu%w4Y8f*WV^{}_L&&ho*|}=vwmD}-RWzw`D%pe_woff+ji~fH}6KgvtZ`zN0;J^ zUe5Nl``|j)MEKO(%j(YWqJ`nwL8I!@g5Ozol3PcOk6rDZaOz>=p=o_u`i(g3KKjV8 z9X}inWeQ6sG%e{?FvvgXlGCM{)Ku5u5q|sZYyO~!Yi7YSj0La0SbwAh;Uj#12)&+E*Wmlro=G)#Kllx*l>V{$|?@7kB~9$voY zSF5kg3?C)$(JA1;lG022BCkKm>-YZFOYW;p*UBfF?;!hsS)W~VAh_l0^WvxhPsYuB z5^mB&UvA3rdA=xTp7(%$AI`Pd-d9|!p4#uqR->$@)(`Rreft|sGUU3=yf$aSiWQ5t zd!1iW-c^>Fd?#YA_NZkemG%7AAjg2-wF^1Qr&DgJ%xvkiHJuOpiw)rqlbnsco1$GZ zJo)A?6;-D%)E>H@+M?`hpYB*!UiCdGwPjoP^Xh}6)AM>f-?4n7nTg2Ad2hy#&wtjn zWSLs**qQg>YIpBxs=;-4N3WdM?H}2T&L=-=)?^1ynX~zs`>ceO;@@7Gf4NYw3 zku^k8NBp;Yye;TFi~c(OgZ(dpp!ZiM@4X?KYO1oeqaFx^L*BoXg;jfeypwEeGf7go zV53h!#DY%)R9$+=Z59quCZ03>?$zo&KwV)uYGSP8y~?{w$A1s#vw56Vw7(5!;U{Ok8UwbH6xqk6`E z=0#24zH|BaxCj0w$8w$u!|oK%h%=1r**L*%aLMz-Q>`?zI@+_3)5!5HPg=gs8ddUk zUd3fpp|z?Oegj47m-6p?<#*X|xpaQ2ruwgAypeSc2UPy zTr3XUGNjg3b?Vc#8&C4PHntv4-G%}?EqiC{^!V}17pwCgP6_=y-Mt}TKCkd~mD@Ah zVGbw8_j=(_Z@A#@hW}3f(zJe$_oFU;yCg2B=5zWOj-7hjc<}6FBS)J}U46Aw`hF|l zNY;6Fq*uO6_>*9hbrrKmS$*~D>zD7oASBFUXWXsnAy#{OE}gYLYW~D|9z%P~?$N91 zirB|f9M)Xrc53eA%X6;|Uu#oz`&{&&1D}q&v4i~4*~YFPH%VA{ak?=_DRrz}a=w1y ziM65G+(9LC=eb;clr^A_aVNC2m$9I6Wa21Oaop&Azdp7+pr(Ac1`Qheq;Ky%hcDWV z|7|++S_a%}vvsjjQuPICQs%xVYmuB1!X zD)I-7S!Aek*=YV|nSJVUE0II@(>B@NJw`k~KJWWtvWoM=YLScS7`I6aTWT9;m~ZHR z|Hj*kZv!3e29e|UHBNuJ0+A7B$5%dx-Vks3b?$&7`xo~fWk2`mMsf1O)^`$JZde;! z`MTvBFMcQT9_FpE8N7Pr)YJW6-i_Jb<)He%KPMQ~ zD?JWqf7B$bv;8!EyQ9W@@u7s|-bP2w9a?9KkJd-_UfZ+a%v|r7t&b;H&PaPlu9+t1eO~V(@p347{9G}K}QW(GK=@IK5xL@IR40`$}Kn2 zY;Jk??eVQK=d|L%YT?I4``<21MVGdH-_v@fF^2!Awx@6Oa0|~PVOn_xC+K`;&*91x zP^N)W0VpOFKWQjR{Jydrl;=R14a%JBA)w3$WdSIQL0JOI3ZO+m9|QV0&?kTv104!< znEDtfFM={1=u<$4104bMX>~Brl4=RiGN1{dNuVj9qkyNBBasM+jv>);Bszgafk<=` ziGq+Q7>UG46oN#dNR$I3U?hwJGZ+nHz#PWHIIuvWaqt@orN9!#BasY=2qcy%&m)lp ziKHkr5+4eZ3UE}5k*pFHs$*5LC@=#NB3OioW3X5quZl;+ zaaf{CK*R}fQYWg;AR-W))pAu5B2L0mLtH=>0C(_E=ORJ^8xbJ|Pt7qz z$Y2v92-vJHL<9+25JACKL`1?iL_~p?2K$JH?V4yr#J~l;Na60KIT884I zP<%AXE(br&2}CI15F*aOVK@T*a1`ihpm7zI0eueWWS~=kmIIv%G%mCZpffd6pcO#p zXrh45209DqK%h^ez)YY+fKCHC2~LK6hvksG6Rw45UD_9IwI2$nTn_+ zL?t8YEYP_?7Xn=jbUDx!ppF2w6x1ZBBS9U7$4d}NAX0`%DWWJuMItH+QQ5eIL}U~q zBN2(UcnicJDi%?>xT64(mk?Qs$Pz>rBeDpQ7ZHi$OGlIfQRlEl79#Tzc>$4mh|EP~ z4kEJ=c^*+Ih{BPjVT*V~o<(FbB9jm)N8}kqCL%HcQHh8;gD5$+5FzpyqACzohNx0R zT|`tdqVUv#h&+i%F(N|{bqZ0qP9@kH8(}^|5rws4L#48nnNF1FEQ3Rq$L}4EnL0tsuVo;ZW zx)jv7jLv|HP!pgo19ds5D?wSPDg(`NP^GC;Koba>AkYMZMhuz|(1d~}3^b=enFpG1 i&_sadG-z: - 8000: fa cli - 8001: fc cld - 8002: 66 89 c1 mov ecx,eax - 8005: 31 c0 xor ax,ax - 8007: 8e d8 mov ds,ax - 8009: 8e c0 mov es,ax - 800b: 8e d0 mov ss,ax - 800d: 0f 01 16 58 80 lgdtw ds:0x8058 - 8012: 0f 20 c0 mov eax,cr0 - 8015: 66 83 c8 01 or eax,0x1 - 8019: 0f 22 c0 mov cr0,eax - 801c: ea 21 80 08 00 jmp 0x8:0x8021 - -0000000000008021 : - 8021: 66 b8 10 00 8e d8 mov eax,0xd88e0010 - 8027: 8e c0 mov es,ax - 8029: 8e d0 mov ss,ax - 802b: 8e e0 mov fs,ax - 802d: 8e e8 mov gs,ax - 802f: ff e1 jmp cx - 8031: 2e 8d b4 26 00 lea si,cs:[si+0x26] - 8036: 00 00 add BYTE PTR [bx+si],al - 8038: 00 8d b4 26 add BYTE PTR [di+0x26b4],cl - ... - 8048: ff (bad) - 8049: ff 00 inc WORD PTR [bx+si] - 804b: 00 00 add BYTE PTR [bx+si],al - 804d: 9b fwait - 804e: cf iret - 804f: 00 ff add bh,bh - 8051: ff 00 inc WORD PTR [bx+si] - 8053: 00 00 add BYTE PTR [bx+si],al - 8055: 93 xchg bx,ax - 8056: cf iret - 8057: 00 17 add BYTE PTR [bx],dl - 8059: 00 40 80 add BYTE PTR [bx+si-0x80],al - ... diff --git a/platform/x86_64/qemu/image/bootloader/out/boot.bin b/platform/x86_64/qemu/image/bootloader/out/boot.bin deleted file mode 100755 index 24a1a8c9309a992c131042daaa02ea10ad6b5827..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 94 zcmezMC$00K;eoyzeFyq3@H2`JfjWMqH=CI&kOFs}h3;sBEngfSQx92hKsY8)6?8Cbw-SwT_)Q27Q_Iu6Q( z(QtK8@lk3t1V%$(Gz3ONU^E0qLtr!nMnhmU1V%$(Gz3ONU^E0qLxA=n@as=n=Rv~* zeK-0J^j+X*6pLu!S2&Q?e1ef*>A)++1`dX_9Rdsx*}eyTFaAH&>)oOTau$?=x$ysg zAZzw{1~5JOJcBrcLjz0>8x0y`5CD!ID59z21dD(C&wwT_233zP&XAf{QdDVd#Ngx@ z>>BUr;u7Q<91IaOG-F`UE3V8fNlXILB}EWA1I8*zttbH+N+FO2>IeCS4Q5UQlmQ(N zfscEj3d2-`^)kVE2u1=_e*#p98AyZd;ehfbpfo7FKyv8d093ymngn=k2T2Sxtb6a1 zHA?DXr>nc`t5-i>y?XVdYya#Ye)q2goa7&NXN34oT%O0@Cg0b(4bc-9bV5ocbV2vT z#!|?Ce6lL7i~m3Rbxqt$iLbb=LpGHHpRS(TUsork#nZZcp3YXK*W{{HpWHDOsXnCtW&^EpsBh3UIuvtCZ_rTl^Lp3Q z+ccnoV0r|yzJYi}PR%~mxd@TrL(4=CPIl*jXzuyu_w1x zS2rQw6pB(MOh-@6mYAA7F%i&4ruR6E?1RI1L_h88^U!tQNS{j2r&rRuMx>yiTSC&) zvM(jwfzs@l5HJzHBhHE>J*!_6*P}S*v7QxuXz5g(&Qe_ah_8)X7hB?+;syO`>bJM` zHc*6F6GDTfM9XH@8uzVSk=Lu~3;LT#M~JIZlXOoH^?8y;&7J?GnQgJ1}_hw+q27 z$tP}Qt~I-Bbxa98G!M-{4bV?ZT^*+GX(wUYP{Y}9wn>}l=P&3fO=&mNDs?D+&h39p zhvrZnidW$8(uO)z@2N4dZV0PU6i|WL?Psscvf82BRvuq z#kSZHFNy=X|5(w4ZmAtvkK9DqojIp=t;)u=rd|3rVJmGf^yN10HnaFTVog`^9W35= zkjIW*n^Oldx;O__z``@OxlpF&0(e}|@1d;Q=vy0wlb@+^zCzS3Z&oY0F|~}oGMMEV zwoTEAkRUGAygWCv`bnVsRb{;PVOR9@dEKX^Na)#yX#XL1Mrjwv80if;lnm$_pr=6RL6`Ly+^i&q zkUj9}XMNFFqdjIEGoH{dLmfhPZLYE!IMVOZWUlEajj0`)u~8{{AVPn{ZAGk!4suJ142`+77719%R3xAq}RZw{RC`*%O!aWs=18$$hJBFU*U5 zO|Gj4sg(ClUOV}I|4045kv~@7)@eUcJsn1J+|PiXf()RIqkobl(-iH+G>o%gm1;#y z`+CYg=jgP7C)$dVQgX9gkVMNKq)n+k75mI_*`@pXt8g~Ol#&#Lc9!U|PITr2Pu3@4 zXbloE9fyHpIiLG8rKC#Kps`kF2cTLjAqvbN87i{Q1g)ry^(VjsU1k=;dx;(C83cIgnhmz?%`F^^a-b`Pjmtf0V z3M1{@WQC<=wNG_>RxcGNOpS9jv0og;dB zKz-`aHd)K+7VVf&WD><^@TY`t^>$FZz_QnNC+;NdMT!(UGGCf8kJqFh)p)IIX zrV>6~2KSM!NhZ5D^(`unS{|Bmc>f7Y0y{hQ&08mr-Ln_9KUurLTjB{0ml5> z6=;`Mv^6u*y9jj~e>cT0YWFy(P0DG^)7z9<0;pLB2D zhf3U4Noz67+-q$@!d;8Wu5v5w>SDOMyN9MI@6bc4E^P6+?U~$+we8o6A?ofD*6s6H@EOb*J}Zajs=O+$io4=&|I?PW^0GNh?j;+^cy=g00sSob z3scR@JuQytzFf5IrV$-V38ivQ3)-Sx?gi(Bc}?A*`(#g@r^|HN9H@g#@7-7T)gEl> zJ^e1GQ(8D;4R#fH)|>a$2S5(d--MhU>0snZ^KpQ3BZ2S;KnhiBC%mf_-ZE87{2O{^ zT!9v@W=3pDK7$qzc4ZxZ3|vP)SJ{(sT+h#5?B5k1Taq7}k0tv*74P?7ZY@Lil21VY zGGhs9BSUsvrsjoV$II=;WiIz@Nu`F1lF`{3i+fdbotl(O0xfluEd$D`AsZt#NM9Bo z+wvVC`(i9E)4YPgo%;yLZw*how}AEgNZ-^WmZgkF`uz%ooloy%Z&-7*pXG7+VrU%l zjOe0G4k#W8ZH2MkRlN$5XZm@*KQ^@+nDu%JIgCeUkhw(|S3->BogwVvkr}KBneu!v zy5JcES`^DP))y*BL&97i_l^hG{1{Lehaf5T>cZxuY4%#*UNJ2EpC0q zt?P5DoX4N%VP*xmUv(Hg^)gnvOkRM!e+Nto<;gR zS6>(Okz=wwg8i-Pv;8#tNFQfs@|RP~Hhg9!A&R)QNM91-kiI0uAvvGUPKZak_8E_K zBiA^jKhBOrVvMXLhSbraY$c=;*gv}Ckb&#pC^})n@caY z6@NBhUYriXdV<@j@Rv~>voSoqES%@K9b#f!fx|v_UWLWyj)U;E@=NY#~=SzOO zT5(`i}l+PXDRDr(dRW z{_-blgc&j;@LO6@4DRJAv`-PUJEqV+qmYI68HMEhI(s2@FArmFYl%lXNr*=>S9VoM z{sL(oc6-_R8oLTvMz|*hiI;IZkGO2D#T2_|$!jZ7JPR#53hDC8*Xq>btGdb^R)zN2 zlIK~huiOv^8PiEN4;8yNi&2kOK>F>oorU2_#xL7Aq%T8pNapR@XFSri&v>M3pH;~E zR_02IuT@C@ebJ9ymwJOX6+i7lTQAe%IppuRp38C?=9$j`mv@bX zc3t|tU>wpf>MG>2B;?_J4OWHBLoWM#Z4$SS(a&cbve4VN8n5K3&TA!(C&%FQP(0G< zp;)9h100o4zii|7Yx;IV9Mb1GB<#dV+wS!A>oX3S*DFf6f@l2*z?b+cq#tv=zZ$tb zm+Ik3q}czhSKHBA+zUI#J`0d+vlq6AobRQbBkNM@&j81b(0xgWL;8{shvYJL?K2+f z+Gjk{*|%{>e}o=~^k;x$k+~dJBGbBkQoi*Tx3}5v9paF_48E|H&`(cQHy?)vXIY z$Kq+grT!oDfWDhH#ndx9=_?uSq+98k0ySat_xAi6t6lP^AzyxiPtjL}v1f1Ck(;fP z%28$&bn&bUY3s9{$)qo6mWsAD#F6CkYAyT2Ma*>`r)_PDd3CnWb3XVw&Yi7jI?hTS_SxgcSjqfvo-P< zno$2Ouc|ALugIo6mE&@(p=%a?%H#yn%0V6Kc{gSgpm0I+*Q3`tYTz-UjdX+QjmFlX;%} zUH^yu?>r9kJex_(HIzu~O1KmXozg#>ft-K!nGR`?&i!K0AJ=}#WRd?gTYTfC#@HK{ zQ~me6tmTw=Q#&bGzShsZS)P9_;?JhA$PXtnV(*8Q636ovDIC5)bHbNX=7Roy|5vkL z)s)b(-RI3VqMrC0{7!=yPPsfKYl3A8e@AES5?>NfN`dweZ zPqCIIugZgTA72H5=sInyV(7vhSu8&rYOD}Lc5y{}hmY!8Ht+p*Ek=m)JT-K;qXIj= z@sZ1|Y@@c7@z;z4pn0!2y9Y>ZuDwFvCT79CpU4sdlJ`PnlB;iu5&P7fXuN3a9)k++ zK;5PnXkFt3H|$WeQp9J$xV_$G$Pi~EO5I1U+0sSL%qiS|`tiioCnNiGo_esP*|O!+ zOD!b=r_*7!l&PbA4rPCwBru-sV0Y%**p@PW2bslCj=$sX>$sKtxsE#gYqC zeZ>jGDsdv646weKr>UtEvfgR6dn-VEHH86K%J^Hf=>~?i6y+O}u9(C|}`8S9y^yX8c zcrG_Pcb7Wcml(mPVWjn^S-4cJUMulP$8$XEt19{s2W+}9_1B^%$nWYA{AP6-Vsy@> z=;L?wj!USTPEkLgTIVAtgjt#et(jU-_Trbbd_-6DHH{(z$Qc@1ps9T&et_0!=^SmT z%>}`x4m!VmzN5aU_)H*6Uo}X=?M#2!S)ogq%gU_A?02k#EOENqzDPAn6g4om^`HdZ zp#$BthVeNasJG-{!t^NnW#3!!Kz&YOp1s*%ZYTCt&vZ;zZK-Xwr*MpFLq2(b-VO@? zNWVtzbn13zpg*T6y@MV&)blRKp3>*~>`uqbx-pHI=Q4q5K7DoBGU4 zv*>fNR$aFQ+X76OMWbuzZ|9WWNZof;P4i(qmtQ34TLo%#4!K9rmKm*?Mo<>um*$X) zG^Lf1EmQkQyq;-#r{}lSrItLM4Be;qvQ)oEKQ--A>vX--^V=9_w?htv;u4+f`IPn> zb-auB=X7Y^R&UD>(zVM+b}YHD!rHe_ zj)BqbTedqV;*bvLfqsXSVkOJeXh_Yo{+V#6*dD3_^#J40BNd22@~MP=%kurL?4eYk z$MsR{Z$i$pJaM{?#6#_R-FAKU1o1Q76t{FPLG@ORlYrI7pDu=F>y1aRWr_2jj7^`a z=)))b4btagDcM6$K59U^p2(JJZqsBl5wu3lCv&ft&xkl{%UzX$gIXcWvlVYFKWux&4jK#x^giL1!j1uzR&(&gWz?1 literal 0 HcmV?d00001 diff --git a/platform/x86_64/qemu/image/font/spleen-6x12.psf b/platform/x86_64/qemu/image/font/spleen-6x12.psf new file mode 100644 index 0000000000000000000000000000000000000000..892d085c647255d9d284c87fe956027e85f3970b GIT binary patch literal 7859 zcmeHJheH#|_m5`-+XWF7Yg82FP!Ku69>vp$1;h$A5F08Qv4M(2MF=36WOwOcKuQFp z2uP7;77^6TCD^dTob9Z5ipCRtzu8RyzyHJU!R~(M&6_vxedf(fdBN9v zIGkSi!#v%XHisjy7T|vjcv#xr9A~DhU7WRAAy41p3EQx5AeoX<5a`Qv_y!ha6=dnK zP;be6tOZ;HfwfM-%*Lt5*i2x}GLF&D!p_8F*NWrp?A+1iti??& zqu}|<Zm1kPOESOFhHoQuHPofPk5F!sZI7#Yapv%(w5;ka`RSmC;}T>|UY z_8@pXZO0hg%dCn{wED09!^2+#11=XE@I@lr5Q?J}wYuDO>!{-p>kSNqA`v?rV`~Eg zmLHBtB#}t8TrM8U(cUDv$Mi7`91r%-j>lMBTwKi741?S1jnga^<9{(58!jJ4e3`L~@+Unm zH=SNrVk{+0xfNp@)*gSE5@u>xUp~4@*Od~liqVV3l17Q%hMh_8Q{13cw2c+l)+!Xy z(K{7HGA>ze8NEW=z>XD*oq5i< z-DZ#ZXF1U_Wa`$}S65iDWHxS_F#Da$?bQpv+;xLF zf?O6DFnbnnTA>Ix+iIKHJWh8iu^nsl z6UJkI*1I!NGn`=i4$SSqc-r>8`SbbD<>@w_z8)IJZY!3v7@w1Jtz2HL^N9yz8ZpK> zwqqv#Ha(t~f$UmfeLKeXI*d8j4`u!6)%vw#PGz~=mxujZ3z$OGoqKpWh0I-o)qEc7 zhPCJZ5ANU)bKagi#_h3T+#U~J!e+LCeF}2*hXTeNYXf_MJx50ad#UL$KIZT~Wn*Ud zF&?bv3LlPaopSh9{J8=yZ*__Rce)<4_b)ymd8{!0IUF;AL?STLVGM8+V>XwzGl3&w zV`~;MvHg^nZl`s;n{`mmm$&E0*fV*tYs4`SGM5TdpY6FbHQ#n7;9cd+aM(64-h9l& z%}i|8_@=~nCA(h~3QRxV!Hm8c-y3abAlvQ;hdYE(;JFxS)nWGC`m>D=>-0JsR-?D! zh@+W3Rm;qaW79PlGxkKl!7*5izl<=L`L@N+U@S+vbTMet8!$FFbVgWCbWp+CG~&I2 ze*;+S{nX;VS_b3AYS-g@v3uQ_BT*<`VjBr#!^Fcp494deLmca$FPAeMvY25D-^cno zVKeZ>p4fbwbpzS=kakXRkeFy)7apshSC^%(-q;0euRXCuj&D4jJ?o#v?h*+;g==#e zz5ZMh>1=d2p&qy2(f@lUK=M3xZdfm?`tZGq{Zt9bG*9Q-#Nx zFyu4BG??FYtdf1{dfM8&Ho)olO^)MnZo5XT1^ANIW%66)z{f;g-ufpMK5m!hY^rG> zicIzm3EL!Xc|303oHx6#@l2Y#nR}QAH|}z9zB2eqfgSCY_D+bE_uiL!^W7xG)v(i^h1%WsT*7`x-!;{&rA?v{-> zR2v}OW4PR#>Yo)ky`trjap*=)QSYwGq<5#hHrXusJ$}Qu{eMmF<8AOsPJ9p6}5^ z%`M#acQePe`TLsxx~*yu7RUxon32Y@ z4@Cs@Text!iRq-lmbY)d3GvN8Uj6Mr=F|ICMg4DV*uSnOy3g>_>D-fd$8w(AS$)wI zMrPMcv)fQVE`IoKPT+xDgRP?Zux|awehJstXeaEd8z!EaP&2{*z4Kbr4Szpt9X#fJ zZKDG(Wc9}Nex>h@`h8mDQ}0pgdhTI{>aEKm>CS)q?{vy{6AvwWJL*HXEt3WrZrhVR z=P%RL;{oQ$gD0cbE)R!|l#X~5wrA&5xOrmh*rLP5_wV@qV`6c>`XBEHu9{%^!e{oOnP)m=S5a{%+h}^N=_ES@*ehVLeP1*6-`G@hmTL$#TX}y+zt&BbsFI$nY zxvZ;aLQHdAeSLzHrsqUzyij!fptD8QPFcgX;cv-3=ce>nKG5sD*hn+j>Dg$DyIp^r zj$AV0qbV=M({7VZOK9PWo1ry^%~$um;r;q-&NKHeWGr`o?XJwZYxK(th=3(hyCl7kk`ybde?^`>5^mOS_di`HzCI47&8Z^w}{luHEe|!FW zR|}h*CKH=WcQS7nmL0xXb$RCejaeDB%S!rNxrc{m`4H>0k&n9>R@$y{OM7D1dc)8I zd?UMjAJc2}kKYSRR~t`9L*mviTQ}HnQ}x@B;BG^AnNB>g5_!vr4QtK2{jeC88Ckm2 zZSR&P?#~83=sU|Qk(#_e?%l830uEeyT3~eIM4zfbj{iK`&@I0g_2IscRFxa%3o>UD+2E5;bj z-97jEx~?I{*CVIFwyUQe{8}&XvG2glyGzrPUZn=Uy?E+`pZjk|OtqeLBcE0wKixzihl^+Qt z{w`O(_j9Uk@jRU4VK6RkiXl;=z1I}ENA>TIUWH8=N8j0W-)~gg|8&LWA5&fDow@ha z_nu%w4Y8f*WV^{}_L&&ho*|}=vwmD}-RWzw`D%pe_woff+ji~fH}6KgvtZ`zN0;J^ zUe5Nl``|j)MEKO(%j(YWqJ`nwL8I!@g5Ozol3PcOk6rDZaOz>=p=o_u`i(g3KKjV8 z9X}inWeQ6sG%e{?FvvgXlGCM{)Ku5u5q|sZYyO~!Yi7YSj0La0SbwAh;Uj#12)&+E*Wmlro=G)#Kllx*l>V{$|?@7kB~9$voY zSF5kg3?C)$(JA1;lG022BCkKm>-YZFOYW;p*UBfF?;!hsS)W~VAh_l0^WvxhPsYuB z5^mB&UvA3rdA=xTp7(%$AI`Pd-d9|!p4#uqR->$@)(`Rreft|sGUU3=yf$aSiWQ5t zd!1iW-c^>Fd?#YA_NZkemG%7AAjg2-wF^1Qr&DgJ%xvkiHJuOpiw)rqlbnsco1$GZ zJo)A?6;-D%)E>H@+M?`hpYB*!UiCdGwPjoP^Xh}6)AM>f-?4n7nTg2Ad2hy#&wtjn zWSLs**qQg>YIpBxs=;-4N3WdM?H}2T&L=-=)?^1ynX~zs`>ceO;@@7Gf4NYw3 zku^k8NBp;Yye;TFi~c(OgZ(dpp!ZiM@4X?KYO1oeqaFx^L*BoXg;jfeypwEeGf7go zV53h!#DY%)R9$+=Z59quCZ03>?$zo&KwV)uYGSP8y~?{w$A1s#vw56Vw7(5!;U{Ok8UwbH6xqk6`E z=0#24zH|BaxCj0w$8w$u!|oK%h%=1r**L*%aLMz-Q>`?zI@+_3)5!5HPg=gs8ddUk zUd3fpp|z?Oegj47m-6p?<#*X|xpaQ2ruwgAypeSc2UPy zTr3XUGNjg3b?Vc#8&C4PHntv4-G%}?EqiC{^!V}17pwCgP6_=y-Mt}TKCkd~mD@Ah zVGbw8_j=(_Z@A#@hW}3f(zJe$_oFU;yCg2B=5zWOj-7hjc<}6FBS)J}U46Aw`hF|l zNY;6Fq*uO6_>*9hbrrKmS$*~D>zD7oASBFUXWXsnAy#{OE}gYLYW~D|9z%P~?$N91 zirB|f9M)Xrc53eA%X6;|Uu#oz`&{&&1D}q&v4i~4*~YFPH%VA{ak?=_DRrz}a=w1y ziM65G+(9LC=eb;clr^A_aVNC2m$9I6Wa21Oaop&Azdp7+pr(Ac1`Qheq;Ky%hcDWV z|7|++S_a%}vvsjjQuPICQs%xVYmuB1!X zD)I-7S!Aek*=YV|nSJVUE0II@(>B@NJw`k~KJWWtvWoM=YLScS7`I6aTWT9;m~ZHR z|Hj*kZv!3e29e|UHBNuJ0+A7B$5%dx-Vks3b?$&7`xo~fWk2`mMsf1O)^`$JZde;! z`MTvBFMcQT9_FpE8N7Pr)YJW6-i_Jb<)He%KPMQ~ zD?JWqf7B$bv;8!EyQ9W@@u7s|-bP2w9a?9KkJd-_UfZ+a%v|r7t&b;H&PaPlu9+t1eO~V(@p347{9G}K}QW(GK=@IK5xL@IR40`$}Kn2 zY;Jk??eVQK=d|L%YT?I4``<21MVGdH-_v@fF^2!Awx@6Oa0|~PVOn_xC+K`;&*91x zP^N)W0VpOFKWQjR{Jydrl;=R14a%JBA)w3$WdSIQL0JOI3ZO+m9|QV0&?kTv104!< znEDtfFM={1=u<$4104bMX>~Brl4=RiGN1{dNuVj9qkyNBBasM+jv>);Bszgafk<=` ziGq+Q7>UG46oN#dNR$I3U?hwJGZ+nHz#PWHIIuvWaqt@orN9!#BasY=2qcy%&m)lp ziKHkr5+4eZ3UE}5k*pFHs$*5LC@=#NB3OioW3X5quZl;+ zaaf{CK*R}fQYWg;AR-W))pAu5B2L0mLtH=>0C(_E=ORJ^8xbJ|Pt7qz z$Y2v92-vJHL<9+25JACKL`1?iL_~p?2K$JH?V4yr#J~l;Na60KIT884I zP<%AXE(br&2}CI15F*aOVK@T*a1`ihpm7zI0eueWWS~=kmIIv%G%mCZpffd6pcO#p zXrh45209DqK%h^ez)YY+fKCHC2~LK6hvksG6Rw45UD_9IwI2$nTn_+ zL?t8YEYP_?7Xn=jbUDx!ppF2w6x1ZBBS9U7$4d}NAX0`%DWWJuMItH+QQ5eIL}U~q zBN2(UcnicJDi%?>xT64(mk?Qs$Pz>rBeDpQ7ZHi$OGlIfQRlEl79#Tzc>$4mh|EP~ z4kEJ=c^*+Ih{BPjVT*V~o<(FbB9jm)N8}kqCL%HcQHh8;gD5$+5FzpyqACzohNx0R zT|`tdqVUv#h&+i%F(N|{bqZ0qP9@kH8(}^|5rws4L#48nnNF1FEQ3Rq$L}4EnL0tsuVo;ZW zx)jv7jLv|HP!pgo19ds5D?wSPDg(`NP^GC;Koba>AkYMZMhuz|(1d~}3^b=enFpG1 i&_sadG-z { #[repr(transparent)] @@ -71,109 +80,153 @@ impl AcpiHandler for HvAcpiHandler { fn unmap_physical_region(region: &acpi::PhysicalMapping) {} } -lazy_static::lazy_static! { - static ref ROOT_ACPI: Mutex = { - Mutex::new(RootAcpi::default()) - }; +static ROOT_ACPI: Once = Once::new(); + +#[derive(Clone, Debug)] +enum PatchValue { + U8(u8), + U16(u16), + U32(u32), + U64(u64), } #[derive(Clone, Debug, Default)] pub struct AcpiTable { - bytes: Vec, + sig: Option, + src: usize, + patches: BTreeMap, + len: usize, + checksum: u8, gpa: usize, hpa: usize, is_addr_set: bool, - is_dirty: bool, +} + +fn get_byte_sum_u32(value: u32) -> u8 { + value + .to_ne_bytes() + .iter() + .fold(0u8, |acc, &b| acc.wrapping_add(b)) +} + +fn get_byte_sum_u64(value: u64) -> u8 { + value + .to_ne_bytes() + .iter() + .fold(0u8, |acc, &b| acc.wrapping_add(b)) } impl AcpiTable { pub fn set_u8(&mut self, value: u8, offset: usize) { - self.bytes[offset] = value; - self.is_dirty = true; + self.patches.insert(offset, PatchValue::U8(value)); + let old = unsafe { *((self.src + offset) as *const u8) }; + self.checksum = self.checksum.wrapping_add(old).wrapping_sub(value); } pub fn set_u32(&mut self, value: u32, offset: usize) { - let bytes = value.to_ne_bytes(); - self.bytes[offset..offset + 4].copy_from_slice(&bytes); - self.is_dirty = true; + self.patches.insert(offset, PatchValue::U32(value)); + let old = unsafe { read_unaligned((self.src + offset) as *const u32) }; + self.checksum = self + .checksum + .wrapping_add(get_byte_sum_u32(old)) + .wrapping_sub(get_byte_sum_u32(value)); } pub fn set_u64(&mut self, value: u64, offset: usize) { - let bytes = value.to_ne_bytes(); - self.bytes[offset..offset + 8].copy_from_slice(&bytes); - self.is_dirty = true; + self.patches.insert(offset, PatchValue::U64(value)); + let old = unsafe { read_unaligned((self.src + offset) as *const u64) }; + self.checksum = self + .checksum + .wrapping_add(get_byte_sum_u64(old)) + .wrapping_sub(get_byte_sum_u64(value)); } - // not for rsdp - pub fn set_len(&mut self, len: usize) { - self.bytes.resize(len, 0); - self.set_u32(len as u32, 4); - self.is_dirty = true; + /// new len must not be longer + pub fn set_new_len(&mut self, len: usize) { + let src_len = self.get_u32(4) as usize; + println!("len: {:x}, selflen: {:x}", len, src_len); + assert!(len <= src_len); + + // update checksum + for offset in len..src_len { + self.checksum = self + .checksum + .wrapping_add(unsafe { *((self.src + offset) as *const u8) }); + } + + self.set_u32(len as _, 4); + self.len = len; } pub fn get_len(&self) -> usize { - self.bytes.len() + self.len } - pub fn get_bytes(&self) -> &Vec { - &self.bytes + pub fn get_unpatched_src(&self) -> *const u8 { + self.src as *const u8 } pub fn get_u8(&self, offset: usize) -> u8 { - self.bytes[offset] + if let Some(&PatchValue::U8(value)) = self.patches.get(&offset) { + return value; + } + unsafe { *((self.src + offset) as *const u8) } } pub fn get_u16(&self, offset: usize) -> u16 { - let bytes: [u8; 2] = self.bytes[offset..offset + 2].try_into().unwrap(); - u16::from_ne_bytes(bytes) + if let Some(&PatchValue::U16(value)) = self.patches.get(&offset) { + return value; + } + unsafe { read_unaligned((self.src + offset) as *const u16) } } pub fn get_u32(&self, offset: usize) -> u32 { - let bytes: [u8; 4] = self.bytes[offset..offset + 4].try_into().unwrap(); - u32::from_ne_bytes(bytes) + if let Some(&PatchValue::U32(value)) = self.patches.get(&offset) { + return value; + } + unsafe { read_unaligned((self.src + offset) as *const u32) } } pub fn get_u64(&self, offset: usize) -> u64 { - let bytes: [u8; 8] = self.bytes[offset..offset + 8].try_into().unwrap(); - u64::from_ne_bytes(bytes) - } - - pub fn fill(&mut self, ptr: *const u8, len: usize) { - self.bytes.clear(); - if self.bytes.capacity() < len { - self.bytes.reserve(len); - } - - unsafe { - core::ptr::copy_nonoverlapping(ptr, self.bytes.as_mut_ptr(), len); - self.bytes.set_len(len); + if let Some(&PatchValue::U64(value)) = self.patches.get(&offset) { + return value; } + unsafe { read_unaligned((self.src + offset) as *const u64) } } - pub fn copy_to_mem(&self) { - unsafe { - core::ptr::copy_nonoverlapping( - self.bytes.as_ptr(), - self.hpa as *mut u8, - self.bytes.len(), - ) - }; + pub fn fill( + &mut self, + sig: Option, + ptr: *const u8, + len: usize, + checksum_offset: usize, + ) { + self.sig = sig; + self.patches.clear(); + self.src = ptr as usize; + self.len = len; + self.checksum = unsafe { *(ptr.wrapping_add(checksum_offset)) }; } - pub fn remove(&mut self, start: usize, len: usize) { - let tot_len = self.bytes.len(); - let end = start + len; - assert!(end <= tot_len); + pub unsafe fn copy_to_mem(&self) { + core::ptr::copy(self.src as *const u8, self.hpa as *mut u8, self.len); - if len == 0 { - return; + macro_rules! write_patch { + ($addr:expr, $val:expr, $ty:ty) => { + write_unaligned($addr as *mut $ty, $val) + }; } - unsafe { - let ptr = self.bytes.as_mut_ptr(); - core::ptr::copy(ptr.add(end), ptr.add(start), tot_len - end); + for (offset, value) in self.patches.iter() { + let addr = self.hpa + *offset; + match *value { + PatchValue::U8(v) => write_patch!(addr, v, u8), + PatchValue::U16(v) => write_patch!(addr, v, u16), + PatchValue::U32(v) => write_patch!(addr, v, u32), + PatchValue::U64(v) => write_patch!(addr, v, u64), + _ => {} + } } - self.set_len(tot_len - len); } pub fn set_addr(&mut self, hpa: usize, gpa: usize) { @@ -184,12 +237,7 @@ impl AcpiTable { /// for rsdp, offset = 8; for the others, offset = 9. pub fn update_checksum(&mut self, offset: usize) { - self.bytes[offset] = 0; - let sum = self - .bytes - .iter() - .fold(0u8, |sum, &byte| sum.wrapping_add(byte)); - self.bytes[offset] = 0u8.wrapping_sub(sum); + unsafe { *((self.src + offset) as *mut u8) = self.checksum }; } } @@ -203,16 +251,20 @@ struct AcpiPointer { #[derive(Clone, Debug, Default)] pub struct RootAcpi { + /// we need to store rsdp to a safer place + rsdp_copy: Vec, rsdp: AcpiTable, tables: BTreeMap, pointers: Vec, devices: Vec, config_space_base: usize, config_space_size: usize, - // key: data reg hpa, value: bdf + /// key: data reg hpa, value: bdf msi_data_reg_map: BTreeMap, - // key: msi-x table bar, value: bdf + /// key: msi-x table bar, value: bdf msix_bar_map: BTreeMap, + /// key: cpuid, value: cpu nr (continuous) + lapic_map: BTreeMap, } impl RootAcpi { @@ -233,7 +285,7 @@ impl RootAcpi { fn add_new_table(&mut self, sig: Signature, ptr: *const u8, len: usize) { let mut table = AcpiTable::default(); - table.fill(ptr, len); + table.fill(Some(sig), ptr, len, ACPI_CHECKSUM_OFFSET); self.tables.insert(sig, table); } @@ -270,16 +322,18 @@ impl RootAcpi { // fix madt cpu info for entry in - unsafe { Pin::new_unchecked(&*(madt.get_bytes().clone().as_ptr() as *const Madt)) } - .entries() + unsafe { Pin::new_unchecked(&*(madt.get_unpatched_src() as *const Madt)) }.entries() { let mut entry_len = madt.get_u8(madt_cur + 1) as usize; match entry { MadtEntry::LocalApic(entry) => { if !cpu_set.contains_cpu(entry.processor_id as _) { - madt.remove(madt_cur, entry_len); - entry_len = 0; + // madt.remove(madt_cur, entry_len); + // set flag to disable lapic + madt.set_u32(0x0, madt_cur + 4); } + // let apic id equals processor id + // madt.set_u8(entry.processor_id, madt_cur + 3); } MadtEntry::LocalX2Apic(entry) => { if !cpu_set.contains_cpu(entry.processor_uid as _) {} @@ -289,20 +343,6 @@ impl RootAcpi { madt_cur += entry_len; } - // FIXME: temp clear dsdt - // let mut dsdt = tables.get_mut(&Signature::DSDT).unwrap(); - // dsdt.set_u32(SDT_HEADER_SIZE as _, 0x4); - - // FIXME: temp add mcfg entry - /*let mut mcfg = tables.get_mut(&Signature::MCFG).unwrap(); - let mcfg_len = mcfg.get_u32(0x4) as usize; - let mut entry = vec![ - 0x00u8, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0xff, 0x00, 0x00, - 0x00, 0x00, - ]; - mcfg.set_u32(mcfg_len as u32 + entry.len() as u32, 0x4); - mcfg.bytes.append(&mut entry);*/ - // set pointers let hpa_start = acpi_zone_region.physical_start as usize; let gpa_start = acpi_zone_region.virtual_start as usize; @@ -350,34 +390,48 @@ impl RootAcpi { } // update checksums - rsdp.update_checksum(8); + rsdp.update_checksum(RSDP_CHECKSUM_OFFSET); for (sig, table) in tables.iter_mut() { - if table.is_dirty { - table.update_checksum(9); - } + table.update_checksum(ACPI_CHECKSUM_OFFSET); } // copy to memory - rsdp.copy_to_mem(); + unsafe { rsdp.copy_to_mem() }; for (sig, table) in tables.iter() { // don't copy tables that are not inside ACPI tree if tables_involved.contains(sig) { - table.copy_to_mem(); + unsafe { table.copy_to_mem() }; } } } // let zone 0 bsp cpu does the work - pub fn init(&mut self) { - let rsdp_mapping = unsafe { Rsdp::search_for_on_bios(HvAcpiHandler {}).unwrap() }; + pub fn init() -> Self { + let mut root_acpi = Self::default(); + let rsdp_addr = boot::get_multiboot_tags().rsdp_addr.unwrap(); + + root_acpi.rsdp_copy = unsafe { + slice::from_raw_parts(rsdp_addr as *const u8, core::mem::size_of::()).to_vec() + }; + let rsdp_copy_addr = root_acpi.rsdp_copy.as_ptr() as usize; + println!("rsdp: {:x}", rsdp_copy_addr); + + let handler = HvAcpiHandler {}; + let rsdp_mapping = unsafe { + handler.map_physical_region::(rsdp_copy_addr, core::mem::size_of::()) + }; + + // let rsdp_mapping = unsafe { Rsdp::search_for_on_bios(HvAcpiHandler {}).unwrap() }; // FIXME: temporarily suppose we use ACPI 1.0 assert!(rsdp_mapping.revision() == 0); - self.rsdp.fill( + root_acpi.rsdp.fill( + None, rsdp_mapping.virtual_start().as_ptr() as *const u8, RSDP_V1_SIZE, + RSDP_CHECKSUM_OFFSET, ); - self.add_pointer( + root_acpi.add_pointer( Signature::RSDT, RSDP_RSDT_OFFSET, Signature::RSDT, @@ -385,41 +439,25 @@ impl RootAcpi { ); // get rsdt - - self.add_new_table( + root_acpi.add_new_table( Signature::RSDT, rsdp_mapping.rsdt_address() as usize as *const u8, SDT_HEADER_SIZE, ); - let mut rsdt_offset = self.get_mut_table(Signature::RSDT).unwrap().get_len(); + let mut rsdt_offset = root_acpi.get_mut_table(Signature::RSDT).unwrap().get_len(); let tables = unsafe { AcpiTables::from_validated_rsdp(HvAcpiHandler {}, rsdp_mapping) }.unwrap(); - if let Ok(madt) = tables.find_table::() { - self.add_new_table( - Signature::MADT, - madt.physical_start() as *const u8, - madt.region_length(), - ); - - info!("-------------------------------- MADT --------------------------------"); - for entry in madt.get().entries() { - info!("{:x?}", entry); - } - - self.add_pointer(Signature::RSDT, rsdt_offset, Signature::MADT, RSDT_PTR_SIZE); - rsdt_offset += RSDT_PTR_SIZE; - } - + // mcfg if let Ok(mcfg) = tables.find_table::() { - self.add_new_table( + root_acpi.add_new_table( Signature::MCFG, mcfg.physical_start() as *const u8, mcfg.region_length(), ); - info!("-------------------------------- MCFG --------------------------------"); + println!("---------- MCFG ----------"); let mut offset = size_of::() + 0xb; if let Some(entry) = mcfg @@ -428,92 +466,127 @@ impl RootAcpi { .find(|&entry| entry.pci_segment_group == 0) { // we only support segment group 0 - info!("{:x?}", entry); + println!("{:x?}", entry); // we don't have such many buses, probe devices to get the max_bus we have let (mut devices, mut msi_data_reg_map, mut msix_bar_map, _, max_bus) = probe_root_pci_devices(entry.base_address as _); // update bus_number_end - self.get_mut_table(Signature::MCFG) + root_acpi + .get_mut_table(Signature::MCFG) .unwrap() .set_u8(max_bus, offset); offset += size_of::(); - self.devices.append(&mut devices); + root_acpi.devices.append(&mut devices); - self.config_space_base = entry.base_address as _; - self.config_space_size = + root_acpi.config_space_base = entry.base_address as _; + root_acpi.config_space_size = (((max_bus as u64 - entry.bus_number_start as u64) + 1) << 20) as usize; - self.msi_data_reg_map.append(&mut msi_data_reg_map); - self.msix_bar_map.append(&mut msix_bar_map); + root_acpi.msi_data_reg_map.append(&mut msi_data_reg_map); + root_acpi.msix_bar_map.append(&mut msix_bar_map); } - self.add_pointer(Signature::RSDT, rsdt_offset, Signature::MCFG, RSDT_PTR_SIZE); + root_acpi.add_pointer(Signature::RSDT, rsdt_offset, Signature::MCFG, RSDT_PTR_SIZE); rsdt_offset += RSDT_PTR_SIZE; } + // println!("fadt"); if let Ok(fadt) = tables.find_table::() { - self.add_new_table( + root_acpi.add_new_table( Signature::FADT, fadt.physical_start() as *const u8, fadt.region_length(), ); - self.add_pointer(Signature::RSDT, rsdt_offset, Signature::FADT, RSDT_PTR_SIZE); + root_acpi.add_pointer(Signature::RSDT, rsdt_offset, Signature::FADT, RSDT_PTR_SIZE); rsdt_offset += RSDT_PTR_SIZE; // dsdt - + // println!("dsdt"); if let Ok(dsdt) = tables.dsdt() { - self.add_new_table( + println!("dsdt ptr: {:x}, len: {:x}", dsdt.address, dsdt.length); + root_acpi.add_new_table( Signature::DSDT, (dsdt.address - SDT_HEADER_SIZE) as *const u8, (dsdt.length as usize + SDT_HEADER_SIZE), ); + // println!("dsdt add_new_table"); - self.add_pointer(Signature::FADT, FADT_DSDT_OFFSET_32, Signature::DSDT, 4); - self.add_pointer(Signature::FADT, FADT_DSDT_OFFSET_64, Signature::DSDT, 8); + root_acpi.add_pointer(Signature::FADT, FADT_DSDT_OFFSET_32, Signature::DSDT, 4); + root_acpi.add_pointer(Signature::FADT, FADT_DSDT_OFFSET_64, Signature::DSDT, 8); } // facs - + println!("facs"); if let Ok(facs_addr) = fadt.facs_address() { - self.add_new_table(Signature::FACS, facs_addr as *const u8, unsafe { + root_acpi.add_new_table(Signature::FACS, facs_addr as *const u8, unsafe { *((facs_addr + 4) as *const u32) as usize }); - self.add_pointer(Signature::FADT, FADT_FACS_OFFSET_32, Signature::FACS, 4); - self.add_pointer(Signature::FADT, FADT_FACS_OFFSET_64, Signature::FACS, 8); + root_acpi.add_pointer(Signature::FADT, FADT_FACS_OFFSET_32, Signature::FACS, 4); + root_acpi.add_pointer(Signature::FADT, FADT_FACS_OFFSET_64, Signature::FACS, 8); + } + } + + // madt + if let Ok(madt) = tables.find_table::() { + root_acpi.add_new_table( + Signature::MADT, + madt.physical_start() as *const u8, + madt.region_length(), + ); + + println!("---------- MADT ----------"); + for entry in madt.get().entries() { + match entry { + MadtEntry::LocalApic(entry) => { + if entry.flags != 0 { + println!("{:x?}", entry); + root_acpi + .lapic_map + .insert(entry.apic_id as _, root_acpi.lapic_map.len()); + } + } + _ => {} + } } + + root_acpi.add_pointer(Signature::RSDT, rsdt_offset, Signature::MADT, RSDT_PTR_SIZE); + rsdt_offset += RSDT_PTR_SIZE; } + // dmar + println!("dmar"); acpi_table!(Dmar, DMAR); if let Ok(dmar) = tables.find_table::() { - self.add_new_table( + root_acpi.add_new_table( Signature::DMAR, dmar.physical_start() as *const u8, dmar.region_length(), ); - info!("dmar: {:x?}", unsafe { + println!("dmar: {:x?}", unsafe { *((dmar.physical_start() + 56) as *const [u8; 8]) }); // self.add_pointer(Signature::RSDT, rsdt_offset, Signature::DMAR, RSDT_PTR_SIZE); - rsdt_offset += RSDT_PTR_SIZE; + // rsdt_offset += RSDT_PTR_SIZE; } - if let Some(rsdt) = self.get_mut_table(Signature::RSDT) { - rsdt.set_len(rsdt_offset); + if let Some(rsdt) = root_acpi.get_mut_table(Signature::RSDT) { + rsdt.set_new_len(rsdt_offset); } + println!("acpi init end"); + root_acpi } } // let zone 0 bsp cpu does the work pub fn root_init() { - ROOT_ACPI.lock().init(); + ROOT_ACPI.call_once(|| RootAcpi::init()); } pub fn copy_to_guest_memory_region(config: &HvZoneConfig, cpu_set: &CpuSet) { @@ -522,7 +595,7 @@ pub fn copy_to_guest_memory_region(config: &HvZoneConfig, cpu_set: &CpuSet) { // if config.zone_id != 0 { // banned.insert(Signature::FADT); // } - ROOT_ACPI.lock().copy_to_zone_region( + ROOT_ACPI.get().unwrap().copy_to_zone_region( &config.memory_regions()[config.arch_config.rsdp_memory_region_id], &config.memory_regions()[config.arch_config.acpi_memory_region_id], &banned, @@ -531,16 +604,16 @@ pub fn copy_to_guest_memory_region(config: &HvZoneConfig, cpu_set: &CpuSet) { } pub fn root_get_table(sig: &Signature) -> Option { - ROOT_ACPI.lock().get_table(sig) + ROOT_ACPI.get().unwrap().get_table(sig) } pub fn root_get_config_space_info() -> Option<(usize, usize)> { - let acpi = ROOT_ACPI.lock(); + let acpi = ROOT_ACPI.get().unwrap(); Some((acpi.config_space_base, acpi.config_space_size)) } pub fn is_msi_data_reg(hpa: usize) -> Option { - if let Some(&bdf) = ROOT_ACPI.lock().msi_data_reg_map.get(&hpa) { + if let Some(&bdf) = ROOT_ACPI.get().unwrap().msi_data_reg_map.get(&hpa) { Some(bdf) } else { None @@ -548,9 +621,13 @@ pub fn is_msi_data_reg(hpa: usize) -> Option { } pub fn is_msix_bar(hpa: usize) -> Option { - if let Some(&bdf) = ROOT_ACPI.lock().msix_bar_map.get(&hpa) { + if let Some(&bdf) = ROOT_ACPI.get().unwrap().msix_bar_map.get(&hpa) { Some(bdf) } else { None } } + +pub fn get_lapic_map() -> &'static BTreeMap { + &ROOT_ACPI.get().unwrap().lapic_map +} diff --git a/src/arch/x86_64/boot.rs b/src/arch/x86_64/boot.rs index 7f523456..021a98b7 100644 --- a/src/arch/x86_64/boot.rs +++ b/src/arch/x86_64/boot.rs @@ -1,8 +1,8 @@ use crate::{ - arch::Stage2PageTable, + arch::{zone::HvArchZoneConfig, Stage2PageTable}, config::{root_zone_config, HvPciConfig, HvZoneConfig, MEM_TYPE_RAM}, error::HvResult, - memory::{GuestPhysAddr, HostPhysAddr, MemorySet}, + memory::{GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion, MemorySet}, percpu::this_zone, platform::MEM_TYPE_OTHER_ZONES, }; @@ -10,22 +10,72 @@ use alloc::string::{String, ToString}; use core::{ arch::{self, global_asm}, ffi::{c_char, CStr}, + mem::size_of, ptr::{copy, copy_nonoverlapping}, }; -use spin::Mutex; +use multiboot_tag::{Modules, MultibootTags}; +use spin::{Mutex, Once}; -use super::zone::HvArchZoneConfig; +mod multiboot_tag { + pub const END: u32 = 0; + pub const MODULES: u32 = 3; + pub const MEMORY_MAP: u32 = 6; + pub const FRAMEBUFFER: u32 = 8; + pub const ACPI_V1: u32 = 14; -const E820_MAX_ENTRIES_ZEROPAGE: usize = 128; + #[repr(C)] + #[derive(Default, Debug, Clone, Copy)] + pub struct Modules { + tag_type: u32, + pub size: u32, + pub mod_start: u32, + pub mod_end: u32, + } -lazy_static::lazy_static! { - static ref CMDLINE: Mutex = Mutex::new(String::new()); -} + #[repr(C)] + #[derive(Default, Debug, Clone, Copy)] + pub struct MemoryMap { + tag_type: u32, + pub size: u32, + pub entry_size: u32, + pub entry_version: u32, + } -pub fn cmdline() -> &'static Mutex { - &CMDLINE + #[repr(C)] + #[derive(Default, Debug, Clone, Copy)] + pub struct MemoryMapEntry { + pub base_addr: u64, + pub length: u64, + pub _type: u32, + reserved: u32, + } + + #[repr(C)] + #[derive(Default, Debug, Clone, Copy)] + pub struct Framebuffer { + tag_type: u32, + size: u32, + pub addr: u64, + pub pitch: u32, + pub width: u32, + pub height: u32, + pub bpp: u8, + pub fb_type: u8, + reserved: u8, + } + + #[derive(Default, Debug, Clone, Copy)] + pub struct MultibootTags { + pub framebuffer: Framebuffer, + pub memory_map_addr: Option, + pub rsdp_addr: Option, + } } +static MULTIBOOT_TAGS: Once = Once::new(); + +const E820_MAX_ENTRIES_ZEROPAGE: usize = 128; + bitflags::bitflags! { #[derive(Clone, Copy, Debug)] /// https://www.kernel.org/doc/html/latest/arch/x86/boot.html @@ -56,13 +106,16 @@ pub enum E820Type { #[derive(Debug, Clone, Copy)] /// The so-called "zeropage" pub struct BootParams { - pad0: [u8; 0x1e8], + screen_info: ScreenInfo, + pad0: [u8; 0x1a8], e820_entries: u8, pad1: [u8; 0x8], setup_sects: u8, root_flags: u16, syssize: u32, - pad2: [u8; 0xd], + ramsize: u16, + vid_mode: u16, + pad2: [u8; 0x9], boot_proto_version: u16, pad3: [u8; 0x6], kernel_version: u16, @@ -87,7 +140,7 @@ pub struct BootParams { } impl BootParams { - pub fn fill(config: &HvZoneConfig, gpm: &MemorySet) -> HvResult { + pub fn fill(config: &HvZoneConfig, gpm: &mut MemorySet) -> HvResult { if config.arch_config.setup_load_gpa == 0 { panic!("setup addr not set yet!"); } @@ -129,15 +182,21 @@ impl BootParams { } // set e820 - // TODO: zone config - boot_params.set_e820_entries(&config); + boot_params.set_e820_entries(config); + // set initrd if config.arch_config.initrd_load_gpa != 0 { boot_params.set_initrd( config.arch_config.initrd_load_gpa as _, config.arch_config.initrd_size as _, ); } + + // set screen + if config.arch_config.screen_base != 0 { + boot_params.set_screen_info(config, gpm); + } + Ok(()) } @@ -183,6 +242,41 @@ impl BootParams { self.ramdisk_size = ramdisk_size; info!("initrd size: {}", self.ramdisk_size); } + + fn set_screen_info(&mut self, config: &HvZoneConfig, gpm: &mut MemorySet) { + let fb_info = &get_multiboot_tags().framebuffer; + + let bytes_per_pixel = (fb_info.bpp as usize) / 8; + let width = fb_info.width as usize; + let height = fb_info.height as usize; + + self.screen_info.lfb_base = config.arch_config.screen_base as _; + self.screen_info.lfb_width = width as _; + self.screen_info.lfb_height = height as _; + self.screen_info.lfb_depth = fb_info.bpp as _; + self.screen_info.lfb_size = (bytes_per_pixel * width * height) as _; + self.screen_info.lfb_linelength = (bytes_per_pixel * width) as _; + + // TODO: custom + self.screen_info.blue_size = 8; + self.screen_info.blue_pos = 0; + self.screen_info.green_size = 8; + self.screen_info.green_pos = 8; + self.screen_info.red_size = 8; + self.screen_info.red_pos = 16; + self.screen_info.alpha_size = 8; + self.screen_info.alpha_pos = 24; + self.screen_info.orig_video_is_vga = 0x23; // VESA + self.screen_info.capabilities = 0; + self.vid_mode = 0xffff; + + gpm.insert(MemoryRegion::new_with_offset_mapper( + config.arch_config.screen_base as GuestPhysAddr, + fb_info.addr as HostPhysAddr, + self.screen_info.lfb_size as _, + MemFlags::READ | MemFlags::WRITE, + )); + } } #[repr(packed)] @@ -194,33 +288,147 @@ pub struct BootE820Entry { _type: E820Type, } -#[repr(C)] -#[derive(Debug, Default, Clone, Copy)] -pub struct MultibootInfo { - flags: u32, - mem_lower: u32, - mem_upper: u32, - boot_device: u32, - cmdline: u32, - pub mods_count: u32, - pub mods_addr: u32, +#[repr(packed)] +#[derive(Debug, Clone, Copy)] +pub struct ScreenInfo { + pad0: [u8; 0x0f], + orig_video_is_vga: u8, + pad1: u16, + lfb_width: u16, + lfb_height: u16, + lfb_depth: u16, + lfb_base: u32, + lfb_size: u32, + pad2: [u16; 2], + lfb_linelength: u16, + red_size: u8, + red_pos: u8, + green_size: u8, + green_pos: u8, + blue_size: u8, + blue_pos: u8, + alpha_size: u8, + alpha_pos: u8, + pad3: [u8; 4], + pages: u16, + vesa_attributes: u16, + capabilities: u32, + pad4: [u8; 6], } -impl MultibootInfo { - fn new(addr: usize) -> Self { - let multiboot_info = unsafe { &*(addr as *const MultibootInfo) }; - multiboot_info.clone() +#[repr(packed)] +#[derive(Debug, Clone, Copy)] +pub struct EfiInfo { + loader_signature: u32, + systab: u32, + memdesc_size: u32, + memdesc_version: u32, + memmap: u32, + memmap_size: u32, + systab_hi: u32, + memmap_hi: u32, +} + +pub fn multiboot_init(info_addr: usize) { + let mut cur = info_addr; + let total_size = unsafe { *(cur as *const u32) } as usize; + let mut multiboot_tags = MultibootTags::default(); + + // println!("{:#x?}", total_size); + cur += 8; + while cur < info_addr + total_size { + let tag_type = unsafe { *(cur as *const u32) }; + if tag_type == multiboot_tag::END { + break; + } + + // println!("{:#x?}", tag_type); + match tag_type { + multiboot_tag::MODULES => {} + multiboot_tag::MEMORY_MAP => { + multiboot_tags.memory_map_addr = Some(cur); + } + multiboot_tag::FRAMEBUFFER => { + multiboot_tags.framebuffer = + unsafe { *(cur as *const multiboot_tag::Framebuffer) }.clone(); + } + multiboot_tag::ACPI_V1 => { + multiboot_tags.rsdp_addr = Some(cur + 8); + } + _ => {} + } + cur += ((unsafe { *((cur + 4) as *const u32) } as usize + 7) & (!7)); } - pub fn init(info_addr: usize) { - let boot_info = MultibootInfo::new(info_addr); - println!("{:#x?}", boot_info); + MULTIBOOT_TAGS.call_once(|| multiboot_tags); +} + +pub fn get_multiboot_tags() -> &'static multiboot_tag::MultibootTags { + MULTIBOOT_TAGS.get().unwrap() +} - let cmd_ptr = boot_info.cmdline as *const c_char; - let cmd_cstr = unsafe { CStr::from_ptr(cmd_ptr) }; - let cmd_str = cmd_cstr.to_str().unwrap(); - CMDLINE.lock().push_str(cmd_str); +pub fn print_memory_map() { + let map_addr = get_multiboot_tags().memory_map_addr.unwrap(); + let mem_map = unsafe { *(map_addr as *const multiboot_tag::MemoryMap) }; + let mem_map_size = size_of::(); + let cnt = ((mem_map.size as usize) - mem_map_size) / (mem_map.entry_size as usize); - println!("cmdline: {}", CMDLINE.lock().as_str()); + let mut entry_addr = map_addr + mem_map_size; + println!("===== MEMORY MAP ====="); + for i in 0..cnt { + let entry = unsafe { *(entry_addr as *const multiboot_tag::MemoryMapEntry) }; + println!( + "base: {:x}, len: {:x}, type: {:x}", + entry.base_addr, entry.length, entry._type + ); + entry_addr += size_of::(); + } +} + +/// copy kernel modules to the right place +pub fn module_init(info_addr: usize) { + println!("module_init"); + let mut cur = info_addr; + let total_size = unsafe { *(cur as *const u32) } as usize; + + let mut cnt = 0; + cur += 8; + while cur < info_addr + total_size { + let tag_type = unsafe { *(cur as *const u32) }; + let ptr = cur as *const multiboot_tag::Modules; + cur += ((unsafe { *((cur + 4) as *const u32) } as usize + 7) & (!7)); + + if tag_type == multiboot_tag::END { + break; + } + if tag_type != multiboot_tag::MODULES { + continue; + } + + let module = unsafe { *ptr }; + let dst = unsafe { + usize::from_str_radix( + CStr::from_ptr(((ptr as usize) + size_of::()) as *const c_char) + .to_str() + .unwrap(), + 16, + ) + .unwrap() + }; + println!("module: {:#x?}, addr: {:#x?}", module, dst); + cnt += 1; + + if dst == 0x0 { + continue; + } + + unsafe { + core::ptr::copy( + module.mod_start as *mut u8, + dst as *mut u8, + (module.mod_end - module.mod_start + 1) as usize, + ) + }; } + println!("module cnt: {:x}", cnt); } diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index de7383da..0fdf0c0a 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -1,5 +1,6 @@ use crate::{ arch::{ + acpi, boot::BootParams, hpet, ipi, mm::new_s2_memory_set, @@ -283,6 +284,7 @@ impl ArchCpu { } self.host_stack_top = (core_end() + (self.cpuid + 1) * PER_CPU_SIZE) as _; + unsafe { self.vmx_launch() }; } diff --git a/src/arch/x86_64/entry.rs b/src/arch/x86_64/entry.rs index df2ed013..f44c9439 100644 --- a/src/arch/x86_64/entry.rs +++ b/src/arch/x86_64/entry.rs @@ -1,5 +1,9 @@ use crate::{ - arch::cpu::this_cpu_id, consts::PER_CPU_SIZE, memory::addr::PHYS_VIRT_OFFSET, rust_main, + arch::{boot, cpu::this_cpu_id, graphics::font_init}, + consts::PER_CPU_SIZE, + memory::addr::PHYS_VIRT_OFFSET, + platform::__board, + rust_main, }; use core::arch::global_asm; use x86::msr::IA32_EFER; @@ -8,8 +12,10 @@ use x86_64::registers::{ model_specific::EferFlags, }; -const MULTIBOOT_HEADER_MAGIC: i32 = 0x1BADB002; -const MULTIBOOT_HEADER_FLAGS: i32 = 0x00010002; +const MULTIBOOT_HEADER_MAGIC: u32 = 0x1bad_b002; +const MULTIBOOT_HEADER_FLAGS: u32 = 0x0001_0002; +const MULTIBOOT2_HEADER_MAGIC: u32 = 0xe852_50d6; +const MULTIBOOT2_ARCH_I386: u32 = 0; const X86_PHYS_VIRT_OFFSET: usize = 0xffff_ff80_0000_0000; const CR0: u64 = Cr0Flags::PROTECTED_MODE_ENABLE.bits() @@ -25,6 +31,8 @@ global_asm!( include_str!("multiboot.S"), multiboot_header_magic = const MULTIBOOT_HEADER_MAGIC, multiboot_header_flags = const MULTIBOOT_HEADER_FLAGS, + multiboot2_header_magic = const MULTIBOOT2_HEADER_MAGIC, + multiboot2_arch_i386 = const MULTIBOOT2_ARCH_I386, rust_entry = sym rust_entry, rust_entry_secondary = sym rust_entry_secondary, offset = const X86_PHYS_VIRT_OFFSET, @@ -42,6 +50,7 @@ pub unsafe extern "C" fn arch_entry() -> i32 { core::arch::asm!( " .code32 + cli mov edi, eax // magic mov esi, ebx // multiboot info jmp bsp_entry32 @@ -51,9 +60,13 @@ pub unsafe extern "C" fn arch_entry() -> i32 { } extern "C" fn rust_entry(magic: u32, info_addr: usize) { + unsafe { fill_page_table() }; crate::clear_bss(); unsafe { PHYS_VIRT_OFFSET = X86_PHYS_VIRT_OFFSET }; - println!(""); + boot::multiboot_init(info_addr); + #[cfg(all(feature = "graphics", target_arch = "x86_64"))] + font_init(__board::GRAPHICS_FONT); + boot::print_memory_map(); rust_main(this_cpu_id(), info_addr); } @@ -61,3 +74,20 @@ fn rust_entry_secondary() { // println!("CPUID: {}", this_cpu_id()); rust_main(this_cpu_id(), 0); } + +extern "C" { + #[link_name = "Ltmp_pdpt_low"] + static mut PDPT_LOW: [u64; 512]; + #[link_name = "Ltmp_pdpt_high"] + static mut PDPT_HIGH: [u64; 512]; +} + +unsafe fn fill_page_table() { + let mut addr: usize = 0; + for i in 0..512 { + // paddr | PRESENT | WRITABLE | HUGE_PAGE + PDPT_LOW[i] = (addr | 0x83) as _; + PDPT_HIGH[i] = (addr | 0x83) as _; + addr += 0x4000_0000; + } +} diff --git a/src/arch/x86_64/graphics.rs b/src/arch/x86_64/graphics.rs new file mode 100644 index 00000000..5315e326 --- /dev/null +++ b/src/arch/x86_64/graphics.rs @@ -0,0 +1,208 @@ +use spin::{Mutex, Once}; + +use crate::arch::boot::get_multiboot_tags; + +const PSF2_MAGIC: u32 = 0x864ab572; + +#[repr(packed)] +#[derive(Debug, Clone, Copy)] +pub struct Psf2Header { + magic: u32, + version: u32, + header_size: u32, + flags: u32, + glyph_nr: u32, + bytes_per_glyph: u32, + height: u32, + width: u32, +} + +#[derive(Debug, Clone, Copy)] +pub struct FontInfo { + /// width in bytes (8 pixels) + width_bytes: usize, + /// width in pixels + width: usize, + /// height in pixels + height: usize, + /// table address + glyph_table: usize, + /// number of glyphs + glyph_nr: u32, + /// size of each glyph + bytes_per_glyph: u32, +} + +static FONT_INFO: Once = Once::new(); + +#[derive(Debug, Clone, Copy)] +pub struct FramebufferInfo { + /// x in char + cursor_x: usize, + /// y in char + cursor_y: usize, + max_char_nr_x: usize, + max_char_nr_y: usize, + pub addr: usize, + pub width: usize, + pub height: usize, +} + +static FRAMEBUFFER_INFO: Once> = Once::new(); + +pub fn font_init(psf: &'static [u8]) { + let psf_header = unsafe { *(psf.as_ptr() as *const Psf2Header) }; + // only support psf2 + assert!(psf_header.magic == PSF2_MAGIC); + + let font_width_bytes = (psf_header.width + 7) / 8; // up align to 8bit + let font_width = font_width_bytes * 8; + + // println!("{:#x?}", psf_header); + + FONT_INFO.call_once(|| FontInfo { + width: font_width as _, + height: psf_header.height as _, + glyph_table: (psf.as_ptr() as usize + psf_header.header_size as usize), + glyph_nr: psf_header.glyph_nr, + bytes_per_glyph: psf_header.bytes_per_glyph, + width_bytes: font_width_bytes as _, + }); + + let framebuffer = &get_multiboot_tags().framebuffer; + FRAMEBUFFER_INFO.call_once(|| { + Mutex::new(FramebufferInfo { + cursor_x: 0, + cursor_y: 0, + max_char_nr_x: (framebuffer.width / font_width) as _, + max_char_nr_y: (framebuffer.height / psf_header.height) as _, + addr: framebuffer.addr as _, + width: framebuffer.width as _, + height: framebuffer.height as _, + }) + }); + + fb_clear_screen(); +} + +fn fb_clear_screen() { + let mut fb_info = FRAMEBUFFER_INFO.get().unwrap().lock(); + let mut ptr = fb_info.addr as *mut u32; + for height in 0..fb_info.height { + for width in 0..fb_info.width { + unsafe { + core::ptr::write_volatile(ptr, 0); + ptr = ptr.wrapping_add(1); + } + } + } +} + +fn fb_putchar_internal(ch: u16, fg: u32, bg: u32) { + let font_info = FONT_INFO.get().unwrap(); + let mut glyph = font_info.glyph_table as *const u8; + + if (ch as u32) < font_info.glyph_nr { + glyph = glyph.wrapping_add((ch as usize) * (font_info.bytes_per_glyph as usize)); + } + + { + let mut fb_info = FRAMEBUFFER_INFO.get().unwrap().lock(); + // current pixel + let cur = fb_info.cursor_y * font_info.height * fb_info.width + + fb_info.cursor_x * font_info.width; + let base = fb_info.addr as *mut u32; + + for y in 0..font_info.height { + let mut mask: u8 = 1 << 7; + for x in 0..font_info.width { + if x % 8 == 0 { + mask = 1 << 7; + } + + let color = match unsafe { *glyph.wrapping_add(x / 8) } & mask != 0 { + true => fg, + false => bg, + }; + + let ptr = base.wrapping_add(cur + y * fb_info.width + x); + unsafe { core::ptr::write_volatile(ptr, color) }; + + mask = mask >> 1; + } + + glyph = glyph.wrapping_add(font_info.width_bytes); + } + + fb_info.cursor_x += 1; + if fb_info.cursor_x < fb_info.max_char_nr_x { + return; + } + } + + fb_putchar_new_line(bg); +} + +fn fb_putchar_new_line(bg: u32) { + let font_info = FONT_INFO.get().unwrap(); + let mut fb_info = FRAMEBUFFER_INFO.get().unwrap().lock(); + let base = fb_info.addr as *mut u32; + + fb_info.cursor_x = 0; + fb_info.cursor_y += 1; + + if fb_info.cursor_y >= fb_info.max_char_nr_y { + fb_info.cursor_y = 0; + } + + for y in 0..font_info.height { + let y1 = (y + fb_info.cursor_y * font_info.height) * fb_info.width; + for x in 0..fb_info.width { + unsafe { core::ptr::write_volatile(base.wrapping_add(x + y1), bg) }; + } + } + + // may need to scroll up + /*if fb_info.cursor_y >= fb_info.max_char_nr_y { + for y in 0..((fb_info.max_char_nr_y - 1) * font_info.height) { + let y1 = y * fb_info.width; + let y2 = (y + font_info.height) * fb_info.width; + for x in 0..fb_info.width { + unsafe { + core::ptr::write_volatile( + base.wrapping_add(x + y1), + core::ptr::read_volatile(base.wrapping_add(x + y2)), + ) + }; + } + } + + for y in 0..font_info.height { + let y1 = (y + (fb_info.max_char_nr_y - 1) * font_info.height) * fb_info.width; + for x in 0..fb_info.width { + unsafe { core::ptr::write_volatile(base.wrapping_add(x + y1), bg) }; + } + } + + fb_info.cursor_y -= 1; + }*/ +} + +pub fn fb_putchar(ch: u8, fg: u32, bg: u32) { + match ch as char { + '\r' => {} + '\n' => fb_putchar_new_line(bg), + _ => fb_putchar_internal(ch as _, fg, bg), + } +} + +pub fn fb_putstr(s: &str, fg: u32) { + for c in s.chars() { + match c { + '\n' => { + fb_putchar_new_line(0x0); + } + _ => fb_putchar_internal(c as _, fg, 0x0), + } + } +} diff --git a/src/arch/x86_64/ipi.rs b/src/arch/x86_64/ipi.rs index edadcc7f..9c117df3 100644 --- a/src/arch/x86_64/ipi.rs +++ b/src/arch/x86_64/ipi.rs @@ -1,4 +1,5 @@ use crate::{ + arch::{cpu::this_cpu_id, idt::IdtVector}, device::irqchip::inject_vector, error::HvResult, event, @@ -9,8 +10,6 @@ use alloc::{collections::vec_deque::VecDeque, vec::Vec}; use bit_field::BitField; use spin::{Mutex, Once}; -use super::{cpu::this_cpu_id, idt::IdtVector}; - #[allow(non_snake_case)] pub mod IpiDeliveryMode { pub const FIXED: u8 = 0; diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index aa15b3c7..3b6a6773 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -4,6 +4,7 @@ pub mod boot; pub mod cpu; pub mod cpuid; pub mod entry; +pub mod graphics; pub mod hpet; pub mod idt; pub mod ipi; diff --git a/src/arch/x86_64/multiboot.S b/src/arch/x86_64/multiboot.S index 91af4acf..52c82cf7 100644 --- a/src/arch/x86_64/multiboot.S +++ b/src/arch/x86_64/multiboot.S @@ -1,9 +1,12 @@ -.equ boot_stack_top, __core_end + {per_cpu_size} +.equ bsp_boot_stack_top, __core_end + {per_cpu_size} +.equ multiboot2_header_len, multiboot2_header_end - multiboot2_header -.section .text.entry +.equ multiboot2_header_tag_end, 0 +.equ multiboot2_header_tag_address, 2 +.equ multiboot2_header_tag_entry_address, 3 +.equ multiboot2_header_tag_framebuffer, 5 -.section .text.entry32 -.code32 +.section .text.header .balign 4 .type multiboot_header, STT_OBJECT @@ -17,23 +20,73 @@ multiboot_header: .int ebss - {offset} // bss_end_addr .int arch_entry - {offset} // entry_addrs -.macro ENTRY32_COMMON - // set data segment selectors - mov ax, 0x18 - mov ss, ax - mov ds, ax - mov es, ax - mov fs, ax - mov gs, ax +.align 8 +.type multiboot2_header STT_OBJECT +multiboot2_header: + .int {multiboot2_header_magic} + .int {multiboot2_arch_i386} + .int multiboot2_header_len + .int -({multiboot2_header_magic} + {multiboot2_arch_i386} + multiboot2_header_len) + +.align 8 +.type tag_address STT_OBJECT +tag_address: + .short multiboot2_header_tag_address + .short 0 + .int 24 + .int multiboot2_header - {offset} // header_addr + .int skernel - {offset} // load_addr + .int edata - {offset} // load_end_addr + .int bsp_boot_stack_top - {offset} // bss_end_addr + +.align 8 +.type tag_entry_address STT_OBJECT +tag_entry_address: + .short multiboot2_header_tag_entry_address + .short 0 + .int 12 + .int arch_entry - {offset} // entry_addr + +.align 8 +.type tag_framebuffer STT_OBJECT +tag_framebuffer: + .short multiboot2_header_tag_framebuffer + .short 0 + .int 20 + .int 1024 // width + .int 768 // height + .int 32 // depth + +.align 8 +.type tag_end STT_OBJECT +tag_end: + .short multiboot2_header_tag_end + .short 0 + .int 8 + +multiboot2_header_end: - // set PAE, PGE bit in CR4 - mov eax, {cr4} - mov cr4, eax +.section .text.entry + +.section .text.entry32 +.code32 + +.macro ENTRY32_COMMON_1 + // disable paging (UEFI may turn it on) + mov eax, cr0 + mov ebx, (1 << 31) + not ebx + and eax, ebx + mov eax, cr0 // load the temporary page table lea eax, [.Ltmp_pml4 - {offset}] mov cr3, eax + // set PAE, PGE bit in CR4 + mov eax, {cr4} + mov cr4, eax + // set LME, NXE bit in IA32_EFER mov ecx, {efer_msr} mov edx, 0 @@ -45,6 +98,16 @@ multiboot_header: mov cr0, eax .endm +.macro ENTRY32_COMMON_2 + // set data segment selectors + mov ax, 0x18 + mov ss, ax + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax +.endm + .macro ENTRY64_COMMON // clear segment selectors xor ax, ax @@ -56,9 +119,12 @@ multiboot_header: .endm bsp_entry32: - // load the temporary GDT + ENTRY32_COMMON_1 + + // set up GDT lgdt [.Ltmp_gdt_desc_phys - {offset}] - ENTRY32_COMMON + + ENTRY32_COMMON_2 // long return to the 64-bit entry push 0x10 // code64 segment selector @@ -68,7 +134,8 @@ bsp_entry32: .global ap_entry32 ap_entry32: - ENTRY32_COMMON + ENTRY32_COMMON_1 + ENTRY32_COMMON_2 // long return to the 64-bit entry push 0x10 // code64 segment selector @@ -83,20 +150,21 @@ bsp_entry64: // reload GDT by high address movabs rax, offset .Ltmp_gdt_desc lgdt [rax] + + // load task register mov ax, 0x20 ltr ax - + ENTRY64_COMMON // set stack and jump to rust_entry - movabs rsp, offset boot_stack_top + movabs rsp, offset bsp_boot_stack_top movabs rax, offset {rust_entry} call rax jmp .Lhlt ap_entry64: ENTRY64_COMMON - // set rsp to high address mov rax, {offset} add rsp, rax @@ -134,20 +202,18 @@ ap_entry64: .balign 4096 .Ltmp_pml4: - // 0x0000_0000 ~ 0x1_0000_0000 - .quad .Ltmp_pdpt_low - {offset} + 0x3 // PRESENT | WRITABLE | paddr(tmp_pdpt) - .zero 8 * 510 - // 0xffff_ff80_0000_0000 ~ 0xffff_ff80_8000_0000 - .quad .Ltmp_pdpt_high - {offset} + 0x3 // PRESENT | WRITABLE | paddr(tmp_pdpt) - -.Ltmp_pdpt_low: - .quad 0x0000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) - .quad 0x40000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x4000_0000) - .quad 0x80000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x4000_0000) - .quad 0xc0000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x4000_0000) - .zero 8 * 508 - -.Ltmp_pdpt_high: - .quad 0x0000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) - .quad 0x40000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x4000_0000) + // 0x0000_0000 ~ 0x4000_0000 + .quad Ltmp_pdpt_low - {offset} + 0x3 // PRESENT | WRITABLE | paddr(tmp_pdpt) .zero 8 * 510 + // 0xffff_ff80_0000_0000 ~ 0xffff_ff80_4000_0000 + .quad Ltmp_pdpt_high - {offset} + 0x3 // PRESENT | WRITABLE | paddr(tmp_pdpt) + +.global Ltmp_pdpt_low +Ltmp_pdpt_low: + .quad 0x00000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) + .zero 8 * 511 + +.global Ltmp_pdpt_high +Ltmp_pdpt_high: + .quad 0x00000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) + .zero 8 * 511 diff --git a/src/arch/x86_64/pci.rs b/src/arch/x86_64/pci.rs index 994f56ed..8a624eb8 100644 --- a/src/arch/x86_64/pci.rs +++ b/src/arch/x86_64/pci.rs @@ -23,11 +23,16 @@ use super::{ impl Zone { pub fn pci_config_space_mmio_init(&mut self, arch: &HvArchZoneConfig) { - let bytes = acpi::root_get_table(&Signature::MCFG) + /*let bytes = acpi::root_get_table(&Signature::MCFG) .unwrap() .get_bytes() .clone(); - let mcfg = unsafe { &*(bytes.as_ptr() as *const Mcfg) }; + let mcfg = unsafe { &*(bytes.as_ptr() as *const Mcfg) };*/ + + let bytes = acpi::root_get_table(&Signature::MCFG) + .unwrap() + .get_unpatched_src(); + let mcfg = unsafe { &*(bytes as *const Mcfg) }; for entry in mcfg.entries() { let start = entry.base_address as usize; @@ -77,7 +82,7 @@ pub fn probe_root_pci_devices( let device_id = unsafe { *((bdf_config_hpa + 0x2) as *const u16) }; let header_type = unsafe { *((bdf_config_hpa + 0xe) as *const u8) }; - info!( + println!( "bdf: {:x}, bus: {:x}, dev_func: {:x}, vendor id: {:x}, device id: {:x}, header type: {:x}", bdf, bus, dev_func, vendor_id, device_id, header_type ); @@ -86,7 +91,7 @@ pub fn probe_root_pci_devices( bus_empty = false; // pci bridge - if header_type == 0x1 { + if header_type.get_bits(0..7) == 0x1 { let secondary_bus = unsafe { *((bdf_config_hpa + 0x19) as *const u8) }; buses.push_back(secondary_bus); } @@ -107,7 +112,7 @@ pub fn probe_root_pci_devices( false => cap_hpa + 0x8, }; msi_data_reg_map.insert(data_reg_hpa, bdf as _); - info!("msi data reg hpa: {:x?}", data_reg_hpa); + // println!("msi data reg hpa: {:x?}", data_reg_hpa); } else if cap_id == 0x11 { // msi-x capability let msg_ctrl_reg = unsafe { *((cap_hpa + 0x2) as *const u16) }; @@ -135,20 +140,20 @@ pub fn probe_root_pci_devices( } } - info!( + /*println!( "table size: {:x}, table bir: {:x}, bar: {:x}", table_size, table_bir, bar - ); + );*/ msix_bar_map.insert(bar, bdf as _); for i in 0..=table_size { let data_reg_hpa = bar + i * size_of::() + 2 * size_of::(); msi_data_reg_map.insert(data_reg_hpa, bdf as _); - info!("msi-x data reg hpa: {:x?}", data_reg_hpa); + // println!("msi-x data reg hpa: {:x?}", data_reg_hpa); } } - info!("cap id: {:x}, hpa: {:x}", cap_id, cap_hpa); + // println!("cap id: {:x}, hpa: {:x}", cap_id, cap_hpa); cap_pointer = unsafe { *((cap_hpa + 1) as *const u8) } as usize; } } diff --git a/src/arch/x86_64/pio.rs b/src/arch/x86_64/pio.rs index 3d67f8e8..198dc08b 100644 --- a/src/arch/x86_64/pio.rs +++ b/src/arch/x86_64/pio.rs @@ -7,6 +7,7 @@ use crate::{ }; pub const UART_COM1_BASE_PORT: u16 = 0x3f8; +pub const UART_COM1_PORT: Range = 0x3f8..0x400; pub const PCI_CONFIG_ADDR_PORT: Range = 0xcf8..0xcfc; pub const PCI_CONFIG_DATA_PORT: Range = 0xcfc..0xd00; @@ -42,6 +43,7 @@ impl PortIoBitmap { // FIXME: uart & i8254 if zone_id == 0 { bitmap.set_range_intercept(0x60..0x65, false); + #[cfg(not(feature = "graphics"))] bitmap.set_range_intercept(0x3f8..0x400, false); } // bitmap.set_range_intercept(0x3f8..0x400, false); diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index 2178a34b..a6bdc87b 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -17,7 +17,7 @@ use crate::{ inject_vector, pic::{ioapic::irqs, lapic::VirtLocalApic}, }, - uart::UartReg, + uart::{virt_console_io_read, virt_console_io_write, UartReg}, }, error::HvResult, hypercall::HyperCall, @@ -30,7 +30,7 @@ use x86_64::registers::control::Cr4Flags; use super::{ pci::{handle_pci_config_port_read, handle_pci_config_port_write}, - pio::{PCI_CONFIG_ADDR_PORT, PCI_CONFIG_DATA_PORT}, + pio::{PCI_CONFIG_ADDR_PORT, PCI_CONFIG_DATA_PORT, UART_COM1_PORT}, }; core::arch::global_asm!( @@ -99,6 +99,9 @@ fn handle_irq(vector: u8) { ), _ => match get_guest_vector(vector, this_zone_id()) { Some(gv) => { + if gv < 0x20 { + info!("inject {:x} {:x}", vector, gv); + } inject_vector(this_cpu_id(), gv as _, None, false); } None => { @@ -266,12 +269,16 @@ fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvR || PCI_CONFIG_DATA_PORT.contains(&io_info.port) { handle_pci_config_port_write(&io_info, value); + } else if UART_COM1_PORT.contains(&io_info.port) { + virt_console_io_write(io_info.port, value); } } else { if PCI_CONFIG_ADDR_PORT.contains(&io_info.port) || PCI_CONFIG_DATA_PORT.contains(&io_info.port) { value = handle_pci_config_port_read(&io_info); + } else if UART_COM1_PORT.contains(&io_info.port) { + value = virt_console_io_read(io_info.port); } let rax = &mut arch_cpu.regs_mut().rax; // SDM Vol. 1, Section 3.4.1.1: diff --git a/src/arch/x86_64/vtd.rs b/src/arch/x86_64/vtd.rs index e168b2f6..895e9d3f 100644 --- a/src/arch/x86_64/vtd.rs +++ b/src/arch/x86_64/vtd.rs @@ -25,6 +25,8 @@ const CONTEXT_TABLE_ENTRY_SIZE: usize = 16; // DMA-remapping registers mod dma_remap_reg { + /// Capability Register + pub const DMAR_CAP_REG: usize = 0x8; /// Extended Capability Register pub const DMAR_ECAP_REG: usize = 0x10; /// Global Command Register @@ -220,9 +222,11 @@ impl Vtd { } fn check_capability(&mut self) { - let ecap = EcapFlags::from_bits_truncate(self.mmio_read_u64(DMAR_ECAP_REG)); - info!("ecap: {:x?}", ecap); - assert!(ecap.contains(EcapFlags::EIM | EcapFlags::IR | EcapFlags::QI)); + let cap = self.mmio_read_u64(DMAR_CAP_REG); + let ecap = self.mmio_read_u64(DMAR_ECAP_REG); + info!("cap: {:x?} ecap: {:x?}", cap, ecap); + assert!(EcapFlags::from_bits_truncate(ecap) + .contains(EcapFlags::EIM | EcapFlags::IR | EcapFlags::QI)); } fn init(&mut self) { @@ -346,6 +350,7 @@ pub fn init() { } pub fn add_device(zone_id: usize, bdf: u64) { + // info!("vtd add device: {:x}, zone: {:x}", bdf, zone_id); VTD.get().unwrap().lock().add_device(zone_id, bdf); } diff --git a/src/arch/x86_64/zone.rs b/src/arch/x86_64/zone.rs index b5604460..064ad0e8 100644 --- a/src/arch/x86_64/zone.rs +++ b/src/arch/x86_64/zone.rs @@ -20,6 +20,8 @@ pub struct HvArchZoneConfig { pub rsdp_memory_region_id: usize, pub acpi_memory_region_id: usize, pub initrd_memory_region_id: usize, + /// not longer than 32 bits + pub screen_base: usize, } impl Zone { @@ -36,7 +38,7 @@ impl Zone { mem_region.physical_start as HostPhysAddr, mem_region.size as _, flags, - ))? + )); } MEM_TYPE_VIRTIO => { self.mmio_region_register( diff --git a/src/device/irqchip/pic/ioapic.rs b/src/device/irqchip/pic/ioapic.rs index 0fe1b47c..afdd94d2 100644 --- a/src/device/irqchip/pic/ioapic.rs +++ b/src/device/irqchip/pic/ioapic.rs @@ -116,24 +116,23 @@ impl VirtIoApic { if reg % 2 == 0 { entry.set_bits(0..=31, value.get_bits(0..=31)); // use host vector instead of guest vector - entry.set_bits( - 0..=7, - idt::get_host_vector(entry.get_bits(0..=7) as u32, zone_id).unwrap() - as _, - ); - /*info!( - "write {:x} is edge: {:x?}, {:x}", - index, - value.get_bit(15), - value - );*/ + let gv = entry.get_bits(0..=7) as u32; + if gv >= 0x20 { + let hv = idt::get_host_vector(gv, zone_id).unwrap(); + entry.set_bits(0..=7, hv as _); + } } else { entry.set_bits(32..=63, value.get_bits(0..=31)); - if zone_id == 0 { + /*if zone_id == 0 { + // info!("1 write {:x} entry: {:x?}", index, *entry); // only root zone modify the real I/O APIC - unsafe { configure_gsi_from_raw(index as _, *entry) }; - } + // unsafe { configure_gsi_from_raw(index as _, *entry) }; + }*/ + } + if zone_id == 0 { + // only root zone modify the real I/O APIC + unsafe { configure_gsi_from_raw(index as _, *entry) }; } } } @@ -203,6 +202,7 @@ fn mmio_ioapic_handler(mmio: &mut MMIOAccess, _: usize) -> HvResult { } unsafe fn configure_gsi_from_raw(irq: u8, raw: u64) { + // info!("irq={:x} {:x}", irq, raw); let mut io_apic = IO_APIC.lock(); io_apic.set_table_entry(irq, core::mem::transmute(raw)); } diff --git a/src/device/irqchip/pic/mod.rs b/src/device/irqchip/pic/mod.rs index bb2ccc35..7e6a093e 100644 --- a/src/device/irqchip/pic/mod.rs +++ b/src/device/irqchip/pic/mod.rs @@ -111,7 +111,6 @@ pub fn primary_init_early() { PENDING_VECTORS.call_once(|| PendingVectors::new(MAX_CPU_NUM)); idt::init(MAX_ZONE_NUM); ioapic::init_virt_ioapic(MAX_ZONE_NUM); - acpi::root_init(); vtd::init(); } diff --git a/src/device/uart/mod.rs b/src/device/uart/mod.rs index 06c46cc5..4fd60cf5 100644 --- a/src/device/uart/mod.rs +++ b/src/device/uart/mod.rs @@ -46,4 +46,6 @@ pub use uart_16550::{console_getchar, console_putchar}; #[cfg(target_arch = "x86_64")] mod uart16550a; #[cfg(target_arch = "x86_64")] -pub use uart16550a::{console_getchar, console_putchar, UartReg}; +pub use uart16550a::{ + console_getchar, console_putchar, virt_console_io_read, virt_console_io_write, UartReg, +}; diff --git a/src/device/uart/uart16550a.rs b/src/device/uart/uart16550a.rs index 259d8841..217a319d 100644 --- a/src/device/uart/uart16550a.rs +++ b/src/device/uart/uart16550a.rs @@ -1,4 +1,11 @@ -use crate::arch::pio::UART_COM1_BASE_PORT; +use core::ops::Range; + +use crate::{ + arch::{graphics::fb_putchar, pio::UART_COM1_BASE_PORT}, + device::irqchip::inject_irq, + error::HvResult, +}; +use alloc::vec::Vec; use spin::Mutex; use x86_64::instructions::port::{PortReadOnly, PortWriteOnly}; @@ -25,6 +32,11 @@ lazy_static::lazy_static! { uart.init(115200); Mutex::new(uart) }; + + static ref VIRT_COM1: VirtUart16550a = { + let uart = VirtUart16550a::new(UART_COM1_BASE_PORT); + uart + }; } bitflags::bitflags! { @@ -164,10 +176,175 @@ impl Uart16550a { } } +pub struct VirtUart16550aUnlocked { + iir: u8, + ier: u8, + lcr: u8, + lsr: u8, + fifo: Fifo<64>, +} + +impl VirtUart16550aUnlocked { + fn new() -> Self { + Self { + iir: 0, + ier: 0, + lcr: 0, + lsr: (LineStatusFlags::XMIT_HOLD_REG_EMPTY | LineStatusFlags::XMIT_EMPTY).bits(), + fifo: Fifo::new(), + } + } + + fn update_irq(&mut self) { + let mut iir: u8 = 0; + + if self.ier & InterruptEnableFlags::ENABLE_RCVR_DATA_AVAIL_INTR.bits() != 0 + && self.lsr & LineStatusFlags::RCVR_DATA_READY.bits() != 0 + { + iir |= InterruptIdentFlags::RCVR_DATA_AVAIL.bits(); + } + + if self.ier & InterruptEnableFlags::ENABLE_XMIT_HOLD_REG_EMPTY_INTR.bits() != 0 + && self.lsr & LineStatusFlags::XMIT_HOLD_REG_EMPTY.bits() != 0 + { + iir |= InterruptIdentFlags::XMIT_HOLD_REG_EMPTY.bits(); + } + + if iir == 0 { + self.iir = InterruptIdentFlags::NO_INTR_IS_PENDING.bits(); + } else { + self.iir = iir; + // FIXME: + inject_irq(0x4, false); + } + } +} + +pub struct VirtUart16550a { + base_port: u16, + port_range: Vec>, + uart: Mutex, +} + +impl VirtUart16550a { + pub fn new(base_port: u16) -> Self { + Self { + base_port, + port_range: vec![base_port..base_port + 8], + uart: Mutex::new(VirtUart16550aUnlocked::new()), + } + } + + fn port_range(&self) -> &Vec> { + &self.port_range + } + + fn read(&self, port: u16) -> HvResult { + let mut uart = self.uart.lock(); + + let ret = match port - self.base_port { + UartReg::RCVR_BUFFER => { + if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { + 1 // dll + } else { + // read a byte from FIFO + if uart.fifo.is_empty() { + 0 + } else { + uart.fifo.pop() + } + } + } + UartReg::INTR_ENABLE => { + if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { + 0 // dlm + } else { + uart.ier + } + } + UartReg::INTR_IDENT => { + // info!("IIR read, {:x}", uart.iir); + uart.iir | InterruptIdentFlags::FIFO_ENABLED_16550_MODE.bits() + } + UartReg::LINE_CTRL => uart.lcr, + UartReg::LINE_STATUS => { + // check if the physical serial port has an available byte, and push it to FIFO. + if !uart.fifo.is_full() { + if let Some(c) = console_getchar() { + uart.fifo.push(c); + } + } + if !uart.fifo.is_empty() { + uart.lsr |= LineStatusFlags::RCVR_DATA_READY.bits(); + } else { + uart.lsr &= (!LineStatusFlags::RCVR_DATA_READY).bits(); + } + uart.lsr + } + UartReg::MODEM_CTRL | UartReg::MODEM_STATUS | UartReg::SCRATCH => { + debug!("Unimplemented serial port I/O read: {:#x}", port); // unimplemented + 0 + } + _ => unreachable!(), + }; + + uart.update_irq(); + Ok(ret as u32) + } + + fn write(&self, port: u16, value: u32) -> HvResult { + let mut uart = self.uart.lock(); + let value: u8 = value as u8; + + match port - self.base_port { + UartReg::XMIT_BUFFER => { + if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { + // dll + } else { + uart.lsr |= + (LineStatusFlags::XMIT_HOLD_REG_EMPTY | LineStatusFlags::XMIT_EMPTY).bits(); + if value != 0xff { + console_putchar(value as u8); + } + } + } + UartReg::INTR_ENABLE => { + if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { + // dlm + } else { + // info!("ier: {:x}", uart.ier); + uart.ier = value & 0x0f; + } + } + UartReg::LINE_CTRL => { + uart.lcr = value; + } + UartReg::FIFO_CTRL | UartReg::MODEM_CTRL | UartReg::SCRATCH => { + debug!("Unimplemented serial port I/O write: {:#x}", port); + } + UartReg::LINE_STATUS => {} // ignore + _ => unreachable!(), + } + + uart.update_irq(); + Ok(()) + } +} + pub fn console_putchar(c: u8) { COM1.lock().putchar(c); + #[cfg(all(feature = "graphics", target_arch = "x86_64"))] + fb_putchar(c, 0xffffffff, 0); } pub fn console_getchar() -> Option { COM1.lock().getchar() } + +pub fn virt_console_io_read(port: u16) -> u32 { + VIRT_COM1.read(port).unwrap() +} + +pub fn virt_console_io_write(port: u16, value: u32) { + VIRT_COM1.write(port, value).unwrap() +} diff --git a/src/logging.rs b/src/logging.rs index f153ffcc..3e0ce640 100644 --- a/src/logging.rs +++ b/src/logging.rs @@ -20,6 +20,9 @@ use spin::Mutex; use crate::device::uart; +#[cfg(all(feature = "graphics", target_arch = "x86_64"))] +use crate::arch::graphics::fb_putstr; + static PRINT_LOCK: Mutex<()> = Mutex::new(()); struct Stdout; @@ -94,6 +97,28 @@ enum ColorCode { BrightWhite = 97, } +fn color_code_to_bgra(code: &ColorCode) -> u32 { + match code { + ColorCode::Black => 0, + ColorCode::Red => 0x0000aaff, + ColorCode::Green => 0x00aa00ff, + ColorCode::Yellow => 0x0055aaff, + ColorCode::Blue => 0xaa0000ff, + ColorCode::Magenta => 0xaa00aaff, + ColorCode::Cyan => 0xaaaa00ff, + ColorCode::White => 0xaaaaaaff, + ColorCode::BrightBlack => 0x555555ff, + ColorCode::BrightRed => 0x5555ffff, + ColorCode::BrightGreen => 0x55ff55ff, + ColorCode::BrightYellow => 0x55ffffff, + ColorCode::BrightBlue => 0xff5555ff, + ColorCode::BrightMagenta => 0xff55ffff, + ColorCode::BrightCyan => 0xffff55ff, + ColorCode::BrightWhite => 0xffffffff, + _ => 0, + } +} + pub fn init() { static LOGGER: SimpleLogger = SimpleLogger; log::set_logger(&LOGGER).unwrap(); @@ -137,6 +162,32 @@ impl Log for SimpleLogger { Level::Debug => ColorCode::Cyan, Level::Trace => ColorCode::BrightBlack, }; + + #[cfg(all(feature = "graphics", target_arch = "x86_64"))] + { + /*fb_putstr("[", color_code_to_bgra(&ColorCode::White)); + fb_putstr( + format!("{:<5} ", level).as_str(), + color_code_to_bgra(&ColorCode::Yellow), + ); + fb_putstr( + format!("{}] ({}:{}) ", cpu_id, target, line).as_str(), + color_code_to_bgra(&ColorCode::White), + ); + fb_putstr( + format!("{}\n", record.args()).as_str(), + color_code_to_bgra(&args_color), + );*/ + println!( + "[{:<5} {}] ({}:{}) {}", + level, + cpu_id, + target, + line, + record.args() + ); + } + #[cfg(not(all(feature = "graphics", target_arch = "x86_64")))] print(with_color!( ColorCode::White, "[{} {}] {} {}\n", diff --git a/src/main.rs b/src/main.rs index 4ae51141..054abdde 100644 --- a/src/main.rs +++ b/src/main.rs @@ -71,8 +71,6 @@ mod pci; #[cfg(test)] mod tests; -#[cfg(target_arch = "x86_64")] -use crate::arch::boot::MultibootInfo; #[cfg(target_arch = "aarch64")] use crate::arch::mm::setup_parange; use crate::consts::MAX_CPU_NUM; @@ -164,15 +162,29 @@ fn per_cpu_init(cpu: &mut PerCpu) { // memory::hv_page_table().read().activate(); // }; info!("CPU {} hv_pt_install OK.", cpu.id); + info!( + "cpuid: {} ArchCpu::id:{}", + crate::arch::cpu::this_cpu_id(), + percpu::this_cpu_data().arch_cpu.cpuid + ); } fn wakeup_secondary_cpus(this_id: usize, host_dtb: usize, ncpu: usize) { + #[cfg(target_arch = "x86_64")] + for (&apic_id, _) in crate::arch::acpi::get_lapic_map() { + if apic_id == this_id { + continue; + } + cpu_start(apic_id, arch_entry as _, host_dtb); + } + #[cfg(not(target_arch = "x86_64"))] for cpu_id in 0..ncpu { if cpu_id == this_id { continue; } cpu_start(cpu_id, arch_entry as _, host_dtb); } + println!("wakeup secondary cpus done"); } fn rust_main(cpuid: usize, host_dtb: usize) { @@ -189,8 +201,9 @@ fn rust_main(cpuid: usize, host_dtb: usize) { memory::heap::test(); #[cfg(target_arch = "x86_64")] { - MultibootInfo::init(host_dtb); + crate::arch::boot::module_init(host_dtb); device::irqchip::pic::ioapic::init_ioapic(); + crate::arch::acpi::root_init(); } } @@ -217,7 +230,7 @@ fn rust_main(cpuid: usize, host_dtb: usize) { // FIXME: #[cfg(target_arch = "x86_64")] { - ncpu = 4; + ncpu = crate::arch::acpi::get_lapic_map().len(); } // If we failed to detect, just use default value. diff --git a/src/percpu.rs b/src/percpu.rs index bb6f015d..30bbb9a2 100644 --- a/src/percpu.rs +++ b/src/percpu.rs @@ -41,6 +41,10 @@ pub struct PerCpu { impl PerCpu { pub fn new<'a>(cpu_id: usize) -> &'static mut PerCpu { + #[cfg(target_arch = "x86_64")] + let vaddr = PER_CPU_ARRAY_PTR as VirtAddr + + *crate::arch::acpi::get_lapic_map().get(&cpu_id).unwrap() as usize * PER_CPU_SIZE; + #[cfg(not(target_arch = "x86_64"))] let vaddr = PER_CPU_ARRAY_PTR as VirtAddr + cpu_id as usize * PER_CPU_SIZE; let ret = vaddr as *mut Self; unsafe { @@ -86,6 +90,10 @@ impl PerCpu { } pub fn get_cpu_data<'a>(cpu_id: usize) -> &'a mut PerCpu { + #[cfg(target_arch = "x86_64")] + let cpu_data: usize = PER_CPU_ARRAY_PTR as VirtAddr + + *crate::arch::acpi::get_lapic_map().get(&cpu_id).unwrap() as usize * PER_CPU_SIZE; + #[cfg(not(target_arch = "x86_64"))] let cpu_data: usize = PER_CPU_ARRAY_PTR as VirtAddr + cpu_id as usize * PER_CPU_SIZE; unsafe { &mut *(cpu_data as *mut PerCpu) } } diff --git a/src/zone.rs b/src/zone.rs index 832f554f..5bc03c28 100644 --- a/src/zone.rs +++ b/src/zone.rs @@ -271,7 +271,7 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult>> { &config.alloc_pci_devs, ); info!("{:#x?}", config.pci_config); - crate::arch::boot::BootParams::fill(&config, &zone.gpm); + crate::arch::boot::BootParams::fill(&config, &mut zone.gpm); crate::arch::acpi::copy_to_guest_memory_region(&config, &cpu_set); } From 0fee5c8834a3468bbdb78307124d34c655be26ed Mon Sep 17 00:00:00 2001 From: Solicey Date: Sat, 26 Jul 2025 11:15:52 +0800 Subject: [PATCH 19/29] x86 NUC14 hacking to root Linux shell --- platform/x86_64/nuc14/board.rs | 71 ++++-- platform/x86_64/qemu/board.rs | 18 +- platform/x86_64/qemu/cargo/features | 3 +- .../x86_64/qemu/image/iso/boot/grub/grub.cfg | 1 + platform/x86_64/qemu/platform.mk | 8 +- src/arch/x86_64/acpi.rs | 106 ++++++-- src/arch/x86_64/boot.rs | 13 +- src/arch/x86_64/cpu.rs | 2 +- src/arch/x86_64/idt.rs | 15 +- src/arch/x86_64/mmio.rs | 227 ++++++++++++------ src/arch/x86_64/pci.rs | 28 ++- src/arch/x86_64/pio.rs | 14 +- src/arch/x86_64/trap.rs | 9 +- src/arch/x86_64/vmx.rs | 2 +- src/arch/x86_64/zone.rs | 4 +- src/device/irqchip/pic/ioapic.rs | 20 +- src/main.rs | 5 - src/memory/mmio.rs | 31 +-- src/pci/pci.rs | 8 + src/platform/mod.rs | 6 +- src/zone.rs | 2 +- 21 files changed, 408 insertions(+), 185 deletions(-) diff --git a/platform/x86_64/nuc14/board.rs b/platform/x86_64/nuc14/board.rs index 2335d1f6..640e5f97 100644 --- a/platform/x86_64/nuc14/board.rs +++ b/platform/x86_64/nuc14/board.rs @@ -15,7 +15,9 @@ // use crate::{arch::zone::HvArchZoneConfig, config::*, memory::GuestPhysAddr}; -pub const MEM_TYPE_OTHER_ZONES: u32 = 5; +pub const MEM_TYPE_RESERVED: u32 = 5; + +pub const BOARD_NCPUS: usize = 4; pub const ROOT_ZONE_DTB_ADDR: u64 = 0x00000000; pub const ROOT_ZONE_BOOT_STACK: GuestPhysAddr = 0x7000; @@ -32,18 +34,18 @@ const ROOT_ZONE_RSDP_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, - physical_start: 0x3a20_0000, // hpa - virtual_start: 0x3520_0000, // gpa + physical_start: 0x3a30_0000, // hpa + virtual_start: 0x3530_0000, // gpa size: 0x10_0000, // modify size accordingly }; pub const ROOT_ZONE_NAME: &str = "root-linux"; -pub const ROOT_ZONE_CMDLINE: &str = - "video=vesafb console=ttyS0 console=tty0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all,lastbus=1 root=/dev/vda rw init=/init\0"; +pub const ROOT_ZONE_CMDLINE: &str = "video=vesafb console=tty0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all root=/dev/ram0 rw init=/init\0"; +// pub const ROOT_ZONE_CMDLINE: &str = "video=vesafb console=ttyS0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all root=/dev/vda rw init=/init\0"; //"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic // video=vesafb -pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 8] = [ +pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 14] = [ HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, physical_start: 0x500_0000, @@ -61,12 +63,12 @@ pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 8] = [ mem_type: MEM_TYPE_RAM, physical_start: 0x1a00_0000, virtual_start: 0x1500_0000, - size: 0x20_0000, + size: 0x30_0000, }, // ram HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, - physical_start: 0x1a20_0000, - virtual_start: 0x1520_0000, + physical_start: 0x1a30_0000, + virtual_start: 0x1530_0000, size: 0x2000_0000, }, // ram ROOT_ZONE_ACPI_REGION, // acpi @@ -78,11 +80,47 @@ pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 8] = [ }, // hpet // TODO: e820 mem space probe HvConfigMemoryRegion { - mem_type: MEM_TYPE_OTHER_ZONES, + mem_type: MEM_TYPE_RESERVED, physical_start: 0x4030_0000, virtual_start: 0x4030_0000, size: 0x2000_0000, }, // zone 1 + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RESERVED, + physical_start: 0x6ed7_f000, + virtual_start: 0x6ed7_f000, + size: 0x10_e000, + }, // FIXME: ACPI non-volatile storage + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RESERVED, + physical_start: 0xfeda_0000, + virtual_start: 0xfeda_0000, + size: 0x2_8000, + }, // FIXME: pnp 00:05 + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RESERVED, + physical_start: 0xfe01_1000, + virtual_start: 0xfe01_1000, + size: 0x40_0000, + }, // FIXME: reserved + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RESERVED, + physical_start: 0x677a_b000, + virtual_start: 0x677a_b000, + size: 0x74d_3000, + }, // FIXME: reserved + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RESERVED, + physical_start: 0xfd69_0000, + virtual_start: 0xfd69_0000, + size: 0x6_0000, + }, // FIXME: INTC1057:00 + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RESERVED, + physical_start: 0xfb00_0000, + virtual_start: 0xfb00_0000, + size: 0x100_0000, + }, // FIXME: reserved ]; const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0x9000; @@ -98,17 +136,20 @@ pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { kernel_entry_gpa: ROOT_ZONE_VMLINUX_ENTRY_ADDR, cmdline_load_gpa: ROOT_ZONE_CMDLINE_ADDR, setup_load_gpa: ROOT_ZONE_SETUP_ADDR, - initrd_load_gpa: 0x0, - initrd_size: 0x0, + // FIXME: + initrd_load_gpa: 0x1500_0000, + initrd_size: 0x26_b000, rsdp_memory_region_id: 0x1, acpi_memory_region_id: 0x5, - initrd_memory_region_id: 0x0, + // FIXME: + initrd_memory_region_id: 0x3, screen_base: ROOT_ZONE_SCREEN_BASE_ADDR, }; -pub const ROOT_PCI_DEVS: [u64; 16] = [ +pub const ROOT_PCI_DEVS: [u64; 19] = [ 0x0, 0x10, 0x20, 0x40, 0x50, 0x68, 0x90, 0xa0, 0xa2, 0xa3, 0xb0, 0xe0, 0xe8, 0xf8, 0xfb, 0xfc, -]; // 0xfd, + 0xfd, 0x100, 0x200, +]; #[cfg(all(feature = "graphics", target_arch = "x86_64"))] pub const GRAPHICS_FONT: &[u8] = diff --git a/platform/x86_64/qemu/board.rs b/platform/x86_64/qemu/board.rs index 05a97341..916edb8b 100644 --- a/platform/x86_64/qemu/board.rs +++ b/platform/x86_64/qemu/board.rs @@ -15,7 +15,7 @@ // use crate::{arch::zone::HvArchZoneConfig, config::*, memory::GuestPhysAddr}; -pub const MEM_TYPE_OTHER_ZONES: u32 = 5; +pub const MEM_TYPE_RESERVED: u32 = 5; pub const BOARD_NCPUS: usize = 4; @@ -41,7 +41,7 @@ const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { pub const ROOT_ZONE_NAME: &str = "root-linux"; pub const ROOT_ZONE_CMDLINE: &str = - "video=vesafb console=ttyS0 console=tty0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all,lastbus=1 root=/dev/vda rw init=/init\0"; + "video=vesafb console=ttyS0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all,lastbus=1 root=/dev/ram0 rw rdinit=/bin/sh\0"; //"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic // video=vesafb @@ -63,12 +63,12 @@ pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 8] = [ mem_type: MEM_TYPE_RAM, physical_start: 0x1a00_0000, virtual_start: 0x1500_0000, - size: 0x20_0000, + size: 0x30_0000, }, // ram HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, - physical_start: 0x1a20_0000, - virtual_start: 0x1520_0000, + physical_start: 0x1a30_0000, + virtual_start: 0x1530_0000, size: 0x2000_0000, }, // ram ROOT_ZONE_ACPI_REGION, // acpi @@ -80,7 +80,7 @@ pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 8] = [ }, // hpet // TODO: e820 mem space probe HvConfigMemoryRegion { - mem_type: MEM_TYPE_OTHER_ZONES, + mem_type: MEM_TYPE_RESERVED, physical_start: 0x4030_0000, virtual_start: 0x4030_0000, size: 0x2000_0000, @@ -100,11 +100,11 @@ pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { kernel_entry_gpa: ROOT_ZONE_VMLINUX_ENTRY_ADDR, cmdline_load_gpa: ROOT_ZONE_CMDLINE_ADDR, setup_load_gpa: ROOT_ZONE_SETUP_ADDR, - initrd_load_gpa: 0x0, - initrd_size: 0x0, + initrd_load_gpa: 0x1500_0000, + initrd_size: 0x26_b000, rsdp_memory_region_id: 0x1, acpi_memory_region_id: 0x5, - initrd_memory_region_id: 0x0, + initrd_memory_region_id: 0x3, screen_base: ROOT_ZONE_SCREEN_BASE_ADDR, }; diff --git a/platform/x86_64/qemu/cargo/features b/platform/x86_64/qemu/cargo/features index e525097a..71878594 100644 --- a/platform/x86_64/qemu/cargo/features +++ b/platform/x86_64/qemu/cargo/features @@ -1,2 +1 @@ -pci -graphics \ No newline at end of file +pci \ No newline at end of file diff --git a/platform/x86_64/qemu/image/iso/boot/grub/grub.cfg b/platform/x86_64/qemu/image/iso/boot/grub/grub.cfg index dea4571a..c2a54ae3 100644 --- a/platform/x86_64/qemu/image/iso/boot/grub/grub.cfg +++ b/platform/x86_64/qemu/image/iso/boot/grub/grub.cfg @@ -9,6 +9,7 @@ menuentry "Hvisor" { module2 /boot/kernel/boot.bin 5008000 module2 /boot/kernel/setup.bin 500a000 module2 /boot/kernel/vmlinux.bin 5100000 + module2 /boot/kernel/initramfs.cpio.gz 1a000000 boot } diff --git a/platform/x86_64/qemu/platform.mk b/platform/x86_64/qemu/platform.mk index 02385a37..7be3dd69 100644 --- a/platform/x86_64/qemu/platform.mk +++ b/platform/x86_64/qemu/platform.mk @@ -18,8 +18,10 @@ QEMU_ARGS += -vga std QEMU_ARGS += -device intel-iommu,intremap=on,eim=on,caching-mode=on,device-iotlb=on,aw-bits=48 QEMU_ARGS += -device ioh3420,id=pcie.1,chassis=1 -QEMU_ARGS += -drive if=none,file="$(zone0_rootfs)",id=X10008000,format=raw -QEMU_ARGS += -device virtio-blk-pci,bus=pcie.1,drive=X10008000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on +# QEMU_ARGS += -drive if=none,file="$(zone0_rootfs)",id=X10008000,format=raw +# QEMU_ARGS += -device virtio-blk-pci,bus=pcie.1,drive=X10008000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on +QEMU_ARGS += -drive if=none,file="$(zone0_rootfs)",id=X10009000,format=raw +QEMU_ARGS += -device nvme,serial=deadbeef,drive=X10009000 # QEMU_ARGS += -drive if=none,file="$(zone1_rootfs)",id=X10009000,format=raw # QEMU_ARGS += -device virtio-blk-pci,bus=pcie.1,drive=X10009000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on @@ -35,7 +37,7 @@ QEMU_ARGS += -drive file=$(image_dir)/virtdisk/hvisor.iso,format=raw,index=0,med # QEMU_ARGS += -device loader,file="$(zone0_boot)",addr=0x5008000,force-raw=on # QEMU_ARGS += -device loader,file="$(zone0_setup)",addr=0x500a000,force-raw=on # QEMU_ARGS += -device loader,file="$(zone0_vmlinux)",addr=0x5100000,force-raw=on -# QEMU_ARGS += -device loader,file="$(zone0_initrd)",addr=0x20000000,force-raw=on +# QEMU_ARGS += -device loader,file="$(zone0_initrd)",addr=0x1a000000,force-raw=on # QEMU_ARGS += -append "initrd_size=$(shell stat -c%s $(zone0_initrd))" $(hvisor_bin): elf boot diff --git a/src/arch/x86_64/acpi.rs b/src/arch/x86_64/acpi.rs index 621cbbff..ae76b41e 100644 --- a/src/arch/x86_64/acpi.rs +++ b/src/arch/x86_64/acpi.rs @@ -255,6 +255,7 @@ pub struct RootAcpi { rsdp_copy: Vec, rsdp: AcpiTable, tables: BTreeMap, + ssdts: BTreeMap, pointers: Vec, devices: Vec, config_space_base: usize, @@ -289,6 +290,12 @@ impl RootAcpi { self.tables.insert(sig, table); } + fn add_ssdt(&mut self, ptr: *const u8, len: usize, rsdt_offset: usize) { + let mut table = AcpiTable::default(); + table.fill(Some(Signature::SSDT), ptr, len, ACPI_CHECKSUM_OFFSET); + self.ssdts.insert(rsdt_offset, table); + } + fn get_mut_table(&mut self, sig: Signature) -> Option<&mut AcpiTable> { self.tables.get_mut(&sig) } @@ -310,6 +317,7 @@ impl RootAcpi { ) { let mut rsdp = self.rsdp.clone(); let mut tables = self.tables.clone(); + let mut ssdts = self.ssdts.clone(); // set rsdp addr rsdp.set_addr( @@ -389,6 +397,26 @@ impl RootAcpi { } } + let ban_ssdt = banned_tables.contains(&Signature::SSDT); + let from = tables.get_mut(&Signature::RSDT).unwrap(); + for (&offset, ssdt) in ssdts.iter_mut() { + info!( + "sig: {:x?}, hpa: {:x?}, gpa: {:x?}, size: {:x?}", + Signature::SSDT, + hpa_start + cur, + gpa_start + cur, + ssdt.get_len() + ); + ssdt.set_addr(hpa_start + cur, gpa_start + cur); + cur += ssdt.get_len(); + + let to_gpa = match ban_ssdt { + true => 0, + false => ssdt.gpa, + }; + from.set_u32(to_gpa as _, offset); + } + // update checksums rsdp.update_checksum(RSDP_CHECKSUM_OFFSET); for (sig, table) in tables.iter_mut() { @@ -403,6 +431,11 @@ impl RootAcpi { unsafe { table.copy_to_mem() }; } } + if !ban_ssdt { + for (&offset, ssdt) in ssdts.iter() { + unsafe { ssdt.copy_to_mem() }; + } + } } // let zone 0 bsp cpu does the work @@ -414,7 +447,6 @@ impl RootAcpi { slice::from_raw_parts(rsdp_addr as *const u8, core::mem::size_of::()).to_vec() }; let rsdp_copy_addr = root_acpi.rsdp_copy.as_ptr() as usize; - println!("rsdp: {:x}", rsdp_copy_addr); let handler = HvAcpiHandler {}; let rsdp_mapping = unsafe { @@ -439,16 +471,28 @@ impl RootAcpi { ); // get rsdt - root_acpi.add_new_table( - Signature::RSDT, - rsdp_mapping.rsdt_address() as usize as *const u8, - SDT_HEADER_SIZE, - ); + let rsdt_addr = rsdp_mapping.rsdt_address() as usize; + root_acpi.add_new_table(Signature::RSDT, rsdt_addr as *const u8, SDT_HEADER_SIZE); let mut rsdt_offset = root_acpi.get_mut_table(Signature::RSDT).unwrap().get_len(); let tables = unsafe { AcpiTables::from_validated_rsdp(HvAcpiHandler {}, rsdp_mapping) }.unwrap(); + // FIXME: temp + let mut rsdt_entry = rsdt_addr + 36; + let size = (unsafe { *((rsdt_addr + 4) as *const u32) } as usize - 36) / 4; + for i in 0..size { + let addr = unsafe { *(rsdt_entry as *const u32) } as usize; + let sig_ptr = addr as *const u8; + let sig = + unsafe { core::str::from_utf8_unchecked(core::slice::from_raw_parts(sig_ptr, 4)) }; + + println!("sig: {:#x?} ptr: {:x} len: {:x}", sig, addr, unsafe { + *((addr + 4) as *const u32) + }); + rsdt_entry += 4; + } + // mcfg if let Ok(mcfg) = tables.find_table::() { root_acpi.add_new_table( @@ -493,7 +537,7 @@ impl RootAcpi { rsdt_offset += RSDT_PTR_SIZE; } - // println!("fadt"); + // fadt if let Ok(fadt) = tables.find_table::() { root_acpi.add_new_table( Signature::FADT, @@ -501,30 +545,47 @@ impl RootAcpi { fadt.region_length(), ); + println!("---------- FADT ----------"); + root_acpi.add_pointer(Signature::RSDT, rsdt_offset, Signature::FADT, RSDT_PTR_SIZE); rsdt_offset += RSDT_PTR_SIZE; + // acpi + let sci_int = fadt.sci_interrupt; + let smi_port = fadt.smi_cmd_port; + let acpi_enable = fadt.acpi_enable; + let acpi_disable = fadt.acpi_disable; + let pm1a_con = fadt.pm1a_control_block(); + let pm1a_evt = fadt.pm1a_event_block(); + + /*println!( + "sci_interrupt: {:x}, smi_cmd_port: {:x}, acpi_enable: {:x}, acpi_disable: {:x}, pm1a_con: {:#x?}, pm1a_evt: {:#x?}", + sci_int, smi_port, acpi_enable, acpi_disable, pm1a_con, pm1a_evt, + );*/ + // println!("{:#x?}", fadt.get()); + // loop {} + // dsdt - // println!("dsdt"); if let Ok(dsdt) = tables.dsdt() { - println!("dsdt ptr: {:x}, len: {:x}", dsdt.address, dsdt.length); root_acpi.add_new_table( Signature::DSDT, (dsdt.address - SDT_HEADER_SIZE) as *const u8, (dsdt.length as usize + SDT_HEADER_SIZE), ); - // println!("dsdt add_new_table"); + println!( + "sig: \"DSDT\" ptr: {:x}, len: {:x}", + dsdt.address, dsdt.length + ); root_acpi.add_pointer(Signature::FADT, FADT_DSDT_OFFSET_32, Signature::DSDT, 4); root_acpi.add_pointer(Signature::FADT, FADT_DSDT_OFFSET_64, Signature::DSDT, 8); } // facs - println!("facs"); if let Ok(facs_addr) = fadt.facs_address() { - root_acpi.add_new_table(Signature::FACS, facs_addr as *const u8, unsafe { - *((facs_addr + 4) as *const u32) as usize - }); + let len = unsafe { *((facs_addr + 4) as *const u32) as usize }; + root_acpi.add_new_table(Signature::FACS, facs_addr as *const u8, len); + println!("sig: \"FACS\" ptr: {:x}, len: {:x}", facs_addr, len); root_acpi.add_pointer(Signature::FADT, FADT_FACS_OFFSET_32, Signature::FACS, 4); root_acpi.add_pointer(Signature::FADT, FADT_FACS_OFFSET_64, Signature::FACS, 8); @@ -559,7 +620,6 @@ impl RootAcpi { } // dmar - println!("dmar"); acpi_table!(Dmar, DMAR); if let Ok(dmar) = tables.find_table::() { root_acpi.add_new_table( @@ -568,18 +628,27 @@ impl RootAcpi { dmar.region_length(), ); - println!("dmar: {:x?}", unsafe { + /*println!("DMAR: {:x?}", unsafe { *((dmar.physical_start() + 56) as *const [u8; 8]) - }); + });*/ // self.add_pointer(Signature::RSDT, rsdt_offset, Signature::DMAR, RSDT_PTR_SIZE); // rsdt_offset += RSDT_PTR_SIZE; } + // ssdt + for ssdt in tables.ssdts() { + root_acpi.add_ssdt( + (ssdt.address - SDT_HEADER_SIZE) as *const u8, + (ssdt.length as usize + SDT_HEADER_SIZE), + rsdt_offset, + ); + rsdt_offset += RSDT_PTR_SIZE; + } + if let Some(rsdt) = root_acpi.get_mut_table(Signature::RSDT) { rsdt.set_new_len(rsdt_offset); } - println!("acpi init end"); root_acpi } } @@ -591,6 +660,7 @@ pub fn root_init() { pub fn copy_to_guest_memory_region(config: &HvZoneConfig, cpu_set: &CpuSet) { let mut banned: BTreeSet = BTreeSet::new(); + // banned.insert(Signature::SSDT); // FIXME: temp // if config.zone_id != 0 { // banned.insert(Signature::FADT); diff --git a/src/arch/x86_64/boot.rs b/src/arch/x86_64/boot.rs index 021a98b7..bf589c2e 100644 --- a/src/arch/x86_64/boot.rs +++ b/src/arch/x86_64/boot.rs @@ -4,7 +4,7 @@ use crate::{ error::HvResult, memory::{GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion, MemorySet}, percpu::this_zone, - platform::MEM_TYPE_OTHER_ZONES, + platform::MEM_TYPE_RESERVED, }; use alloc::string::{String, ToString}; use core::{ @@ -210,12 +210,13 @@ impl BootParams { || i == config.arch_config.acpi_memory_region_id { e820_type = E820Type::E820_ACPI; - } else if config.arch_config.initrd_load_gpa != 0 - && i == config.arch_config.initrd_memory_region_id - { } else if mem_region.mem_type == MEM_TYPE_RAM { e820_type = E820Type::E820_RAM; - } + } /* + else if config.arch_config.initrd_load_gpa != 0 + && i == config.arch_config.initrd_memory_region_id + { + } */ if e820_type != E820Type::E820_DEFAULT { self.e820_table[index] = BootE820Entry { @@ -374,7 +375,7 @@ pub fn print_memory_map() { let cnt = ((mem_map.size as usize) - mem_map_size) / (mem_map.entry_size as usize); let mut entry_addr = map_addr + mem_map_size; - println!("===== MEMORY MAP ====="); + println!("---------- MEMORY MAP ----------"); for i in 0..cnt { let entry = unsafe { *(entry_addr as *const multiboot_tag::MemoryMapEntry) }; println!( diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index 0fdf0c0a..5e2dab94 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -279,7 +279,7 @@ impl ArchCpu { self.guest_regs = self.vm_launch_guest_regs.clone(); } - while VMXON_DONE.load(Ordering::Acquire) < unsafe { consts::NCPU } as u32 - 1 { + while VMXON_DONE.load(Ordering::Acquire) < unsafe { consts::MAX_CPU_NUM } as u32 - 1 { core::hint::spin_loop(); } diff --git a/src/arch/x86_64/idt.rs b/src/arch/x86_64/idt.rs index 51dc7438..3125e732 100644 --- a/src/arch/x86_64/idt.rs +++ b/src/arch/x86_64/idt.rs @@ -11,7 +11,7 @@ pub mod IdtVector { pub const ALLOC_START: u8 = 0x20; pub const ALLOC_END: u8 = 0xdf; - pub const VIRT_IPI_VECTOR: u8 = 0xe0; + pub const VIRT_IPI_VECTOR: u8 = 0x1e; pub const APIC_TIMER_VECTOR: u8 = 0xf0; pub const APIC_SPURIOUS_VECTOR: u8 = 0xf1; pub const APIC_ERROR_VECTOR: u8 = 0xf2; @@ -40,6 +40,12 @@ impl RemapVectors { } fn get_host_vector(&self, gv: u32, zone_id: usize) -> Option { + if gv < 0x20 { + return None; + } + // FIXME: + return Some(gv as _); + let mut vectors = self.inner.get(zone_id).unwrap().lock(); if let Some(&hv) = vectors.gv_to_hv.get(&gv) { @@ -51,7 +57,6 @@ impl RemapVectors { vectors.hv_to_gv.insert(hv, gv); vectors.gv_to_hv.insert(gv, hv); - // info!("gv: {:x}, hv: {:x}", gv, hv); return Some(hv); } } @@ -60,6 +65,12 @@ impl RemapVectors { } fn get_guest_vector(&self, hv: u8, zone_id: usize) -> Option { + if hv < 0x20 { + return None; + } + // FIXME: + return Some(hv as _); + let mut vectors = self.inner.get(zone_id).unwrap().lock(); if let Some(&gv) = vectors.hv_to_gv.get(&hv) { diff --git a/src/arch/x86_64/mmio.rs b/src/arch/x86_64/mmio.rs index 7f372c97..de0e1063 100644 --- a/src/arch/x86_64/mmio.rs +++ b/src/arch/x86_64/mmio.rs @@ -4,12 +4,15 @@ use crate::{ vmcs::{VmcsGuest16, VmcsGuestNW}, }, error::HvResult, - memory::{GuestPhysAddr, GuestVirtAddr, HostPhysAddr, MMIOAccess, MMIOHandler}, + memory::{ + addr::{GuestPhysAddr, GuestVirtAddr, HostPhysAddr}, + MMIOAccess, MMIOHandler, + }, percpu::{this_cpu_data, this_zone}, }; use alloc::{sync::Arc, vec::Vec}; use bit_field::BitField; -use core::{mem::size_of, ops::Range, slice::from_raw_parts}; +use core::{mem::size_of, ops::Range, ptr::write_volatile, slice::from_raw_parts}; use spin::Mutex; use x86::controlregs::{Cr0, Cr4}; @@ -126,13 +129,14 @@ impl RmReg { /* G: general registers E: registers / memory -B: byte -V: word / dword / qword +b: byte +w: word +v: word / dword / qword */ numeric_enum_macro::numeric_enum! { #[repr(u8)] #[derive(Debug)] -pub enum OpCode { +pub enum OneByteOpCode { // move r to r/m MovEbGb = 0x88, MovEvGv = 0x89, @@ -141,6 +145,14 @@ pub enum OpCode { MovGvEv = 0x8b, } } +numeric_enum_macro::numeric_enum! { +#[repr(u8)] +#[derive(Debug)] +pub enum TwoByteOpCode { + MovZxGvEb = 0xb6, + MovZxGvEw = 0xb7, +} +} bitflags::bitflags! { #[derive(Debug, PartialEq)] @@ -155,6 +167,8 @@ const REX_PREFIX_HIGH: u8 = 0x4; const OPERAND_SIZE_OVERRIDE_PREFIX: u8 = 0x66; +const TWO_BYTE_ESCAPE: u8 = 0xf; + // len stands for instruction len enum OprandType { Reg { reg: RmReg, len: usize }, @@ -258,7 +272,7 @@ fn gva_to_gpa(gva: GuestVirtAddr) -> HvResult { // lookup guest page table in long mode - let p4_gpa = VmcsGuestNW::CR3.read()?; + let p4_gpa = (VmcsGuestNW::CR3.read()?) & !(0xfff); let p4_hpa = gpa_to_hpa(p4_gpa)?; let p4_entry_id = (gva >> 39) & 0x1ff; let p4_entry = get_page_entry(p4_hpa, p4_entry_id); @@ -349,83 +363,153 @@ fn emulate_inst( let mut rex = RexPrefixLow::from_bits_truncate(0); if inst[cur_id].get_bits(4..=7) == REX_PREFIX_HIGH { rex = RexPrefixLow::from_bits_truncate(inst[cur_id].get_bits(0..=3)); + // we haven't implemented other situations yet assert!(rex == RexPrefixLow::REGISTERS); cur_id += 1; } - let opcode: OpCode = inst[cur_id].try_into().unwrap(); - cur_id += 1; - - if !size_override { - size = match opcode { - OpCode::MovEbGb | OpCode::MovGbEb => size_of::(), - _ => size, - }; + let mut two_byte = false; + if inst[cur_id] == TWO_BYTE_ESCAPE { + two_byte = true; + cur_id += 1; } - match opcode { - OpCode::MovEbGb | OpCode::MovEvGv => { - let mod_rm = ModRM::new(inst[cur_id], &rex); - cur_id += 1; + if !two_byte { + if OneByteOpCode::try_from(inst[cur_id]).is_err() { + error!("inst: {:#x?}", inst); + } + let opcode: OneByteOpCode = inst[cur_id].try_into().unwrap(); + cur_id += 1; - let src = mod_rm.get_reg(); - let src_val = src.read().unwrap(); + if !size_override { + size = match opcode { + OneByteOpCode::MovEbGb | OneByteOpCode::MovGbEb => size_of::(), + _ => size, + }; + } - let dst = mod_rm.get_modrm(inst, cur_id).unwrap(); - match dst { - OprandType::Reg { reg, len } => { - cur_id += len; - reg.write(src_val, size).unwrap(); + match opcode { + OneByteOpCode::MovEbGb | OneByteOpCode::MovEvGv => { + let mod_rm = ModRM::new(inst[cur_id], &rex); + cur_id += 1; + + let src = mod_rm.get_reg(); + let src_val = src.read().unwrap(); + + let dst = mod_rm.get_modrm(inst, cur_id).unwrap(); + match dst { + OprandType::Reg { reg, len } => { + cur_id += len; + reg.write(src_val, size).unwrap(); + } + OprandType::Gpa { gpa, len } => { + cur_id += len; + + mmio.address = gpa - base; + mmio.is_write = true; + mmio.size = size; + mmio.value = src_val as _; + + handler(mmio, base); + } + _ => {} } - OprandType::Gpa { gpa, len } => { - cur_id += len; - - mmio.address = gpa - base; - mmio.is_write = true; - mmio.size = size; - mmio.value = src_val as _; - handler(mmio, base); - } - _ => {} + Ok(cur_id) + } + OneByteOpCode::MovGbEb | OneByteOpCode::MovGvEv => { + let mod_rm = ModRM::new(inst[cur_id], &rex); + cur_id += 1; + + let dst = mod_rm.get_reg(); + + let src = mod_rm.get_modrm(inst, cur_id).unwrap(); + let src_val = match src { + OprandType::Reg { reg, len } => { + cur_id += len; + reg.read().unwrap() + } + OprandType::Gpa { gpa, len } => { + cur_id += len; + + mmio.address = gpa - base; + mmio.is_write = false; + mmio.size = size; + mmio.value = 0; + // info!("src_val: {:x}", gpa); + + handler(mmio, base); + mmio.value as u64 + } + }; + + dst.write(src_val, size).unwrap(); + Ok(cur_id) + } + _ => { + hv_result_err!( + ENOSYS, + format!("Unimplemented opcode: 0x{:x}", opcode as u8) + ) } - - Ok(cur_id) } - OpCode::MovGbEb | OpCode::MovGvEv => { - let mod_rm = ModRM::new(inst[cur_id], &rex); - cur_id += 1; - - let dst = mod_rm.get_reg(); - - let src = mod_rm.get_modrm(inst, cur_id).unwrap(); - let src_val = match src { - OprandType::Reg { reg, len } => { - cur_id += len; - reg.read().unwrap() - } - OprandType::Gpa { gpa, len } => { - cur_id += len; - - mmio.address = gpa - base; - mmio.is_write = false; - mmio.size = size; - mmio.value = 0; - // info!("src_val: {:x}", gpa); + } else { + if TwoByteOpCode::try_from(inst[cur_id]).is_err() { + error!("inst: {:#x?}", inst); + } + let opcode: TwoByteOpCode = inst[cur_id].try_into().unwrap(); + cur_id += 1; - handler(mmio, base); - mmio.value as u64 - } + if !size_override { + size = match opcode { + TwoByteOpCode::MovZxGvEb => size_of::(), + TwoByteOpCode::MovZxGvEw => size_of::(), + _ => size, }; - - dst.write(src_val, size).unwrap(); - Ok(cur_id) } - _ => { - hv_result_err!( - ENOSYS, - format!("Unimplemented opcode: 0x{:x}", opcode as u8) - ) + + match opcode { + TwoByteOpCode::MovZxGvEb | TwoByteOpCode::MovZxGvEw => { + let mod_rm = ModRM::new(inst[cur_id], &rex); + cur_id += 1; + + let dst = mod_rm.get_reg(); + + let src = mod_rm.get_modrm(inst, cur_id).unwrap(); + let src_val = match src { + OprandType::Reg { reg, len } => { + cur_id += len; + reg.read().unwrap() + } + OprandType::Gpa { gpa, len } => { + cur_id += len; + + mmio.address = gpa - base; + mmio.is_write = false; + mmio.size = size; + mmio.value = 0; + // info!("src_val: {:x}", gpa); + + handler(mmio, base); + mmio.value as u64 + } + }; + let src_val_zero_extend = match size { + 1 => src_val.get_bits(0..8), + 2 => src_val.get_bits(0..16), + 4 => src_val.get_bits(0..32), + _ => src_val, + }; + + dst.write(src_val_zero_extend, 8).unwrap(); + Ok(cur_id) + } + _ => { + hv_result_err!( + ENOSYS, + format!("Unimplemented opcode: 0x{:x}", opcode as u8) + ) + } } } } @@ -441,3 +525,10 @@ pub fn instruction_emulator(handler: &MMIOHandler, mmio: &mut MMIOAccess, base: Ok(()) } + +pub fn mmio_empty_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { + if !mmio.is_write { + mmio.value = 0; + } + Ok(()) +} diff --git a/src/arch/x86_64/pci.rs b/src/arch/x86_64/pci.rs index 8a624eb8..ed95c989 100644 --- a/src/arch/x86_64/pci.rs +++ b/src/arch/x86_64/pci.rs @@ -106,6 +106,7 @@ pub fn probe_root_pci_devices( // msi capablility let msg_ctrl_reg = unsafe { *((cap_hpa + 0x2) as *const u16) }; let is_64b = msg_ctrl_reg.get_bit(7); + let per_vector_masking = msg_ctrl_reg.get_bit(8); let data_reg_hpa = match is_64b { true => cap_hpa + 0xc, @@ -113,6 +114,7 @@ pub fn probe_root_pci_devices( }; msi_data_reg_map.insert(data_reg_hpa, bdf as _); // println!("msi data reg hpa: {:x?}", data_reg_hpa); + println!("msi per vector masking: {:#x?}", per_vector_masking); } else if cap_id == 0x11 { // msi-x capability let msg_ctrl_reg = unsafe { *((cap_hpa + 0x2) as *const u16) }; @@ -200,27 +202,29 @@ pub fn mmio_msi_data_reg_handler( let host_vector = unsafe { core::ptr::read_volatile(hpa as *mut u32) } as u8; if mmio.is_write { - let alloc_host_vector = idt::get_host_vector(mmio.value as _, zone_id).unwrap(); - if host_vector != alloc_host_vector { - idt::clear_vectors(host_vector, zone_id); + info!( + "MSI write, bdf: {:x} hpa: {:x} gv: {:x}", + bdf, hpa, mmio.value + ); + if let Some(alloc_host_vector) = idt::get_host_vector(mmio.value as _, zone_id) { + if host_vector != alloc_host_vector { + idt::clear_vectors(host_vector, zone_id); + } mmio.value = alloc_host_vector as _; - mmio_perform_access(base, mmio); + info!( + "MSI write, old_hv: {:x} alloc_hv: {:x}", + host_vector, alloc_host_vector + ); } + mmio_perform_access(base, mmio); } else { if let Some(guest_vector) = idt::get_guest_vector(host_vector, zone_id) { mmio.value = guest_vector as _; } else { + warn!("msi can't get hv with gv"); mmio.value = host_vector as _; } } - trace!( - "mmio_msi_data_reg_handler! hpa: {:x}, bdf: {:x}, is write: {:x?}, read value: {:x}, write value: {:x}", - base + mmio.address, - bdf, - mmio.is_write, - host_vector, - mmio.value - ); Ok(()) } diff --git a/src/arch/x86_64/pio.rs b/src/arch/x86_64/pio.rs index 198dc08b..631530d7 100644 --- a/src/arch/x86_64/pio.rs +++ b/src/arch/x86_64/pio.rs @@ -26,8 +26,9 @@ impl PortIoBitmap { pci_config_addr: 0, }; - bitmap.a.fill(0xff); - bitmap.b.fill(0xff); + // FIXME: zone0 + bitmap.a.fill(0); + bitmap.b.fill(0); // ban i8259a ports bitmap.set_intercept(0x20, true); @@ -43,14 +44,15 @@ impl PortIoBitmap { // FIXME: uart & i8254 if zone_id == 0 { bitmap.set_range_intercept(0x60..0x65, false); - #[cfg(not(feature = "graphics"))] - bitmap.set_range_intercept(0x3f8..0x400, false); + #[cfg(feature = "graphics")] + bitmap.set_range_intercept(UART_COM1_PORT, true); } // bitmap.set_range_intercept(0x3f8..0x400, false); // FIXME: get port info from ACPI FACP table - bitmap.set_intercept(0xb2, false); - bitmap.set_range_intercept(0x600..0x630, false); + // bitmap.set_intercept(0xb2, false); + // bitmap.set_range_intercept(0x600..0x630, false); + // bitmap.set_range_intercept(0x1800..0x1900, false); bitmap } diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index a6bdc87b..eeb7acd1 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -99,9 +99,7 @@ fn handle_irq(vector: u8) { ), _ => match get_guest_vector(vector, this_zone_id()) { Some(gv) => { - if gv < 0x20 { - info!("inject {:x} {:x}", vector, gv); - } + // info!("inject: {:x}", vector); inject_vector(this_cpu_id(), gv as _, None, false); } None => { @@ -265,12 +263,15 @@ fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvR _ => unreachable!(), } as _; + // FIXME: reconstruct if PCI_CONFIG_ADDR_PORT.contains(&io_info.port) || PCI_CONFIG_DATA_PORT.contains(&io_info.port) { handle_pci_config_port_write(&io_info, value); } else if UART_COM1_PORT.contains(&io_info.port) { virt_console_io_write(io_info.port, value); + } else { + // info!("io write {:x} value: {:x}", io_info.port, value); } } else { if PCI_CONFIG_ADDR_PORT.contains(&io_info.port) @@ -279,6 +280,8 @@ fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvR value = handle_pci_config_port_read(&io_info); } else if UART_COM1_PORT.contains(&io_info.port) { value = virt_console_io_read(io_info.port); + } else { + // info!("io read {:x}", io_info.port); } let rax = &mut arch_cpu.regs_mut().rax; // SDM Vol. 1, Section 3.4.1.1: diff --git a/src/arch/x86_64/vmx.rs b/src/arch/x86_64/vmx.rs index 9bd1ccc3..baa5eaf7 100644 --- a/src/arch/x86_64/vmx.rs +++ b/src/arch/x86_64/vmx.rs @@ -7,7 +7,7 @@ use crate::{ }, consts::PAGE_SIZE, error::{HvError, HvResult}, - memory::{Frame, GuestPhysAddr, HostPhysAddr, HostVirtAddr, MemFlags, PhysAddr}, + memory::{Frame, GuestPhysAddr, HostPhysAddr, MemFlags, PhysAddr}, }; use bit_field::BitField; use bitflags::{bitflags, Flags}; diff --git a/src/arch/x86_64/zone.rs b/src/arch/x86_64/zone.rs index 064ad0e8..b8a338f5 100644 --- a/src/arch/x86_64/zone.rs +++ b/src/arch/x86_64/zone.rs @@ -3,7 +3,7 @@ use crate::{ device::virtio_trampoline::mmio_virtio_handler, error::HvResult, memory::{GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion}, - platform::MEM_TYPE_OTHER_ZONES, + platform::MEM_TYPE_RESERVED, zone::Zone, }; @@ -32,7 +32,7 @@ impl Zone { flags |= MemFlags::IO; } match mem_region.mem_type { - MEM_TYPE_RAM | MEM_TYPE_IO | MEM_TYPE_OTHER_ZONES => { + MEM_TYPE_RAM | MEM_TYPE_IO | MEM_TYPE_RESERVED => { self.gpm.insert(MemoryRegion::new_with_offset_mapper( mem_region.virtual_start as GuestPhysAddr, mem_region.physical_start as HostPhysAddr, diff --git a/src/device/irqchip/pic/ioapic.rs b/src/device/irqchip/pic/ioapic.rs index afdd94d2..533bb95e 100644 --- a/src/device/irqchip/pic/ioapic.rs +++ b/src/device/irqchip/pic/ioapic.rs @@ -117,8 +117,7 @@ impl VirtIoApic { entry.set_bits(0..=31, value.get_bits(0..=31)); // use host vector instead of guest vector let gv = entry.get_bits(0..=7) as u32; - if gv >= 0x20 { - let hv = idt::get_host_vector(gv, zone_id).unwrap(); + if let Some(hv) = idt::get_host_vector(gv, zone_id) { entry.set_bits(0..=7, hv as _); } } else { @@ -157,18 +156,11 @@ impl VirtIoApic { let dest = entry.get_bits(56..=63) as usize; let masked = entry.get_bit(16); let vector = entry.get_bits(0..=7) as u8; - /*info!( - "trigger gv: {:x} zone: {:x}", - idt::get_guest_vector(vector as _, zone_id).unwrap(), - zone_id - );*/ - if !masked { - inject_vector( - dest, - idt::get_guest_vector(vector as _, zone_id).unwrap() as _, - None, - allow_repeat, - ); + // info!("trigger hv: {:x} zone: {:x}", vector, zone_id); + if let Some(gv) = idt::get_guest_vector(vector as _, zone_id) { + if !masked { + inject_vector(dest, gv as _, None, allow_repeat); + } } } Ok(()) diff --git a/src/main.rs b/src/main.rs index f794b69a..c8b30c8b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -159,11 +159,6 @@ fn per_cpu_init(cpu: &mut PerCpu) { warn!("zone is not created for cpu {}", cpu.id); } info!("CPU {} hv_pt_install OK.", cpu.id); - info!( - "cpuid: {} ArchCpu::id:{}", - crate::arch::cpu::this_cpu_id(), - percpu::this_cpu_data().arch_cpu.cpuid - ); } fn wakeup_secondary_cpus(this_id: usize, host_dtb: usize) { diff --git a/src/memory/mmio.rs b/src/memory/mmio.rs index 2005beec..3a8524b5 100644 --- a/src/memory/mmio.rs +++ b/src/memory/mmio.rs @@ -90,24 +90,27 @@ pub fn mmio_handle_access(mmio: &mut MMIOAccess) -> HvResult { match res { Some((region, handler, arg)) => { mmio.address -= region.start; - if cfg!(target_arch = "x86_64") { - if mmio.size == 0 { - #[cfg(target_arch = "x86_64")] - crate::arch::mmio::instruction_emulator(&handler, mmio, arg) - } else { - handler(mmio, arg) - } - } else { - match handler(mmio, arg) { - Ok(_) => Ok(()), - Err(e) => { - error!("mmio handler returned error: {:#x?}", e); - Err(e) - } + + #[cfg(target_arch = "x86_64")] + if mmio.size == 0 { + return crate::arch::mmio::instruction_emulator(&handler, mmio, arg); + } + + match handler(mmio, arg) { + Ok(_) => Ok(()), + Err(e) => { + error!("mmio handler returned error: {:#x?}", e); + Err(e) } } } None => { + /*#[cfg(target_arch = "x86_64")] + if mmio.size == 0 { + let handler: MMIOHandler = mmio_generic_handler; //crate::arch::mmio::mmio_empty_handler; + return crate::arch::mmio::instruction_emulator(&handler, mmio, 0); + }*/ + warn!("Zone {} unhandled mmio fault {:#x?}", zone_id, mmio); hv_result_err!(EINVAL) } diff --git a/src/pci/pci.rs b/src/pci/pci.rs index d4238ff1..73711148 100644 --- a/src/pci/pci.rs +++ b/src/pci/pci.rs @@ -217,6 +217,7 @@ impl Zone { self.virtual_pci_mmio_init(pci_config, hv_addr_prefix, loong_ht_prefix); self.virtual_pci_device_init(pci_config); self.pciroot.alloc_devs = root_zone_alloc_devs; + return; } #[cfg(not(target_arch = "x86_64"))] { @@ -356,6 +357,9 @@ impl Zone { trace!("pciroot = {:?}", self.pciroot); self.pciroot.bars_register(); + #[cfg(target_arch = "x86_64")] + self.pci_bars_register(pci_config); + #[cfg(not(target_arch = "x86_64"))] if self.id != 0 { self.pci_bars_register(pci_config); } @@ -489,7 +493,11 @@ pub fn mmio_pci_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { mmio.value = header_val as _; return Ok(()); } else { + #[cfg(not(target_arch = "x86_64"))] panic!("invalid access to empty device {:x}:{:x}.{:x}, addr: {:#x}, reg_addr: {:#x}!", bdf >> 8, (bdf >> 3) & 0b11111, bdf & 0b111, mmio.address, reg_addr); + // in x86, linux will probe for pci devices automatically + #[cfg(target_arch = "x86_64")] + return Ok(()); } } else { // device exists, so we try to get the phantom device diff --git a/src/platform/mod.rs b/src/platform/mod.rs index 735e8a04..733d61ad 100644 --- a/src/platform/mod.rs +++ b/src/platform/mod.rs @@ -75,7 +75,7 @@ pub fn platform_root_zone_config() -> HvZoneConfig { let mut _root_pci_cfg = HvPciConfig::new_empty(); let mut _num_pci_devs: u64 = 0; - #[cfg(feature = "pci")] + #[cfg(all(feature = "pci", not(target_arch = "x86_64")))] { check!(ROOT_PCI_DEVS.len(), CONFIG_MAX_PCI_DEV, "ROOT_PCI_DEVS"); pci_devs[..ROOT_PCI_DEVS.len()].copy_from_slice(&ROOT_PCI_DEVS); @@ -86,9 +86,9 @@ pub fn platform_root_zone_config() -> HvZoneConfig { { pci_devs[..ROOT_PCI_DEVS.len()].copy_from_slice(&ROOT_PCI_DEVS); let config_space_info = crate::arch::acpi::root_get_config_space_info().unwrap(); - (root_pci_cfg.ecam_base, root_pci_cfg.ecam_size) = + (_root_pci_cfg.ecam_base, _root_pci_cfg.ecam_size) = (config_space_info.0 as _, config_space_info.1 as _); - num_pci_devs = ROOT_PCI_DEVS.len() as _; + _num_pci_devs = ROOT_PCI_DEVS.len() as _; } HvZoneConfig::new( diff --git a/src/zone.rs b/src/zone.rs index a3c4a33b..54370c9e 100644 --- a/src/zone.rs +++ b/src/zone.rs @@ -236,7 +236,7 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult>> { config.pci_config.ecam_size as _, )?; - #[cfg(all(feature = "pci"))] + #[cfg(all(feature = "pci", not(target_arch = "x86_64")))] zone.pci_init( &config.pci_config, config.num_pci_devs as _, From fe7fc08e9b0d9d3643d4a75d2ddd7ac9939025ea Mon Sep 17 00:00:00 2001 From: Solicey Date: Sun, 27 Jul 2025 15:25:58 +0800 Subject: [PATCH 20/29] x86 remove vector remapping, run rootfs on sda --- platform/x86_64/nuc14/board.rs | 8 +-- platform/x86_64/qemu/board.rs | 2 +- src/arch/x86_64/idt.rs | 118 ++----------------------------- src/arch/x86_64/pci.rs | 51 ------------- src/arch/x86_64/trap.rs | 21 ++---- src/device/irqchip/pic/ioapic.rs | 19 +---- src/device/irqchip/pic/lapic.rs | 36 +++++++--- src/device/irqchip/pic/mod.rs | 1 - src/pci/pci.rs | 11 +-- 9 files changed, 44 insertions(+), 223 deletions(-) diff --git a/platform/x86_64/nuc14/board.rs b/platform/x86_64/nuc14/board.rs index 640e5f97..b94757cf 100644 --- a/platform/x86_64/nuc14/board.rs +++ b/platform/x86_64/nuc14/board.rs @@ -40,7 +40,7 @@ const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { }; pub const ROOT_ZONE_NAME: &str = "root-linux"; -pub const ROOT_ZONE_CMDLINE: &str = "video=vesafb console=tty0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all root=/dev/ram0 rw init=/init\0"; +pub const ROOT_ZONE_CMDLINE: &str = "video=vesafb console=tty0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all root=/dev/sda2 rw init=/bin/sh rootwait\0"; // pub const ROOT_ZONE_CMDLINE: &str = "video=vesafb console=ttyS0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all root=/dev/vda rw init=/init\0"; //"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic // video=vesafb @@ -137,12 +137,12 @@ pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { cmdline_load_gpa: ROOT_ZONE_CMDLINE_ADDR, setup_load_gpa: ROOT_ZONE_SETUP_ADDR, // FIXME: - initrd_load_gpa: 0x1500_0000, - initrd_size: 0x26_b000, + initrd_load_gpa: 0, //0x1500_0000, + initrd_size: 0, //0x26_b000, rsdp_memory_region_id: 0x1, acpi_memory_region_id: 0x5, // FIXME: - initrd_memory_region_id: 0x3, + initrd_memory_region_id: 0, //0x3, screen_base: ROOT_ZONE_SCREEN_BASE_ADDR, }; diff --git a/platform/x86_64/qemu/board.rs b/platform/x86_64/qemu/board.rs index 916edb8b..f103dd89 100644 --- a/platform/x86_64/qemu/board.rs +++ b/platform/x86_64/qemu/board.rs @@ -41,7 +41,7 @@ const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { pub const ROOT_ZONE_NAME: &str = "root-linux"; pub const ROOT_ZONE_CMDLINE: &str = - "video=vesafb console=ttyS0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all,lastbus=1 root=/dev/ram0 rw rdinit=/bin/sh\0"; + "video=vesafb console=ttyS0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all,lastbus=1 root=/dev/ram0 rw rdinit=/init\0"; //"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic // video=vesafb diff --git a/src/arch/x86_64/idt.rs b/src/arch/x86_64/idt.rs index 3125e732..caa24798 100644 --- a/src/arch/x86_64/idt.rs +++ b/src/arch/x86_64/idt.rs @@ -8,104 +8,10 @@ const VECTOR_CNT: usize = 256; #[allow(non_snake_case)] pub mod IdtVector { - pub const ALLOC_START: u8 = 0x20; - pub const ALLOC_END: u8 = 0xdf; - - pub const VIRT_IPI_VECTOR: u8 = 0x1e; - pub const APIC_TIMER_VECTOR: u8 = 0xf0; - pub const APIC_SPURIOUS_VECTOR: u8 = 0xf1; - pub const APIC_ERROR_VECTOR: u8 = 0xf2; -} - -lazy_static::lazy_static! { - static ref ALLOC_VECTORS: Mutex = { - Mutex::new(RemapVectorsUnlocked::new()) - }; -} - -static REMAP_VECTORS: Once = Once::new(); - -struct RemapVectors { - inner: Vec>, -} - -impl RemapVectors { - fn new(max_zones: usize) -> Self { - let mut vs = vec![]; - for _ in 0..max_zones { - let v = Mutex::new(RemapVectorsUnlocked::new()); - vs.push(v) - } - Self { inner: vs } - } - - fn get_host_vector(&self, gv: u32, zone_id: usize) -> Option { - if gv < 0x20 { - return None; - } - // FIXME: - return Some(gv as _); - - let mut vectors = self.inner.get(zone_id).unwrap().lock(); - - if let Some(&hv) = vectors.gv_to_hv.get(&gv) { - return Some(hv); - } - - for hv in IdtVector::ALLOC_START..=IdtVector::ALLOC_END { - if !vectors.hv_to_gv.contains_key(&hv) { - vectors.hv_to_gv.insert(hv, gv); - vectors.gv_to_hv.insert(gv, hv); - - return Some(hv); - } - } - - None - } - - fn get_guest_vector(&self, hv: u8, zone_id: usize) -> Option { - if hv < 0x20 { - return None; - } - // FIXME: - return Some(hv as _); - - let mut vectors = self.inner.get(zone_id).unwrap().lock(); - - if let Some(&gv) = vectors.hv_to_gv.get(&hv) { - if gv != u32::MAX { - return Some(gv); - } - } - - None - } - - fn clear_vectors(&self, hv: u8, zone_id: usize) { - let mut vectors = self.inner.get(zone_id).unwrap().lock(); - - if let Some(&gv) = vectors.hv_to_gv.get(&hv) { - vectors.hv_to_gv.remove_entry(&hv); - vectors.gv_to_hv.remove_entry(&gv); - } - } -} - -struct RemapVectorsUnlocked { - // key: host vector value: guest vector - hv_to_gv: BTreeMap, - // key: guest vector value: host vector - gv_to_hv: BTreeMap, -} - -impl RemapVectorsUnlocked { - fn new() -> Self { - Self { - hv_to_gv: BTreeMap::new(), - gv_to_hv: BTreeMap::new(), - } - } + pub const VIRT_IPI_VECTOR: u8 = 0x1c; + pub const APIC_ERROR_VECTOR: u8 = 0xfc; + pub const APIC_SPURIOUS_VECTOR: u8 = 0xfd; + pub const APIC_TIMER_VECTOR: u8 = 0xfe; } pub struct IdtStruct { @@ -137,19 +43,3 @@ impl IdtStruct { self.table.load(); } } - -pub fn get_host_vector(gv: u32, zone_id: usize) -> Option { - REMAP_VECTORS.get().unwrap().get_host_vector(gv, zone_id) -} - -pub fn get_guest_vector(hv: u8, zone_id: usize) -> Option { - REMAP_VECTORS.get().unwrap().get_guest_vector(hv, zone_id) -} - -pub fn clear_vectors(hv: u8, zone_id: usize) { - REMAP_VECTORS.get().unwrap().clear_vectors(hv, zone_id); -} - -pub fn init(max_zones: usize) { - REMAP_VECTORS.call_once(|| RemapVectors::new(max_zones)); -} diff --git a/src/arch/x86_64/pci.rs b/src/arch/x86_64/pci.rs index ed95c989..5a2c3410 100644 --- a/src/arch/x86_64/pci.rs +++ b/src/arch/x86_64/pci.rs @@ -177,57 +177,6 @@ pub fn probe_root_pci_devices( ) } -pub fn mmio_msix_table_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { - let hpa = base + mmio.address; - - let zone = this_zone(); - let zone_id = zone.read().id; - - let bdf = acpi::is_msi_data_reg(hpa); - if bdf.is_some() && zone.write().pciroot.is_assigned_device(bdf.unwrap()) { - mmio_msi_data_reg_handler(mmio, base, bdf.unwrap(), zone_id) - } else { - mmio_perform_access(base, mmio); - Ok(()) - } -} - -pub fn mmio_msi_data_reg_handler( - mmio: &mut MMIOAccess, - base: usize, - bdf: usize, - zone_id: usize, -) -> HvResult { - let hpa = base + mmio.address; - - let host_vector = unsafe { core::ptr::read_volatile(hpa as *mut u32) } as u8; - if mmio.is_write { - info!( - "MSI write, bdf: {:x} hpa: {:x} gv: {:x}", - bdf, hpa, mmio.value - ); - if let Some(alloc_host_vector) = idt::get_host_vector(mmio.value as _, zone_id) { - if host_vector != alloc_host_vector { - idt::clear_vectors(host_vector, zone_id); - } - mmio.value = alloc_host_vector as _; - info!( - "MSI write, old_hv: {:x} alloc_hv: {:x}", - host_vector, alloc_host_vector - ); - } - mmio_perform_access(base, mmio); - } else { - if let Some(guest_vector) = idt::get_guest_vector(host_vector, zone_id) { - mmio.value = guest_vector as _; - } else { - warn!("msi can't get hv with gv"); - mmio.value = host_vector as _; - } - } - Ok(()) -} - fn get_pci_mmio_addr() -> Option { let addr = this_zone().read().pio_bitmap.pci_config_addr as usize; let (base, _) = crate::arch::acpi::root_get_config_space_info().unwrap(); diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index eeb7acd1..3d4691f0 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -5,7 +5,7 @@ use crate::{ cpu::{this_cpu_id, ArchCpu}, cpuid::{CpuIdEax, ExtendedFeaturesEcx, FeatureInfoFlags}, hpet, - idt::{get_guest_vector, get_host_vector, IdtStruct, IdtVector}, + idt::{IdtStruct, IdtVector}, ipi, msr::Msr::{self, *}, s2pt::Stage2PageFaultInfo, @@ -91,21 +91,12 @@ fn handle_irq(vector: u8) { IdtVector::VIRT_IPI_VECTOR => { ipi::handle_virt_ipi(); } - IdtVector::APIC_TIMER_VECTOR => inject_vector( - this_cpu_id(), - this_cpu_data().arch_cpu.virt_lapic.virt_timer_vector, - None, - false, - ), - _ => match get_guest_vector(vector, this_zone_id()) { - Some(gv) => { - // info!("inject: {:x}", vector); - inject_vector(this_cpu_id(), gv as _, None, false); - } - None => { - warn!("can't find guest vector with host vector {:x}", vector); + IdtVector::APIC_SPURIOUS_VECTOR | IdtVector::APIC_ERROR_VECTOR => {} + _ => { + if vector >= 0x20 { + inject_vector(this_cpu_id(), vector, None, false); } - }, + } } unsafe { VirtLocalApic::phys_local_apic().end_of_interrupt() }; } diff --git a/src/device/irqchip/pic/ioapic.rs b/src/device/irqchip/pic/ioapic.rs index 533bb95e..6144baa5 100644 --- a/src/device/irqchip/pic/ioapic.rs +++ b/src/device/irqchip/pic/ioapic.rs @@ -75,13 +75,7 @@ impl VirtIoApic { let index = (reg >> 1) as usize; if let Some(entry) = inner.rte.get(index) { if reg % 2 == 0 { - let mut lower = (*entry).get_bits(0..=31); - if let Some(gv) = - idt::get_guest_vector(lower.get_bits(0..=7) as u8, zone_id) - { - lower.set_bits(0..=7, gv as _); - } - Ok(lower.get_bits(0..=31)) + Ok((*entry).get_bits(0..=31)) } else { Ok((*entry).get_bits(32..=63)) } @@ -115,11 +109,6 @@ impl VirtIoApic { if let Some(entry) = inner.rte.get_mut(index) { if reg % 2 == 0 { entry.set_bits(0..=31, value.get_bits(0..=31)); - // use host vector instead of guest vector - let gv = entry.get_bits(0..=7) as u32; - if let Some(hv) = idt::get_host_vector(gv, zone_id) { - entry.set_bits(0..=7, hv as _); - } } else { entry.set_bits(32..=63, value.get_bits(0..=31)); @@ -157,10 +146,8 @@ impl VirtIoApic { let masked = entry.get_bit(16); let vector = entry.get_bits(0..=7) as u8; // info!("trigger hv: {:x} zone: {:x}", vector, zone_id); - if let Some(gv) = idt::get_guest_vector(vector as _, zone_id) { - if !masked { - inject_vector(dest, gv as _, None, allow_repeat); - } + if !masked && vector >= 0x20 { + inject_vector(dest, vector, None, allow_repeat); } } Ok(()) diff --git a/src/device/irqchip/pic/lapic.rs b/src/device/irqchip/pic/lapic.rs index fc0673c8..4c65b716 100644 --- a/src/device/irqchip/pic/lapic.rs +++ b/src/device/irqchip/pic/lapic.rs @@ -22,23 +22,29 @@ pub struct VirtLocalApic { impl VirtLocalApic { pub fn new() -> Self { + Self { + phys_lapic: Self::new_phys_lapic( + IdtVector::APIC_TIMER_VECTOR as _, + IdtVector::APIC_ERROR_VECTOR as _, + IdtVector::APIC_SPURIOUS_VECTOR as _, + ), + virt_timer_vector: IdtVector::APIC_TIMER_VECTOR as _, + virt_lvt_timer_bits: (1 << 16) as _, // masked + } + } + + fn new_phys_lapic(timer: usize, error: usize, spurious: usize) -> LocalApic { let mut lapic = LocalApicBuilder::new() - .timer_vector(IdtVector::APIC_TIMER_VECTOR as _) - .error_vector(IdtVector::APIC_ERROR_VECTOR as _) - .spurious_vector(IdtVector::APIC_SPURIOUS_VECTOR as _) + .timer_vector(timer) + .error_vector(error) + .spurious_vector(spurious) .build() .unwrap(); - unsafe { lapic.enable(); lapic.disable_timer(); } - - Self { - phys_lapic: lapic, - virt_timer_vector: 0, - virt_lvt_timer_bits: (1 << 16) as _, // masked - } + lapic } pub const fn msr_range() -> Range { @@ -85,7 +91,15 @@ impl VirtLocalApic { } IA32_X2APIC_LVT_TIMER => { self.virt_lvt_timer_bits = value as u32; - self.virt_timer_vector = value.get_bits(0..=7) as _; + let timer = value.get_bits(0..=7) as u8; + if timer != self.virt_timer_vector { + self.virt_timer_vector = timer; + self.phys_lapic = Self::new_phys_lapic( + timer as _, + IdtVector::APIC_ERROR_VECTOR as _, + IdtVector::APIC_SPURIOUS_VECTOR as _, + ) + } unsafe { self.phys_lapic .set_timer_mode(match value.get_bits(17..19) { diff --git a/src/device/irqchip/pic/mod.rs b/src/device/irqchip/pic/mod.rs index 7e6a093e..bc4173f4 100644 --- a/src/device/irqchip/pic/mod.rs +++ b/src/device/irqchip/pic/mod.rs @@ -109,7 +109,6 @@ pub fn percpu_init() {} pub fn primary_init_early() { ipi::init(MAX_CPU_NUM); PENDING_VECTORS.call_once(|| PendingVectors::new(MAX_CPU_NUM)); - idt::init(MAX_ZONE_NUM); ioapic::init_virt_ioapic(MAX_ZONE_NUM); vtd::init(); } diff --git a/src/pci/pci.rs b/src/pci/pci.rs index 73711148..c540e8fc 100644 --- a/src/pci/pci.rs +++ b/src/pci/pci.rs @@ -432,7 +432,7 @@ impl Zone { self.mmio_region_register( region.start, region.size, - crate::arch::x86_64::pci::mmio_msix_table_handler, + crate::memory::mmio_generic_handler, region.start, ); } @@ -459,15 +459,6 @@ pub fn mmio_pci_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { match is_assigned { true => { - #[cfg(target_arch = "x86_64")] - { - if let Some(bdf) = crate::arch::acpi::is_msi_data_reg(base + mmio.address) { - crate::arch::pci::mmio_msi_data_reg_handler(mmio, base, bdf, zone_id); - } else { - mmio_perform_access(base, mmio); - } - } - #[cfg(not(target_arch = "x86_64"))] mmio_perform_access(base, mmio); if bus == 6 && reg_addr == 0x150 && !mmio.is_write { // assume pcie network card is in bus 6(X4 slot in 3A6000 board), this will skip it's sriov From bd7acc288c0c21809b9d184de5da9de16fb872b0 Mon Sep 17 00:00:00 2001 From: Solicey Date: Sun, 27 Jul 2025 21:44:18 +0800 Subject: [PATCH 21/29] x86 NUC14 run root linux on SMP --- platform/x86_64/nuc14/board.rs | 10 +++--- src/arch/x86_64/acpi.rs | 54 ++++++++++++++++++++++++++------ src/arch/x86_64/cpu.rs | 22 ++++++++----- src/arch/x86_64/entry.rs | 6 ++-- src/arch/x86_64/ipi.rs | 10 ++++-- src/device/irqchip/pic/ioapic.rs | 12 +++++-- src/device/irqchip/pic/lapic.rs | 6 ++-- src/main.rs | 12 +++---- src/percpu.rs | 8 ----- 9 files changed, 89 insertions(+), 51 deletions(-) diff --git a/platform/x86_64/nuc14/board.rs b/platform/x86_64/nuc14/board.rs index b94757cf..01a8bc30 100644 --- a/platform/x86_64/nuc14/board.rs +++ b/platform/x86_64/nuc14/board.rs @@ -23,7 +23,7 @@ pub const ROOT_ZONE_DTB_ADDR: u64 = 0x00000000; pub const ROOT_ZONE_BOOT_STACK: GuestPhysAddr = 0x7000; pub const ROOT_ZONE_ENTRY: u64 = 0x8000; pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x500_0000; // hpa -pub const ROOT_ZONE_CPUS: u64 = (1 << 0); +pub const ROOT_ZONE_CPUS: u64 = (1 << 0) | (1 << 1); const ROOT_ZONE_RSDP_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, @@ -40,7 +40,7 @@ const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { }; pub const ROOT_ZONE_NAME: &str = "root-linux"; -pub const ROOT_ZONE_CMDLINE: &str = "video=vesafb console=tty0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all root=/dev/sda2 rw init=/bin/sh rootwait\0"; +pub const ROOT_ZONE_CMDLINE: &str = "video=vesafb console=tty0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all root=/dev/sda2 rw init=/init rootwait\0"; // pub const ROOT_ZONE_CMDLINE: &str = "video=vesafb console=ttyS0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all root=/dev/vda rw init=/init\0"; //"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic // video=vesafb @@ -137,12 +137,12 @@ pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { cmdline_load_gpa: ROOT_ZONE_CMDLINE_ADDR, setup_load_gpa: ROOT_ZONE_SETUP_ADDR, // FIXME: - initrd_load_gpa: 0, //0x1500_0000, - initrd_size: 0, //0x26_b000, + initrd_load_gpa: 0, // 0x1500_0000, + initrd_size: 0, // 0x26_b000, rsdp_memory_region_id: 0x1, acpi_memory_region_id: 0x5, // FIXME: - initrd_memory_region_id: 0, //0x3, + initrd_memory_region_id: 0, // 0x3, screen_base: ROOT_ZONE_SCREEN_BASE_ADDR, }; diff --git a/src/arch/x86_64/acpi.rs b/src/arch/x86_64/acpi.rs index ae76b41e..440e4985 100644 --- a/src/arch/x86_64/acpi.rs +++ b/src/arch/x86_64/acpi.rs @@ -264,8 +264,10 @@ pub struct RootAcpi { msi_data_reg_map: BTreeMap, /// key: msi-x table bar, value: bdf msix_bar_map: BTreeMap, - /// key: cpuid, value: cpu nr (continuous) - lapic_map: BTreeMap, + /// key: apic id, value: cpu id (continuous) + apic_id_to_cpu_id: BTreeMap, + /// key: cpu id (continuous), value: apic id + cpu_id_to_apic_id: BTreeMap, } impl RootAcpi { @@ -335,13 +337,19 @@ impl RootAcpi { let mut entry_len = madt.get_u8(madt_cur + 1) as usize; match entry { MadtEntry::LocalApic(entry) => { - if !cpu_set.contains_cpu(entry.processor_id as _) { - // madt.remove(madt_cur, entry_len); + let mut disable_lapic = true; + if contains_apic_id(entry.apic_id as _) { + let cpuid = get_cpu_id(entry.apic_id as _); + if cpu_set.contains_cpu(cpuid) { + disable_lapic = false; + } + // reset processor id + madt.set_u8(cpuid as _, madt_cur + 2); + } + if disable_lapic { // set flag to disable lapic madt.set_u32(0x0, madt_cur + 4); } - // let apic id equals processor id - // madt.set_u8(entry.processor_id, madt_cur + 3); } MadtEntry::LocalX2Apic(entry) => { if !cpu_set.contains_cpu(entry.processor_uid as _) {} @@ -606,9 +614,13 @@ impl RootAcpi { MadtEntry::LocalApic(entry) => { if entry.flags != 0 { println!("{:x?}", entry); + let cpu_id = root_acpi.apic_id_to_cpu_id.len(); root_acpi - .lapic_map - .insert(entry.apic_id as _, root_acpi.lapic_map.len()); + .apic_id_to_cpu_id + .insert(entry.apic_id as _, cpu_id); + root_acpi + .cpu_id_to_apic_id + .insert(cpu_id, entry.apic_id as _); } } _ => {} @@ -698,6 +710,28 @@ pub fn is_msix_bar(hpa: usize) -> Option { } } -pub fn get_lapic_map() -> &'static BTreeMap { - &ROOT_ACPI.get().unwrap().lapic_map +fn contains_apic_id(apic_id: usize) -> bool { + ROOT_ACPI + .get() + .unwrap() + .apic_id_to_cpu_id + .contains_key(&apic_id) +} + +pub fn get_cpu_id(apic_id: usize) -> usize { + *ROOT_ACPI + .get() + .unwrap() + .apic_id_to_cpu_id + .get(&apic_id) + .unwrap() +} + +pub fn get_apic_id(cpu_id: usize) -> usize { + *ROOT_ACPI + .get() + .unwrap() + .cpu_id_to_apic_id + .get(&cpu_id) + .unwrap() } diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index 5e2dab94..1e779061 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -1,6 +1,6 @@ use crate::{ arch::{ - acpi, + acpi::{self, *}, boot::BootParams, hpet, ipi, mm::new_s2_memory_set, @@ -45,8 +45,6 @@ use x86::{ }; use x86_64::registers::control::{Cr0, Cr0Flags, Cr3, Cr4, Cr4Flags}; -use super::acpi::RootAcpi; - const AP_START_PAGE_IDX: u8 = 6; const AP_START_PAGE_PADDR: PhysAddr = AP_START_PAGE_IDX as usize * PAGE_SIZE; @@ -124,13 +122,14 @@ pub fn cpu_start(cpuid: usize, start_addr: usize, opaque: usize) { unsafe { setup_ap_start_page(cpuid) }; let lapic = VirtLocalApic::phys_local_apic(); + let apic_id = acpi::get_apic_id(cpuid); // Intel SDM Vol 3C, Section 8.4.4, MP Initialization Example - unsafe { lapic.send_init_ipi(cpuid as u32) }; + unsafe { lapic.send_init_ipi(apic_id as u32) }; hpet::busy_wait(Duration::from_millis(50)); // 10ms - unsafe { lapic.send_sipi(AP_START_PAGE_IDX, cpuid as u32) }; + unsafe { lapic.send_sipi(AP_START_PAGE_IDX, apic_id as u32) }; hpet::busy_wait(Duration::from_micros(2000)); // 200us - unsafe { lapic.send_sipi(AP_START_PAGE_IDX, cpuid as u32) }; + unsafe { lapic.send_sipi(AP_START_PAGE_IDX, apic_id as u32) }; } /// General-Purpose Registers for 64-bit x86 architecture. @@ -554,7 +553,7 @@ impl ArchCpu { fn vmexit_handler(&mut self) { crate::arch::trap::handle_vmexit(self).unwrap(); - check_pending_vectors(this_cpu_id()); + check_pending_vectors(self.cpuid); } unsafe fn vmx_entry_failed() -> ! { @@ -596,9 +595,16 @@ impl ArchCpu { } pub fn this_cpu_id() -> usize { + crate::arch::acpi::get_cpu_id(this_apic_id()) +} + +pub fn this_apic_id() -> usize { match CpuId::new().get_feature_info() { Some(info) => info.initial_local_apic_id() as usize, - None => 0, + None => { + panic!("can not find apic id!"); + 0 + } } } diff --git a/src/arch/x86_64/entry.rs b/src/arch/x86_64/entry.rs index f44c9439..e3284bac 100644 --- a/src/arch/x86_64/entry.rs +++ b/src/arch/x86_64/entry.rs @@ -1,5 +1,5 @@ use crate::{ - arch::{boot, cpu::this_cpu_id, graphics::font_init}, + arch::{boot, cpu::this_apic_id, graphics::font_init}, consts::PER_CPU_SIZE, memory::addr::PHYS_VIRT_OFFSET, platform::__board, @@ -67,12 +67,12 @@ extern "C" fn rust_entry(magic: u32, info_addr: usize) { #[cfg(all(feature = "graphics", target_arch = "x86_64"))] font_init(__board::GRAPHICS_FONT); boot::print_memory_map(); - rust_main(this_cpu_id(), info_addr); + rust_main(this_apic_id(), info_addr); } fn rust_entry_secondary() { // println!("CPUID: {}", this_cpu_id()); - rust_main(this_cpu_id(), 0); + rust_main(this_apic_id(), 0); } extern "C" { diff --git a/src/arch/x86_64/ipi.rs b/src/arch/x86_64/ipi.rs index 9c117df3..c51db55f 100644 --- a/src/arch/x86_64/ipi.rs +++ b/src/arch/x86_64/ipi.rs @@ -1,5 +1,9 @@ use crate::{ - arch::{cpu::this_cpu_id, idt::IdtVector}, + arch::{ + acpi::{get_apic_id, get_cpu_id}, + cpu::this_cpu_id, + idt::IdtVector, + }, device::irqchip::inject_vector, error::HvResult, event, @@ -68,7 +72,7 @@ pub fn send_ipi(value: u64) -> HvResult { let vector = value.get_bits(0..=7) as u8; let delivery_mode: u8 = value.get_bits(8..=10) as u8; let dest_shorthand = value.get_bits(18..=19) as u8; - let dest = value.get_bits(32..=39) as usize; + let dest = get_cpu_id(value.get_bits(32..=39) as usize); let cnt = value.get_bits(40..=63) as u32; let mut cpu_set = this_zone().read().cpu_set; @@ -121,7 +125,7 @@ pub fn arch_send_event(dest: u64, _: u64) { .arch_cpu .virt_lapic .phys_lapic - .send_ipi(IdtVector::VIRT_IPI_VECTOR, dest as _) + .send_ipi(IdtVector::VIRT_IPI_VECTOR, get_apic_id(dest as _) as _) }; } diff --git a/src/device/irqchip/pic/ioapic.rs b/src/device/irqchip/pic/ioapic.rs index 6144baa5..078bae59 100644 --- a/src/device/irqchip/pic/ioapic.rs +++ b/src/device/irqchip/pic/ioapic.rs @@ -1,5 +1,11 @@ use crate::{ - arch::{cpu::this_cpu_id, idt, ipi, mmio::MMIoDevice, zone::HvArchZoneConfig}, + arch::{ + acpi::{get_apic_id, get_cpu_id}, + cpu::this_cpu_id, + idt, ipi, + mmio::MMIoDevice, + zone::HvArchZoneConfig, + }, device::irqchip::pic::inject_vector, error::HvResult, memory::{GuestPhysAddr, MMIOAccess}, @@ -131,7 +137,7 @@ impl VirtIoApic { fn get_irq_cpu(&self, irq: usize, zone_id: usize) -> Option { let ioapic = self.inner.get(zone_id).unwrap(); if let Some(entry) = ioapic.lock().rte.get(irq) { - let dest = entry.get_bits(56..=63) as usize; + let dest = get_cpu_id(entry.get_bits(56..=63) as usize); return Some(dest); } None @@ -142,7 +148,7 @@ impl VirtIoApic { let ioapic = self.inner.get(zone_id).unwrap(); if let Some(entry) = ioapic.lock().rte.get(irq) { // TODO: physical & logical mode - let dest = entry.get_bits(56..=63) as usize; + let dest = get_cpu_id(entry.get_bits(56..=63) as usize); let masked = entry.get_bit(16); let vector = entry.get_bits(0..=7) as u8; // info!("trigger hv: {:x} zone: {:x}", vector, zone_id); diff --git a/src/device/irqchip/pic/lapic.rs b/src/device/irqchip/pic/lapic.rs index 4c65b716..12b0f101 100644 --- a/src/device/irqchip/pic/lapic.rs +++ b/src/device/irqchip/pic/lapic.rs @@ -1,6 +1,6 @@ use crate::{ arch::{ - cpu::this_cpu_id, + cpu::{this_apic_id, this_cpu_id}, idt::IdtVector, ipi, msr::Msr::{self, *}, @@ -59,9 +59,9 @@ impl VirtLocalApic { match msr { IA32_X2APIC_APICID => { // info!("apicid: {:x}", this_cpu_id()); - Ok(this_cpu_id() as u64) + Ok(this_apic_id() as u64) } - IA32_X2APIC_LDR => Ok(this_cpu_id() as u64), // logical apic id + IA32_X2APIC_LDR => Ok(this_apic_id() as u64), // logical apic id IA32_X2APIC_ISR0 | IA32_X2APIC_ISR1 | IA32_X2APIC_ISR2 | IA32_X2APIC_ISR3 | IA32_X2APIC_ISR4 | IA32_X2APIC_ISR5 | IA32_X2APIC_ISR6 | IA32_X2APIC_ISR7 => { // info!("isr!"); diff --git a/src/main.rs b/src/main.rs index c8b30c8b..2d82599a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -162,14 +162,6 @@ fn per_cpu_init(cpu: &mut PerCpu) { } fn wakeup_secondary_cpus(this_id: usize, host_dtb: usize) { - #[cfg(target_arch = "x86_64")] - for (&apic_id, _) in crate::arch::acpi::get_lapic_map() { - if apic_id == this_id { - continue; - } - cpu_start(apic_id, arch_entry as _, host_dtb); - } - #[cfg(not(target_arch = "x86_64"))] for cpu_id in 0..MAX_CPU_NUM { if cpu_id == this_id { continue; @@ -201,6 +193,10 @@ fn rust_main(cpuid: usize, host_dtb: usize) { } } + #[cfg(target_arch = "x86_64")] + // get the real cpuid, we are using apic id before + let cpuid = crate::arch::cpu::this_cpu_id(); + let cpu = PerCpu::new(cpuid); println!( diff --git a/src/percpu.rs b/src/percpu.rs index 5473574c..2e7c9069 100644 --- a/src/percpu.rs +++ b/src/percpu.rs @@ -40,10 +40,6 @@ pub struct PerCpu { impl PerCpu { pub fn new<'a>(cpu_id: usize) -> &'static mut PerCpu { - #[cfg(target_arch = "x86_64")] - let vaddr = PER_CPU_ARRAY_PTR as VirtAddr - + *crate::arch::acpi::get_lapic_map().get(&cpu_id).unwrap() as usize * PER_CPU_SIZE; - #[cfg(not(target_arch = "x86_64"))] let vaddr = PER_CPU_ARRAY_PTR as VirtAddr + cpu_id as usize * PER_CPU_SIZE; let ret = vaddr as *mut Self; unsafe { @@ -89,10 +85,6 @@ impl PerCpu { } pub fn get_cpu_data<'a>(cpu_id: usize) -> &'a mut PerCpu { - #[cfg(target_arch = "x86_64")] - let cpu_data: usize = PER_CPU_ARRAY_PTR as VirtAddr - + *crate::arch::acpi::get_lapic_map().get(&cpu_id).unwrap() as usize * PER_CPU_SIZE; - #[cfg(not(target_arch = "x86_64"))] let cpu_data: usize = PER_CPU_ARRAY_PTR as VirtAddr + cpu_id as usize * PER_CPU_SIZE; unsafe { &mut *(cpu_data as *mut PerCpu) } } From bc1bc9e3d04de7a9e1f1a4a3edd7a343b44f3154 Mon Sep 17 00:00:00 2001 From: Solicey Date: Fri, 15 Aug 2025 20:49:07 +0800 Subject: [PATCH 22/29] x86 NUC14 run non-root, enable virtio blk, console, net --- platform/x86_64/nuc14/board.rs | 11 +++++------ platform/x86_64/nuc14/cargo/features | 3 +-- platform/x86_64/qemu/board.rs | 12 ++++++------ platform/x86_64/qemu/platform.mk | 8 ++++---- src/arch/x86_64/acpi.rs | 9 ++++----- src/arch/x86_64/idt.rs | 2 +- src/arch/x86_64/pio.rs | 29 ++++++++++++++++------------ src/arch/x86_64/trap.rs | 8 ++++++-- src/arch/x86_64/zone.rs | 1 - src/event.rs | 1 - src/hypercall/mod.rs | 2 +- 11 files changed, 45 insertions(+), 41 deletions(-) diff --git a/platform/x86_64/nuc14/board.rs b/platform/x86_64/nuc14/board.rs index 01a8bc30..95c55934 100644 --- a/platform/x86_64/nuc14/board.rs +++ b/platform/x86_64/nuc14/board.rs @@ -40,7 +40,7 @@ const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { }; pub const ROOT_ZONE_NAME: &str = "root-linux"; -pub const ROOT_ZONE_CMDLINE: &str = "video=vesafb console=tty0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all root=/dev/sda2 rw init=/init rootwait\0"; +pub const ROOT_ZONE_CMDLINE: &str = "video=vesafb console=tty0 nointremap no_timer_check pci=pcie_scan_all root=/dev/sda2 rw init=/init rootwait\0"; // pub const ROOT_ZONE_CMDLINE: &str = "video=vesafb console=ttyS0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all root=/dev/vda rw init=/init\0"; //"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic // video=vesafb @@ -81,8 +81,8 @@ pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 14] = [ // TODO: e820 mem space probe HvConfigMemoryRegion { mem_type: MEM_TYPE_RESERVED, - physical_start: 0x4030_0000, - virtual_start: 0x4030_0000, + physical_start: 0x1_0000_0000, + virtual_start: 0x1_0000_0000, size: 0x2000_0000, }, // zone 1 HvConfigMemoryRegion { @@ -126,7 +126,7 @@ pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 14] = [ const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0x9000; const ROOT_ZONE_SETUP_ADDR: GuestPhysAddr = 0xa000; const ROOT_ZONE_VMLINUX_ENTRY_ADDR: GuestPhysAddr = 0x10_0000; -const ROOT_ZONE_SCREEN_BASE_ADDR: GuestPhysAddr = 0x7000_0000; +const ROOT_ZONE_SCREEN_BASE_ADDR: GuestPhysAddr = 0x8000_0000; pub const ROOT_ZONE_IRQS: [u32; 32] = [0; 32]; pub const ROOT_ZONE_IOAPIC_BASE: usize = 0xfec0_0000; @@ -141,8 +141,7 @@ pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { initrd_size: 0, // 0x26_b000, rsdp_memory_region_id: 0x1, acpi_memory_region_id: 0x5, - // FIXME: - initrd_memory_region_id: 0, // 0x3, + // not longer than 32 bits screen_base: ROOT_ZONE_SCREEN_BASE_ADDR, }; diff --git a/platform/x86_64/nuc14/cargo/features b/platform/x86_64/nuc14/cargo/features index e525097a..71878594 100644 --- a/platform/x86_64/nuc14/cargo/features +++ b/platform/x86_64/nuc14/cargo/features @@ -1,2 +1 @@ -pci -graphics \ No newline at end of file +pci \ No newline at end of file diff --git a/platform/x86_64/qemu/board.rs b/platform/x86_64/qemu/board.rs index f103dd89..2c58a454 100644 --- a/platform/x86_64/qemu/board.rs +++ b/platform/x86_64/qemu/board.rs @@ -34,14 +34,14 @@ const ROOT_ZONE_RSDP_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, - physical_start: 0x3a20_0000, // hpa - virtual_start: 0x3520_0000, // gpa + physical_start: 0x3a30_0000, // hpa + virtual_start: 0x3530_0000, // gpa size: 0xf000, // modify size accordingly }; pub const ROOT_ZONE_NAME: &str = "root-linux"; pub const ROOT_ZONE_CMDLINE: &str = - "video=vesafb console=ttyS0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all,lastbus=1 root=/dev/ram0 rw rdinit=/init\0"; + "video=vesafb console=ttyS0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all,lastbus=1 root=/dev/vda rw init=/init\0"; //"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic // video=vesafb @@ -100,11 +100,11 @@ pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { kernel_entry_gpa: ROOT_ZONE_VMLINUX_ENTRY_ADDR, cmdline_load_gpa: ROOT_ZONE_CMDLINE_ADDR, setup_load_gpa: ROOT_ZONE_SETUP_ADDR, - initrd_load_gpa: 0x1500_0000, - initrd_size: 0x26_b000, + initrd_load_gpa: 0, // 0x1500_0000, + initrd_size: 0, //0x26_b000, rsdp_memory_region_id: 0x1, acpi_memory_region_id: 0x5, - initrd_memory_region_id: 0x3, + // not longer than 32 bits screen_base: ROOT_ZONE_SCREEN_BASE_ADDR, }; diff --git a/platform/x86_64/qemu/platform.mk b/platform/x86_64/qemu/platform.mk index 7be3dd69..33b79a19 100644 --- a/platform/x86_64/qemu/platform.mk +++ b/platform/x86_64/qemu/platform.mk @@ -18,10 +18,10 @@ QEMU_ARGS += -vga std QEMU_ARGS += -device intel-iommu,intremap=on,eim=on,caching-mode=on,device-iotlb=on,aw-bits=48 QEMU_ARGS += -device ioh3420,id=pcie.1,chassis=1 -# QEMU_ARGS += -drive if=none,file="$(zone0_rootfs)",id=X10008000,format=raw -# QEMU_ARGS += -device virtio-blk-pci,bus=pcie.1,drive=X10008000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on -QEMU_ARGS += -drive if=none,file="$(zone0_rootfs)",id=X10009000,format=raw -QEMU_ARGS += -device nvme,serial=deadbeef,drive=X10009000 +QEMU_ARGS += -drive if=none,file="$(zone0_rootfs)",id=X10008000,format=raw +QEMU_ARGS += -device virtio-blk-pci,bus=pcie.1,drive=X10008000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on +# QEMU_ARGS += -drive if=none,file="$(zone0_rootfs)",id=X10009000,format=raw +# QEMU_ARGS += -device nvme,serial=deadbeef,drive=X10009000 # QEMU_ARGS += -drive if=none,file="$(zone1_rootfs)",id=X10009000,format=raw # QEMU_ARGS += -device virtio-blk-pci,bus=pcie.1,drive=X10009000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on diff --git a/src/arch/x86_64/acpi.rs b/src/arch/x86_64/acpi.rs index 440e4985..6185ac45 100644 --- a/src/arch/x86_64/acpi.rs +++ b/src/arch/x86_64/acpi.rs @@ -672,11 +672,10 @@ pub fn root_init() { pub fn copy_to_guest_memory_region(config: &HvZoneConfig, cpu_set: &CpuSet) { let mut banned: BTreeSet = BTreeSet::new(); - // banned.insert(Signature::SSDT); - // FIXME: temp - // if config.zone_id != 0 { - // banned.insert(Signature::FADT); - // } + if config.zone_id != 0 { + banned.insert(Signature::FADT); + banned.insert(Signature::SSDT); + } ROOT_ACPI.get().unwrap().copy_to_zone_region( &config.memory_regions()[config.arch_config.rsdp_memory_region_id], &config.memory_regions()[config.arch_config.acpi_memory_region_id], diff --git a/src/arch/x86_64/idt.rs b/src/arch/x86_64/idt.rs index caa24798..3be90002 100644 --- a/src/arch/x86_64/idt.rs +++ b/src/arch/x86_64/idt.rs @@ -8,7 +8,7 @@ const VECTOR_CNT: usize = 256; #[allow(non_snake_case)] pub mod IdtVector { - pub const VIRT_IPI_VECTOR: u8 = 0x1c; + pub const VIRT_IPI_VECTOR: u8 = 0xfa; pub const APIC_ERROR_VECTOR: u8 = 0xfc; pub const APIC_SPURIOUS_VECTOR: u8 = 0xfd; pub const APIC_TIMER_VECTOR: u8 = 0xfe; diff --git a/src/arch/x86_64/pio.rs b/src/arch/x86_64/pio.rs index 631530d7..c6ed8dec 100644 --- a/src/arch/x86_64/pio.rs +++ b/src/arch/x86_64/pio.rs @@ -26,9 +26,13 @@ impl PortIoBitmap { pci_config_addr: 0, }; - // FIXME: zone0 - bitmap.a.fill(0); - bitmap.b.fill(0); + if zone_id == 0 { + bitmap.a.fill(0); + bitmap.b.fill(0); + } else { + bitmap.a.fill(0xff); + bitmap.b.fill(0xff); + } // ban i8259a ports bitmap.set_intercept(0x20, true); @@ -36,23 +40,24 @@ impl PortIoBitmap { bitmap.set_intercept(0xa0, true); bitmap.set_intercept(0xa1, true); - // ban pci config ports - // TODO: handle config space operations from io ports + // pci config ports bitmap.set_range_intercept(PCI_CONFIG_ADDR_PORT, true); bitmap.set_range_intercept(PCI_CONFIG_DATA_PORT, true); - // FIXME: uart & i8254 if zone_id == 0 { - bitmap.set_range_intercept(0x60..0x65, false); #[cfg(feature = "graphics")] bitmap.set_range_intercept(UART_COM1_PORT, true); } - // bitmap.set_range_intercept(0x3f8..0x400, false); - // FIXME: get port info from ACPI FACP table - // bitmap.set_intercept(0xb2, false); - // bitmap.set_range_intercept(0x600..0x630, false); - // bitmap.set_range_intercept(0x1800..0x1900, false); + // i8042, we won't use it, but intercept its ports might block linux init + bitmap.set_range_intercept(0x60..0x65, false); + + // FIXME: for debug + if zone_id != 0 { + // #[cfg(feature = "graphics")] + // bitmap.set_range_intercept(UART_COM1_PORT, true); + // bitmap.set_range_intercept(UART_COM1_PORT, false); + } bitmap } diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index 3d4691f0..9796fc41 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -262,7 +262,10 @@ fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvR } else if UART_COM1_PORT.contains(&io_info.port) { virt_console_io_write(io_info.port, value); } else { - // info!("io write {:x} value: {:x}", io_info.port, value); + /* info!( + "unhandled port io write {:x} value: {:x}", + io_info.port, value + ); */ } } else { if PCI_CONFIG_ADDR_PORT.contains(&io_info.port) @@ -272,7 +275,8 @@ fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvR } else if UART_COM1_PORT.contains(&io_info.port) { value = virt_console_io_read(io_info.port); } else { - // info!("io read {:x}", io_info.port); + // info!("unhandled port io read {:x}", io_info.port); + value = 0x0; } let rax = &mut arch_cpu.regs_mut().rax; // SDM Vol. 1, Section 3.4.1.1: diff --git a/src/arch/x86_64/zone.rs b/src/arch/x86_64/zone.rs index b8a338f5..bf2358ce 100644 --- a/src/arch/x86_64/zone.rs +++ b/src/arch/x86_64/zone.rs @@ -19,7 +19,6 @@ pub struct HvArchZoneConfig { pub initrd_size: usize, pub rsdp_memory_region_id: usize, pub acpi_memory_region_id: usize, - pub initrd_memory_region_id: usize, /// not longer than 32 bits pub screen_base: usize, } diff --git a/src/event.rs b/src/event.rs index d4d64c4c..98531338 100644 --- a/src/event.rs +++ b/src/event.rs @@ -116,7 +116,6 @@ pub fn clear_events(cpu: usize) { } pub fn check_events() -> bool { - trace!("check_events"); let cpu_data = this_cpu_data(); match fetch_event(cpu_data.id) { Some(IPI_EVENT_WAKEUP) => { diff --git a/src/hypercall/mod.rs b/src/hypercall/mod.rs index 56d914c6..52205bfd 100644 --- a/src/hypercall/mod.rs +++ b/src/hypercall/mod.rs @@ -289,7 +289,7 @@ impl<'a> HyperCall<'a> { .0 as *mut u64 }; - #[cfg(all(not(target_arch = "loongarch64"), not(target_arch = "x86_64")))] + #[cfg(not(target_arch = "loongarch64"))] { unsafe { *magic_version = CONFIG_MAGIC_VERSION as _; From 2fce4a61cc93c4424cbbf98e89ac4d2a12299348 Mon Sep 17 00:00:00 2001 From: Solicey Date: Sun, 24 Aug 2025 10:26:00 +0800 Subject: [PATCH 23/29] rename nuc14 to nuc14mnk --- Makefile | 2 +- platform/x86_64/{nuc14 => nuc14mnk}/board.rs | 13 +++++------ .../cargo/config.template.toml | 0 .../x86_64/{nuc14 => nuc14mnk}/cargo/features | 0 .../image/bootloader/boot.S | 0 .../image/bootloader/boot.ld | 0 .../image/bootloader/boot.mk | 0 .../image/font/solarize-12x29.psf | Bin .../image/font/spleen-6x12.psf | Bin .../image/iso/boot/grub/grub.cfg | 0 platform/x86_64/{nuc14 => nuc14mnk}/linker.ld | 0 .../x86_64/{nuc14 => nuc14mnk}/platform.mk | 2 ++ .../x86_64/{nuc14 => nuc14mnk}/test/runner.sh | 0 platform/x86_64/qemu/board.rs | 3 ++- platform/x86_64/qemu/platform.mk | 6 ++++- src/arch/x86_64/acpi.rs | 4 ++-- src/arch/x86_64/iommu.rs | 2 +- src/arch/x86_64/ipi.rs | 1 - src/arch/x86_64/pio.rs | 7 ------ src/arch/x86_64/trap.rs | 5 ++--- src/arch/x86_64/zone.rs | 21 +++++++++++++++++- src/device/uart/uart16550a.rs | 2 +- src/hypercall/mod.rs | 1 - 23 files changed, 42 insertions(+), 27 deletions(-) rename platform/x86_64/{nuc14 => nuc14mnk}/board.rs (96%) rename platform/x86_64/{nuc14 => nuc14mnk}/cargo/config.template.toml (100%) rename platform/x86_64/{nuc14 => nuc14mnk}/cargo/features (100%) rename platform/x86_64/{nuc14 => nuc14mnk}/image/bootloader/boot.S (100%) rename platform/x86_64/{nuc14 => nuc14mnk}/image/bootloader/boot.ld (100%) rename platform/x86_64/{nuc14 => nuc14mnk}/image/bootloader/boot.mk (100%) rename platform/x86_64/{nuc14 => nuc14mnk}/image/font/solarize-12x29.psf (100%) rename platform/x86_64/{nuc14 => nuc14mnk}/image/font/spleen-6x12.psf (100%) rename platform/x86_64/{nuc14 => nuc14mnk}/image/iso/boot/grub/grub.cfg (100%) rename platform/x86_64/{nuc14 => nuc14mnk}/linker.ld (100%) rename platform/x86_64/{nuc14 => nuc14mnk}/platform.mk (97%) rename platform/x86_64/{nuc14 => nuc14mnk}/test/runner.sh (100%) diff --git a/Makefile b/Makefile index 5727cbd0..61b7b9fe 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ LOG ?= info STATS ?= off PORT ?= 2333 MODE ?= debug -BOARD ?= nuc14 +BOARD ?= nuc14mnk FEATURES= BID ?= diff --git a/platform/x86_64/nuc14/board.rs b/platform/x86_64/nuc14mnk/board.rs similarity index 96% rename from platform/x86_64/nuc14/board.rs rename to platform/x86_64/nuc14mnk/board.rs index 62946d64..d5f001e9 100644 --- a/platform/x86_64/nuc14/board.rs +++ b/platform/x86_64/nuc14mnk/board.rs @@ -90,37 +90,37 @@ pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 14] = [ physical_start: 0x6ed7_f000, virtual_start: 0x6ed7_f000, size: 0x10_e000, - }, // FIXME: ACPI non-volatile storage + }, // ACPI non-volatile storage HvConfigMemoryRegion { mem_type: MEM_TYPE_RESERVED, physical_start: 0xfeda_0000, virtual_start: 0xfeda_0000, size: 0x2_8000, - }, // FIXME: pnp 00:05 + }, // pnp 00:05 HvConfigMemoryRegion { mem_type: MEM_TYPE_RESERVED, physical_start: 0xfe01_1000, virtual_start: 0xfe01_1000, size: 0x40_0000, - }, // FIXME: reserved + }, // reserved HvConfigMemoryRegion { mem_type: MEM_TYPE_RESERVED, physical_start: 0x677a_b000, virtual_start: 0x677a_b000, size: 0x74d_3000, - }, // FIXME: reserved + }, // reserved HvConfigMemoryRegion { mem_type: MEM_TYPE_RESERVED, physical_start: 0xfd69_0000, virtual_start: 0xfd69_0000, size: 0x6_0000, - }, // FIXME: INTC1057:00 + }, // INTC1057:00 HvConfigMemoryRegion { mem_type: MEM_TYPE_RESERVED, physical_start: 0xfb00_0000, virtual_start: 0xfb00_0000, size: 0x100_0000, - }, // FIXME: reserved + }, // reserved ]; const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0x9000; @@ -136,7 +136,6 @@ pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { kernel_entry_gpa: ROOT_ZONE_VMLINUX_ENTRY_ADDR, cmdline_load_gpa: ROOT_ZONE_CMDLINE_ADDR, setup_load_gpa: ROOT_ZONE_SETUP_ADDR, - // FIXME: initrd_load_gpa: 0, // 0x1500_0000, initrd_size: 0, // 0x26_b000, rsdp_memory_region_id: 0x1, diff --git a/platform/x86_64/nuc14/cargo/config.template.toml b/platform/x86_64/nuc14mnk/cargo/config.template.toml similarity index 100% rename from platform/x86_64/nuc14/cargo/config.template.toml rename to platform/x86_64/nuc14mnk/cargo/config.template.toml diff --git a/platform/x86_64/nuc14/cargo/features b/platform/x86_64/nuc14mnk/cargo/features similarity index 100% rename from platform/x86_64/nuc14/cargo/features rename to platform/x86_64/nuc14mnk/cargo/features diff --git a/platform/x86_64/nuc14/image/bootloader/boot.S b/platform/x86_64/nuc14mnk/image/bootloader/boot.S similarity index 100% rename from platform/x86_64/nuc14/image/bootloader/boot.S rename to platform/x86_64/nuc14mnk/image/bootloader/boot.S diff --git a/platform/x86_64/nuc14/image/bootloader/boot.ld b/platform/x86_64/nuc14mnk/image/bootloader/boot.ld similarity index 100% rename from platform/x86_64/nuc14/image/bootloader/boot.ld rename to platform/x86_64/nuc14mnk/image/bootloader/boot.ld diff --git a/platform/x86_64/nuc14/image/bootloader/boot.mk b/platform/x86_64/nuc14mnk/image/bootloader/boot.mk similarity index 100% rename from platform/x86_64/nuc14/image/bootloader/boot.mk rename to platform/x86_64/nuc14mnk/image/bootloader/boot.mk diff --git a/platform/x86_64/nuc14/image/font/solarize-12x29.psf b/platform/x86_64/nuc14mnk/image/font/solarize-12x29.psf similarity index 100% rename from platform/x86_64/nuc14/image/font/solarize-12x29.psf rename to platform/x86_64/nuc14mnk/image/font/solarize-12x29.psf diff --git a/platform/x86_64/nuc14/image/font/spleen-6x12.psf b/platform/x86_64/nuc14mnk/image/font/spleen-6x12.psf similarity index 100% rename from platform/x86_64/nuc14/image/font/spleen-6x12.psf rename to platform/x86_64/nuc14mnk/image/font/spleen-6x12.psf diff --git a/platform/x86_64/nuc14/image/iso/boot/grub/grub.cfg b/platform/x86_64/nuc14mnk/image/iso/boot/grub/grub.cfg similarity index 100% rename from platform/x86_64/nuc14/image/iso/boot/grub/grub.cfg rename to platform/x86_64/nuc14mnk/image/iso/boot/grub/grub.cfg diff --git a/platform/x86_64/nuc14/linker.ld b/platform/x86_64/nuc14mnk/linker.ld similarity index 100% rename from platform/x86_64/nuc14/linker.ld rename to platform/x86_64/nuc14mnk/linker.ld diff --git a/platform/x86_64/nuc14/platform.mk b/platform/x86_64/nuc14mnk/platform.mk similarity index 97% rename from platform/x86_64/nuc14/platform.mk rename to platform/x86_64/nuc14mnk/platform.mk index 02385a37..db763ed5 100644 --- a/platform/x86_64/nuc14/platform.mk +++ b/platform/x86_64/nuc14mnk/platform.mk @@ -41,9 +41,11 @@ QEMU_ARGS += -drive file=$(image_dir)/virtdisk/hvisor.iso,format=raw,index=0,med $(hvisor_bin): elf boot $(OBJCOPY) $(hvisor_elf) --strip-all -O binary $@ cp $(hvisor_elf) $(image_dir)/iso/boot + mkdir -p $(image_dir)/iso/boot/kernel cp $(zone0_boot) $(image_dir)/iso/boot/kernel cp $(zone0_setup) $(image_dir)/iso/boot/kernel cp $(zone0_vmlinux) $(image_dir)/iso/boot/kernel + mkdir -p $(image_dir)/virtdisk grub-mkrescue /usr/lib/grub/x86_64-efi -o $(image_dir)/virtdisk/hvisor.iso $(image_dir)/iso include $(image_dir)/bootloader/boot.mk \ No newline at end of file diff --git a/platform/x86_64/nuc14/test/runner.sh b/platform/x86_64/nuc14mnk/test/runner.sh similarity index 100% rename from platform/x86_64/nuc14/test/runner.sh rename to platform/x86_64/nuc14mnk/test/runner.sh diff --git a/platform/x86_64/qemu/board.rs b/platform/x86_64/qemu/board.rs index 8d44b9e9..d108c80b 100644 --- a/platform/x86_64/qemu/board.rs +++ b/platform/x86_64/qemu/board.rs @@ -41,7 +41,7 @@ const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { pub const ROOT_ZONE_NAME: &str = "root-linux"; pub const ROOT_ZONE_CMDLINE: &str = - "video=vesafb console=ttyS0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all,lastbus=1 root=/dev/vda rw init=/init\0"; + "console=ttyS0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all,lastbus=1 root=/dev/vda rw init=/init\0"; //"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic // video=vesafb @@ -108,6 +108,7 @@ pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { screen_base: ROOT_ZONE_SCREEN_BASE_ADDR, }; +// only need to fill in ecam_base and ecam_size in x86_64 pub const ROOT_PCI_CONFIG: HvPciConfig = HvPciConfig { ecam_base: 0xe0000000, ecam_size: 0x200000, diff --git a/platform/x86_64/qemu/platform.mk b/platform/x86_64/qemu/platform.mk index 33b79a19..62ac98f6 100644 --- a/platform/x86_64/qemu/platform.mk +++ b/platform/x86_64/qemu/platform.mk @@ -16,13 +16,15 @@ QEMU_ARGS += -bios /usr/share/ovmf/OVMF.fd QEMU_ARGS += -vga std # QEMU_ARGS += -nographic +QEMU_ARGS += -nodefaults +QEMU_ARGS += -net nic -net user + QEMU_ARGS += -device intel-iommu,intremap=on,eim=on,caching-mode=on,device-iotlb=on,aw-bits=48 QEMU_ARGS += -device ioh3420,id=pcie.1,chassis=1 QEMU_ARGS += -drive if=none,file="$(zone0_rootfs)",id=X10008000,format=raw QEMU_ARGS += -device virtio-blk-pci,bus=pcie.1,drive=X10008000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on # QEMU_ARGS += -drive if=none,file="$(zone0_rootfs)",id=X10009000,format=raw # QEMU_ARGS += -device nvme,serial=deadbeef,drive=X10009000 - # QEMU_ARGS += -drive if=none,file="$(zone1_rootfs)",id=X10009000,format=raw # QEMU_ARGS += -device virtio-blk-pci,bus=pcie.1,drive=X10009000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on # QEMU_ARGS += -netdev tap,id=net0,ifname=tap0,script=no,downscript=no @@ -43,9 +45,11 @@ QEMU_ARGS += -drive file=$(image_dir)/virtdisk/hvisor.iso,format=raw,index=0,med $(hvisor_bin): elf boot $(OBJCOPY) $(hvisor_elf) --strip-all -O binary $@ cp $(hvisor_elf) $(image_dir)/iso/boot + mkdir -p $(image_dir)/iso/boot/kernel cp $(zone0_boot) $(image_dir)/iso/boot/kernel cp $(zone0_setup) $(image_dir)/iso/boot/kernel cp $(zone0_vmlinux) $(image_dir)/iso/boot/kernel + mkdir -p $(image_dir)/virtdisk grub-mkrescue /usr/lib/grub/x86_64-efi -o $(image_dir)/virtdisk/hvisor.iso $(image_dir)/iso include $(image_dir)/bootloader/boot.mk \ No newline at end of file diff --git a/src/arch/x86_64/acpi.rs b/src/arch/x86_64/acpi.rs index 6185ac45..b1c16f4d 100644 --- a/src/arch/x86_64/acpi.rs +++ b/src/arch/x86_64/acpi.rs @@ -462,7 +462,7 @@ impl RootAcpi { }; // let rsdp_mapping = unsafe { Rsdp::search_for_on_bios(HvAcpiHandler {}).unwrap() }; - // FIXME: temporarily suppose we use ACPI 1.0 + // TODO: temporarily suppose we use ACPI 1.0 assert!(rsdp_mapping.revision() == 0); root_acpi.rsdp.fill( @@ -486,7 +486,7 @@ impl RootAcpi { let tables = unsafe { AcpiTables::from_validated_rsdp(HvAcpiHandler {}, rsdp_mapping) }.unwrap(); - // FIXME: temp + // print rsdt entries let mut rsdt_entry = rsdt_addr + 36; let size = (unsafe { *((rsdt_addr + 4) as *const u32) } as usize - 36) / 4; for i in 0..size { diff --git a/src/arch/x86_64/iommu.rs b/src/arch/x86_64/iommu.rs index 695d05a5..50f0815f 100644 --- a/src/arch/x86_64/iommu.rs +++ b/src/arch/x86_64/iommu.rs @@ -212,7 +212,7 @@ impl Vtd { irte.set_bit(15, false); // vector irte.set_bits(16..=23, irq as _); - // FIXME: dest id + // dest id irte.set_bits(32..=63, 0); unsafe { *irte_ptr = irte }; diff --git a/src/arch/x86_64/ipi.rs b/src/arch/x86_64/ipi.rs index 34bc010e..dc95233c 100644 --- a/src/arch/x86_64/ipi.rs +++ b/src/arch/x86_64/ipi.rs @@ -107,7 +107,6 @@ pub fn send_ipi(value: u64) -> HvResult { } IpiDeliveryMode::INIT => {} IpiDeliveryMode::START_UP => { - // FIXME: start up once? let mut ipi_info = get_ipi_info(dest).unwrap().lock(); ipi_info.start_up_addr = (vector as usize) << 12; event::send_event(dest, SGI_IPI_ID as _, event::IPI_EVENT_WAKEUP); diff --git a/src/arch/x86_64/pio.rs b/src/arch/x86_64/pio.rs index 232bcb4b..ba76d18b 100644 --- a/src/arch/x86_64/pio.rs +++ b/src/arch/x86_64/pio.rs @@ -80,13 +80,6 @@ impl PortIoBitmap { // i8042, we won't use it, but intercept its ports might block linux init bitmap.set_range_intercept(0x60..0x65, false); - // FIXME: for debug - if zone_id != 0 { - // #[cfg(feature = "graphics")] - // bitmap.set_range_intercept(UART_COM1_PORT, true); - // bitmap.set_range_intercept(UART_COM1_PORT, false); - } - bitmap } diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index 9796fc41..4a317668 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -103,7 +103,7 @@ fn handle_irq(vector: u8) { fn handle_cpuid(arch_cpu: &mut ArchCpu) -> HvResult { use raw_cpuid::{cpuid, CpuIdResult}; - // FIXME: temporary hypervisor hack + // TODO: temporary hypervisor hack let signature = unsafe { &*("ACRNACRNACRN".as_ptr() as *const [u32; 3]) }; let cr4_flags = Cr4Flags::from_bits_truncate(arch_cpu.cr(4) as _); let regs = arch_cpu.regs_mut(); @@ -254,7 +254,7 @@ fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvR _ => unreachable!(), } as _; - // FIXME: reconstruct + // TODO: reconstruct if PCI_CONFIG_ADDR_PORT.contains(&io_info.port) || PCI_CONFIG_DATA_PORT.contains(&io_info.port) { @@ -302,7 +302,6 @@ fn handle_msr_read(arch_cpu: &mut ArchCpu) -> HvResult { if let Ok(msr) = Msr::try_from(rcx) { let res = if msr == IA32_APIC_BASE { - // FIXME: non root linux let mut apic_base = unsafe { IA32_APIC_BASE.read() }; // info!("APIC BASE: {:x}", apic_base); apic_base |= 1 << 11 | 1 << 10; // enable xAPIC and x2APIC diff --git a/src/arch/x86_64/zone.rs b/src/arch/x86_64/zone.rs index bb7bf3a7..583b6299 100644 --- a/src/arch/x86_64/zone.rs +++ b/src/arch/x86_64/zone.rs @@ -14,16 +14,35 @@ use alloc::vec::Vec; #[repr(C)] #[derive(Debug, Clone)] pub struct HvArchZoneConfig { + /// base address of ioapic mmio registers, usually 0xfec00000 pub ioapic_base: usize, + /// size of ioapic mmio registers, usually 0x1000 pub ioapic_size: usize, + /// start gpa of vmlinux.bin, usually 0x100000 pub kernel_entry_gpa: usize, + /// gpa of linux boot command line pub cmdline_load_gpa: usize, + /// start gpa of setup.bin, address length no bigger than 16 bits pub setup_load_gpa: usize, + /// If you want to use initrd, set initrd_load_gpa and initrd_size. + /// Otherwise, leave them as zero. The memory region type of + /// initrd should be set to MEM_TYPE_RESERVED. + /// initrd_load_gpa is the start gpa of initrd pub initrd_load_gpa: usize, + /// size of initrd pub initrd_size: usize, + /// RSDP table will be copied to the memory region with this id. + /// The start gpa of this memory region should 0xe_0000 + /// and the size should be 0x2_0000. Set its type to MEM_TYPE_RAM. pub rsdp_memory_region_id: usize, + /// Other ACPI tables will be copied to the memory region with this id. + /// no restriction on start gpa and size, but its type should be MEM_TYPE_RAM as well. + /// Usually, the DSDT table is large, so the size of this region should be large enough. pub acpi_memory_region_id: usize, - /// not longer than 32 bits + /// If you want to use a graphical console, set screen_base to a preferred gpa + /// as the start of the framebuffer. Otherwise, leave it as zero. + /// No need to add a memory region for the framebuffer, + /// Hvisor will do the job. **IMPORTANT: screen_base should be no longer than 32 bits.** pub screen_base: usize, } diff --git a/src/device/uart/uart16550a.rs b/src/device/uart/uart16550a.rs index e05f6a1e..a86b6c89 100644 --- a/src/device/uart/uart16550a.rs +++ b/src/device/uart/uart16550a.rs @@ -214,7 +214,7 @@ impl VirtUart16550aUnlocked { self.iir = InterruptIdentFlags::NO_INTR_IS_PENDING.bits(); } else { self.iir = iir; - // FIXME: + // use COM1 irq inject_irq(0x4, false); } } diff --git a/src/hypercall/mod.rs b/src/hypercall/mod.rs index e9b09452..5065b324 100644 --- a/src/hypercall/mod.rs +++ b/src/hypercall/mod.rs @@ -131,7 +131,6 @@ impl<'a> HyperCall<'a> { .set_base_addr(shared_region_addr_pa as _); info!("hvisor device region base is {:#x?}", shared_region_addr_pa); - // FIXME: HyperCallResult::Ok(0) } From 47666c29fe8b979bd9986a93fe6582b29fe8add8 Mon Sep 17 00:00:00 2001 From: Solicey Date: Sun, 24 Aug 2025 10:57:52 +0800 Subject: [PATCH 24/29] fix makefile diff --- Makefile | 4 ++-- src/memory/frame.rs | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 61b7b9fe..6f2f21a2 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ -ARCH ?= x86_64 +ARCH ?= aarch64 LOG ?= info STATS ?= off PORT ?= 2333 MODE ?= debug -BOARD ?= nuc14mnk +BOARD ?= qemu-gicv3 FEATURES= BID ?= diff --git a/src/memory/frame.rs b/src/memory/frame.rs index b508b536..eb49a10b 100644 --- a/src/memory/frame.rs +++ b/src/memory/frame.rs @@ -23,7 +23,6 @@ use spin::Mutex; use super::addr::{align_down, align_up, is_aligned, PhysAddr}; use crate::consts::PAGE_SIZE; use crate::error::HvResult; -use crate::memory::addr::virt_to_phys; // Support max 1M * 4096 = 1GB memory. type FrameAlloc = bitmap_allocator::BitAlloc1M; @@ -278,9 +277,7 @@ pub fn init() { let mem_pool_start = crate::consts::mem_pool_start(); let mem_pool_end = align_down(crate::consts::hv_end()); let mem_pool_size = mem_pool_end - mem_pool_start; - FRAME_ALLOCATOR - .lock() - .init(virt_to_phys(mem_pool_start), mem_pool_size); + FRAME_ALLOCATOR.lock().init(mem_pool_start, mem_pool_size); info!( "Frame allocator initialization finished: {:#x?}", From 02aa5de06a55a2dcfa3b2b0830650ce3a7a612bf Mon Sep 17 00:00:00 2001 From: Solicey Date: Tue, 16 Sep 2025 12:15:19 +0800 Subject: [PATCH 25/29] fix bug zone1 cannot restart --- platform/x86_64/nuc14mnk/board.rs | 6 +++--- src/arch/x86_64/msr.rs | 2 +- src/arch/x86_64/pio.rs | 2 +- src/memory/frame.rs | 5 ++++- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/platform/x86_64/nuc14mnk/board.rs b/platform/x86_64/nuc14mnk/board.rs index d5f001e9..43df35d3 100644 --- a/platform/x86_64/nuc14mnk/board.rs +++ b/platform/x86_64/nuc14mnk/board.rs @@ -158,9 +158,9 @@ pub const ROOT_PCI_CONFIG: HvPciConfig = HvPciConfig { pci_mem64_base: 0x0, }; -pub const ROOT_PCI_DEVS: [u64; 19] = [ - 0x0, 0x10, 0x20, 0x40, 0x50, 0x68, 0x90, 0xa0, 0xa2, 0xa3, 0xb0, 0xe0, 0xe8, 0xf8, 0xfb, 0xfc, - 0xfd, 0x100, 0x200, +pub const ROOT_PCI_DEVS: [u64; 18] = [ + 0x0, 0x10, 0x20, 0x40, 0x50, 0x68, 0x90, 0xa0, 0xa2, 0xb0, 0xe0, 0xe8, 0xf8, 0xfb, 0xfc, 0xfd, + 0x100, 0x200, ]; #[cfg(all(feature = "graphics"))] diff --git a/src/arch/x86_64/msr.rs b/src/arch/x86_64/msr.rs index 50b92483..be9b23a3 100644 --- a/src/arch/x86_64/msr.rs +++ b/src/arch/x86_64/msr.rs @@ -150,7 +150,7 @@ pub fn set_msr_bitmap(zone_id: usize) { unsafe { if let Some(map) = &mut MSR_BITMAP_MAP { if map.contains_key(&zone_id) { - panic!("msr bitmap for Zone {} already exists!", zone_id); + map.remove(&zone_id); } map.insert(zone_id, MsrBitmap::new()); } diff --git a/src/arch/x86_64/pio.rs b/src/arch/x86_64/pio.rs index ba76d18b..f5d79986 100644 --- a/src/arch/x86_64/pio.rs +++ b/src/arch/x86_64/pio.rs @@ -22,7 +22,7 @@ pub fn set_pio_bitmap(zone_id: usize) { unsafe { if let Some(map) = &mut PIO_BITMAP_MAP { if map.contains_key(&zone_id) { - panic!("pio bitmap for Zone {} already exists!", zone_id); + map.remove(&zone_id); } map.insert(zone_id, PortIoBitmap::new(zone_id)); } diff --git a/src/memory/frame.rs b/src/memory/frame.rs index eb49a10b..b508b536 100644 --- a/src/memory/frame.rs +++ b/src/memory/frame.rs @@ -23,6 +23,7 @@ use spin::Mutex; use super::addr::{align_down, align_up, is_aligned, PhysAddr}; use crate::consts::PAGE_SIZE; use crate::error::HvResult; +use crate::memory::addr::virt_to_phys; // Support max 1M * 4096 = 1GB memory. type FrameAlloc = bitmap_allocator::BitAlloc1M; @@ -277,7 +278,9 @@ pub fn init() { let mem_pool_start = crate::consts::mem_pool_start(); let mem_pool_end = align_down(crate::consts::hv_end()); let mem_pool_size = mem_pool_end - mem_pool_start; - FRAME_ALLOCATOR.lock().init(mem_pool_start, mem_pool_size); + FRAME_ALLOCATOR + .lock() + .init(virt_to_phys(mem_pool_start), mem_pool_size); info!( "Frame allocator initialization finished: {:#x?}", From 0bc52c7195f033628fabffd3af348cb370223aa0 Mon Sep 17 00:00:00 2001 From: ZhongkaiXu <3605832858@qq.com> Date: Sat, 20 Sep 2025 08:38:09 +0800 Subject: [PATCH 26/29] =?UTF-8?q?=F0=9F=90=9E=20fix(iommu,=20its):=20fix?= =?UTF-8?q?=20its=20bugs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix its emulation and add iommu pt. --- .../qemu-gicv3/image/dts/zone1-linux.dts | 39 +++++++- src/arch/aarch64/iommu.rs | 79 ++-------------- src/arch/aarch64/zone.rs | 52 +++++++++++ src/device/irqchip/gicv3/gits.rs | 90 +++++++++++++------ src/device/irqchip/gicv3/vgic.rs | 77 ++++++++-------- src/pci/pci.rs | 7 +- src/zone.rs | 10 +++ 7 files changed, 207 insertions(+), 147 deletions(-) diff --git a/platform/aarch64/qemu-gicv3/image/dts/zone1-linux.dts b/platform/aarch64/qemu-gicv3/image/dts/zone1-linux.dts index e2d4a8af..9239584b 100644 --- a/platform/aarch64/qemu-gicv3/image/dts/zone1-linux.dts +++ b/platform/aarch64/qemu-gicv3/image/dts/zone1-linux.dts @@ -37,12 +37,43 @@ reg = <0x0 0x50000000 0x0 0x30000000>; }; - gic@8000000 { + intc@8000000 { + phandle = <0x01>; + interrupts = <0x01 0x09 0x04>; + reg = <0x00 0x8000000 0x00 0x10000 0x00 0x80a0000 0x00 0xf60000>; + #redistributor-regions = <0x01>; compatible = "arm,gic-v3"; - #interrupt-cells = <0x03>; + ranges; + #size-cells = <0x02>; + #address-cells = <0x02>; interrupt-controller; - reg = <0x00 0x8000000 0x00 0x10000 0x00 0x80a0000 0x00 0xf60000>; - phandle = <0x01>; + #interrupt-cells = <0x03>; + + its@8080000 { + phandle = <0x8006>; + reg = <0x00 0x8080000 0x00 0x20000>; + #msi-cells = <0x01>; + msi-controller; + compatible = "arm,gic-v3-its"; + }; + }; + + pcie@10000000 { + interrupt-map-mask = <0x1800 0x00 0x00 0x07>; + interrupt-map = <0x00 0x00 0x00 0x01 0x01 0x00 0x00 0x00 0x03 0x04 0x00 0x00 0x00 0x02 0x01 0x00 0x00 0x00 0x04 0x04 0x00 0x00 0x00 0x03 0x01 0x00 0x00 0x00 0x05 0x04 0x00 0x00 0x00 0x04 0x01 0x00 0x00 0x00 0x06 0x04 0x800 0x00 0x00 0x01 0x01 0x00 0x00 0x00 0x04 0x04 0x800 0x00 0x00 0x02 0x01 0x00 0x00 0x00 0x05 0x04 0x800 0x00 0x00 0x03 0x01 0x00 0x00 0x00 0x06 0x04 0x800 0x00 0x00 0x04 0x01 0x00 0x00 0x00 0x03 0x04 0x1000 0x00 0x00 0x01 0x01 0x00 0x00 0x00 0x05 0x04 0x1000 0x00 0x00 0x02 0x01 0x00 0x00 0x00 0x06 0x04 0x1000 0x00 0x00 0x03 0x01 0x00 0x00 0x00 0x03 0x04 0x1000 0x00 0x00 0x04 0x01 0x00 0x00 0x00 0x04 0x04 0x1800 0x00 0x00 0x01 0x01 0x00 0x00 0x00 0x06 0x04 0x1800 0x00 0x00 0x02 0x01 0x00 0x00 0x00 0x03 0x04 0x1800 0x00 0x00 0x03 0x01 0x00 0x00 0x00 0x04 0x04 0x1800 0x00 0x00 0x04 0x01 0x00 0x00 0x00 0x05 0x04>; + #interrupt-cells = <0x01>; + ranges = <0x1000000 0x00 0x00 0x00 0x3eff0000 0x00 0x10000 + 0x2000000 0x00 0x10000000 0x00 0x10000000 0x00 0x2eff0000 + 0x3000000 0x80 0x00 0x80 0x00 0x80 0x00>; + reg = <0x40 0x10000000 0x00 0x10000000>; + msi-map = <0x00 0x8006 0x00 0x10000>; + dma-coherent; + bus-range = <0x00 0xff>; + linux,pci-domain = <0x00>; + #size-cells = <0x02>; + #address-cells = <0x03>; + device_type = "pci"; + compatible = "pci-host-ecam-generic"; }; apb-pclk { diff --git a/src/arch/aarch64/iommu.rs b/src/arch/aarch64/iommu.rs index 6773d2f7..1b7110d5 100644 --- a/src/arch/aarch64/iommu.rs +++ b/src/arch/aarch64/iommu.rs @@ -348,7 +348,6 @@ impl CmdQueue { pub struct Smmuv3 { rp: &'static RegisterPage, strtab: LinearStreamTable, - iommu_pt_list: Vec>, cmdq: CmdQueue, } @@ -358,20 +357,13 @@ impl Smmuv3 { let mut r = Self { rp: rp, strtab: LinearStreamTable::new(), - iommu_pt_list: vec![], cmdq: CmdQueue::new(), }; - for _ in 0..MAX_ZONE_NUM { - r.iommu_pt_list.push(new_s2_memory_set()); - } - - info!("pagetables for iommu, init done!"); - r.check_env(); - r.init_limited_pt(); r.init_structures(); r.device_reset(); + r } @@ -413,47 +405,6 @@ impl Smmuv3 { } } - fn init_limited_pt(&mut self) { - // its - for pt in self.iommu_pt_list.iter_mut() { - pt.insert(MemoryRegion::new_with_offset_mapper( - 0x8080000 as GuestPhysAddr, - 0x8080000, - 0x20000, - MemFlags::READ | MemFlags::WRITE, - )) - .ok(); - } - - // ram - self.iommu_pt_list[0] - .insert(MemoryRegion::new_with_offset_mapper( - 0x80000000 as GuestPhysAddr, - 0x80000000, - 0x50000000, - MemFlags::READ | MemFlags::WRITE, - )) - .ok(); - - self.iommu_pt_list[1] - .insert(MemoryRegion::new_with_offset_mapper( - 0x50000000 as GuestPhysAddr, - 0x50000000, - 0x30000000, - MemFlags::READ | MemFlags::WRITE, - )) - .ok(); - - self.iommu_pt_list[2] - .insert(MemoryRegion::new_with_offset_mapper( - 0x80000000 as GuestPhysAddr, - 0x80000000, - 0x10000000, - MemFlags::READ | MemFlags::WRITE, - )) - .ok(); - } - fn init_structures(&mut self) { self.init_strtab(); self.init_queues(); @@ -545,13 +496,12 @@ impl Smmuv3 { } // s1 bypass and s2 translate - fn write_ste(&mut self, sid: usize, vmid: usize) { + fn write_ste(&mut self, sid: usize, vmid: usize, root_pt: usize) { self.sync_ste(sid); assert!(vmid < MAX_ZONE_NUM, "Invalid zone id!"); - self.strtab - .write_ste(sid, vmid, self.iommu_pt_list[vmid].root_paddr()); + self.strtab.write_ste(sid, vmid, root_pt); } // invalidate the ste @@ -582,13 +532,8 @@ static SMMUV3: spin::Once> = spin::Once::new(); /// smmuv3 init pub fn iommu_init() { - #[cfg(feature = "iommu")] - { - info!("Smmuv3 init..."); - SMMUV3.call_once(|| Mutex::new(Smmuv3::new())); - } - #[cfg(not(feature = "iommu"))] - info!("Smmuv3 init: do nothing now"); + info!("Smmuv3 init..."); + SMMUV3.call_once(|| Mutex::new(Smmuv3::new())); } /// smmuv3_base @@ -604,15 +549,7 @@ pub fn smmuv3_size() -> usize { } /// write ste -pub fn iommu_add_device(vmid: usize, sid: usize) { - #[cfg(feature = "iommu")] - { - let mut smmu = SMMUV3.get().unwrap().lock(); - smmu.write_ste(sid as _, vmid as _); - } - #[cfg(not(feature = "iommu"))] - info!( - "aarch64: iommu_add_device: do nothing now, vmid: {}, sid: {}", - vmid, sid - ); +pub fn iommu_add_device(vmid: usize, sid: usize, root_pt: usize) { + let mut smmu = SMMUV3.get().unwrap().lock(); + smmu.write_ste(sid as _, vmid as _, root_pt as _); } diff --git a/src/arch/aarch64/zone.rs b/src/arch/aarch64/zone.rs index b7e8e90b..dd14fc0a 100644 --- a/src/arch/aarch64/zone.rs +++ b/src/arch/aarch64/zone.rs @@ -60,6 +60,58 @@ impl Zone { Ok(()) } + pub fn iommu_pt_init( + &mut self, + mem_regions: &[HvConfigMemoryRegion], + hv_config: &HvArchZoneConfig, + ) -> HvResult { + // Create a new stage 2 page table for iommu. + // Only map the memory regions that are possible to be accessed by devices as DMA buffer. + + let pt = self.iommu_pt.as_mut().unwrap(); + let flags = MemFlags::READ | MemFlags::WRITE; + for mem_region in mem_regions.iter() { + match mem_region.mem_type { + MEM_TYPE_RAM => { + pt.insert(MemoryRegion::new_with_offset_mapper( + mem_region.virtual_start as GuestPhysAddr, + mem_region.physical_start as HostPhysAddr, + mem_region.size as _, + flags, + ))?; + info!( + "iommu map: vaddr:{} - paddr:{}", + mem_region.virtual_start, mem_region.physical_start + ); + } + _ => { + // pass + } + } + } + + match hv_config.gic_config { + GicConfig::Gicv3(ref gicv3_config) => { + if gicv3_config.gits_size != 0 { + // map gits + pt.insert(MemoryRegion::new_with_offset_mapper( + gicv3_config.gits_base as GuestPhysAddr, + gicv3_config.gits_base as HostPhysAddr, + gicv3_config.gits_size as _, + flags | MemFlags::IO, + ))?; + info!( + "iommu map: vaddr:{} - paddr:{}", + gicv3_config.gits_base, gicv3_config.gits_base + ); + } + } + _ => {} + } + + Ok(()) + } + pub fn arch_zone_configuration(&mut self, config: &HvZoneConfig) -> HvResult { self.ivc_init(config.ivc_config()); Ok(()) diff --git a/src/device/irqchip/gicv3/gits.rs b/src/device/irqchip/gicv3/gits.rs index b43c2849..e3ad3b75 100644 --- a/src/device/irqchip/gicv3/gits.rs +++ b/src/device/irqchip/gicv3/gits.rs @@ -37,7 +37,7 @@ pub const GITS_UMSIR: usize = 0x0048; // unmapped msi pub const GITS_CBASER: usize = 0x0080; // the addr of command queue pub const GITS_CWRITER: usize = 0x0088; // rw, write an command to the cmdq, write this reg to tell hw pub const GITS_CREADR: usize = 0x0090; // read-only, hardware changes it -pub const GITS_BASER: usize = 0x0100; // itt, desc +pub const GITS_BASER: usize = 0x0100; // device table, itt, desc pub const GITS_COLLECTION_BASER: usize = GITS_BASER + 0x8; pub const GITS_TRANSLATER: usize = 0x10000 + 0x0040; // to signal an interrupt, written by devices @@ -73,19 +73,25 @@ fn vicid_to_icid(vicid: u64, cpu_bitmap: u64) -> Option { // created by root linux, and make a virtual one to non root pub struct DeviceTable { baser: usize, + mask: usize, + fix_val: usize, } impl DeviceTable { fn new() -> Self { let dt_baser_reg = host_gits_base() + GITS_BASER; let dt_baser = unsafe { ptr::read_volatile(dt_baser_reg as *mut u64) }; + let mask = 0x71f000000000000; + let fix_val = dt_baser & mask; Self { baser: dt_baser as _, + mask: mask as _, + fix_val: fix_val as _, } } fn set_baser(&mut self, value: usize) { - self.baser = value; + self.baser = (value & !self.mask) | self.fix_val; } fn read_baser(&self) -> usize { @@ -95,19 +101,25 @@ impl DeviceTable { pub struct CollectionTable { baser: usize, + mask: usize, + fix_val: usize, } impl CollectionTable { fn new() -> Self { let ct_baser_reg = host_gits_base() + GITS_COLLECTION_BASER; let ct_baser = unsafe { ptr::read_volatile(ct_baser_reg as *mut u64) }; + let mask = 0x71f000000000000; + let fix_val = ct_baser & mask; Self { baser: ct_baser as _, + mask: mask as _, + fix_val: fix_val as _, } } fn set_baser(&mut self, value: usize) { - self.baser = value; + self.baser = (value & !self.mask) | self.fix_val; } fn read_baser(&self) -> usize { @@ -121,8 +133,8 @@ pub struct Cmdq { writer: usize, frame: Frame, - phy_base_list: [usize; MAX_ZONE_NUM], - cbaser_list: [usize; MAX_ZONE_NUM], + phy_base_list: [usize; MAX_ZONE_NUM], // the real phy addr for vm cmdq + cbaser_list: [usize; MAX_ZONE_NUM], // the v register for vm creadr_list: [usize; MAX_ZONE_NUM], cwriter_list: [usize; MAX_ZONE_NUM], cmdq_page_num: [usize; MAX_ZONE_NUM], @@ -131,7 +143,6 @@ pub struct Cmdq { impl Cmdq { fn new() -> Self { let f = Frame::new_contiguous_with_base(CMDQ_PAGES_NUM, 16).unwrap(); - info!("ITS cmdq base: 0x{:x}", f.start_paddr()); let r = Self { phy_addr: f.start_paddr(), readr: 0, @@ -154,8 +165,8 @@ impl Cmdq { val = val | (CMDQ_PAGES_NUM - 1); // 16 contigous 4KB pages let ctrl = host_gits_base() + GITS_CTRL; unsafe { - let origin_ctrl = ptr::read_volatile(ctrl as *mut u64); - ptr::write_volatile(ctrl as *mut u64, origin_ctrl & 0xfffffffffffffffeu64); // turn off, vm will turn on this ctrl + let origin_ctrl = ptr::read_volatile(ctrl as *mut u32); + ptr::write_volatile(ctrl as *mut u32, origin_ctrl & 0xfffffffeu32); // turn off, vm will turn on this ctrl ptr::write_volatile(reg as *mut u64, val as u64); ptr::write_volatile(writer as *mut u64, 0 as u64); // init cwriter } @@ -164,9 +175,15 @@ impl Cmdq { fn set_cbaser(&mut self, zone_id: usize, value: usize) { assert!(zone_id < MAX_ZONE_NUM, "Invalid zone id!"); self.cbaser_list[zone_id] = value; - self.phy_base_list[zone_id] = value & 0xffffffffff000; + let gpa_base = value & 0xffffffffff000; + unsafe { + let phy_base = match this_zone().read().gpm.page_table_query(gpa_base) { + Ok(p) => self.phy_base_list[zone_id] = p.0, + _ => {} + }; + } self.cmdq_page_num[zone_id] = (value & 0xff) + 1; // get the page num - info!( + debug!( "zone_id: {}, cmdq base: {:#x}, page num: {}", zone_id, self.phy_base_list[zone_id], self.cmdq_page_num[zone_id] ); @@ -182,7 +199,7 @@ impl Cmdq { if value == self.creadr_list[zone_id] { // if the off vmm gonna read is equal to the cwriter, it means that // the first write cmd is not sent to the hw, so we ignore it. - trace!("ignore first write"); + debug!("ignore first write"); } else { self.insert_cmd(zone_id, value); } @@ -205,7 +222,7 @@ impl Cmdq { self.creadr_list[zone_id] = writer; } - // it's ok to add qemu-args: -trace gicv3_gits_cmd_*, remember to remain `enable one lpi` + // it's ok to add qemu-args: -info gicv3_gits_cmd_*, remember to remain `enable one lpi` // we need changge vicid to icid here fn analyze_cmd(&self, value: [u64; 4]) -> [u64; 4] { let code = (value[0] & 0xff) as usize; @@ -223,7 +240,7 @@ impl Cmdq { new_cmd[2] &= !0xffffu64; new_cmd[2] |= icid & 0xffff; enable_one_lpi((event - 8192) as _); - info!( + debug!( "MAPI cmd, for device {:#x}, event = intid = {:#x} -> vicid {:#x} (icid {:#x})", id >> 32, event, @@ -233,10 +250,25 @@ impl Cmdq { } 0x08 => { let id = value[0] & 0xffffffff00000000; - let itt_base = (value[2] & 0x000fffffffffffff) >> 8; - trace!( + let itt_base = value[2] & 0x000fffffffffff00; // the lowest 8 bits are zeros + debug!( + "MAPD cmd, for device {:#x}, itt base {:#x}", + id >> 32, + itt_base + ); + let phys_itt_base = unsafe { + this_zone() + .read() + .gpm + .page_table_query(itt_base as _) + .unwrap() + .0 + }; + new_cmd[2] &= !0x000fffffffffff00u64; + new_cmd[2] |= phys_itt_base as u64; + debug!( "MAPD cmd, set ITT: {:#x} to device {:#x}", - itt_base, + phys_itt_base, id >> 32 ); } @@ -250,7 +282,7 @@ impl Cmdq { new_cmd[2] &= !0xffffu64; new_cmd[2] |= icid & 0xffff; enable_one_lpi((intid - 8192) as _); - info!( + debug!( "MAPTI cmd, for device {:#x}, event {:#x} -> vicid {:#x} (icid {:#x}) + intid {:#x}", id >> 32, event, @@ -266,33 +298,34 @@ impl Cmdq { new_cmd[2] &= !0xffffu64; new_cmd[2] |= icid & 0xffff; let rd_base = (value[2] >> 16) & 0x7ffffffff; - info!( + debug!( "MAPC cmd, vicid {:#x} (icid {:#x}) -> redist {:#x}", vicid, icid, rd_base ); } 0x05 => { - trace!("SYNC cmd"); + debug!("SYNC cmd"); } 0x04 => { - trace!("CLEAR cmd"); + debug!("CLEAR cmd"); } 0x0f => { - trace!("DISCARD cmd"); + debug!("DISCARD cmd"); } 0x03 => { - trace!("INT cmd"); + debug!("INT cmd"); } 0x0c => { - trace!("INV cmd"); + debug!("INV cmd"); } 0x0d => { - trace!("INVALL cmd"); + debug!("INVALL cmd"); } _ => { - trace!("other cmd, code: 0x{:x}", code); + debug!("other cmd, code: 0x{:x}", code); } } + new_cmd } @@ -312,7 +345,7 @@ impl Cmdq { }; let cmd_num = cmd_size / PER_CMD_BYTES; - trace!("cmd size: {:#x}, cmd num: {:#x}", cmd_size, cmd_num); + debug!("cmd size: {:#x}, cmd num: {:#x}", cmd_size, cmd_num); let mut vm_cmdq_addr = zone_addr + origin_readr; let mut real_cmdq_addr = self.phy_addr + self.readr; @@ -342,10 +375,9 @@ impl Cmdq { loop { self.readr = (ptr::read_volatile(readr as *mut u64)) as usize; // hw readr if self.readr == self.writer { - trace!( + debug!( "readr={:#x}, writer={:#x}, its cmd end", - self.readr, - self.writer + self.readr, self.writer ); break; } else { diff --git a/src/device/irqchip/gicv3/vgic.rs b/src/device/irqchip/gicv3/vgic.rs index b5662b4d..7c52c26e 100644 --- a/src/device/irqchip/gicv3/vgic.rs +++ b/src/device/irqchip/gicv3/vgic.rs @@ -338,86 +338,79 @@ pub fn vgicv3_its_handler(mmio: &mut MMIOAccess, _arg: usize) -> HvResult { match reg { GITS_CTRL => { mmio_perform_access(gits_base, mmio); - if mmio.is_write { - trace!("write GITS_CTRL: {:#x}", mmio.value); - } else { - trace!("read GITS_CTRL: {:#x}", mmio.value); - } } GITS_CBASER => { if mmio.is_write { - if zone_id == 0 { - mmio_perform_access(gits_base, mmio); - } set_cbaser(mmio.value, zone_id); - trace!("write GITS_CBASER: {:#x}", mmio.value); } else { mmio.value = read_cbaser(zone_id); - trace!("read GITS_CBASER: {:#x}", mmio.value); } } + // v_dt_addr + 0x10000000; GITS_BASER => { - if zone_id == 0 { - mmio_perform_access(gits_base, mmio); - } else { - if mmio.is_write { - set_dt_baser(mmio.value, zone_id); - } else { - mmio.value = read_dt_baser(zone_id); - } - } if mmio.is_write { - trace!("write GITS_BASER: 0x{:016x}", mmio.value); + set_dt_baser(mmio.value, zone_id); + if zone_id == 0 { + let v_dt_addr = mmio.value & 0xfff_fff_fff_000usize; + let phys_dt_trans = + unsafe { this_zone().read().gpm.page_table_query(v_dt_addr) }; + match phys_dt_trans { + Ok(p) => { + mmio.value &= !0xfff_fff_fff_000usize; + mmio.value |= p.0 as usize; + } + _ => {} + } + mmio_perform_access(gits_base, mmio); + } } else { - trace!("read GITS_BASER: 0x{:016x}", mmio.value); + mmio.value = read_dt_baser(zone_id); } } GITS_COLLECTION_BASER => { - if zone_id == 0 { - mmio_perform_access(gits_base, mmio); - } else { - if mmio.is_write { - set_ct_baser(mmio.value, zone_id); - } else { - mmio.value = read_ct_baser(zone_id); - } - } if mmio.is_write { - trace!("write GITS_COLL_BASER: 0x{:016x}", mmio.value); + set_ct_baser(mmio.value, zone_id); + if zone_id == 0 { + let v_ct_addr = mmio.value & 0xfff_fff_fff_000usize; + let phys_ct_trans = + unsafe { this_zone().read().gpm.page_table_query(v_ct_addr) }; + match phys_ct_trans { + Ok(p) => { + mmio.value &= !0xfff_fff_fff_000usize; + mmio.value |= p.0 as usize; + } + _ => {} + } + mmio_perform_access(gits_base, mmio); + } } else { - trace!("read GITS_COLL_BASER: 0x{:016x}", mmio.value); + mmio.value = read_ct_baser(zone_id); } } GITS_CWRITER => { if mmio.is_write { - trace!("write GITS_CWRITER: {:#x}", mmio.value); set_cwriter(mmio.value, zone_id); } else { mmio.value = read_cwriter(zone_id); - trace!("read GITS_CWRITER: {:#x}", mmio.value); } } GITS_CREADR => { mmio.value = read_creadr(zone_id); - trace!("read GITS_CREADER: {:#x}", mmio.value); } GITS_TYPER => { mmio_perform_access(gits_base, mmio); - trace!("GITS_TYPER: {:#x}", mmio.value); } _ => { mmio_perform_access(gits_base, mmio); if mmio.is_write { - trace!( + debug!( "write GITS offset: {:#x}, 0x{:016x}", - mmio.address, - mmio.value + mmio.address, mmio.value ); } else { - trace!( + debug!( "read GITS offset: {:#x}, 0x{:016x}", - mmio.address, - mmio.value + mmio.address, mmio.value ); } } diff --git a/src/pci/pci.rs b/src/pci/pci.rs index ac0d5ec7..93d50fe7 100644 --- a/src/pci/pci.rs +++ b/src/pci/pci.rs @@ -185,8 +185,13 @@ impl Zone { alloc_pci_devs[idx] & 0b111 ); self.pciroot.alloc_devs.push(alloc_pci_devs[idx] as _); + #[cfg(all(feature = "iommu", target_arch = "aarch64"))] if alloc_pci_devs[idx] != 0 { - iommu_add_device(self.id, alloc_pci_devs[idx] as _); + iommu_add_device( + self.id, + alloc_pci_devs[idx] as _, + self.iommu_pt.as_ref().unwrap().root_paddr(), + ); } } diff --git a/src/zone.rs b/src/zone.rs index 5e6483e1..0ede1517 100644 --- a/src/zone.rs +++ b/src/zone.rs @@ -39,6 +39,7 @@ pub struct Zone { pub irq_bitmap: [u32; 1024 / 32], pub gpm: MemorySet, pub pciroot: PciRoot, + pub iommu_pt: Option>, pub is_err: bool, } @@ -53,6 +54,11 @@ impl Zone { mmio: Vec::new(), irq_bitmap: [0; 1024 / 32], pciroot: PciRoot::new(), + iommu_pt: if cfg!(feature = "iommu") { + Some(new_s2_memory_set()) + } else { + None + }, is_err: false, } } @@ -208,6 +214,10 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult>> { // #[cfg(target_arch = "aarch64")] // zone.ivc_init(config.ivc_config()); + #[cfg(all(feature = "iommu", target_arch = "aarch64"))] + zone.iommu_pt_init(config.memory_regions(), &config.arch_config) + .unwrap(); + /* loongarch page table emergency */ /* Kai: Maybe unnecessary but i can't boot vms on my 3A6000 PC without this function. */ // #[cfg(target_arch = "loongarch64")] From 4b5b208ed9b792958ae9a5129134ae0a185a037a Mon Sep 17 00:00:00 2001 From: Solicey Date: Sat, 18 Oct 2025 13:39:53 +0800 Subject: [PATCH 27/29] bug fix: add invalidation queue to x86 iommu, so that zone1 can use PCI device after restart --- platform/x86_64/nuc14mnk/board.rs | 6 +- platform/x86_64/qemu/board.rs | 2 +- src/arch/x86_64/cpu.rs | 8 +- src/arch/x86_64/idt.rs | 2 +- src/arch/x86_64/iommu.rs | 166 +++++++++++++++++++++++++++--- src/arch/x86_64/s2pt.rs | 2 +- src/arch/x86_64/trap.rs | 2 +- src/device/irqchip/pic/mod.rs | 13 ++- src/device/virtio_trampoline.rs | 4 +- 9 files changed, 178 insertions(+), 27 deletions(-) diff --git a/platform/x86_64/nuc14mnk/board.rs b/platform/x86_64/nuc14mnk/board.rs index 43df35d3..d5f001e9 100644 --- a/platform/x86_64/nuc14mnk/board.rs +++ b/platform/x86_64/nuc14mnk/board.rs @@ -158,9 +158,9 @@ pub const ROOT_PCI_CONFIG: HvPciConfig = HvPciConfig { pci_mem64_base: 0x0, }; -pub const ROOT_PCI_DEVS: [u64; 18] = [ - 0x0, 0x10, 0x20, 0x40, 0x50, 0x68, 0x90, 0xa0, 0xa2, 0xb0, 0xe0, 0xe8, 0xf8, 0xfb, 0xfc, 0xfd, - 0x100, 0x200, +pub const ROOT_PCI_DEVS: [u64; 19] = [ + 0x0, 0x10, 0x20, 0x40, 0x50, 0x68, 0x90, 0xa0, 0xa2, 0xa3, 0xb0, 0xe0, 0xe8, 0xf8, 0xfb, 0xfc, + 0xfd, 0x100, 0x200, ]; #[cfg(all(feature = "graphics"))] diff --git a/platform/x86_64/qemu/board.rs b/platform/x86_64/qemu/board.rs index d108c80b..342bac03 100644 --- a/platform/x86_64/qemu/board.rs +++ b/platform/x86_64/qemu/board.rs @@ -123,7 +123,7 @@ pub const ROOT_PCI_CONFIG: HvPciConfig = HvPciConfig { pci_mem64_base: 0x0, }; -pub const ROOT_PCI_DEVS: [u64; 8] = [0x0, 0x8, 0x10, 0x18, 0xf8, 0xfa, 0xfb, 0x100]; // 0x10, +pub const ROOT_PCI_DEVS: [u64; 8] = [0x0, 0x8, 0x10, 0x18, 0xf8, 0xfa, 0xfb, 0x100]; #[cfg(all(feature = "graphics"))] pub const GRAPHICS_FONT: &[u8] = diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index 23d8ab31..b9d69b9c 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -14,7 +14,7 @@ use crate::{ vmx::*, }, consts::{self, core_end, PER_CPU_SIZE}, - device::irqchip::pic::{check_pending_vectors, ioapic, lapic::VirtLocalApic}, + device::irqchip::pic::{check_pending_vectors, clear_vectors, ioapic, lapic::VirtLocalApic}, error::{HvError, HvResult}, memory::{ addr::{phys_to_virt, PHYS_VIRT_OFFSET}, @@ -291,6 +291,8 @@ impl ArchCpu { self.host_stack_top = (core_end() + (self.cpuid + 1) * PER_CPU_SIZE) as _; + clear_vectors(self.cpuid); + unsafe { self.vmx_launch() }; loop {} @@ -562,7 +564,9 @@ impl ArchCpu { fn vmexit_handler(&mut self) { crate::arch::trap::handle_vmexit(self).unwrap(); - check_pending_vectors(self.cpuid); + if (self.power_on) { + check_pending_vectors(self.cpuid); + } } unsafe fn vmx_entry_failed() -> ! { diff --git a/src/arch/x86_64/idt.rs b/src/arch/x86_64/idt.rs index 3be90002..93567f7c 100644 --- a/src/arch/x86_64/idt.rs +++ b/src/arch/x86_64/idt.rs @@ -8,7 +8,7 @@ const VECTOR_CNT: usize = 256; #[allow(non_snake_case)] pub mod IdtVector { - pub const VIRT_IPI_VECTOR: u8 = 0xfa; + pub const VIRT_IPI_VECTOR: u8 = 0xef; pub const APIC_ERROR_VECTOR: u8 = 0xfc; pub const APIC_SPURIOUS_VECTOR: u8 = 0xfd; pub const APIC_TIMER_VECTOR: u8 = 0xfe; diff --git a/src/arch/x86_64/iommu.rs b/src/arch/x86_64/iommu.rs index 50f0815f..6805ce25 100644 --- a/src/arch/x86_64/iommu.rs +++ b/src/arch/x86_64/iommu.rs @@ -1,9 +1,9 @@ use crate::{ - arch::acpi, - memory::{Frame, HostPhysAddr}, + arch::{acpi, hpet::current_time_nanos}, + memory::{addr::virt_to_phys, Frame, HostPhysAddr}, zone::this_zone_id, }; -use ::acpi::{mcfg::Mcfg, sdt::Signature}; +use ::acpi::sdt::Signature; use alloc::{collections::btree_map::BTreeMap, vec::Vec}; use bit_field::BitField; use core::{ @@ -22,6 +22,26 @@ const IR_ENTRY_CNT: usize = 256; const ROOT_TABLE_ENTRY_SIZE: usize = 16; const CONTEXT_TABLE_ENTRY_SIZE: usize = 16; +const INVALIDATION_QUEUE_SIZE: usize = 4096; +const QI_INV_ENTRY_SIZE: usize = 16; +const NUM_IR_ENTRIES_PER_PAGE: usize = 256; + +const INV_CONTEXT_CACHE_DESC: u64 = 0x01; +const INV_IOTLB_DESC: u64 = 0x02; +const INV_WAIT_DESC: u64 = 0x05; + +const INV_STATUS_WRITE: u64 = 1 << 5; +const INV_STATUS_INCOMPLETED: u64 = 0; +const INV_STATUS_COMPLETED: u64 = 1; +const INV_STATUS_DATA: u64 = INV_STATUS_COMPLETED << 32; +const INV_WAIT_DESC_LOWER: u64 = INV_WAIT_DESC | INV_STATUS_WRITE | INV_STATUS_DATA; + +const DMA_CONTEXT_DEVICE_INVL: u64 = (3 << 4); + +const DMA_IOTLB_DOMAIN_INVL: u64 = (2 << 4); +const DMA_IOTLB_DW: u64 = (1 << 6); +const DMA_IOTLB_DR: u64 = (1 << 7); + // DMA-remapping registers mod dma_remap_reg { @@ -107,6 +127,12 @@ struct VtdDevice { dev_func: u8, } +#[derive(Clone, Debug)] +struct DmarEntry { + lo_64: u64, + hi_64: u64, +} + #[derive(Debug)] struct Vtd { reg_base_hpa: usize, @@ -118,6 +144,8 @@ struct Vtd { ir_table: Frame, /// cache value of DMAR_GCMD_REG gcmd: GcmdFlags, + qi_queue_hpa: usize, + qi_tail: usize, } impl Vtd { @@ -144,8 +172,8 @@ impl Vtd { } fn activate_qi(&mut self) { - let qi_queue_hpa = self.qi_queue.start_paddr(); - self.mmio_write_u64(DMAR_IQA_REG, qi_queue_hpa as u64); + self.qi_queue_hpa = self.qi_queue.start_paddr(); + self.mmio_write_u64(DMAR_IQA_REG, self.qi_queue_hpa as u64); self.mmio_write_u32(DMAR_IQT_REG, 0); if !self.gcmd.contains(GcmdFlags::QIE) { @@ -157,9 +185,16 @@ impl Vtd { } } - fn add_context_entry(&mut self, bus: u8, dev_func: u8, zone_s2pt_hpa: HostPhysAddr) { + fn update_context_entry( + &mut self, + bus: u8, + dev_func: u8, + zone_s2pt_hpa: HostPhysAddr, + is_insert: bool, + ) { let root_entry_hpa = self.root_table.start_paddr() + (bus as usize) * ROOT_TABLE_ENTRY_SIZE; let root_entry_low = unsafe { &mut *(root_entry_hpa as *mut u64) }; + let zone_id = this_zone_id(); // context table not present if !root_entry_low.get_bit(0) { @@ -179,19 +214,22 @@ impl Vtd { let context_entry_hpa = context_table_hpa + (dev_func as usize) * CONTEXT_TABLE_ENTRY_SIZE; let context_entry = unsafe { &mut *(context_entry_hpa as *mut u128) }; - // s2pt not present - if !context_entry.get_bit(0) { + if is_insert { // address width: 010b (48bit 4-level page table) context_entry.set_bits(64..=66, 0b010); // domain identifier: zone id - context_entry.set_bits(72..=87, this_zone_id() as _); + context_entry.set_bits(72..=87, zone_id as _); // second stage page translation pointer context_entry.set_bits(12..=63, zone_s2pt_hpa.get_bits(12..=63) as _); // present context_entry.set_bit(0, true); - - flush_cache_range(context_entry_hpa, CONTEXT_TABLE_ENTRY_SIZE); + } else { + context_entry.set_bits(0..=127, 0); } + + flush_cache_range(context_entry_hpa, CONTEXT_TABLE_ENTRY_SIZE); + let bdf: u16 = (bus as u16) << 8 | (dev_func as u16); + self.invalidate_context_cache(zone_id as _, bdf as _, 0); } fn add_device(&mut self, zone_id: usize, bdf: u64) { @@ -229,6 +267,20 @@ impl Vtd { .contains(EcapFlags::EIM | EcapFlags::IR | EcapFlags::QI)); } + fn clear_devices(&mut self, zone_id: usize) { + let bdfs: Vec<(u8, u8)> = self + .devices + .iter() + .filter(|&(_, &dev_zone_id)| dev_zone_id == zone_id) + .map(|(&bdf, _)| (bdf.get_bits(8..=15) as u8, bdf.get_bits(0..=7) as u8)) + .collect(); + + for (bus, dev_func) in bdfs { + self.update_context_entry(bus, dev_func, 0, false); + } + self.invalid_iotlb(zone_id as _); + } + fn init(&mut self) { self.check_capability(); self.set_interrupt(); @@ -242,6 +294,67 @@ impl Vtd { self.activate_interrupt_remapping(); */ } + fn invalidate_context_cache(&mut self, domain_id: u16, source_id: u16, func_mask: u8) { + let entry: DmarEntry = DmarEntry { + lo_64: INV_CONTEXT_CACHE_DESC + | DMA_CONTEXT_DEVICE_INVL + | dma_ccmd_did(domain_id) + | dma_ccmd_sid(source_id) + | dma_ccmd_fm(func_mask), + hi_64: 0, + }; + if (entry.lo_64 != 0) { + self.issue_qi_request(entry); + } + } + + fn invalid_iotlb(&mut self, domain_id: u16) { + let entry: DmarEntry = DmarEntry { + // drain read & drain write + lo_64: INV_IOTLB_DESC + | DMA_IOTLB_DOMAIN_INVL + | DMA_IOTLB_DR + | DMA_IOTLB_DW + | dma_iotlb_did(domain_id), + hi_64: 0, + }; + if (entry.lo_64 != 0) { + self.issue_qi_request(entry); + } + } + + fn issue_qi_request(&mut self, entry: DmarEntry) { + let mut qi_status: u32 = 0; + let qi_status_ptr = &qi_status as *const u32; + + unsafe { + let mut invalidate_desc = &mut *((self.qi_queue_hpa + self.qi_tail) as *mut DmarEntry); + invalidate_desc.hi_64 = entry.hi_64; + invalidate_desc.lo_64 = entry.lo_64; + } + self.qi_tail = (self.qi_tail + QI_INV_ENTRY_SIZE) % INVALIDATION_QUEUE_SIZE; + unsafe { + let mut invalidate_desc = &mut *((self.qi_queue_hpa + self.qi_tail) as *mut DmarEntry); + invalidate_desc.hi_64 = virt_to_phys(qi_status_ptr as usize) as u64; + invalidate_desc.lo_64 = INV_WAIT_DESC_LOWER; + } + self.qi_tail = (self.qi_tail + QI_INV_ENTRY_SIZE) % INVALIDATION_QUEUE_SIZE; + + qi_status = INV_STATUS_INCOMPLETED as u32; + self.mmio_write_u32(DMAR_IQT_REG, self.qi_tail as _); + + let start_tick = current_time_nanos(); + while (qi_status != INV_STATUS_COMPLETED as _) { + if (current_time_nanos() - start_tick > 1000000) { + error!("issue qi request failed!"); + break; + } + unsafe { + asm!("pause", options(nostack, preserves_flags)); + } + } + } + fn set_interrupt(&mut self) { self.mmio_write_u32(DMAR_FECTL_REG, 0); } @@ -266,7 +379,7 @@ impl Vtd { self.wait(GstsFlags::RTPS, false); } - fn update_dma_translation_tables(&mut self, zone_id: usize, zone_s2pt_hpa: HostPhysAddr) { + fn fill_dma_translation_tables(&mut self, zone_id: usize, zone_s2pt_hpa: HostPhysAddr) { let bdfs: Vec<(u8, u8)> = self .devices .iter() @@ -275,8 +388,9 @@ impl Vtd { .collect(); for (bus, dev_func) in bdfs { - self.add_context_entry(bus, dev_func, zone_s2pt_hpa); + self.update_context_entry(bus, dev_func, zone_s2pt_hpa, true); } + self.invalid_iotlb(zone_id as _); } fn wait(&mut self, mask: GstsFlags, cond: bool) { @@ -307,6 +421,22 @@ impl Vtd { } } +const fn dma_ccmd_sid(sid: u16) -> u64 { + ((sid as u64) & 0xffff) << 32 +} + +const fn dma_ccmd_did(did: u16) -> u64 { + ((did as u64) & 0xffff) << 16 +} + +const fn dma_ccmd_fm(fm: u8) -> u64 { + ((fm as u64) & 0x3) << 48 +} + +const fn dma_iotlb_did(did: u16) -> u64 { + ((did as u64) & 0xffff) << 16 +} + pub fn parse_root_dmar() -> Mutex { let dmar = acpi::root_get_table(&Signature::DMAR).unwrap(); let mut cur: usize = 48; // start offset of remapping structures @@ -339,6 +469,8 @@ pub fn parse_root_dmar() -> Mutex { qi_queue: Frame::new().unwrap(), ir_table: Frame::new().unwrap(), gcmd: GcmdFlags::empty(), + qi_queue_hpa: 0, + qi_tail: 0, }) } @@ -354,11 +486,15 @@ pub fn iommu_add_device(zone_id: usize, bdf: usize) { VTD.get().unwrap().lock().add_device(zone_id, bdf as _); } -pub fn update_dma_translation_tables(zone_id: usize, zone_s2pt_hpa: HostPhysAddr) { +pub fn clear_dma_translation_tables(zone_id: usize) { + VTD.get().unwrap().lock().clear_devices(zone_id); +} + +pub fn fill_dma_translation_tables(zone_id: usize, zone_s2pt_hpa: HostPhysAddr) { VTD.get() .unwrap() .lock() - .update_dma_translation_tables(zone_id, zone_s2pt_hpa); + .fill_dma_translation_tables(zone_id, zone_s2pt_hpa); } /// should be called after gpm is activated diff --git a/src/arch/x86_64/s2pt.rs b/src/arch/x86_64/s2pt.rs index 3dffc80c..b6e6a0c9 100644 --- a/src/arch/x86_64/s2pt.rs +++ b/src/arch/x86_64/s2pt.rs @@ -247,7 +247,7 @@ impl PagingInstr for S2PTInstr { // if this cpu is boot cpu and it is running if this_cpu_data().arch_cpu.power_on && this_cpu_data().boot_cpu { - iommu::update_dma_translation_tables(this_zone_id(), root_paddr); + iommu::fill_dma_translation_tables(this_zone_id(), root_paddr); } } diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index 4a317668..94e8e790 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -93,7 +93,7 @@ fn handle_irq(vector: u8) { } IdtVector::APIC_SPURIOUS_VECTOR | IdtVector::APIC_ERROR_VECTOR => {} _ => { - if vector >= 0x20 { + if vector >= 0x20 && this_cpu_data().arch_cpu.power_on { inject_vector(this_cpu_id(), vector, None, false); } } diff --git a/src/device/irqchip/pic/mod.rs b/src/device/irqchip/pic/mod.rs index 3bbca1f1..c649397c 100644 --- a/src/device/irqchip/pic/mod.rs +++ b/src/device/irqchip/pic/mod.rs @@ -71,6 +71,11 @@ impl PendingVectors { let mut vectors = self.inner.get(cpu_id).unwrap().lock(); vectors.has_eoi = true; } + + fn clear_vectors(&self, cpu_id: usize) { + let mut vectors = self.inner.get(cpu_id).unwrap().lock(); + vectors.queue.clear(); + } } pub fn inject_vector(cpu_id: usize, vector: u8, err_code: Option, allow_repeat: bool) { @@ -92,6 +97,10 @@ pub fn pop_vector(cpu_id: usize) { PENDING_VECTORS.get().unwrap().pop_vector(cpu_id); } +pub fn clear_vectors(cpu_id: usize) { + PENDING_VECTORS.get().unwrap().clear_vectors(cpu_id); +} + pub fn enable_irq() { unsafe { asm!("sti") }; } @@ -118,5 +127,7 @@ pub fn primary_init_early() { pub fn primary_init_late() {} impl Zone { - pub fn arch_irqchip_reset(&self) {} + pub fn arch_irqchip_reset(&self) { + iommu::clear_dma_translation_tables(self.id); + } } diff --git a/src/device/virtio_trampoline.rs b/src/device/virtio_trampoline.rs index 251491ab..8a51da50 100644 --- a/src/device/virtio_trampoline.rs +++ b/src/device/virtio_trampoline.rs @@ -99,7 +99,7 @@ pub fn mmio_virtio_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { // if it is cfg request, current cpu should be blocked until gets the result if need_interrupt == 0 { // when virtio backend finish the req, it will add 1 to cfg_flag. - while cfg_flags[cpu_id] == old_cfg_flag { + while unsafe { core::ptr::read_volatile(&cfg_flags[cpu_id]) } == old_cfg_flag { // fence(Ordering::Acquire); count += 1; if count == MAX_WAIT_TIMES { @@ -119,7 +119,7 @@ pub fn mmio_virtio_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { } if !mmio.is_write { // ensure cfg value is right. - mmio.value = cfg_values[cpu_id] as _; + mmio.value = unsafe { core::ptr::read_volatile(&cfg_values[cpu_id]) as _ }; // debug!("non root receives value: {:#x?}", mmio.value); } } From fb042eca4bcac2d6b5896888367564e41d2583e3 Mon Sep 17 00:00:00 2001 From: Solicey Date: Sat, 18 Oct 2025 15:36:18 +0800 Subject: [PATCH 28/29] x86_64 add license headers --- src/arch/x86_64/acpi.rs | 16 ++++++++++++++++ src/arch/x86_64/boot.rs | 16 ++++++++++++++++ src/arch/x86_64/consts.rs | 16 ++++++++++++++++ src/arch/x86_64/cpu.rs | 16 ++++++++++++++++ src/arch/x86_64/cpuid.rs | 16 ++++++++++++++++ src/arch/x86_64/entry.rs | 16 ++++++++++++++++ src/arch/x86_64/graphics.rs | 17 ++++++++++++++++- src/arch/x86_64/hpet.rs | 16 ++++++++++++++++ src/arch/x86_64/hypercall.rs | 16 ++++++++++++++++ src/arch/x86_64/idt.rs | 16 ++++++++++++++++ src/arch/x86_64/iommu.rs | 16 ++++++++++++++++ src/arch/x86_64/ipi.rs | 16 ++++++++++++++++ src/arch/x86_64/mm.rs | 16 ++++++++++++++++ src/arch/x86_64/mmio.rs | 16 ++++++++++++++++ src/arch/x86_64/mod.rs | 16 ++++++++++++++++ src/arch/x86_64/msr.rs | 16 ++++++++++++++++ src/arch/x86_64/paging.rs | 16 ++++++++++++++++ src/arch/x86_64/pci.rs | 16 ++++++++++++++++ src/arch/x86_64/pio.rs | 16 ++++++++++++++++ src/arch/x86_64/s1pt.rs | 16 ++++++++++++++++ src/arch/x86_64/s2pt.rs | 16 ++++++++++++++++ src/arch/x86_64/trap.rs | 17 ++++++++++++++++- src/arch/x86_64/vmcs.rs | 16 ++++++++++++++++ src/arch/x86_64/vmx.rs | 16 ++++++++++++++++ src/arch/x86_64/zone.rs | 16 ++++++++++++++++ src/device/irqchip/pic/ioapic.rs | 16 ++++++++++++++++ src/device/irqchip/pic/lapic.rs | 16 ++++++++++++++++ src/device/irqchip/pic/mod.rs | 16 ++++++++++++++++ src/device/uart/uart16550a.rs | 17 ++++++++++++++++- src/device/virtio_trampoline.rs | 3 +-- src/pci/pci.rs | 5 ++--- 31 files changed, 467 insertions(+), 8 deletions(-) diff --git a/src/arch/x86_64/acpi.rs b/src/arch/x86_64/acpi.rs index b1c16f4d..b3c6636d 100644 --- a/src/arch/x86_64/acpi.rs +++ b/src/arch/x86_64/acpi.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::{boot, pci::probe_root_pci_devices}, config::{HvConfigMemoryRegion, HvZoneConfig}, diff --git a/src/arch/x86_64/boot.rs b/src/arch/x86_64/boot.rs index bf589c2e..334990f2 100644 --- a/src/arch/x86_64/boot.rs +++ b/src/arch/x86_64/boot.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::{zone::HvArchZoneConfig, Stage2PageTable}, config::{root_zone_config, HvPciConfig, HvZoneConfig, MEM_TYPE_RAM}, diff --git a/src/arch/x86_64/consts.rs b/src/arch/x86_64/consts.rs index 74b81c9e..260d8c2f 100644 --- a/src/arch/x86_64/consts.rs +++ b/src/arch/x86_64/consts.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + // PCI constants pub const HV_ADDR_PREFIX: u64 = 0; pub const LOONG_HT_PREFIX: u64 = 0; diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs index b9d69b9c..07190eb2 100644 --- a/src/arch/x86_64/cpu.rs +++ b/src/arch/x86_64/cpu.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::{ acpi::{self, *}, diff --git a/src/arch/x86_64/cpuid.rs b/src/arch/x86_64/cpuid.rs index 71783033..989666d5 100644 --- a/src/arch/x86_64/cpuid.rs +++ b/src/arch/x86_64/cpuid.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + numeric_enum_macro::numeric_enum! { #[repr(u32)] #[derive(Debug)] diff --git a/src/arch/x86_64/entry.rs b/src/arch/x86_64/entry.rs index a45b1914..2a6db8bd 100644 --- a/src/arch/x86_64/entry.rs +++ b/src/arch/x86_64/entry.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::{boot, cpu::this_apic_id, graphics::font_init}, consts::PER_CPU_SIZE, diff --git a/src/arch/x86_64/graphics.rs b/src/arch/x86_64/graphics.rs index 5315e326..af5453d8 100644 --- a/src/arch/x86_64/graphics.rs +++ b/src/arch/x86_64/graphics.rs @@ -1,6 +1,21 @@ -use spin::{Mutex, Once}; +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey use crate::arch::boot::get_multiboot_tags; +use spin::{Mutex, Once}; const PSF2_MAGIC: u32 = 0x864ab572; diff --git a/src/arch/x86_64/hpet.rs b/src/arch/x86_64/hpet.rs index 15cdb5f7..0cd8da9b 100644 --- a/src/arch/x86_64/hpet.rs +++ b/src/arch/x86_64/hpet.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::memory::VirtAddr; use bit_field::BitField; use core::{arch::x86_64::_rdtsc, time::Duration, u32}; diff --git a/src/arch/x86_64/hypercall.rs b/src/arch/x86_64/hypercall.rs index 8ddc83a0..9a84d84d 100644 --- a/src/arch/x86_64/hypercall.rs +++ b/src/arch/x86_64/hypercall.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::cpu::this_cpu_id, config::CONFIG_MAGIC_VERSION, diff --git a/src/arch/x86_64/idt.rs b/src/arch/x86_64/idt.rs index 93567f7c..97f284e5 100644 --- a/src/arch/x86_64/idt.rs +++ b/src/arch/x86_64/idt.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{error::HvResult, zone::this_zone_id}; use alloc::{collections::btree_map::BTreeMap, vec::Vec}; use core::u32; diff --git a/src/arch/x86_64/iommu.rs b/src/arch/x86_64/iommu.rs index 0ce1853d..80eff95f 100644 --- a/src/arch/x86_64/iommu.rs +++ b/src/arch/x86_64/iommu.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::{acpi, hpet::current_time_nanos}, memory::{addr::virt_to_phys, Frame, HostPhysAddr}, diff --git a/src/arch/x86_64/ipi.rs b/src/arch/x86_64/ipi.rs index dc95233c..b8c20de5 100644 --- a/src/arch/x86_64/ipi.rs +++ b/src/arch/x86_64/ipi.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::{ acpi::{get_apic_id, get_cpu_id}, diff --git a/src/arch/x86_64/mm.rs b/src/arch/x86_64/mm.rs index 8435d5e3..97680cf4 100644 --- a/src/arch/x86_64/mm.rs +++ b/src/arch/x86_64/mm.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::{acpi, boot, s1pt::Stage1PageTable, s2pt::Stage2PageTable}, error::HvResult, diff --git a/src/arch/x86_64/mmio.rs b/src/arch/x86_64/mmio.rs index de0e1063..2b7681c4 100644 --- a/src/arch/x86_64/mmio.rs +++ b/src/arch/x86_64/mmio.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::{ s2pt::DescriptorAttr, diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index b8f3f789..d75140ed 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + #![allow(unused)] pub mod acpi; pub mod boot; diff --git a/src/arch/x86_64/msr.rs b/src/arch/x86_64/msr.rs index be9b23a3..80585c5b 100644 --- a/src/arch/x86_64/msr.rs +++ b/src/arch/x86_64/msr.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::msr::Msr::*, consts::MAX_ZONE_NUM, diff --git a/src/arch/x86_64/paging.rs b/src/arch/x86_64/paging.rs index d8b84a74..94aedb61 100644 --- a/src/arch/x86_64/paging.rs +++ b/src/arch/x86_64/paging.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ error::{HvError, HvResult}, memory::{addr::is_aligned, Frame, MemFlags, MemoryRegion, PhysAddr, VirtAddr}, diff --git a/src/arch/x86_64/pci.rs b/src/arch/x86_64/pci.rs index f078d9ac..8383fccd 100644 --- a/src/arch/x86_64/pci.rs +++ b/src/arch/x86_64/pci.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::{acpi, idt, mmio::MMIoDevice, pio::get_pio_bitmap, zone::HvArchZoneConfig}, error::HvResult, diff --git a/src/arch/x86_64/pio.rs b/src/arch/x86_64/pio.rs index f5d79986..feee7537 100644 --- a/src/arch/x86_64/pio.rs +++ b/src/arch/x86_64/pio.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ consts::MAX_ZONE_NUM, error::HvResult, diff --git a/src/arch/x86_64/s1pt.rs b/src/arch/x86_64/s1pt.rs index 5498ae33..13ead238 100644 --- a/src/arch/x86_64/s1pt.rs +++ b/src/arch/x86_64/s1pt.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use super::paging::{GenericPTE, Level4PageTable, PagingInstr}; use crate::{ consts::PAGE_SIZE, diff --git a/src/arch/x86_64/s2pt.rs b/src/arch/x86_64/s2pt.rs index b6e6a0c9..9a4e8e49 100644 --- a/src/arch/x86_64/s2pt.rs +++ b/src/arch/x86_64/s2pt.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::{ iommu, diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs index 94e8e790..edfbeee9 100644 --- a/src/arch/x86_64/trap.rs +++ b/src/arch/x86_64/trap.rs @@ -1,4 +1,18 @@ -use core::mem::size_of; +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey use crate::{ arch::{ @@ -26,6 +40,7 @@ use crate::{ zone::this_zone_id, }; use bit_field::BitField; +use core::mem::size_of; use x86_64::registers::control::Cr4Flags; use super::{ diff --git a/src/arch/x86_64/vmcs.rs b/src/arch/x86_64/vmcs.rs index a9718c45..858e2a20 100644 --- a/src/arch/x86_64/vmcs.rs +++ b/src/arch/x86_64/vmcs.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + #![allow(non_camel_case_types)] use crate::{ arch::{ diff --git a/src/arch/x86_64/vmx.rs b/src/arch/x86_64/vmx.rs index baa5eaf7..a4cbe2bf 100644 --- a/src/arch/x86_64/vmx.rs +++ b/src/arch/x86_64/vmx.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::{ cpu::ArchCpu, diff --git a/src/arch/x86_64/zone.rs b/src/arch/x86_64/zone.rs index f2eb04d8..75e7c43d 100644 --- a/src/arch/x86_64/zone.rs +++ b/src/arch/x86_64/zone.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::{acpi, boot, msr::set_msr_bitmap, pio, pio::set_pio_bitmap, Stage2PageTable}, config::*, diff --git a/src/device/irqchip/pic/ioapic.rs b/src/device/irqchip/pic/ioapic.rs index 0da331f1..84b6e5a2 100644 --- a/src/device/irqchip/pic/ioapic.rs +++ b/src/device/irqchip/pic/ioapic.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::{ acpi::{get_apic_id, get_cpu_id}, diff --git a/src/device/irqchip/pic/lapic.rs b/src/device/irqchip/pic/lapic.rs index 12b0f101..f5697c3b 100644 --- a/src/device/irqchip/pic/lapic.rs +++ b/src/device/irqchip/pic/lapic.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + use crate::{ arch::{ cpu::{this_apic_id, this_cpu_id}, diff --git a/src/device/irqchip/pic/mod.rs b/src/device/irqchip/pic/mod.rs index c649397c..a72b4e07 100644 --- a/src/device/irqchip/pic/mod.rs +++ b/src/device/irqchip/pic/mod.rs @@ -1,3 +1,19 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + pub mod ioapic; pub mod lapic; diff --git a/src/device/uart/uart16550a.rs b/src/device/uart/uart16550a.rs index a86b6c89..144b788b 100644 --- a/src/device/uart/uart16550a.rs +++ b/src/device/uart/uart16550a.rs @@ -1,4 +1,18 @@ -use core::ops::Range; +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey use crate::{ arch::{graphics::fb_putchar, pio::UART_COM1_BASE_PORT}, @@ -6,6 +20,7 @@ use crate::{ error::HvResult, }; use alloc::vec::Vec; +use core::ops::Range; use spin::Mutex; use x86_64::instructions::port::{PortReadOnly, PortWriteOnly}; diff --git a/src/device/virtio_trampoline.rs b/src/device/virtio_trampoline.rs index 8a51da50..ebd851b7 100644 --- a/src/device/virtio_trampoline.rs +++ b/src/device/virtio_trampoline.rs @@ -1,4 +1,3 @@ -use crate::arch::cpu::get_target_cpu; // Copyright (c) 2025 Syswonder // hvisor is licensed under Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan PSL v2. @@ -14,7 +13,7 @@ use crate::arch::cpu::get_target_cpu; // // Authors: // -use crate::arch::cpu::this_cpu_id; +use crate::arch::cpu::{get_target_cpu, this_cpu_id}; use crate::consts::MAX_CPU_NUM; use crate::consts::MAX_WAIT_TIMES; use crate::device::irqchip::inject_irq; diff --git a/src/pci/pci.rs b/src/pci/pci.rs index 3397a33f..5ab53bbc 100644 --- a/src/pci/pci.rs +++ b/src/pci/pci.rs @@ -1,4 +1,3 @@ -use core::ptr::{read_volatile, write_volatile}; // Copyright (c) 2025 Syswonder // hvisor is licensed under Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan PSL v2. @@ -14,8 +13,6 @@ use core::ptr::{read_volatile, write_volatile}; // // Authors: // -use core::{panic, ptr, usize}; - use crate::config::{HvPciConfig, CONFIG_MAX_PCI_DEV}; use crate::memory::addr::align_down; use crate::memory::mmio_perform_access; @@ -30,6 +27,8 @@ use crate::{ zone::Zone, }; use alloc::vec::Vec; +use core::ptr::{read_volatile, write_volatile}; +use core::{panic, ptr, usize}; use super::bridge::BridgeConfig; use super::endpoint::EndpointConfig; From 3e3ef7be7055069791c03f99fdf58c7826008165 Mon Sep 17 00:00:00 2001 From: Solicey Date: Sun, 19 Oct 2025 19:12:26 +0800 Subject: [PATCH 29/29] add feature uart16550a, modify logging implement --- Cargo.toml | 1 + platform/x86_64/nuc14mnk/cargo/features | 3 +- platform/x86_64/qemu/cargo/features | 3 +- src/device/uart/mod.rs | 4 +- src/logging.rs | 65 +++++++++++++++++-------- 5 files changed, 52 insertions(+), 24 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 11a7f0f0..1c737a52 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,6 +61,7 @@ imx_uart = [] uart_16550 = [] sifive_ccache = [] eic7700_sysreg = [] +uart16550a = [] ############## riscv64 ############# # irqchip driver diff --git a/platform/x86_64/nuc14mnk/cargo/features b/platform/x86_64/nuc14mnk/cargo/features index 71878594..ac3b7f71 100644 --- a/platform/x86_64/nuc14mnk/cargo/features +++ b/platform/x86_64/nuc14mnk/cargo/features @@ -1 +1,2 @@ -pci \ No newline at end of file +pci +uart16550a \ No newline at end of file diff --git a/platform/x86_64/qemu/cargo/features b/platform/x86_64/qemu/cargo/features index 71878594..ac3b7f71 100644 --- a/platform/x86_64/qemu/cargo/features +++ b/platform/x86_64/qemu/cargo/features @@ -1 +1,2 @@ -pci \ No newline at end of file +pci +uart16550a \ No newline at end of file diff --git a/src/device/uart/mod.rs b/src/device/uart/mod.rs index 4a0f5983..f30f036d 100644 --- a/src/device/uart/mod.rs +++ b/src/device/uart/mod.rs @@ -47,9 +47,9 @@ mod uart_16550; #[cfg(all(feature = "uart_16550", target_arch = "aarch64"))] pub use uart_16550::{console_getchar, console_putchar}; -#[cfg(target_arch = "x86_64")] +#[cfg(all(feature = "uart16550a", target_arch = "x86_64"))] mod uart16550a; -#[cfg(target_arch = "x86_64")] +#[cfg(all(feature = "uart16550a", target_arch = "x86_64"))] pub use uart16550a::{ console_getchar, console_putchar, virt_console_io_read, virt_console_io_write, UartReg, }; diff --git a/src/logging.rs b/src/logging.rs index dd046899..65f2370d 100644 --- a/src/logging.rs +++ b/src/logging.rs @@ -131,6 +131,50 @@ pub fn init() { struct SimpleLogger; +impl SimpleLogger { + #[cfg(feature = "graphics")] + fn print( + &self, + level: Level, + line: u32, + target: &str, + cpu_id: usize, + level_color: ColorCode, + args_color: ColorCode, + record: &Record, + ) { + println!( + "[{:<5} {}] ({}:{}) {}", + level, + cpu_id, + target, + line, + record.args() + ); + } + + #[cfg(not(feature = "graphics"))] + fn print( + &self, + level: Level, + line: u32, + target: &str, + cpu_id: usize, + level_color: ColorCode, + args_color: ColorCode, + record: &Record, + ) { + print(with_color!( + ColorCode::White, + "[{} {}] {} {}\n", + with_color!(level_color, "{:<5}", level), + with_color!(ColorCode::White, "{}", cpu_id), + with_color!(ColorCode::White, "({}:{})", target, line), + with_color!(args_color, "{}", record.args()), + )); + } +} + impl Log for SimpleLogger { fn enabled(&self, _metadata: &Metadata) -> bool { true @@ -160,26 +204,7 @@ impl Log for SimpleLogger { Level::Trace => ColorCode::BrightBlack, }; - #[cfg(all(feature = "graphics"))] - { - println!( - "[{:<5} {}] ({}:{}) {}", - level, - cpu_id, - target, - line, - record.args() - ); - } - #[cfg(not(all(feature = "graphics")))] - print(with_color!( - ColorCode::White, - "[{} {}] {} {}\n", - with_color!(level_color, "{:<5}", level), - with_color!(ColorCode::White, "{}", cpu_id), - with_color!(ColorCode::White, "({}:{})", target, line), - with_color!(args_color, "{}", record.args()), - )); + self.print(level, line, target, cpu_id, level_color, args_color, record); } fn flush(&self) {}