diff --git a/.gitignore b/.gitignore index 4cb7754b..224a9573 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,9 @@ /images/* /platform/*/*/image/kernel/* /platform/*/*/image/virtdisk/* +/platform/*/*/image/bootloader/out +/platform/*/*/image/iso/boot/kernel/* +/platform/*/*/image/iso/boot/hvisor /tools/hvisor /tmp *.mod.[co] diff --git a/Cargo.lock b/Cargo.lock index c59334ea..4b904038 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,17 @@ dependencies = [ "tock-registers", ] +[[package]] +name = "acpi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94476c7ef97af4c4d998b3f422c1b01d5211aad57c80ed200baf148d1f1efab6" +dependencies = [ + "bit_field 0.10.2", + "bitflags 2.9.1", + "log", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -22,9 +33,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.2.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "bare-metal" @@ -157,6 +168,7 @@ name = "hvisor" version = "0.1.0" dependencies = [ "aarch64-cpu", + "acpi", "bit_field 0.10.2", "bitflags 2.9.1", "bitmap-allocator", @@ -188,18 +200,18 @@ dependencies = [ [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" dependencies = [ - "spin 0.5.2", + "spin 0.9.8", ] [[package]] name = "lock_api" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" dependencies = [ "autocfg", "scopeguard", @@ -207,9 +219,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.21" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "loongArch64" @@ -223,9 +235,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.2" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "numeric-enum-macro" @@ -286,9 +298,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.4" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -298,9 +310,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -309,9 +321,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "riscv" @@ -340,9 +352,9 @@ dependencies = [ [[package]] name = "riscv-decode" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec7a6dc0b0bb96a4d23271864a45c0d24dcd9dde2a1b630a35f79fa29c588bf" +checksum = "cf8b4cfb0da0528321d22daee4299a23a8c5ac8848623d716e898d2a9eec0694" [[package]] name = "riscv-macros" @@ -441,15 +453,15 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "spin" -version = "0.5.2" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +checksum = "13287b4da9d1207a4f4929ac390916d64eacfe236a487e9a9f5b3be392be5162" [[package]] name = "spin" -version = "0.7.1" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13287b4da9d1207a4f4929ac390916d64eacfe236a487e9a9f5b3be392be5162" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "spin" diff --git a/Cargo.toml b/Cargo.toml index f61cca26..1c737a52 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ heapless = { version = "0.8.0 "} [target.'cfg(target_arch = "aarch64")'.dependencies] aarch64-cpu = "9.4.0" -psci = { version = "0.1.0", default-features = false, features = ["smc"]} +psci = { version = "0.1.0", default-features = false, features = ["smc"] } [target.'cfg(target_arch = "riscv64")'.dependencies] sbi-rt = { version = "0.0.3", features = ["legacy"] } @@ -38,10 +38,11 @@ riscv-pac = "0.2.0" loongArch64 = "0.2.5" [target.'cfg(target_arch = "x86_64")'.dependencies] -x86 = "0.52.0" +x86 = "=0.52.0" x86_64 = "=0.14.10" -x2apic = "0.4.3" -raw-cpuid = "10.7.0" +x2apic = "=0.4.3" +raw-cpuid = "=10.7.0" +acpi = "=5.2.0" [features] ############# general ############## @@ -60,6 +61,7 @@ imx_uart = [] uart_16550 = [] sifive_ccache = [] eic7700_sysreg = [] +uart16550a = [] ############## riscv64 ############# # irqchip driver @@ -80,6 +82,9 @@ loongson_uart = [] loongson_3a5000 = [] loongson_3a6000 = [] +############# x86_64 ############### +graphics = [] + [profile.dev] # panic = "abort" # avoid cargo test failure, this is a bug of cargo diff --git a/platform/aarch64/qemu-gicv3/image/dts/zone1-linux.dts b/platform/aarch64/qemu-gicv3/image/dts/zone1-linux.dts index e2d4a8af..9239584b 100644 --- a/platform/aarch64/qemu-gicv3/image/dts/zone1-linux.dts +++ b/platform/aarch64/qemu-gicv3/image/dts/zone1-linux.dts @@ -37,12 +37,43 @@ reg = <0x0 0x50000000 0x0 0x30000000>; }; - gic@8000000 { + intc@8000000 { + phandle = <0x01>; + interrupts = <0x01 0x09 0x04>; + reg = <0x00 0x8000000 0x00 0x10000 0x00 0x80a0000 0x00 0xf60000>; + #redistributor-regions = <0x01>; compatible = "arm,gic-v3"; - #interrupt-cells = <0x03>; + ranges; + #size-cells = <0x02>; + #address-cells = <0x02>; interrupt-controller; - reg = <0x00 0x8000000 0x00 0x10000 0x00 0x80a0000 0x00 0xf60000>; - phandle = <0x01>; + #interrupt-cells = <0x03>; + + its@8080000 { + phandle = <0x8006>; + reg = <0x00 0x8080000 0x00 0x20000>; + #msi-cells = <0x01>; + msi-controller; + compatible = "arm,gic-v3-its"; + }; + }; + + pcie@10000000 { + interrupt-map-mask = <0x1800 0x00 0x00 0x07>; + interrupt-map = <0x00 0x00 0x00 0x01 0x01 0x00 0x00 0x00 0x03 0x04 0x00 0x00 0x00 0x02 0x01 0x00 0x00 0x00 0x04 0x04 0x00 0x00 0x00 0x03 0x01 0x00 0x00 0x00 0x05 0x04 0x00 0x00 0x00 0x04 0x01 0x00 0x00 0x00 0x06 0x04 0x800 0x00 0x00 0x01 0x01 0x00 0x00 0x00 0x04 0x04 0x800 0x00 0x00 0x02 0x01 0x00 0x00 0x00 0x05 0x04 0x800 0x00 0x00 0x03 0x01 0x00 0x00 0x00 0x06 0x04 0x800 0x00 0x00 0x04 0x01 0x00 0x00 0x00 0x03 0x04 0x1000 0x00 0x00 0x01 0x01 0x00 0x00 0x00 0x05 0x04 0x1000 0x00 0x00 0x02 0x01 0x00 0x00 0x00 0x06 0x04 0x1000 0x00 0x00 0x03 0x01 0x00 0x00 0x00 0x03 0x04 0x1000 0x00 0x00 0x04 0x01 0x00 0x00 0x00 0x04 0x04 0x1800 0x00 0x00 0x01 0x01 0x00 0x00 0x00 0x06 0x04 0x1800 0x00 0x00 0x02 0x01 0x00 0x00 0x00 0x03 0x04 0x1800 0x00 0x00 0x03 0x01 0x00 0x00 0x00 0x04 0x04 0x1800 0x00 0x00 0x04 0x01 0x00 0x00 0x00 0x05 0x04>; + #interrupt-cells = <0x01>; + ranges = <0x1000000 0x00 0x00 0x00 0x3eff0000 0x00 0x10000 + 0x2000000 0x00 0x10000000 0x00 0x10000000 0x00 0x2eff0000 + 0x3000000 0x80 0x00 0x80 0x00 0x80 0x00>; + reg = <0x40 0x10000000 0x00 0x10000000>; + msi-map = <0x00 0x8006 0x00 0x10000>; + dma-coherent; + bus-range = <0x00 0xff>; + linux,pci-domain = <0x00>; + #size-cells = <0x02>; + #address-cells = <0x03>; + device_type = "pci"; + compatible = "pci-host-ecam-generic"; }; apb-pclk { diff --git a/platform/x86_64/nuc14mnk/board.rs b/platform/x86_64/nuc14mnk/board.rs new file mode 100644 index 00000000..d5f001e9 --- /dev/null +++ b/platform/x86_64/nuc14mnk/board.rs @@ -0,0 +1,168 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// +use crate::{arch::zone::HvArchZoneConfig, config::*, memory::GuestPhysAddr}; + +pub const MEM_TYPE_RESERVED: u32 = 5; + +pub const BOARD_NCPUS: usize = 4; + +pub const ROOT_ZONE_DTB_ADDR: u64 = 0x00000000; +pub const ROOT_ZONE_BOOT_STACK: GuestPhysAddr = 0x7000; +pub const ROOT_ZONE_ENTRY: u64 = 0x8000; +pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x500_0000; // hpa +pub const ROOT_ZONE_CPUS: u64 = (1 << 0) | (1 << 1); + +const ROOT_ZONE_RSDP_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x50e_0000, + virtual_start: 0xe_0000, + size: 0x2_0000, +}; + +const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x3a30_0000, // hpa + virtual_start: 0x3530_0000, // gpa + size: 0x10_0000, // modify size accordingly +}; + +pub const ROOT_ZONE_NAME: &str = "root-linux"; +pub const ROOT_ZONE_CMDLINE: &str = "video=vesafb console=tty0 nointremap no_timer_check pci=pcie_scan_all root=/dev/sda2 rw init=/init rootwait\0"; +// pub const ROOT_ZONE_CMDLINE: &str = "video=vesafb console=ttyS0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all root=/dev/vda rw init=/init\0"; +//"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic +// video=vesafb + +pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 14] = [ + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x500_0000, + virtual_start: 0x0, + size: 0xe_0000, + }, // ram + ROOT_ZONE_RSDP_REGION, // rsdp + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x510_0000, + virtual_start: 0x10_0000, + size: 0x14f0_0000, + }, // ram + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x1a00_0000, + virtual_start: 0x1500_0000, + size: 0x30_0000, + }, // ram + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x1a30_0000, + virtual_start: 0x1530_0000, + size: 0x2000_0000, + }, // ram + ROOT_ZONE_ACPI_REGION, // acpi + HvConfigMemoryRegion { + mem_type: MEM_TYPE_IO, + physical_start: 0xfed0_0000, + virtual_start: 0xfed0_0000, + size: 0x1000, + }, // hpet + // TODO: e820 mem space probe + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RESERVED, + physical_start: 0x1_0000_0000, + virtual_start: 0x1_0000_0000, + size: 0x2000_0000, + }, // zone 1 + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RESERVED, + physical_start: 0x6ed7_f000, + virtual_start: 0x6ed7_f000, + size: 0x10_e000, + }, // ACPI non-volatile storage + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RESERVED, + physical_start: 0xfeda_0000, + virtual_start: 0xfeda_0000, + size: 0x2_8000, + }, // pnp 00:05 + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RESERVED, + physical_start: 0xfe01_1000, + virtual_start: 0xfe01_1000, + size: 0x40_0000, + }, // reserved + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RESERVED, + physical_start: 0x677a_b000, + virtual_start: 0x677a_b000, + size: 0x74d_3000, + }, // reserved + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RESERVED, + physical_start: 0xfd69_0000, + virtual_start: 0xfd69_0000, + size: 0x6_0000, + }, // INTC1057:00 + HvConfigMemoryRegion { + mem_type: MEM_TYPE_RESERVED, + physical_start: 0xfb00_0000, + virtual_start: 0xfb00_0000, + size: 0x100_0000, + }, // reserved +]; + +const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0x9000; +const ROOT_ZONE_SETUP_ADDR: GuestPhysAddr = 0xa000; +const ROOT_ZONE_VMLINUX_ENTRY_ADDR: GuestPhysAddr = 0x10_0000; +const ROOT_ZONE_SCREEN_BASE_ADDR: GuestPhysAddr = 0x8000_0000; + +pub const ROOT_ZONE_IRQS: [u32; 32] = [0; 32]; +pub const ROOT_ZONE_IOAPIC_BASE: usize = 0xfec0_0000; +pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { + ioapic_base: ROOT_ZONE_IOAPIC_BASE, + ioapic_size: 0x1000, + kernel_entry_gpa: ROOT_ZONE_VMLINUX_ENTRY_ADDR, + cmdline_load_gpa: ROOT_ZONE_CMDLINE_ADDR, + setup_load_gpa: ROOT_ZONE_SETUP_ADDR, + initrd_load_gpa: 0, // 0x1500_0000, + initrd_size: 0, // 0x26_b000, + rsdp_memory_region_id: 0x1, + acpi_memory_region_id: 0x5, + // not longer than 32 bits + screen_base: ROOT_ZONE_SCREEN_BASE_ADDR, +}; + +pub const ROOT_PCI_CONFIG: HvPciConfig = HvPciConfig { + ecam_base: 0xc0000000, + ecam_size: 0x300000, + io_base: 0x0, + io_size: 0x0, + pci_io_base: 0x0, + mem32_base: 0x0, + mem32_size: 0x0, + pci_mem32_base: 0x0, + mem64_base: 0x0, + mem64_size: 0x0, + pci_mem64_base: 0x0, +}; + +pub const ROOT_PCI_DEVS: [u64; 19] = [ + 0x0, 0x10, 0x20, 0x40, 0x50, 0x68, 0x90, 0xa0, 0xa2, 0xa3, 0xb0, 0xe0, 0xe8, 0xf8, 0xfb, 0xfc, + 0xfd, 0x100, 0x200, +]; + +#[cfg(all(feature = "graphics"))] +pub const GRAPHICS_FONT: &[u8] = + include_bytes!("../../platform/x86_64/qemu/image/font/spleen-6x12.psf"); diff --git a/platform/x86_64/nuc14mnk/cargo/config.template.toml b/platform/x86_64/nuc14mnk/cargo/config.template.toml new file mode 100644 index 00000000..a454e986 --- /dev/null +++ b/platform/x86_64/nuc14mnk/cargo/config.template.toml @@ -0,0 +1,10 @@ +[target.x86_64-unknown-none] +linker = "rust-lld" +rustflags = [ + "-Clink-arg=-Tplatform/__ARCH__/__BOARD__/linker.ld", + "-Clink-arg=-no-pie", + "-Clinker-flavor=ld.lld", + "-Cforce-frame-pointers=yes", + "-Ctarget-feature=-mmx,-sse,+soft-float", + "-Cno-redzone=yes", +] \ No newline at end of file diff --git a/platform/x86_64/nuc14mnk/cargo/features b/platform/x86_64/nuc14mnk/cargo/features new file mode 100644 index 00000000..ac3b7f71 --- /dev/null +++ b/platform/x86_64/nuc14mnk/cargo/features @@ -0,0 +1,2 @@ +pci +uart16550a \ No newline at end of file diff --git a/platform/x86_64/nuc14mnk/image/bootloader/boot.S b/platform/x86_64/nuc14mnk/image/bootloader/boot.S new file mode 100644 index 00000000..9b979b63 --- /dev/null +++ b/platform/x86_64/nuc14mnk/image/bootloader/boot.S @@ -0,0 +1,41 @@ +.section .text +.code16 +.global entry16 +entry16: + cli + cld + + mov ecx, eax + xor ax, ax + mov ds, ax + mov es, ax + mov ss, ax + + lgdt [prot_gdt_desc] + mov eax, cr0 + or eax, 0x1 + mov cr0, eax + + ljmp 0x8, entry32 + +.code32 +.global entry32 +entry32: + mov ax, 0x10 + mov ds, ax + mov es, ax + mov ss, ax + mov fs, ax + mov gs, ax + + jmp ecx + +.balign 16 +prot_gdt: + .quad 0x0000000000000000 # 0x00: null + .quad 0x00cf9b000000ffff # 0x08: code segment (base=0, limit=0xfffff, type=32bit code exec/read, DPL=0, 4k) + .quad 0x00cf93000000ffff # 0x10: data segment (base=0, limit=0xfffff, type=32bit data read/write, DPL=0, 4k) + +prot_gdt_desc: + .short prot_gdt_desc - prot_gdt - 1 # limit + .long prot_gdt # base diff --git a/platform/x86_64/nuc14mnk/image/bootloader/boot.ld b/platform/x86_64/nuc14mnk/image/bootloader/boot.ld new file mode 100644 index 00000000..3f96b209 --- /dev/null +++ b/platform/x86_64/nuc14mnk/image/bootloader/boot.ld @@ -0,0 +1,15 @@ +OUTPUT_ARCH(i386) +BASE_ADDRESS = 0x8000; + +ENTRY(entry16) +SECTIONS +{ + . = BASE_ADDRESS; + .text : { + *(.text .text.*) + } + + /DISCARD/ : { + *(.eh_frame) *(.eh_frame_hdr) + } +} diff --git a/platform/x86_64/nuc14mnk/image/bootloader/boot.mk b/platform/x86_64/nuc14mnk/image/bootloader/boot.mk new file mode 100644 index 00000000..e23e4540 --- /dev/null +++ b/platform/x86_64/nuc14mnk/image/bootloader/boot.mk @@ -0,0 +1,36 @@ +boot_dir := $(image_dir)/bootloader +boot_out_dir := $(image_dir)/bootloader/out + +boot_src := $(boot_dir)/boot.S +boot_lds := $(boot_dir)/boot.ld + +boot_o := $(boot_out_dir)/boot.o +boot_elf := $(boot_out_dir)/boot.elf +boot_bin := $(boot_out_dir)/boot.bin +boot_disa := $(boot_out_dir)/boot.asm + +AS ?= as +LD ?= ld +OBJCOPY ?= objcopy +OBJDUMP ?= objdump + +boot: mkout $(boot_bin) + +disasm: + $(OBJDUMP) -d -m i8086 -M intel $(boot_elf) | less + +mkout: + rm -rf $(boot_out_dir) + mkdir -p $(boot_out_dir) + +$(boot_o): $(boot_src) + $(AS) --32 -msyntax=intel -mnaked-reg $< -o $@ + +$(boot_elf): $(boot_o) $(boot_lds) + $(LD) -T$(boot_lds) $< -o $@ + $(OBJDUMP) -d -m i8086 -M intel $@ > $(boot_disa) + +$(boot_bin): $(boot_elf) + $(OBJCOPY) $< --strip-all -O binary $@ + +.PHONY: all disasm \ No newline at end of file diff --git a/platform/x86_64/nuc14mnk/image/font/solarize-12x29.psf b/platform/x86_64/nuc14mnk/image/font/solarize-12x29.psf new file mode 100644 index 00000000..071330e9 Binary files /dev/null and b/platform/x86_64/nuc14mnk/image/font/solarize-12x29.psf differ diff --git a/platform/x86_64/nuc14mnk/image/font/spleen-6x12.psf b/platform/x86_64/nuc14mnk/image/font/spleen-6x12.psf new file mode 100644 index 00000000..892d085c Binary files /dev/null and b/platform/x86_64/nuc14mnk/image/font/spleen-6x12.psf differ diff --git a/platform/x86_64/nuc14mnk/image/iso/boot/grub/grub.cfg b/platform/x86_64/nuc14mnk/image/iso/boot/grub/grub.cfg new file mode 100644 index 00000000..dea4571a --- /dev/null +++ b/platform/x86_64/nuc14mnk/image/iso/boot/grub/grub.cfg @@ -0,0 +1,27 @@ +set timeout=10 # waiting time befo automatic booting +set default=0 # default menu entry index + +insmod all_video + +menuentry "Hvisor" { + multiboot2 /boot/hvisor # use multiboot spec to boot + module2 /boot/kernel/boot.bin 0 + module2 /boot/kernel/boot.bin 5008000 + module2 /boot/kernel/setup.bin 500a000 + module2 /boot/kernel/vmlinux.bin 5100000 + boot +} + +if [ ${grub_platform} == "efi" ]; then + menuentry "UEFI Setting" { + fwsetup + } +fi + +menuentry "System Reboot" --class=reboot { + reboot +} + +menuentry "System Shutdown" --class=halt { + halt +} \ No newline at end of file diff --git a/platform/x86_64/nuc14mnk/linker.ld b/platform/x86_64/nuc14mnk/linker.ld new file mode 100644 index 00000000..a0096daa --- /dev/null +++ b/platform/x86_64/nuc14mnk/linker.ld @@ -0,0 +1,53 @@ +ENTRY(arch_entry) +BASE_ADDRESS = 0xffffff8000200000; + +SECTIONS +{ + . = BASE_ADDRESS; + skernel = .; + + stext = .; + .text : { + KEEP(*(.text.header)) + *(.text.entry) + *(.text.entry32) + *(.text.entry64) + *(.text .text.*) + } + + . = ALIGN(4K); + etext = .; + srodata = .; + .rodata : { + *(.rodata .rodata.*) + *(.srodata .srodata.*) + } + + . = ALIGN(4K); + erodata = .; + sdata = .; + .data : { + *(.data.entry_page_table) + *(.data .data.*) + *(.sdata .sdata.*) + } + + . = ALIGN(4K); + edata = .; + .bss : { + *(.bss.stack) + sbss = .; + *(.bss .bss.*) + *(.sbss .sbss.*) + } + + . = ALIGN(4K); + ebss = .; + ekernel = .; + + /DISCARD/ : { + *(.eh_frame) + } + . = ALIGN(4K); + __core_end = .; +} \ No newline at end of file diff --git a/platform/x86_64/nuc14mnk/platform.mk b/platform/x86_64/nuc14mnk/platform.mk new file mode 100644 index 00000000..db763ed5 --- /dev/null +++ b/platform/x86_64/nuc14mnk/platform.mk @@ -0,0 +1,51 @@ +QEMU := qemu-system-x86_64 + +zone0_boot := $(image_dir)/bootloader/out/boot.bin +zone0_setup := $(image_dir)/kernel/setup.bin +zone0_vmlinux := $(image_dir)/kernel/vmlinux.bin +zone0_initrd := $(image_dir)/virtdisk/initramfs.cpio.gz +zone0_rootfs := $(image_dir)/virtdisk/rootfs1.img +zone1_rootfs := $(image_dir)/virtdisk/rootfs2.img + +QEMU_ARGS := -machine q35,kernel-irqchip=split +QEMU_ARGS += -cpu host,+x2apic,+invtsc,+vmx -accel kvm +QEMU_ARGS += -smp 4 +QEMU_ARGS += -serial mon:stdio +QEMU_ARGS += -m 4G +QEMU_ARGS += -bios /usr/share/ovmf/OVMF.fd +QEMU_ARGS += -vga std +# QEMU_ARGS += -nographic + +QEMU_ARGS += -device intel-iommu,intremap=on,eim=on,caching-mode=on,device-iotlb=on,aw-bits=48 +QEMU_ARGS += -device ioh3420,id=pcie.1,chassis=1 +QEMU_ARGS += -drive if=none,file="$(zone0_rootfs)",id=X10008000,format=raw +QEMU_ARGS += -device virtio-blk-pci,bus=pcie.1,drive=X10008000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on + +# QEMU_ARGS += -drive if=none,file="$(zone1_rootfs)",id=X10009000,format=raw +# QEMU_ARGS += -device virtio-blk-pci,bus=pcie.1,drive=X10009000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on +# QEMU_ARGS += -netdev tap,id=net0,ifname=tap0,script=no,downscript=no +# QEMU_ARGS += -device virtio-net-pci,bus=pcie.1,netdev=net0,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on +# QEMU_ARGS += -netdev tap,id=net0,vhostforce=on +# QEMU_ARGS += -device virtio-net-pci,bus=pcie.1,netdev=net0,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on +# QEMU_ARGS += --trace "virtio_*" --trace "virtqueue_*" --trace "vtd_dma*" --trace "iommu_*" + +# QEMU_ARGS += -kernel $(hvisor_elf) +QEMU_ARGS += -drive file=$(image_dir)/virtdisk/hvisor.iso,format=raw,index=0,media=disk + +# QEMU_ARGS += -device loader,file="$(zone0_boot)",addr=0x5008000,force-raw=on +# QEMU_ARGS += -device loader,file="$(zone0_setup)",addr=0x500a000,force-raw=on +# QEMU_ARGS += -device loader,file="$(zone0_vmlinux)",addr=0x5100000,force-raw=on +# QEMU_ARGS += -device loader,file="$(zone0_initrd)",addr=0x20000000,force-raw=on +# QEMU_ARGS += -append "initrd_size=$(shell stat -c%s $(zone0_initrd))" + +$(hvisor_bin): elf boot + $(OBJCOPY) $(hvisor_elf) --strip-all -O binary $@ + cp $(hvisor_elf) $(image_dir)/iso/boot + mkdir -p $(image_dir)/iso/boot/kernel + cp $(zone0_boot) $(image_dir)/iso/boot/kernel + cp $(zone0_setup) $(image_dir)/iso/boot/kernel + cp $(zone0_vmlinux) $(image_dir)/iso/boot/kernel + mkdir -p $(image_dir)/virtdisk + grub-mkrescue /usr/lib/grub/x86_64-efi -o $(image_dir)/virtdisk/hvisor.iso $(image_dir)/iso + +include $(image_dir)/bootloader/boot.mk \ No newline at end of file diff --git a/platform/x86_64/nuc14mnk/test/runner.sh b/platform/x86_64/nuc14mnk/test/runner.sh new file mode 100644 index 00000000..e69de29b diff --git a/platform/x86_64/qemu/board.rs b/platform/x86_64/qemu/board.rs index 71147aea..342bac03 100644 --- a/platform/x86_64/qemu/board.rs +++ b/platform/x86_64/qemu/board.rs @@ -13,114 +13,118 @@ // // Authors: // -use crate::{ - arch::zone::HvArchZoneConfig, - config::*, - memory::{GuestPhysAddr, HostPhysAddr}, -}; +use crate::{arch::zone::HvArchZoneConfig, config::*, memory::GuestPhysAddr}; -pub const BOARD_NAME: &str = "qemu"; +pub const MEM_TYPE_RESERVED: u32 = 5; pub const BOARD_NCPUS: usize = 4; pub const ROOT_ZONE_DTB_ADDR: u64 = 0x00000000; -pub const ROOT_ZONE_ENTRY: u64 = 0x8000; // 0x10_0000; -pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x500_0000; // 0x500_0000; -pub const ROOT_ZONE_SETUP_ADDR: GuestPhysAddr = 0xd000; pub const ROOT_ZONE_BOOT_STACK: GuestPhysAddr = 0x7000; -pub const ROOT_ZONE_INITRD_ADDR: GuestPhysAddr = 0x1500_0000; -pub const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0xc000; -pub const ROOT_ZONE_CPUS: u64 = (1 << 0); +pub const ROOT_ZONE_ENTRY: u64 = 0x8000; +pub const ROOT_ZONE_KERNEL_ADDR: u64 = 0x500_0000; // hpa +pub const ROOT_ZONE_CPUS: u64 = (1 << 0) | (1 << 1); + +const ROOT_ZONE_RSDP_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x50e_0000, + virtual_start: 0xe_0000, + size: 0x2_0000, +}; + +const ROOT_ZONE_ACPI_REGION: HvConfigMemoryRegion = HvConfigMemoryRegion { + mem_type: MEM_TYPE_RAM, + physical_start: 0x3a30_0000, // hpa + virtual_start: 0x3530_0000, // gpa + size: 0xf000, // modify size accordingly +}; pub const ROOT_ZONE_NAME: &str = "root-linux"; +pub const ROOT_ZONE_CMDLINE: &str = + "console=ttyS0 earlyprintk=serial nointremap no_timer_check pci=pcie_scan_all,lastbus=1 root=/dev/vda rw init=/init\0"; +//"console=ttyS0 earlyprintk=serial rdinit=/init nokaslr nointremap\0"; // noapic +// video=vesafb -pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 7] = [ +pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 8] = [ HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, physical_start: 0x500_0000, virtual_start: 0x0, - size: 0x1_0000, + size: 0xe_0000, }, // ram + ROOT_ZONE_RSDP_REGION, // rsdp HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, - physical_start: 0x501_0000, - virtual_start: 0x1_0000, - size: 0x14ff_0000, + physical_start: 0x510_0000, + virtual_start: 0x10_0000, + size: 0x14f0_0000, }, // ram HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, - physical_start: 0x2020_0000, - virtual_start: 0x1520_0000, - size: 0x4000_0000, + physical_start: 0x1a00_0000, + virtual_start: 0x1500_0000, + size: 0x30_0000, }, // ram HvConfigMemoryRegion { mem_type: MEM_TYPE_RAM, - physical_start: 0x2000_0000, - virtual_start: 0x1500_0000, - size: 0x20_0000, + physical_start: 0x1a30_0000, + virtual_start: 0x1530_0000, + size: 0x2000_0000, }, // ram - HvConfigMemoryRegion { - mem_type: MEM_TYPE_IO, - physical_start: 0xfec0_0000, - virtual_start: 0xfec0_0000, - size: 0x1000, - }, // io apic + ROOT_ZONE_ACPI_REGION, // acpi HvConfigMemoryRegion { mem_type: MEM_TYPE_IO, physical_start: 0xfed0_0000, virtual_start: 0xfed0_0000, size: 0x1000, }, // hpet + // TODO: e820 mem space probe HvConfigMemoryRegion { - mem_type: MEM_TYPE_IO, - physical_start: 0xfee0_0000, - virtual_start: 0xfee0_0000, - size: 0x1000, - }, // local apic + mem_type: MEM_TYPE_RESERVED, + physical_start: 0x4030_0000, + virtual_start: 0x4030_0000, + size: 0x2000_0000, + }, // zone 1 ]; -pub const ROOT_ZONE_IRQS: [u32; 32] = [0; 32]; -pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig {}; +const ROOT_ZONE_CMDLINE_ADDR: GuestPhysAddr = 0x9000; +const ROOT_ZONE_SETUP_ADDR: GuestPhysAddr = 0xa000; +const ROOT_ZONE_VMLINUX_ENTRY_ADDR: GuestPhysAddr = 0x10_0000; +const ROOT_ZONE_SCREEN_BASE_ADDR: GuestPhysAddr = 0x7000_0000; -// TODO: temp -pub const GUEST_PT1: GuestPhysAddr = 0x1000; -pub const GUEST_PT2: GuestPhysAddr = 0x2000; -pub const GUEST_ENTRY: GuestPhysAddr = 0x10_0000; -pub const GUEST_STACK_TOP: GuestPhysAddr = 0x7000; -pub const GUEST_PHYS_MEMORY_START: HostPhysAddr = 0x100_0000; +pub const ROOT_ZONE_IRQS: [u32; 32] = [0; 32]; +pub const ROOT_ZONE_IOAPIC_BASE: usize = 0xfec0_0000; +pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { + ioapic_base: ROOT_ZONE_IOAPIC_BASE, + ioapic_size: 0x1000, + kernel_entry_gpa: ROOT_ZONE_VMLINUX_ENTRY_ADDR, + cmdline_load_gpa: ROOT_ZONE_CMDLINE_ADDR, + setup_load_gpa: ROOT_ZONE_SETUP_ADDR, + initrd_load_gpa: 0, // 0x1500_0000, + initrd_size: 0, //0x26_b000, + rsdp_memory_region_id: 0x1, + acpi_memory_region_id: 0x5, + // not longer than 32 bits + screen_base: ROOT_ZONE_SCREEN_BASE_ADDR, +}; -pub fn gpa_as_mut_ptr(guest_paddr: GuestPhysAddr) -> *mut u8 { - let offset = ROOT_ZONE_KERNEL_ADDR as usize; - let host_vaddr = guest_paddr + offset; - host_vaddr as *mut u8 -} +// only need to fill in ecam_base and ecam_size in x86_64 +pub const ROOT_PCI_CONFIG: HvPciConfig = HvPciConfig { + ecam_base: 0xe0000000, + ecam_size: 0x200000, + io_base: 0x0, + io_size: 0x0, + pci_io_base: 0x0, + mem32_base: 0x0, + mem32_size: 0x0, + pci_mem32_base: 0x0, + mem64_base: 0x0, + mem64_size: 0x0, + pci_mem64_base: 0x0, +}; -#[naked] -pub unsafe extern "C" fn test_guest() -> ! { - core::arch::asm!( - " - mov rax, 0 - mov rdi, 2 - mov rsi, 3 - mov rdx, 3 - mov rcx, 3 - 2: - vmcall - add rax, 1 - jmp 2b", - options(noreturn), - ); -} +pub const ROOT_PCI_DEVS: [u64; 8] = [0x0, 0x8, 0x10, 0x18, 0xf8, 0xfa, 0xfb, 0x100]; -pub unsafe extern "C" fn test_guest_2() -> ! { - core::arch::asm!( - "vmcall", - inout("rax") 0 => _, - in("rdi") 2, - in("rsi") 3, - in("rdx") 3, - in("rcx") 3, - ); - core::arch::asm!("mov qword ptr [$0xffff233], $2333"); // panic - loop {} -} \ No newline at end of file +#[cfg(all(feature = "graphics"))] +pub const GRAPHICS_FONT: &[u8] = + include_bytes!("../../platform/x86_64/qemu/image/font/spleen-6x12.psf"); diff --git a/platform/x86_64/qemu/cargo/features b/platform/x86_64/qemu/cargo/features index e69de29b..ac3b7f71 100644 --- a/platform/x86_64/qemu/cargo/features +++ b/platform/x86_64/qemu/cargo/features @@ -0,0 +1,2 @@ +pci +uart16550a \ No newline at end of file diff --git a/platform/x86_64/qemu/image/acpi/hpet.asl b/platform/x86_64/qemu/image/acpi/hpet.asl deleted file mode 100644 index ae7416bc..00000000 --- a/platform/x86_64/qemu/image/acpi/hpet.asl +++ /dev/null @@ -1,36 +0,0 @@ -/* -* HPET template -*/ -[0004] Signature : "HPET" -[0004] Table Length : 00000000 -[0001] Revision : 01 -[0001] Checksum : 00 -[0006] Oem ID : "DM " -[0008] Oem Table ID : "DMHPET " -[0004] Oem Revision : 00000001 -/* iasl will fill in the compiler ID/revision fields */ -[0004] Asl Compiler ID : "xxxx" -[0004] Asl Compiler Revision : 00000000 - -/* -[31:16] = PCI Vendor ID of 1st Timer Block (0x8086) -[15] = LegacyReplacement IRQ Routing Capable (0) -[14] = Reserved (0) -[13] = COUNT_SIZE_CAP counter size (32-bit=0) -[12:8] = Number of Comparators in 1st Timer Block (3-1=2) -[7:0] = Hardware Rev ID (1) -*/ -[0004] Hardware Block ID : 80860201 - -[0012] Timer Block Register : [Generic Address Structure] - [0001] Space ID : 00 [SystemMemory] - [0001] Bit Width : 00 - [0001] Bit Offset : 00 - [0001] Encoded Access Width : 00 [Undefined/Legacy] - [0008] Address : 00000000fed00000 - -[0001] Sequence Number : 00 -[0002] Minimum Clock Ticks : 0000 -[0004] Flags (decoded below) : 00000001 - 4K Page Protect : 1 - 64K Page Protect : 0 \ No newline at end of file diff --git a/platform/x86_64/qemu/image/acpi/madt.asl b/platform/x86_64/qemu/image/acpi/madt.asl deleted file mode 100644 index 3d413454..00000000 --- a/platform/x86_64/qemu/image/acpi/madt.asl +++ /dev/null @@ -1,54 +0,0 @@ -/* -* MADT template -*/ -[0004] Signature : "APIC" -[0004] Table Length : 00000000 -[0001] Revision : 01 -[0001] Checksum : 00 -[0006] Oem ID : "DM " -[0008] Oem Table ID : "DMMADT " -[0004] Oem Revision : 00000001 -/* iasl will fill in the compiler ID/revision fields */ -[0004] Asl Compiler ID : "xxxx" -[0004] Asl Compiler Revision : 00000000 -[0004] Local Apic Address : fee00000 -[0004] Flags (decoded below) : 00000001 - PC-AT Compatibility : 1 - -/* Processor Local APIC */ -[0001] Subtable Type : 00 -[0001] Length : 08 -[0001] Processor ID : 00 -[0001] Local Apic ID : 00 -[0004] Flags (decoded below) : 00000001 - Processor Enabled : 1 - Runtime Online Capable : 0 - -/* IO APIC */ -[0001] Subtable Type : 01 -[0001] Length : 0C -[0001] I/O Apic ID : 00 -[0001] Reserved : 00 -[0004] Address : fec00000 -[0004] Interrupt : 00000000 - -/* Interrupt Source Override */ -/* Legacy IRQ0 is connected to pin 2 of the IOAPIC -[0001] Subtable Type : 02 -[0001] Length : 0A -[0001] Bus : 00 -[0001] Source : 00 -[0004] Interrupt : 00000002 -[0002] Flags (decoded below) : 0000 - Polarity : 0 - Trigger Mode : 0 */ - -/* Local APIC NMI Structure */ -/* Connected to LINT1 on all CPUs -[0001] Subtable Type : 04 -[0001] Length : 06 -[0001] Processor ID : ff -[0002] Flags (decoded below) : 0000 - Polarity : 0 - Trigger Mode : 0 -[0001] Interrupt Input LINT : 01 */ \ No newline at end of file diff --git a/platform/x86_64/qemu/image/acpi/rsdp.asl b/platform/x86_64/qemu/image/acpi/rsdp.asl deleted file mode 100644 index cbf1120f..00000000 --- a/platform/x86_64/qemu/image/acpi/rsdp.asl +++ /dev/null @@ -1,12 +0,0 @@ -/* -* RSDP template -*/ -[0008] Signature : "RSD PTR " -[0001] Checksum : 00 -[0006] Oem ID : "DM " -[0001] Revision : 02 -[0004] RSDT Address : 000f2440 -[0004] Length : 00000024 -[0008] XSDT Address : 00000000000f2480 -[0001] Extended Checksum : 00 -[0003] Reserved : 000000 \ No newline at end of file diff --git a/platform/x86_64/qemu/image/acpi/rsdt.asl b/platform/x86_64/qemu/image/acpi/rsdt.asl deleted file mode 100644 index 4d778ae7..00000000 --- a/platform/x86_64/qemu/image/acpi/rsdt.asl +++ /dev/null @@ -1,18 +0,0 @@ -/* -* RSDT template -*/ -[0004] Signature : "RSDT" -[0004] Table Length : 00000000 -[0001] Revision : 01 -[0001] Checksum : 00 -[0006] Oem ID : "DM " -[0008] Oem Table ID : "DMRSDT " -[0004] Oem Revision : 00000001 -/* iasl will fill in the compiler ID/revision fields */ -[0004] Asl Compiler ID : "xxxx" -[0004] Asl Compiler Revision : 00000000 - -/* MADT */ -[0004] ACPI Table Address : 000f2500 -/* HPET */ -[0004] ACPI Table Address : 000f2740 \ No newline at end of file diff --git a/platform/x86_64/qemu/image/acpi/xsdt.asl b/platform/x86_64/qemu/image/acpi/xsdt.asl deleted file mode 100644 index f00ddf50..00000000 --- a/platform/x86_64/qemu/image/acpi/xsdt.asl +++ /dev/null @@ -1,18 +0,0 @@ -/* -* XSDT template -*/ -[0004] Signature : "XSDT" -[0004] Table Length : 00000000 -[0001] Revision : 01 -[0001] Checksum : 00 -[0006] Oem ID : "DM " -[0008] Oem Table ID : "DMXSDT " -[0004] Oem Revision : 00000001 -/* iasl will fill in the compiler ID/revision fields */ -[0004] Asl Compiler ID : "xxxx" -[0004] Asl Compiler Revision : 00000000 - -/* MADT */ -[0004] ACPI Table Address : 000f2500 -/* HPET */ -[0004] ACPI Table Address : 000f2740 \ No newline at end of file diff --git a/platform/x86_64/qemu/image/bootloader/boot.S b/platform/x86_64/qemu/image/bootloader/boot.S new file mode 100644 index 00000000..9b979b63 --- /dev/null +++ b/platform/x86_64/qemu/image/bootloader/boot.S @@ -0,0 +1,41 @@ +.section .text +.code16 +.global entry16 +entry16: + cli + cld + + mov ecx, eax + xor ax, ax + mov ds, ax + mov es, ax + mov ss, ax + + lgdt [prot_gdt_desc] + mov eax, cr0 + or eax, 0x1 + mov cr0, eax + + ljmp 0x8, entry32 + +.code32 +.global entry32 +entry32: + mov ax, 0x10 + mov ds, ax + mov es, ax + mov ss, ax + mov fs, ax + mov gs, ax + + jmp ecx + +.balign 16 +prot_gdt: + .quad 0x0000000000000000 # 0x00: null + .quad 0x00cf9b000000ffff # 0x08: code segment (base=0, limit=0xfffff, type=32bit code exec/read, DPL=0, 4k) + .quad 0x00cf93000000ffff # 0x10: data segment (base=0, limit=0xfffff, type=32bit data read/write, DPL=0, 4k) + +prot_gdt_desc: + .short prot_gdt_desc - prot_gdt - 1 # limit + .long prot_gdt # base diff --git a/platform/x86_64/qemu/image/bootloader/boot.ld b/platform/x86_64/qemu/image/bootloader/boot.ld new file mode 100644 index 00000000..3f96b209 --- /dev/null +++ b/platform/x86_64/qemu/image/bootloader/boot.ld @@ -0,0 +1,15 @@ +OUTPUT_ARCH(i386) +BASE_ADDRESS = 0x8000; + +ENTRY(entry16) +SECTIONS +{ + . = BASE_ADDRESS; + .text : { + *(.text .text.*) + } + + /DISCARD/ : { + *(.eh_frame) *(.eh_frame_hdr) + } +} diff --git a/platform/x86_64/qemu/image/bootloader/boot.mk b/platform/x86_64/qemu/image/bootloader/boot.mk new file mode 100644 index 00000000..e23e4540 --- /dev/null +++ b/platform/x86_64/qemu/image/bootloader/boot.mk @@ -0,0 +1,36 @@ +boot_dir := $(image_dir)/bootloader +boot_out_dir := $(image_dir)/bootloader/out + +boot_src := $(boot_dir)/boot.S +boot_lds := $(boot_dir)/boot.ld + +boot_o := $(boot_out_dir)/boot.o +boot_elf := $(boot_out_dir)/boot.elf +boot_bin := $(boot_out_dir)/boot.bin +boot_disa := $(boot_out_dir)/boot.asm + +AS ?= as +LD ?= ld +OBJCOPY ?= objcopy +OBJDUMP ?= objdump + +boot: mkout $(boot_bin) + +disasm: + $(OBJDUMP) -d -m i8086 -M intel $(boot_elf) | less + +mkout: + rm -rf $(boot_out_dir) + mkdir -p $(boot_out_dir) + +$(boot_o): $(boot_src) + $(AS) --32 -msyntax=intel -mnaked-reg $< -o $@ + +$(boot_elf): $(boot_o) $(boot_lds) + $(LD) -T$(boot_lds) $< -o $@ + $(OBJDUMP) -d -m i8086 -M intel $@ > $(boot_disa) + +$(boot_bin): $(boot_elf) + $(OBJCOPY) $< --strip-all -O binary $@ + +.PHONY: all disasm \ No newline at end of file diff --git a/platform/x86_64/qemu/image/font/solarize-12x29.psf b/platform/x86_64/qemu/image/font/solarize-12x29.psf new file mode 100644 index 00000000..071330e9 Binary files /dev/null and b/platform/x86_64/qemu/image/font/solarize-12x29.psf differ diff --git a/platform/x86_64/qemu/image/font/spleen-6x12.psf b/platform/x86_64/qemu/image/font/spleen-6x12.psf new file mode 100644 index 00000000..892d085c Binary files /dev/null and b/platform/x86_64/qemu/image/font/spleen-6x12.psf differ diff --git a/platform/x86_64/qemu/image/iso/boot/grub/grub.cfg b/platform/x86_64/qemu/image/iso/boot/grub/grub.cfg new file mode 100644 index 00000000..c2a54ae3 --- /dev/null +++ b/platform/x86_64/qemu/image/iso/boot/grub/grub.cfg @@ -0,0 +1,28 @@ +set timeout=10 # waiting time befo automatic booting +set default=0 # default menu entry index + +insmod all_video + +menuentry "Hvisor" { + multiboot2 /boot/hvisor # use multiboot spec to boot + module2 /boot/kernel/boot.bin 0 + module2 /boot/kernel/boot.bin 5008000 + module2 /boot/kernel/setup.bin 500a000 + module2 /boot/kernel/vmlinux.bin 5100000 + module2 /boot/kernel/initramfs.cpio.gz 1a000000 + boot +} + +if [ ${grub_platform} == "efi" ]; then + menuentry "UEFI Setting" { + fwsetup + } +fi + +menuentry "System Reboot" --class=reboot { + reboot +} + +menuentry "System Shutdown" --class=halt { + halt +} \ No newline at end of file diff --git a/platform/x86_64/qemu/linker.ld b/platform/x86_64/qemu/linker.ld index 2b782f28..a0096daa 100644 --- a/platform/x86_64/qemu/linker.ld +++ b/platform/x86_64/qemu/linker.ld @@ -8,6 +8,7 @@ SECTIONS stext = .; .text : { + KEEP(*(.text.header)) *(.text.entry) *(.text.entry32) *(.text.entry64) diff --git a/platform/x86_64/qemu/platform.mk b/platform/x86_64/qemu/platform.mk index 8abf8ebb..62ac98f6 100644 --- a/platform/x86_64/qemu/platform.mk +++ b/platform/x86_64/qemu/platform.mk @@ -1,50 +1,55 @@ QEMU := qemu-system-x86_64 -acpi_asl_dir := scripts/x86_64/acpi -acpi_aml_dir := $(image_dir)/acpi - -zone0_bios := $(image_dir)/rvm-bios.bin -zone0_kernel := $(image_dir)/nimbos.bin - -zone0_image := $(image_dir)/bzImage -zone0_setup := $(image_dir)/setup.bin -zone0_vmlinux := $(image_dir)/vmlinux.bin -zone0_initrd := $(image_dir)/initramfs.cpio.gz -zone0_boot16 := $(image_dir)/boot16.bin - -aml_hpet := $(acpi_aml_dir)/hpet.aml -aml_madt := $(acpi_aml_dir)/madt.aml -aml_rsdp := $(acpi_aml_dir)/rsdp.aml -aml_rsdt := $(acpi_aml_dir)/rsdt.aml -aml_xsdt := $(acpi_aml_dir)/xsdt.aml - -QEMU_ARGS := -machine q35 -QEMU_ARGS += -cpu host,+x2apic -accel kvm +zone0_boot := $(image_dir)/bootloader/out/boot.bin +zone0_setup := $(image_dir)/kernel/setup.bin +zone0_vmlinux := $(image_dir)/kernel/vmlinux.bin +zone0_initrd := $(image_dir)/virtdisk/initramfs.cpio.gz +zone0_rootfs := $(image_dir)/virtdisk/rootfs1.img +zone1_rootfs := $(image_dir)/virtdisk/rootfs2.img + +QEMU_ARGS := -machine q35,kernel-irqchip=split +QEMU_ARGS += -cpu host,+x2apic,+invtsc,+vmx -accel kvm QEMU_ARGS += -smp 4 QEMU_ARGS += -serial mon:stdio -QEMU_ARGS += -m 2G -QEMU_ARGS += -nographic - -QEMU_ARGS += -kernel $(hvisor_elf) -# QEMU_ARGS += -device loader,file="$(zone0_bios)",addr=0x5008000,force-raw=on -# QEMU_ARGS += -device loader,file="$(zone0_kernel)",addr=0x5200000,force-raw=on - -QEMU_ARGS += -device loader,file="$(zone0_boot16)",addr=0x5008000,force-raw=on -QEMU_ARGS += -device loader,file="$(zone0_setup)",addr=0x500d000,force-raw=on -QEMU_ARGS += -device loader,file="$(zone0_vmlinux)",addr=0x5100000,force-raw=on -QEMU_ARGS += -device loader,file="$(zone0_initrd)",addr=0x20000000,force-raw=on -QEMU_ARGS += -append "initrd_size=$(shell stat -c%s $(zone0_initrd))" - -QEMU_ARGS += -device loader,file="$(aml_rsdp)",addr=0x50f2400,force-raw=on -QEMU_ARGS += -device loader,file="$(aml_rsdt)",addr=0x50f2440,force-raw=on -QEMU_ARGS += -device loader,file="$(aml_xsdt)",addr=0x50f2480,force-raw=on -QEMU_ARGS += -device loader,file="$(aml_madt)",addr=0x50f2500,force-raw=on -QEMU_ARGS += -device loader,file="$(aml_hpet)",addr=0x50f2740,force-raw=on - -$(hvisor_bin): elf aml +QEMU_ARGS += -m 4G +QEMU_ARGS += -bios /usr/share/ovmf/OVMF.fd +QEMU_ARGS += -vga std +# QEMU_ARGS += -nographic + +QEMU_ARGS += -nodefaults +QEMU_ARGS += -net nic -net user + +QEMU_ARGS += -device intel-iommu,intremap=on,eim=on,caching-mode=on,device-iotlb=on,aw-bits=48 +QEMU_ARGS += -device ioh3420,id=pcie.1,chassis=1 +QEMU_ARGS += -drive if=none,file="$(zone0_rootfs)",id=X10008000,format=raw +QEMU_ARGS += -device virtio-blk-pci,bus=pcie.1,drive=X10008000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on +# QEMU_ARGS += -drive if=none,file="$(zone0_rootfs)",id=X10009000,format=raw +# QEMU_ARGS += -device nvme,serial=deadbeef,drive=X10009000 +# QEMU_ARGS += -drive if=none,file="$(zone1_rootfs)",id=X10009000,format=raw +# QEMU_ARGS += -device virtio-blk-pci,bus=pcie.1,drive=X10009000,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on +# QEMU_ARGS += -netdev tap,id=net0,ifname=tap0,script=no,downscript=no +# QEMU_ARGS += -device virtio-net-pci,bus=pcie.1,netdev=net0,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on +# QEMU_ARGS += -netdev tap,id=net0,vhostforce=on +# QEMU_ARGS += -device virtio-net-pci,bus=pcie.1,netdev=net0,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on +# QEMU_ARGS += --trace "virtio_*" --trace "virtqueue_*" --trace "vtd_dma*" --trace "iommu_*" + +# QEMU_ARGS += -kernel $(hvisor_elf) +QEMU_ARGS += -drive file=$(image_dir)/virtdisk/hvisor.iso,format=raw,index=0,media=disk + +# QEMU_ARGS += -device loader,file="$(zone0_boot)",addr=0x5008000,force-raw=on +# QEMU_ARGS += -device loader,file="$(zone0_setup)",addr=0x500a000,force-raw=on +# QEMU_ARGS += -device loader,file="$(zone0_vmlinux)",addr=0x5100000,force-raw=on +# QEMU_ARGS += -device loader,file="$(zone0_initrd)",addr=0x1a000000,force-raw=on +# QEMU_ARGS += -append "initrd_size=$(shell stat -c%s $(zone0_initrd))" + +$(hvisor_bin): elf boot $(OBJCOPY) $(hvisor_elf) --strip-all -O binary $@ - -aml: $(aml_hpet) $(aml_madt) $(aml_rsdp) $(aml_rsdt) $(aml_xsdt) - -$(acpi_aml_dir)/%.aml: $(acpi_asl_dir)/%.asl - iasl -p $@ $< \ No newline at end of file + cp $(hvisor_elf) $(image_dir)/iso/boot + mkdir -p $(image_dir)/iso/boot/kernel + cp $(zone0_boot) $(image_dir)/iso/boot/kernel + cp $(zone0_setup) $(image_dir)/iso/boot/kernel + cp $(zone0_vmlinux) $(image_dir)/iso/boot/kernel + mkdir -p $(image_dir)/virtdisk + grub-mkrescue /usr/lib/grub/x86_64-efi -o $(image_dir)/virtdisk/hvisor.iso $(image_dir)/iso + +include $(image_dir)/bootloader/boot.mk \ No newline at end of file diff --git a/src/arch/aarch64/cpu.rs b/src/arch/aarch64/cpu.rs index f7498c7a..3f7e3676 100644 --- a/src/arch/aarch64/cpu.rs +++ b/src/arch/aarch64/cpu.rs @@ -22,6 +22,7 @@ use crate::{ }, percpu::this_cpu_data, platform::BOARD_MPIDR_MAPPINGS, + zone::find_zone, }; use aarch64_cpu::registers::{ Readable, Writeable, ELR_EL2, HCR_EL2, MPIDR_EL1, SCTLR_EL1, SPSR_EL2, VTCR_EL2, @@ -268,3 +269,12 @@ pub fn store_cpu_pointer_to_reg(pointer: usize) { // println!("aarch64 doesn't support store cpu pointer to reg, pointer: {:#x}", pointer); return; } + +pub fn get_target_cpu(irq: usize, zone_id: usize) -> usize { + find_zone(zone_id) + .unwrap() + .read() + .cpu_set + .first_cpu() + .unwrap() +} diff --git a/src/arch/aarch64/hypercall.rs b/src/arch/aarch64/hypercall.rs index 68eb755f..91e59485 100644 --- a/src/arch/aarch64/hypercall.rs +++ b/src/arch/aarch64/hypercall.rs @@ -46,10 +46,6 @@ impl<'a> HyperCall<'a> { HyperCallResult::Ok(0) } - pub fn translate_ipa_to_hva(&mut self, ipa: u64) -> u64 { - return ipa; - } - pub fn wait_for_interrupt(&mut self, irq_list: &mut [u64; MAX_DEVS + 1]) { trace!("wait_for_interrupt is not need for AArch64"); } @@ -79,4 +75,9 @@ impl<'a> HyperCall<'a> { let cpuid = this_cpu_id(); trace!("CPU ID: {} Start Zone", cpuid); } + + pub fn hv_virtio_get_irq(&self, virtio_irq: *mut u32) -> HyperCallResult { + trace!("hv_virtio_get_irq is not need for AArch64"); + HyperCallResult::Ok(0) + } } diff --git a/src/arch/aarch64/iommu.rs b/src/arch/aarch64/iommu.rs index 6773d2f7..1b7110d5 100644 --- a/src/arch/aarch64/iommu.rs +++ b/src/arch/aarch64/iommu.rs @@ -348,7 +348,6 @@ impl CmdQueue { pub struct Smmuv3 { rp: &'static RegisterPage, strtab: LinearStreamTable, - iommu_pt_list: Vec>, cmdq: CmdQueue, } @@ -358,20 +357,13 @@ impl Smmuv3 { let mut r = Self { rp: rp, strtab: LinearStreamTable::new(), - iommu_pt_list: vec![], cmdq: CmdQueue::new(), }; - for _ in 0..MAX_ZONE_NUM { - r.iommu_pt_list.push(new_s2_memory_set()); - } - - info!("pagetables for iommu, init done!"); - r.check_env(); - r.init_limited_pt(); r.init_structures(); r.device_reset(); + r } @@ -413,47 +405,6 @@ impl Smmuv3 { } } - fn init_limited_pt(&mut self) { - // its - for pt in self.iommu_pt_list.iter_mut() { - pt.insert(MemoryRegion::new_with_offset_mapper( - 0x8080000 as GuestPhysAddr, - 0x8080000, - 0x20000, - MemFlags::READ | MemFlags::WRITE, - )) - .ok(); - } - - // ram - self.iommu_pt_list[0] - .insert(MemoryRegion::new_with_offset_mapper( - 0x80000000 as GuestPhysAddr, - 0x80000000, - 0x50000000, - MemFlags::READ | MemFlags::WRITE, - )) - .ok(); - - self.iommu_pt_list[1] - .insert(MemoryRegion::new_with_offset_mapper( - 0x50000000 as GuestPhysAddr, - 0x50000000, - 0x30000000, - MemFlags::READ | MemFlags::WRITE, - )) - .ok(); - - self.iommu_pt_list[2] - .insert(MemoryRegion::new_with_offset_mapper( - 0x80000000 as GuestPhysAddr, - 0x80000000, - 0x10000000, - MemFlags::READ | MemFlags::WRITE, - )) - .ok(); - } - fn init_structures(&mut self) { self.init_strtab(); self.init_queues(); @@ -545,13 +496,12 @@ impl Smmuv3 { } // s1 bypass and s2 translate - fn write_ste(&mut self, sid: usize, vmid: usize) { + fn write_ste(&mut self, sid: usize, vmid: usize, root_pt: usize) { self.sync_ste(sid); assert!(vmid < MAX_ZONE_NUM, "Invalid zone id!"); - self.strtab - .write_ste(sid, vmid, self.iommu_pt_list[vmid].root_paddr()); + self.strtab.write_ste(sid, vmid, root_pt); } // invalidate the ste @@ -582,13 +532,8 @@ static SMMUV3: spin::Once> = spin::Once::new(); /// smmuv3 init pub fn iommu_init() { - #[cfg(feature = "iommu")] - { - info!("Smmuv3 init..."); - SMMUV3.call_once(|| Mutex::new(Smmuv3::new())); - } - #[cfg(not(feature = "iommu"))] - info!("Smmuv3 init: do nothing now"); + info!("Smmuv3 init..."); + SMMUV3.call_once(|| Mutex::new(Smmuv3::new())); } /// smmuv3_base @@ -604,15 +549,7 @@ pub fn smmuv3_size() -> usize { } /// write ste -pub fn iommu_add_device(vmid: usize, sid: usize) { - #[cfg(feature = "iommu")] - { - let mut smmu = SMMUV3.get().unwrap().lock(); - smmu.write_ste(sid as _, vmid as _); - } - #[cfg(not(feature = "iommu"))] - info!( - "aarch64: iommu_add_device: do nothing now, vmid: {}, sid: {}", - vmid, sid - ); +pub fn iommu_add_device(vmid: usize, sid: usize, root_pt: usize) { + let mut smmu = SMMUV3.get().unwrap().lock(); + smmu.write_ste(sid as _, vmid as _, root_pt as _); } diff --git a/src/arch/aarch64/mm.rs b/src/arch/aarch64/mm.rs index 9acdae18..36e8e687 100644 --- a/src/arch/aarch64/mm.rs +++ b/src/arch/aarch64/mm.rs @@ -52,3 +52,9 @@ pub fn is_s2_pt_level3() -> bool { pub fn new_s2_memory_set() -> MemorySet { MemorySet::new(if is_s2_pt_level3() { 3 } else { 4 }) } + +pub fn arch_post_heap_init(host_dtb: usize) { + // AArch64 does not need to do some setup work after heap init like x86_64. + // This function can be used to set up any architecture-specific parameters if needed. + // Currently, it does nothing. +} diff --git a/src/arch/aarch64/zone.rs b/src/arch/aarch64/zone.rs index b7e8e90b..b57fecae 100644 --- a/src/arch/aarch64/zone.rs +++ b/src/arch/aarch64/zone.rs @@ -16,10 +16,12 @@ use core::panic; use crate::{ + arch::Stage2PageTable, config::*, device::virtio_trampoline::mmio_virtio_handler, error::HvResult, - memory::{GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion}, + memory::{GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion, MemorySet}, + pci::pcibar::BarRegion, zone::Zone, }; @@ -60,10 +62,66 @@ impl Zone { Ok(()) } - pub fn arch_zone_configuration(&mut self, config: &HvZoneConfig) -> HvResult { + pub fn iommu_pt_init( + &mut self, + mem_regions: &[HvConfigMemoryRegion], + hv_config: &HvArchZoneConfig, + ) -> HvResult { + // Create a new stage 2 page table for iommu. + // Only map the memory regions that are possible to be accessed by devices as DMA buffer. + + let pt = self.iommu_pt.as_mut().unwrap(); + let flags = MemFlags::READ | MemFlags::WRITE; + for mem_region in mem_regions.iter() { + match mem_region.mem_type { + MEM_TYPE_RAM => { + pt.insert(MemoryRegion::new_with_offset_mapper( + mem_region.virtual_start as GuestPhysAddr, + mem_region.physical_start as HostPhysAddr, + mem_region.size as _, + flags, + ))?; + info!( + "iommu map: vaddr:{} - paddr:{}", + mem_region.virtual_start, mem_region.physical_start + ); + } + _ => { + // pass + } + } + } + + match hv_config.gic_config { + GicConfig::Gicv3(ref gicv3_config) => { + if gicv3_config.gits_size != 0 { + // map gits + pt.insert(MemoryRegion::new_with_offset_mapper( + gicv3_config.gits_base as GuestPhysAddr, + gicv3_config.gits_base as HostPhysAddr, + gicv3_config.gits_size as _, + flags | MemFlags::IO, + ))?; + info!( + "iommu map: vaddr:{} - paddr:{}", + gicv3_config.gits_base, gicv3_config.gits_base + ); + } + } + _ => {} + } + + Ok(()) + } + + pub fn arch_zone_pre_configuration(&mut self, config: &HvZoneConfig) -> HvResult { self.ivc_init(config.ivc_config()); Ok(()) } + + pub fn arch_zone_post_configuration(&mut self, config: &HvZoneConfig) -> HvResult { + Ok(()) + } } #[repr(C)] @@ -104,3 +162,19 @@ pub struct Gicv3Config { pub gits_base: usize, pub gits_size: usize, } + +impl BarRegion { + pub fn arch_set_bar_region_start(&mut self, cpu_base: usize, pci_base: usize) { + self.start = crate::memory::addr::align_down(cpu_base + self.start - pci_base); + } + + pub fn arch_insert_bar_region(&self, gpm: &mut MemorySet, zone_id: usize) { + gpm.insert(MemoryRegion::new_with_offset_mapper( + self.start as GuestPhysAddr, + self.start, + self.size, + MemFlags::READ | MemFlags::WRITE | MemFlags::IO, + )) + .ok(); + } +} diff --git a/src/arch/loongarch64/cpu.rs b/src/arch/loongarch64/cpu.rs index 4e811c4c..625e43c4 100644 --- a/src/arch/loongarch64/cpu.rs +++ b/src/arch/loongarch64/cpu.rs @@ -19,6 +19,7 @@ use super::zone::ZoneContext; use crate::arch::zone::disable_hwi_through; use crate::device::common::MMIODerefWrapper; use crate::percpu::this_cpu_data; +use crate::zone::find_zone; use core::arch::asm; use core::fmt::{self, Debug, Formatter}; use loongArch64::register::crmd::Crmd; @@ -163,3 +164,12 @@ pub fn store_cpu_pointer_to_reg(pointer: usize) { // println!("loongarch64 doesn't support store cpu pointer to reg, pointer: {:#x}", pointer); return; } + +pub fn get_target_cpu(irq: usize, zone_id: usize) -> usize { + find_zone(zone_id) + .unwrap() + .read() + .cpu_set + .first_cpu() + .unwrap() +} diff --git a/src/arch/loongarch64/hypercall.rs b/src/arch/loongarch64/hypercall.rs index 0253397a..6b56282f 100644 --- a/src/arch/loongarch64/hypercall.rs +++ b/src/arch/loongarch64/hypercall.rs @@ -26,10 +26,6 @@ impl<'a> HyperCall<'a> { HyperCallResult::Ok(0) } - pub fn translate_ipa_to_hva(&mut self, ipa: u64) -> u64 { - return ipa | crate::arch::mm::LOONGARCH64_CACHED_DMW_PREFIX; - } - pub fn wait_for_interrupt(&mut self, irq_list: &mut [u64; MAX_DEVS + 1]) { use crate::device::irqchip::ls7a2000::*; let status = GLOBAL_IRQ_INJECT_STATUS.lock(); @@ -58,7 +54,7 @@ impl<'a> HyperCall<'a> { } pub fn hv_get_real_list_pa(&mut self, list_addr: u64) -> u64 { - // RISC-V does not have a specific prefix for cached memory, so we return the address as is. + // LoongArch64 does not have a specific prefix for cached memory, so we return the address as is. return list_addr; } @@ -66,4 +62,9 @@ impl<'a> HyperCall<'a> { let cpuid = this_cpu_id(); assert_eq!(cpuid, 0); } + + pub fn hv_virtio_get_irq(&self, virtio_irq: *mut u32) -> HyperCallResult { + trace!("hv_virtio_get_irq is not need for LoongArch64"); + HyperCallResult::Ok(0) + } } diff --git a/src/arch/loongarch64/mm.rs b/src/arch/loongarch64/mm.rs index 075e96b1..a25a39ca 100644 --- a/src/arch/loongarch64/mm.rs +++ b/src/arch/loongarch64/mm.rs @@ -69,3 +69,9 @@ pub fn arch_setup_parange() { // This function can be used to set up any architecture-specific parameters if needed. // Currently, it does nothing. } + +pub fn arch_post_heap_init(host_dtb: usize) { + // LoongArch64 does not need to do some setup work after heap init like x86_64. + // This function can be used to set up any architecture-specific parameters if needed. + // Currently, it does nothing. +} diff --git a/src/arch/loongarch64/zone.rs b/src/arch/loongarch64/zone.rs index 419b29ef..96ac8883 100644 --- a/src/arch/loongarch64/zone.rs +++ b/src/arch/loongarch64/zone.rs @@ -16,7 +16,7 @@ // use crate::device::irqchip::ls7a2000::chip::get_extioi_sr; use crate::{ - arch::{cpu::this_cpu_id, trap::GLOBAL_TRAP_CONTEXT_HELPER_PER_CPU}, + arch::{cpu::this_cpu_id, trap::GLOBAL_TRAP_CONTEXT_HELPER_PER_CPU, Stage2PageTable}, config::*, consts::PAGE_SIZE, device::virtio_trampoline::mmio_virtio_handler, @@ -24,8 +24,9 @@ use crate::{ memory::{ addr::{align_down, align_up}, mmio_generic_handler, mmio_perform_access, GuestPhysAddr, HostPhysAddr, MMIOAccess, - MemFlags, MemoryRegion, + MemFlags, MemoryRegion, MemorySet, }, + pci::pcibar::BarRegion, zone::Zone, PHY_TO_DMW_UNCACHED, }; @@ -683,7 +684,7 @@ impl Zone { self.gpm.delete(vaddr as GuestPhysAddr) } - pub fn arch_zone_configuration(&mut self, config: &HvZoneConfig) -> HvResult { + pub fn arch_zone_pre_configuration(&mut self, config: &HvZoneConfig) -> HvResult { let vaddr = config.pci_config.ecam_base; let size = config.pci_config.ecam_size; self.gpm.insert(MemoryRegion::new_with_offset_mapper( @@ -694,4 +695,24 @@ impl Zone { ))?; self.gpm.delete(vaddr as GuestPhysAddr) } + + pub fn arch_zone_post_configuration(&mut self, config: &HvZoneConfig) -> HvResult { + Ok(()) + } +} + +impl BarRegion { + pub fn arch_set_bar_region_start(&mut self, cpu_base: usize, pci_base: usize) { + self.start = crate::memory::addr::align_down(cpu_base + self.start - pci_base); + } + + pub fn arch_insert_bar_region(&self, gpm: &mut MemorySet, zone_id: usize) { + gpm.insert(MemoryRegion::new_with_offset_mapper( + self.start as GuestPhysAddr, + self.start, + self.size, + MemFlags::READ | MemFlags::WRITE | MemFlags::IO, + )) + .ok(); + } } diff --git a/src/arch/mod.rs b/src/arch/mod.rs index 5b29b4e2..997ce3da 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -22,6 +22,9 @@ pub mod riscv64; #[cfg(target_arch = "loongarch64")] pub mod loongarch64; +#[cfg(target_arch = "x86_64")] +pub mod x86_64; + // export modules for external use #[cfg(target_arch = "aarch64")] pub use aarch64::*; @@ -31,3 +34,6 @@ pub use riscv64::*; #[cfg(target_arch = "loongarch64")] pub use loongarch64::*; + +#[cfg(target_arch = "x86_64")] +pub use x86_64::*; diff --git a/src/arch/riscv64/cpu.rs b/src/arch/riscv64/cpu.rs index cb735efa..1f036a73 100644 --- a/src/arch/riscv64/cpu.rs +++ b/src/arch/riscv64/cpu.rs @@ -24,6 +24,7 @@ use crate::{ addr::PHYS_VIRT_OFFSET, mm::PARKING_MEMORY_SET, GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion, MemorySet, VirtAddr, PARKING_INST_PAGE, }, + zone::find_zone, }; #[repr(C)] @@ -229,3 +230,12 @@ pub fn store_cpu_pointer_to_reg(pointer: usize) { // println!("Stored CPU pointer to CSR_SSCRATCH: {:#x}", pointer); return; } + +pub fn get_target_cpu(irq: usize, zone_id: usize) -> usize { + find_zone(zone_id) + .unwrap() + .read() + .cpu_set + .first_cpu() + .unwrap() +} diff --git a/src/arch/riscv64/hypercall.rs b/src/arch/riscv64/hypercall.rs index 195fdfaa..ee37e015 100644 --- a/src/arch/riscv64/hypercall.rs +++ b/src/arch/riscv64/hypercall.rs @@ -26,10 +26,6 @@ impl<'a> HyperCall<'a> { HyperCallResult::Ok(0) } - pub fn translate_ipa_to_hva(&mut self, ipa: u64) -> u64 { - return ipa; - } - pub fn wait_for_interrupt(&mut self, irq_list: &mut [u64; MAX_DEVS + 1]) { trace!("wait_for_interrupt is not need for RISC-V"); } @@ -59,4 +55,9 @@ impl<'a> HyperCall<'a> { let cpuid = this_cpu_id(); trace!("CPU ID: {} Start Zone", cpuid); } + + pub fn hv_virtio_get_irq(&self, virtio_irq: *mut u32) -> HyperCallResult { + trace!("hv_virtio_get_irq is not need for RISC-V"); + HyperCallResult::Ok(0) + } } diff --git a/src/arch/riscv64/mm.rs b/src/arch/riscv64/mm.rs index 7b860ff5..fcdfaed0 100644 --- a/src/arch/riscv64/mm.rs +++ b/src/arch/riscv64/mm.rs @@ -145,3 +145,9 @@ pub fn arch_setup_parange() { // The parange is determined by the memory regions defined in the device tree. // So we do not need to do anything here. } + +pub fn arch_post_heap_init(host_dtb: usize) { + // RISC-V does not need to do some setup work after heap init like x86_64. + // This function can be used to set up any architecture-specific parameters if needed. + // Currently, it does nothing. +} diff --git a/src/arch/riscv64/zone.rs b/src/arch/riscv64/zone.rs index 32c9952a..fd67b580 100644 --- a/src/arch/riscv64/zone.rs +++ b/src/arch/riscv64/zone.rs @@ -14,10 +14,12 @@ // Authors: // use crate::{ + arch::Stage2PageTable, config::*, device::virtio_trampoline::{mmio_virtio_handler, VIRTIO_BRIDGE}, error::HvResult, - memory::{addr::align_up, GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion}, + memory::{addr::align_up, GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion, MemorySet}, + pci::pcibar::BarRegion, percpu::get_cpu_data, zone::Zone, }; @@ -56,11 +58,15 @@ impl Zone { Ok(()) } - pub fn arch_zone_configuration(&mut self, config: &HvZoneConfig) -> HvResult { + pub fn arch_zone_pre_configuration(&mut self, config: &HvZoneConfig) -> HvResult { // We do not have any specific architecture configuration for RISC-V. // If needed, this function can be extended in the future. Ok(()) } + + pub fn arch_zone_post_configuration(&mut self, config: &HvZoneConfig) -> HvResult { + Ok(()) + } } #[repr(C)] @@ -71,3 +77,19 @@ pub struct HvArchZoneConfig { pub aplic_base: usize, pub aplic_size: usize, } + +impl BarRegion { + pub fn arch_set_bar_region_start(&mut self, cpu_base: usize, pci_base: usize) { + self.start = crate::memory::addr::align_down(cpu_base + self.start - pci_base); + } + + pub fn arch_insert_bar_region(&self, gpm: &mut MemorySet, zone_id: usize) { + gpm.insert(MemoryRegion::new_with_offset_mapper( + self.start as GuestPhysAddr, + self.start, + self.size, + MemFlags::READ | MemFlags::WRITE | MemFlags::IO, + )) + .ok(); + } +} diff --git a/src/arch/x86_64/acpi.rs b/src/arch/x86_64/acpi.rs new file mode 100644 index 00000000..b3c6636d --- /dev/null +++ b/src/arch/x86_64/acpi.rs @@ -0,0 +1,752 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{boot, pci::probe_root_pci_devices}, + config::{HvConfigMemoryRegion, HvZoneConfig}, + error::HvResult, + percpu::{this_zone, CpuSet}, +}; +use acpi::{ + fadt::Fadt, + madt::{LocalApicEntry, Madt, MadtEntry}, + mcfg::{Mcfg, McfgEntry}, + rsdp::Rsdp, + sdt::{SdtHeader, Signature}, + AcpiHandler, AcpiTables, AmlTable, PciConfigRegions, +}; +use alloc::{ + collections::{btree_map::BTreeMap, btree_set::BTreeSet}, + vec::Vec, +}; +use core::{ + any::Any, + mem::size_of, + pin::Pin, + ptr::{read_unaligned, write_unaligned, NonNull}, + slice, +}; +use spin::{Mutex, Once}; + +const RSDP_V1_SIZE: usize = 20; +const RSDP_V2_SIZE: usize = 36; + +const RSDP_RSDT_OFFSET: usize = 16; +const RSDP_RSDT_PTR_SIZE: usize = 4; +const RSDT_PTR_SIZE: usize = 4; + +const FADT_DSDT_OFFSET_32: usize = 0x28; +const FADT_DSDT_OFFSET_64: usize = 0x8c; + +const FADT_FACS_OFFSET_32: usize = 0x24; +const FADT_FACS_OFFSET_64: usize = 0x84; + +const SDT_HEADER_SIZE: usize = 36; + +const RSDP_CHECKSUM_OFFSET: usize = 8; +const ACPI_CHECKSUM_OFFSET: usize = 9; + +macro_rules! acpi_table { + ($a: ident, $b: ident) => { + #[repr(transparent)] + struct $a { + header: SdtHeader, + } + + unsafe impl acpi::AcpiTable for $a { + const SIGNATURE: Signature = Signature::$b; + fn header(&self) -> &SdtHeader { + &self.header + } + } + }; +} + +#[derive(Clone, Debug)] +struct HvAcpiHandler {} + +impl AcpiHandler for HvAcpiHandler { + unsafe fn map_physical_region( + &self, + physical_address: usize, + size: usize, + ) -> acpi::PhysicalMapping { + acpi::PhysicalMapping::new( + physical_address, + NonNull::new(physical_address as *mut T).unwrap(), + size, + size, + self.clone(), + ) + } + + fn unmap_physical_region(region: &acpi::PhysicalMapping) {} +} + +static ROOT_ACPI: Once = Once::new(); + +#[derive(Clone, Debug)] +enum PatchValue { + U8(u8), + U16(u16), + U32(u32), + U64(u64), +} + +#[derive(Clone, Debug, Default)] +pub struct AcpiTable { + sig: Option, + src: usize, + patches: BTreeMap, + len: usize, + checksum: u8, + gpa: usize, + hpa: usize, + is_addr_set: bool, +} + +fn get_byte_sum_u32(value: u32) -> u8 { + value + .to_ne_bytes() + .iter() + .fold(0u8, |acc, &b| acc.wrapping_add(b)) +} + +fn get_byte_sum_u64(value: u64) -> u8 { + value + .to_ne_bytes() + .iter() + .fold(0u8, |acc, &b| acc.wrapping_add(b)) +} + +impl AcpiTable { + pub fn set_u8(&mut self, value: u8, offset: usize) { + self.patches.insert(offset, PatchValue::U8(value)); + let old = unsafe { *((self.src + offset) as *const u8) }; + self.checksum = self.checksum.wrapping_add(old).wrapping_sub(value); + } + + pub fn set_u32(&mut self, value: u32, offset: usize) { + self.patches.insert(offset, PatchValue::U32(value)); + let old = unsafe { read_unaligned((self.src + offset) as *const u32) }; + self.checksum = self + .checksum + .wrapping_add(get_byte_sum_u32(old)) + .wrapping_sub(get_byte_sum_u32(value)); + } + + pub fn set_u64(&mut self, value: u64, offset: usize) { + self.patches.insert(offset, PatchValue::U64(value)); + let old = unsafe { read_unaligned((self.src + offset) as *const u64) }; + self.checksum = self + .checksum + .wrapping_add(get_byte_sum_u64(old)) + .wrapping_sub(get_byte_sum_u64(value)); + } + + /// new len must not be longer + pub fn set_new_len(&mut self, len: usize) { + let src_len = self.get_u32(4) as usize; + println!("len: {:x}, selflen: {:x}", len, src_len); + assert!(len <= src_len); + + // update checksum + for offset in len..src_len { + self.checksum = self + .checksum + .wrapping_add(unsafe { *((self.src + offset) as *const u8) }); + } + + self.set_u32(len as _, 4); + self.len = len; + } + + pub fn get_len(&self) -> usize { + self.len + } + + pub fn get_unpatched_src(&self) -> *const u8 { + self.src as *const u8 + } + + pub fn get_u8(&self, offset: usize) -> u8 { + if let Some(&PatchValue::U8(value)) = self.patches.get(&offset) { + return value; + } + unsafe { *((self.src + offset) as *const u8) } + } + + pub fn get_u16(&self, offset: usize) -> u16 { + if let Some(&PatchValue::U16(value)) = self.patches.get(&offset) { + return value; + } + unsafe { read_unaligned((self.src + offset) as *const u16) } + } + + pub fn get_u32(&self, offset: usize) -> u32 { + if let Some(&PatchValue::U32(value)) = self.patches.get(&offset) { + return value; + } + unsafe { read_unaligned((self.src + offset) as *const u32) } + } + + pub fn get_u64(&self, offset: usize) -> u64 { + if let Some(&PatchValue::U64(value)) = self.patches.get(&offset) { + return value; + } + unsafe { read_unaligned((self.src + offset) as *const u64) } + } + + pub fn fill( + &mut self, + sig: Option, + ptr: *const u8, + len: usize, + checksum_offset: usize, + ) { + self.sig = sig; + self.patches.clear(); + self.src = ptr as usize; + self.len = len; + self.checksum = unsafe { *(ptr.wrapping_add(checksum_offset)) }; + } + + pub unsafe fn copy_to_mem(&self) { + core::ptr::copy(self.src as *const u8, self.hpa as *mut u8, self.len); + + macro_rules! write_patch { + ($addr:expr, $val:expr, $ty:ty) => { + write_unaligned($addr as *mut $ty, $val) + }; + } + + for (offset, value) in self.patches.iter() { + let addr = self.hpa + *offset; + match *value { + PatchValue::U8(v) => write_patch!(addr, v, u8), + PatchValue::U16(v) => write_patch!(addr, v, u16), + PatchValue::U32(v) => write_patch!(addr, v, u32), + PatchValue::U64(v) => write_patch!(addr, v, u64), + _ => {} + } + } + } + + pub fn set_addr(&mut self, hpa: usize, gpa: usize) { + self.hpa = hpa; + self.gpa = gpa; + self.is_addr_set = true; + } + + /// for rsdp, offset = 8; for the others, offset = 9. + pub fn update_checksum(&mut self, offset: usize) { + unsafe { *((self.src + offset) as *mut u8) = self.checksum }; + } +} + +#[derive(Copy, Clone, Debug)] +struct AcpiPointer { + pub from_sig: Signature, + pub from_offset: usize, + pub to_sig: Signature, + pub pointer_size: usize, +} + +#[derive(Clone, Debug, Default)] +pub struct RootAcpi { + /// we need to store rsdp to a safer place + rsdp_copy: Vec, + rsdp: AcpiTable, + tables: BTreeMap, + ssdts: BTreeMap, + pointers: Vec, + devices: Vec, + config_space_base: usize, + config_space_size: usize, + /// key: data reg hpa, value: bdf + msi_data_reg_map: BTreeMap, + /// key: msi-x table bar, value: bdf + msix_bar_map: BTreeMap, + /// key: apic id, value: cpu id (continuous) + apic_id_to_cpu_id: BTreeMap, + /// key: cpu id (continuous), value: apic id + cpu_id_to_apic_id: BTreeMap, +} + +impl RootAcpi { + fn add_pointer( + &mut self, + from_sig: Signature, + from_offset: usize, + to_sig: Signature, + pointer_size: usize, + ) { + self.pointers.push(AcpiPointer { + from_sig, + from_offset, + to_sig, + pointer_size, + }); + } + + fn add_new_table(&mut self, sig: Signature, ptr: *const u8, len: usize) { + let mut table = AcpiTable::default(); + table.fill(Some(sig), ptr, len, ACPI_CHECKSUM_OFFSET); + self.tables.insert(sig, table); + } + + fn add_ssdt(&mut self, ptr: *const u8, len: usize, rsdt_offset: usize) { + let mut table = AcpiTable::default(); + table.fill(Some(Signature::SSDT), ptr, len, ACPI_CHECKSUM_OFFSET); + self.ssdts.insert(rsdt_offset, table); + } + + fn get_mut_table(&mut self, sig: Signature) -> Option<&mut AcpiTable> { + self.tables.get_mut(&sig) + } + + fn get_table(&self, sig: &Signature) -> Option { + if self.tables.contains_key(sig) { + Some(self.tables.get(sig).unwrap().clone()) + } else { + None + } + } + + pub fn copy_to_zone_region( + &self, + rsdp_zone_region: &HvConfigMemoryRegion, + acpi_zone_region: &HvConfigMemoryRegion, + banned_tables: &BTreeSet, + cpu_set: &CpuSet, + ) { + let mut rsdp = self.rsdp.clone(); + let mut tables = self.tables.clone(); + let mut ssdts = self.ssdts.clone(); + + // set rsdp addr + rsdp.set_addr( + rsdp_zone_region.physical_start as _, + rsdp_zone_region.virtual_start as _, + ); + + let mut madt_cur: usize = SDT_HEADER_SIZE + 8; + let mut madt = tables.get_mut(&Signature::MADT).unwrap(); + + // fix madt cpu info + for entry in + unsafe { Pin::new_unchecked(&*(madt.get_unpatched_src() as *const Madt)) }.entries() + { + let mut entry_len = madt.get_u8(madt_cur + 1) as usize; + match entry { + MadtEntry::LocalApic(entry) => { + let mut disable_lapic = true; + if contains_apic_id(entry.apic_id as _) { + let cpuid = get_cpu_id(entry.apic_id as _); + if cpu_set.contains_cpu(cpuid) { + disable_lapic = false; + } + // reset processor id + madt.set_u8(cpuid as _, madt_cur + 2); + } + if disable_lapic { + // set flag to disable lapic + madt.set_u32(0x0, madt_cur + 4); + } + } + MadtEntry::LocalX2Apic(entry) => { + if !cpu_set.contains_cpu(entry.processor_uid as _) {} + } + _ => {} + } + madt_cur += entry_len; + } + + // set pointers + let hpa_start = acpi_zone_region.physical_start as usize; + let gpa_start = acpi_zone_region.virtual_start as usize; + let mut cur: usize = 0; + + let mut tables_involved = BTreeSet::::new(); + + for pointer in self.pointers.iter() { + let to = tables.get_mut(&pointer.to_sig).unwrap(); + tables_involved.insert(pointer.to_sig); + + if !to.is_addr_set { + info!( + "sig: {:x?}, hpa: {:x?}, gpa: {:x?}, size: {:x?}", + pointer.to_sig, + hpa_start + cur, + gpa_start + cur, + to.get_len() + ); + to.set_addr(hpa_start + cur, gpa_start + cur); + cur += to.get_len(); + } + + let to_gpa = match banned_tables.contains(&pointer.to_sig) { + true => 0, + false => to.gpa, + }; + + let from = match pointer.from_sig == pointer.to_sig { + true => &mut rsdp, + false => tables.get_mut(&pointer.from_sig).unwrap(), + }; + + match pointer.pointer_size { + 4 => { + from.set_u32(to_gpa as _, pointer.from_offset); + } + 8 => { + from.set_u64(to_gpa as _, pointer.from_offset); + } + _ => { + warn!("Unused pointer size!"); + } + } + } + + let ban_ssdt = banned_tables.contains(&Signature::SSDT); + let from = tables.get_mut(&Signature::RSDT).unwrap(); + for (&offset, ssdt) in ssdts.iter_mut() { + info!( + "sig: {:x?}, hpa: {:x?}, gpa: {:x?}, size: {:x?}", + Signature::SSDT, + hpa_start + cur, + gpa_start + cur, + ssdt.get_len() + ); + ssdt.set_addr(hpa_start + cur, gpa_start + cur); + cur += ssdt.get_len(); + + let to_gpa = match ban_ssdt { + true => 0, + false => ssdt.gpa, + }; + from.set_u32(to_gpa as _, offset); + } + + // update checksums + rsdp.update_checksum(RSDP_CHECKSUM_OFFSET); + for (sig, table) in tables.iter_mut() { + table.update_checksum(ACPI_CHECKSUM_OFFSET); + } + + // copy to memory + unsafe { rsdp.copy_to_mem() }; + for (sig, table) in tables.iter() { + // don't copy tables that are not inside ACPI tree + if tables_involved.contains(sig) { + unsafe { table.copy_to_mem() }; + } + } + if !ban_ssdt { + for (&offset, ssdt) in ssdts.iter() { + unsafe { ssdt.copy_to_mem() }; + } + } + } + + // let zone 0 bsp cpu does the work + pub fn init() -> Self { + let mut root_acpi = Self::default(); + let rsdp_addr = boot::get_multiboot_tags().rsdp_addr.unwrap(); + + root_acpi.rsdp_copy = unsafe { + slice::from_raw_parts(rsdp_addr as *const u8, core::mem::size_of::()).to_vec() + }; + let rsdp_copy_addr = root_acpi.rsdp_copy.as_ptr() as usize; + + let handler = HvAcpiHandler {}; + let rsdp_mapping = unsafe { + handler.map_physical_region::(rsdp_copy_addr, core::mem::size_of::()) + }; + + // let rsdp_mapping = unsafe { Rsdp::search_for_on_bios(HvAcpiHandler {}).unwrap() }; + // TODO: temporarily suppose we use ACPI 1.0 + assert!(rsdp_mapping.revision() == 0); + + root_acpi.rsdp.fill( + None, + rsdp_mapping.virtual_start().as_ptr() as *const u8, + RSDP_V1_SIZE, + RSDP_CHECKSUM_OFFSET, + ); + root_acpi.add_pointer( + Signature::RSDT, + RSDP_RSDT_OFFSET, + Signature::RSDT, + RSDP_RSDT_PTR_SIZE, + ); + + // get rsdt + let rsdt_addr = rsdp_mapping.rsdt_address() as usize; + root_acpi.add_new_table(Signature::RSDT, rsdt_addr as *const u8, SDT_HEADER_SIZE); + let mut rsdt_offset = root_acpi.get_mut_table(Signature::RSDT).unwrap().get_len(); + + let tables = + unsafe { AcpiTables::from_validated_rsdp(HvAcpiHandler {}, rsdp_mapping) }.unwrap(); + + // print rsdt entries + let mut rsdt_entry = rsdt_addr + 36; + let size = (unsafe { *((rsdt_addr + 4) as *const u32) } as usize - 36) / 4; + for i in 0..size { + let addr = unsafe { *(rsdt_entry as *const u32) } as usize; + let sig_ptr = addr as *const u8; + let sig = + unsafe { core::str::from_utf8_unchecked(core::slice::from_raw_parts(sig_ptr, 4)) }; + + println!("sig: {:#x?} ptr: {:x} len: {:x}", sig, addr, unsafe { + *((addr + 4) as *const u32) + }); + rsdt_entry += 4; + } + + // mcfg + if let Ok(mcfg) = tables.find_table::() { + root_acpi.add_new_table( + Signature::MCFG, + mcfg.physical_start() as *const u8, + mcfg.region_length(), + ); + + println!("---------- MCFG ----------"); + let mut offset = size_of::() + 0xb; + + if let Some(entry) = mcfg + .entries() + .iter() + .find(|&entry| entry.pci_segment_group == 0) + { + // we only support segment group 0 + println!("{:x?}", entry); + + // we don't have such many buses, probe devices to get the max_bus we have + let (mut devices, mut msi_data_reg_map, mut msix_bar_map, _, max_bus) = + probe_root_pci_devices(entry.base_address as _); + + // update bus_number_end + root_acpi + .get_mut_table(Signature::MCFG) + .unwrap() + .set_u8(max_bus, offset); + offset += size_of::(); + + root_acpi.devices.append(&mut devices); + + root_acpi.config_space_base = entry.base_address as _; + root_acpi.config_space_size = + (((max_bus as u64 - entry.bus_number_start as u64) + 1) << 20) as usize; + + root_acpi.msi_data_reg_map.append(&mut msi_data_reg_map); + root_acpi.msix_bar_map.append(&mut msix_bar_map); + } + + root_acpi.add_pointer(Signature::RSDT, rsdt_offset, Signature::MCFG, RSDT_PTR_SIZE); + rsdt_offset += RSDT_PTR_SIZE; + } + + // fadt + if let Ok(fadt) = tables.find_table::() { + root_acpi.add_new_table( + Signature::FADT, + fadt.physical_start() as *const u8, + fadt.region_length(), + ); + + println!("---------- FADT ----------"); + + root_acpi.add_pointer(Signature::RSDT, rsdt_offset, Signature::FADT, RSDT_PTR_SIZE); + rsdt_offset += RSDT_PTR_SIZE; + + // acpi + let sci_int = fadt.sci_interrupt; + let smi_port = fadt.smi_cmd_port; + let acpi_enable = fadt.acpi_enable; + let acpi_disable = fadt.acpi_disable; + let pm1a_con = fadt.pm1a_control_block(); + let pm1a_evt = fadt.pm1a_event_block(); + + /*println!( + "sci_interrupt: {:x}, smi_cmd_port: {:x}, acpi_enable: {:x}, acpi_disable: {:x}, pm1a_con: {:#x?}, pm1a_evt: {:#x?}", + sci_int, smi_port, acpi_enable, acpi_disable, pm1a_con, pm1a_evt, + );*/ + // println!("{:#x?}", fadt.get()); + // loop {} + + // dsdt + if let Ok(dsdt) = tables.dsdt() { + root_acpi.add_new_table( + Signature::DSDT, + (dsdt.address - SDT_HEADER_SIZE) as *const u8, + (dsdt.length as usize + SDT_HEADER_SIZE), + ); + println!( + "sig: \"DSDT\" ptr: {:x}, len: {:x}", + dsdt.address, dsdt.length + ); + + root_acpi.add_pointer(Signature::FADT, FADT_DSDT_OFFSET_32, Signature::DSDT, 4); + root_acpi.add_pointer(Signature::FADT, FADT_DSDT_OFFSET_64, Signature::DSDT, 8); + } + + // facs + if let Ok(facs_addr) = fadt.facs_address() { + let len = unsafe { *((facs_addr + 4) as *const u32) as usize }; + root_acpi.add_new_table(Signature::FACS, facs_addr as *const u8, len); + println!("sig: \"FACS\" ptr: {:x}, len: {:x}", facs_addr, len); + + root_acpi.add_pointer(Signature::FADT, FADT_FACS_OFFSET_32, Signature::FACS, 4); + root_acpi.add_pointer(Signature::FADT, FADT_FACS_OFFSET_64, Signature::FACS, 8); + } + } + + // madt + if let Ok(madt) = tables.find_table::() { + root_acpi.add_new_table( + Signature::MADT, + madt.physical_start() as *const u8, + madt.region_length(), + ); + + println!("---------- MADT ----------"); + for entry in madt.get().entries() { + match entry { + MadtEntry::LocalApic(entry) => { + if entry.flags != 0 { + println!("{:x?}", entry); + let cpu_id = root_acpi.apic_id_to_cpu_id.len(); + root_acpi + .apic_id_to_cpu_id + .insert(entry.apic_id as _, cpu_id); + root_acpi + .cpu_id_to_apic_id + .insert(cpu_id, entry.apic_id as _); + } + } + _ => {} + } + } + + root_acpi.add_pointer(Signature::RSDT, rsdt_offset, Signature::MADT, RSDT_PTR_SIZE); + rsdt_offset += RSDT_PTR_SIZE; + } + + // dmar + acpi_table!(Dmar, DMAR); + if let Ok(dmar) = tables.find_table::() { + root_acpi.add_new_table( + Signature::DMAR, + dmar.physical_start() as *const u8, + dmar.region_length(), + ); + + /*println!("DMAR: {:x?}", unsafe { + *((dmar.physical_start() + 56) as *const [u8; 8]) + });*/ + + // self.add_pointer(Signature::RSDT, rsdt_offset, Signature::DMAR, RSDT_PTR_SIZE); + // rsdt_offset += RSDT_PTR_SIZE; + } + + // ssdt + for ssdt in tables.ssdts() { + root_acpi.add_ssdt( + (ssdt.address - SDT_HEADER_SIZE) as *const u8, + (ssdt.length as usize + SDT_HEADER_SIZE), + rsdt_offset, + ); + rsdt_offset += RSDT_PTR_SIZE; + } + + if let Some(rsdt) = root_acpi.get_mut_table(Signature::RSDT) { + rsdt.set_new_len(rsdt_offset); + } + root_acpi + } +} + +// let zone 0 bsp cpu does the work +pub fn root_init() { + ROOT_ACPI.call_once(|| RootAcpi::init()); +} + +pub fn copy_to_guest_memory_region(config: &HvZoneConfig, cpu_set: &CpuSet) { + let mut banned: BTreeSet = BTreeSet::new(); + if config.zone_id != 0 { + banned.insert(Signature::FADT); + banned.insert(Signature::SSDT); + } + ROOT_ACPI.get().unwrap().copy_to_zone_region( + &config.memory_regions()[config.arch_config.rsdp_memory_region_id], + &config.memory_regions()[config.arch_config.acpi_memory_region_id], + &banned, + cpu_set, + ); +} + +pub fn root_get_table(sig: &Signature) -> Option { + ROOT_ACPI.get().unwrap().get_table(sig) +} + +pub fn root_get_config_space_info() -> Option<(usize, usize)> { + let acpi = ROOT_ACPI.get().unwrap(); + Some((acpi.config_space_base, acpi.config_space_size)) +} + +pub fn is_msi_data_reg(hpa: usize) -> Option { + if let Some(&bdf) = ROOT_ACPI.get().unwrap().msi_data_reg_map.get(&hpa) { + Some(bdf) + } else { + None + } +} + +pub fn is_msix_bar(hpa: usize) -> Option { + if let Some(&bdf) = ROOT_ACPI.get().unwrap().msix_bar_map.get(&hpa) { + Some(bdf) + } else { + None + } +} + +fn contains_apic_id(apic_id: usize) -> bool { + ROOT_ACPI + .get() + .unwrap() + .apic_id_to_cpu_id + .contains_key(&apic_id) +} + +pub fn get_cpu_id(apic_id: usize) -> usize { + *ROOT_ACPI + .get() + .unwrap() + .apic_id_to_cpu_id + .get(&apic_id) + .unwrap() +} + +pub fn get_apic_id(cpu_id: usize) -> usize { + *ROOT_ACPI + .get() + .unwrap() + .cpu_id_to_apic_id + .get(&cpu_id) + .unwrap() +} diff --git a/src/arch/x86_64/ap_start.S b/src/arch/x86_64/ap_start.S new file mode 100644 index 00000000..8a730730 --- /dev/null +++ b/src/arch/x86_64/ap_start.S @@ -0,0 +1,63 @@ +.equ pa_ap_start32, ap_start32 - ap_start16 + {ap_start_page_paddr} +.equ pa_ap_gdt, .Lap_tmp_gdt - ap_start16 + {ap_start_page_paddr} +.equ pa_ap_gdt_desc, .Lap_tmp_gdt_desc - ap_start16 + {ap_start_page_paddr} +.equ stack_ptr, {ap_start_page_paddr} + 0xff0 +.equ entry_ptr, {ap_start_page_paddr} + 0xff8 + +.section .text +.code16 +// 0x6000 +.p2align 12 +.global ap_start16 +ap_start16: + cli + cld + // clear cache + wbinvd + + xor ax, ax + mov ds, ax + mov es, ax + mov ss, ax + mov fs, ax + mov gs, ax + + // load the 64-bit GDT + lgdt [pa_ap_gdt_desc] + + // switch to protected-mode + mov eax, cr0 + or eax, (1 << 0) + mov cr0, eax + + // far jump to 32-bit code. 0x8 is code32 segment selector + ljmp 0x8, offset pa_ap_start32 + +.code32 +ap_start32: + mov ax, 0x20 + ltr ax + + mov esp, [stack_ptr] + mov eax, [entry_ptr] + jmp eax + +.balign 8 +.Lap_tmp_gdt_desc: + .short .Lap_tmp_gdt_end - .Lap_tmp_gdt - 1 // limit + .long pa_ap_gdt // base + +.balign 16 +.Lap_tmp_gdt: + .quad 0x0000000000000000 // 0x00: null + .quad 0x00cf9b000000ffff // 0x08: code segment (base=0, limit=0xfffff, type=32bit code exec/read, DPL=0, 4k) + .quad 0x00af9b000000ffff // 0x10: code segment (base=0, limit=0xfffff, type=64bit code exec/read, DPL=0, 4k) + .quad 0x00cf93000000ffff // 0x18: data segment (base=0, limit=0xfffff, type=32bit data read/write, DPL=0, 4k) + .quad 0x00008934ee800067 // 0x20: tss low + .quad 0x00000000ffffff80 // 0x28: tss high +.Lap_tmp_gdt_end: + +// 0x7000 +.p2align 12 +.global ap_end +ap_end: \ No newline at end of file diff --git a/src/arch/x86_64/boot.rs b/src/arch/x86_64/boot.rs new file mode 100644 index 00000000..334990f2 --- /dev/null +++ b/src/arch/x86_64/boot.rs @@ -0,0 +1,451 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{zone::HvArchZoneConfig, Stage2PageTable}, + config::{root_zone_config, HvPciConfig, HvZoneConfig, MEM_TYPE_RAM}, + error::HvResult, + memory::{GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion, MemorySet}, + percpu::this_zone, + platform::MEM_TYPE_RESERVED, +}; +use alloc::string::{String, ToString}; +use core::{ + arch::{self, global_asm}, + ffi::{c_char, CStr}, + mem::size_of, + ptr::{copy, copy_nonoverlapping}, +}; +use multiboot_tag::{Modules, MultibootTags}; +use spin::{Mutex, Once}; + +mod multiboot_tag { + pub const END: u32 = 0; + pub const MODULES: u32 = 3; + pub const MEMORY_MAP: u32 = 6; + pub const FRAMEBUFFER: u32 = 8; + pub const ACPI_V1: u32 = 14; + + #[repr(C)] + #[derive(Default, Debug, Clone, Copy)] + pub struct Modules { + tag_type: u32, + pub size: u32, + pub mod_start: u32, + pub mod_end: u32, + } + + #[repr(C)] + #[derive(Default, Debug, Clone, Copy)] + pub struct MemoryMap { + tag_type: u32, + pub size: u32, + pub entry_size: u32, + pub entry_version: u32, + } + + #[repr(C)] + #[derive(Default, Debug, Clone, Copy)] + pub struct MemoryMapEntry { + pub base_addr: u64, + pub length: u64, + pub _type: u32, + reserved: u32, + } + + #[repr(C)] + #[derive(Default, Debug, Clone, Copy)] + pub struct Framebuffer { + tag_type: u32, + size: u32, + pub addr: u64, + pub pitch: u32, + pub width: u32, + pub height: u32, + pub bpp: u8, + pub fb_type: u8, + reserved: u8, + } + + #[derive(Default, Debug, Clone, Copy)] + pub struct MultibootTags { + pub framebuffer: Framebuffer, + pub memory_map_addr: Option, + pub rsdp_addr: Option, + } +} + +static MULTIBOOT_TAGS: Once = Once::new(); + +const E820_MAX_ENTRIES_ZEROPAGE: usize = 128; + +bitflags::bitflags! { + #[derive(Clone, Copy, Debug)] + /// https://www.kernel.org/doc/html/latest/arch/x86/boot.html + pub struct BootLoadFlags: u8 { + const LOADED_HIGH = 1; + const KASLR_FLAG = 1 << 1; + const QUIET_FLAG = 1 << 5; + const CAN_USE_HEAP = 1 << 7; + } +} + +numeric_enum_macro::numeric_enum! { +#[repr(u32)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[allow(non_camel_case_types)] +pub enum E820Type { + E820_DEFAULT = 0, + E820_RAM = 1, + E820_RESERVED = 2, + E820_ACPI = 3, + E820_NVS = 4, + E820_UNUSABLE = 5, + E820_PMEM = 7, +} +} + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +/// The so-called "zeropage" +pub struct BootParams { + screen_info: ScreenInfo, + pad0: [u8; 0x1a8], + e820_entries: u8, + pad1: [u8; 0x8], + setup_sects: u8, + root_flags: u16, + syssize: u32, + ramsize: u16, + vid_mode: u16, + pad2: [u8; 0x9], + boot_proto_version: u16, + pad3: [u8; 0x6], + kernel_version: u16, + type_of_loader: u8, + loadflags: BootLoadFlags, + setup_move_size: u16, + code32_start: u32, + ramdisk_image: u32, + ramdisk_size: u32, + bootsect_kludge: u32, + heap_end_ptr: u16, + pad4: [u8; 2], + cmd_line_ptr: u32, + pad5: [u8; 12], + cmdline_size: u32, + pad6: [u8; 12], + payload_offset: u32, + payload_length: u32, + pad7: [u8; 128], + e820_table: [BootE820Entry; E820_MAX_ENTRIES_ZEROPAGE], + pad8: [u8; 0x330], +} + +impl BootParams { + pub fn fill(config: &HvZoneConfig, gpm: &mut MemorySet) -> HvResult { + if config.arch_config.setup_load_gpa == 0 { + panic!("setup addr not set yet!"); + } + + let boot_params_hpa = unsafe { + gpm.page_table_query(config.arch_config.setup_load_gpa) + .unwrap() + .0 + } as HostPhysAddr; + let boot_params = unsafe { &mut *(boot_params_hpa as *mut BootParams) }; + + // info!("boot_proto_version: {:x?}", boot_params.boot_proto_version); + if boot_params.boot_proto_version < 0x0204 { + panic!("kernel boot protocol version older than 2.04 not supported!"); + } + + // set bootloader type as undefined + boot_params.type_of_loader = 0xff; + + let mut loadflags = boot_params.loadflags; + // print early messages + loadflags &= !BootLoadFlags::QUIET_FLAG; + // disable heap_ptr + loadflags &= !BootLoadFlags::CAN_USE_HEAP; + boot_params.loadflags = loadflags; + + boot_params.cmd_line_ptr = config.arch_config.cmdline_load_gpa as _; + // copy cmdline manually for root zone + if config.zone_id == 0 { + unsafe { + core::ptr::copy_nonoverlapping( + crate::platform::ROOT_ZONE_CMDLINE.as_ptr(), + gpm.page_table_query(config.arch_config.cmdline_load_gpa) + .unwrap() + .0 as *mut u8, + crate::platform::ROOT_ZONE_CMDLINE.len(), + ) + }; + } + + // set e820 + boot_params.set_e820_entries(config); + + // set initrd + if config.arch_config.initrd_load_gpa != 0 { + boot_params.set_initrd( + config.arch_config.initrd_load_gpa as _, + config.arch_config.initrd_size as _, + ); + } + + // set screen + if config.arch_config.screen_base != 0 { + boot_params.set_screen_info(config, gpm); + } + + Ok(()) + } + + fn set_e820_entries(&mut self, config: &HvZoneConfig) { + let mut index = 0; + for i in 0..config.memory_regions().len() { + let mem_region = config.memory_regions()[i]; + let mut e820_type = E820Type::E820_DEFAULT; + + if i == config.arch_config.rsdp_memory_region_id + || i == config.arch_config.acpi_memory_region_id + { + e820_type = E820Type::E820_ACPI; + } else if mem_region.mem_type == MEM_TYPE_RAM { + e820_type = E820Type::E820_RAM; + } /* + else if config.arch_config.initrd_load_gpa != 0 + && i == config.arch_config.initrd_memory_region_id + { + } */ + + if e820_type != E820Type::E820_DEFAULT { + self.e820_table[index] = BootE820Entry { + addr: mem_region.virtual_start, + size: mem_region.size, + _type: e820_type, + }; + index += 1; + } + } + + self.e820_table[index] = BootE820Entry { + addr: config.pci_config.ecam_base as _, + size: config.pci_config.ecam_size as _, + _type: E820Type::E820_RESERVED, + }; + index += 1; + + self.e820_entries = index as _; + } + + fn set_initrd(&mut self, ramdisk_image: u32, ramdisk_size: u32) { + self.ramdisk_image = ramdisk_image; + self.ramdisk_size = ramdisk_size; + info!("initrd size: {}", self.ramdisk_size); + } + + fn set_screen_info(&mut self, config: &HvZoneConfig, gpm: &mut MemorySet) { + let fb_info = &get_multiboot_tags().framebuffer; + + let bytes_per_pixel = (fb_info.bpp as usize) / 8; + let width = fb_info.width as usize; + let height = fb_info.height as usize; + + self.screen_info.lfb_base = config.arch_config.screen_base as _; + self.screen_info.lfb_width = width as _; + self.screen_info.lfb_height = height as _; + self.screen_info.lfb_depth = fb_info.bpp as _; + self.screen_info.lfb_size = (bytes_per_pixel * width * height) as _; + self.screen_info.lfb_linelength = (bytes_per_pixel * width) as _; + + // TODO: custom + self.screen_info.blue_size = 8; + self.screen_info.blue_pos = 0; + self.screen_info.green_size = 8; + self.screen_info.green_pos = 8; + self.screen_info.red_size = 8; + self.screen_info.red_pos = 16; + self.screen_info.alpha_size = 8; + self.screen_info.alpha_pos = 24; + self.screen_info.orig_video_is_vga = 0x23; // VESA + self.screen_info.capabilities = 0; + self.vid_mode = 0xffff; + + gpm.insert(MemoryRegion::new_with_offset_mapper( + config.arch_config.screen_base as GuestPhysAddr, + fb_info.addr as HostPhysAddr, + self.screen_info.lfb_size as _, + MemFlags::READ | MemFlags::WRITE, + )); + } +} + +#[repr(packed)] +#[derive(Debug, Clone, Copy)] +/// The E820 memory region entry of the boot protocol ABI: +pub struct BootE820Entry { + addr: u64, + size: u64, + _type: E820Type, +} + +#[repr(packed)] +#[derive(Debug, Clone, Copy)] +pub struct ScreenInfo { + pad0: [u8; 0x0f], + orig_video_is_vga: u8, + pad1: u16, + lfb_width: u16, + lfb_height: u16, + lfb_depth: u16, + lfb_base: u32, + lfb_size: u32, + pad2: [u16; 2], + lfb_linelength: u16, + red_size: u8, + red_pos: u8, + green_size: u8, + green_pos: u8, + blue_size: u8, + blue_pos: u8, + alpha_size: u8, + alpha_pos: u8, + pad3: [u8; 4], + pages: u16, + vesa_attributes: u16, + capabilities: u32, + pad4: [u8; 6], +} + +#[repr(packed)] +#[derive(Debug, Clone, Copy)] +pub struct EfiInfo { + loader_signature: u32, + systab: u32, + memdesc_size: u32, + memdesc_version: u32, + memmap: u32, + memmap_size: u32, + systab_hi: u32, + memmap_hi: u32, +} + +pub fn multiboot_init(info_addr: usize) { + let mut cur = info_addr; + let total_size = unsafe { *(cur as *const u32) } as usize; + let mut multiboot_tags = MultibootTags::default(); + + // println!("{:#x?}", total_size); + cur += 8; + while cur < info_addr + total_size { + let tag_type = unsafe { *(cur as *const u32) }; + if tag_type == multiboot_tag::END { + break; + } + + // println!("{:#x?}", tag_type); + match tag_type { + multiboot_tag::MODULES => {} + multiboot_tag::MEMORY_MAP => { + multiboot_tags.memory_map_addr = Some(cur); + } + multiboot_tag::FRAMEBUFFER => { + multiboot_tags.framebuffer = + unsafe { *(cur as *const multiboot_tag::Framebuffer) }.clone(); + } + multiboot_tag::ACPI_V1 => { + multiboot_tags.rsdp_addr = Some(cur + 8); + } + _ => {} + } + cur += ((unsafe { *((cur + 4) as *const u32) } as usize + 7) & (!7)); + } + + MULTIBOOT_TAGS.call_once(|| multiboot_tags); +} + +pub fn get_multiboot_tags() -> &'static multiboot_tag::MultibootTags { + MULTIBOOT_TAGS.get().unwrap() +} + +pub fn print_memory_map() { + let map_addr = get_multiboot_tags().memory_map_addr.unwrap(); + let mem_map = unsafe { *(map_addr as *const multiboot_tag::MemoryMap) }; + let mem_map_size = size_of::(); + let cnt = ((mem_map.size as usize) - mem_map_size) / (mem_map.entry_size as usize); + + let mut entry_addr = map_addr + mem_map_size; + println!("---------- MEMORY MAP ----------"); + for i in 0..cnt { + let entry = unsafe { *(entry_addr as *const multiboot_tag::MemoryMapEntry) }; + println!( + "base: {:x}, len: {:x}, type: {:x}", + entry.base_addr, entry.length, entry._type + ); + entry_addr += size_of::(); + } +} + +/// copy kernel modules to the right place +pub fn module_init(info_addr: usize) { + println!("module_init"); + let mut cur = info_addr; + let total_size = unsafe { *(cur as *const u32) } as usize; + + let mut cnt = 0; + cur += 8; + while cur < info_addr + total_size { + let tag_type = unsafe { *(cur as *const u32) }; + let ptr = cur as *const multiboot_tag::Modules; + cur += ((unsafe { *((cur + 4) as *const u32) } as usize + 7) & (!7)); + + if tag_type == multiboot_tag::END { + break; + } + if tag_type != multiboot_tag::MODULES { + continue; + } + + let module = unsafe { *ptr }; + let dst = unsafe { + usize::from_str_radix( + CStr::from_ptr(((ptr as usize) + size_of::()) as *const c_char) + .to_str() + .unwrap(), + 16, + ) + .unwrap() + }; + println!("module: {:#x?}, addr: {:#x?}", module, dst); + cnt += 1; + + if dst == 0x0 { + continue; + } + + unsafe { + core::ptr::copy( + module.mod_start as *mut u8, + dst as *mut u8, + (module.mod_end - module.mod_start + 1) as usize, + ) + }; + } + println!("module cnt: {:x}", cnt); +} diff --git a/src/arch/x86_64/consts.rs b/src/arch/x86_64/consts.rs new file mode 100644 index 00000000..260d8c2f --- /dev/null +++ b/src/arch/x86_64/consts.rs @@ -0,0 +1,20 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +// PCI constants +pub const HV_ADDR_PREFIX: u64 = 0; +pub const LOONG_HT_PREFIX: u64 = 0; +pub const BDF_SHIFT: usize = 12; diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs new file mode 100644 index 00000000..07190eb2 --- /dev/null +++ b/src/arch/x86_64/cpu.rs @@ -0,0 +1,685 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{ + acpi::{self, *}, + boot::BootParams, + hpet, iommu, ipi, + mm::new_s2_memory_set, + msr::{ + get_msr_bitmap, + Msr::{self, *}, + MsrBitmap, + }, + pio::{get_pio_bitmap, PortIoBitmap}, + vmcs::*, + vmx::*, + }, + consts::{self, core_end, PER_CPU_SIZE}, + device::irqchip::pic::{check_pending_vectors, clear_vectors, ioapic, lapic::VirtLocalApic}, + error::{HvError, HvResult}, + memory::{ + addr::{phys_to_virt, PHYS_VIRT_OFFSET}, + mm::PARKING_MEMORY_SET, + Frame, GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion, PhysAddr, PAGE_SIZE, + PARKING_INST_PAGE, + }, + percpu::{this_cpu_data, this_zone}, + platform::{ROOT_ZONE_BOOT_STACK, ROOT_ZONE_CMDLINE}, + zone::{find_zone, this_zone_id}, +}; +use alloc::boxed::Box; +use bit_field::BitField; +use core::{ + arch::{asm, global_asm}, + fmt::{Debug, Formatter, Result}, + mem::size_of, + ptr::copy_nonoverlapping, + sync::atomic::{AtomicU32, Ordering}, + time::Duration, +}; +use raw_cpuid::CpuId; +use x86::{ + bits64::vmx, + dtables::{self, DescriptorTablePointer}, + vmx::vmcs::control::{ + EntryControls, ExitControls, PinbasedControls, PrimaryControls, SecondaryControls, + }, +}; +use x86_64::registers::control::{Cr0, Cr0Flags, Cr3, Cr4, Cr4Flags}; + +const AP_START_PAGE_IDX: u8 = 6; +const AP_START_PAGE_PADDR: PhysAddr = AP_START_PAGE_IDX as usize * PAGE_SIZE; + +static VMXON_DONE: AtomicU32 = AtomicU32::new(0); + +global_asm!( + include_str!("ap_start.S"), + ap_start_page_paddr = const AP_START_PAGE_PADDR, +); + +macro_rules! save_regs_to_stack { + () => { + " + push r15 + push r14 + push r13 + push r12 + push r11 + push r10 + push r9 + push r8 + push rdi + push rsi + push rbp + sub rsp, 8 + push rbx + push rdx + push rcx + push rax" + }; +} + +macro_rules! restore_regs_from_stack { + () => { + " + pop rax + pop rcx + pop rdx + pop rbx + add rsp, 8 + pop rbp + pop rsi + pop rdi + pop r8 + pop r9 + pop r10 + pop r11 + pop r12 + pop r13 + pop r14 + pop r15" + }; +} + +unsafe fn setup_ap_start_page(cpuid: usize) { + extern "C" { + fn ap_start16(); + fn ap_end(); + fn ap_entry32(); + } + const U64_PER_PAGE: usize = PAGE_SIZE / size_of::(); + + let ap_start_page_ptr = AP_START_PAGE_PADDR as *mut u64; + let ap_start_page = core::slice::from_raw_parts_mut(ap_start_page_ptr, U64_PER_PAGE); + core::ptr::copy_nonoverlapping( + ap_start16 as *const u64, + ap_start_page_ptr, + (ap_end as usize - ap_start16 as usize) / 8, + ); + ap_start_page[U64_PER_PAGE - 2] = (core_end() + (cpuid + 1) * PER_CPU_SIZE) as u64; + ap_start_page[U64_PER_PAGE - 1] = ap_entry32 as u64; +} + +pub fn cpu_start(cpuid: usize, start_addr: usize, opaque: usize) { + unsafe { setup_ap_start_page(cpuid) }; + + let lapic = VirtLocalApic::phys_local_apic(); + let apic_id = acpi::get_apic_id(cpuid); + + // Intel SDM Vol 3C, Section 8.4.4, MP Initialization Example + unsafe { lapic.send_init_ipi(apic_id as u32) }; + hpet::busy_wait(Duration::from_millis(50)); // 10ms + unsafe { lapic.send_sipi(AP_START_PAGE_IDX, apic_id as u32) }; + hpet::busy_wait(Duration::from_micros(2000)); // 200us + unsafe { lapic.send_sipi(AP_START_PAGE_IDX, apic_id as u32) }; +} + +/// General-Purpose Registers for 64-bit x86 architecture. +#[repr(C)] +#[derive(Debug, Default, Clone)] +pub struct GeneralRegisters { + pub rax: u64, + pub rcx: u64, + pub rdx: u64, + pub rbx: u64, + _unused_rsp: u64, + pub rbp: u64, + pub rsi: u64, + pub rdi: u64, + pub r8: u64, + pub r9: u64, + pub r10: u64, + pub r11: u64, + pub r12: u64, + pub r13: u64, + pub r14: u64, + pub r15: u64, +} + +#[repr(C)] +pub struct ArchCpu { + // guest_regs and host_stack_top should always be at first. + guest_regs: GeneralRegisters, + host_stack_top: u64, + pub cpuid: usize, + pub power_on: bool, + pub virt_lapic: VirtLocalApic, + vmx_on: bool, + vmcs_revision_id: u32, + vmxon_region: VmxRegion, + vmcs_region: VmxRegion, + vm_launch_guest_regs: GeneralRegisters, +} + +impl ArchCpu { + pub fn new(cpuid: usize) -> Self { + let cpuid = this_cpu_id(); + Self { + guest_regs: GeneralRegisters::default(), + host_stack_top: 0, + cpuid, + power_on: false, + virt_lapic: VirtLocalApic::new(), + vmx_on: false, + vmcs_revision_id: 0, + vmxon_region: VmxRegion::fake_init(), + vmcs_region: VmxRegion::fake_init(), + vm_launch_guest_regs: GeneralRegisters::default(), + } + } + + /// Advance guest `RIP` by `instr_len` bytes. + pub fn advance_guest_rip(&mut self, instr_len: u8) -> HvResult { + Ok(VmcsGuestNW::RIP.write(VmcsGuestNW::RIP.read()? + instr_len as usize)?) + } + + pub fn cr(&self, cr_idx: usize) -> usize { + (|| -> HvResult { + Ok(match cr_idx { + 4 => { + let host_mask = VmcsControlNW::CR4_GUEST_HOST_MASK.read()?; + (VmcsControlNW::CR4_READ_SHADOW.read()? & host_mask) + | (VmcsGuestNW::CR4.read()? & !host_mask) + } + _ => unreachable!(), + }) + })() + .expect("Failed to read guest control register") + } + + pub fn idle(&mut self) -> ! { + unsafe { self.virt_lapic.phys_lapic.end_of_interrupt() }; + + assert!(this_cpu_id() == self.cpuid); + + self.power_on = false; + self.activate_vmx().unwrap(); + + // info!("idle! cpuid: {:x}", self.cpuid); + + PARKING_MEMORY_SET.call_once(|| { + let parking_code: [u8; 2] = [0xeb, 0xfe]; // jump short -2 + unsafe { + PARKING_INST_PAGE[..2].copy_from_slice(&parking_code); + } + + let mut gpm = new_s2_memory_set(); + gpm.insert(MemoryRegion::new_with_offset_mapper( + 0 as GuestPhysAddr, + unsafe { &PARKING_INST_PAGE as *const _ as HostPhysAddr - PHYS_VIRT_OFFSET }, + PAGE_SIZE, + MemFlags::READ | MemFlags::WRITE | MemFlags::EXECUTE, + )) + .unwrap(); + gpm + }); + + self.setup_vmcs(0, true).unwrap(); + self.host_stack_top = (core_end() + (self.cpuid + 1) * PER_CPU_SIZE) as _; + + unsafe { + PARKING_MEMORY_SET.get().unwrap().activate(); + self.vmx_launch(); + } + } + + /// Guest general-purpose registers. + pub fn regs(&self) -> &GeneralRegisters { + &self.guest_regs + } + + /// Mutable reference of guest general-purpose registers. + pub fn regs_mut(&mut self) -> &mut GeneralRegisters { + &mut self.guest_regs + } + + pub fn run(&mut self) { + if self.power_on { + // x86 wake up cpu will send ipi twice, but we only want once + return; + } + + unsafe { self.virt_lapic.phys_lapic.end_of_interrupt() }; + + assert!(this_cpu_id() == self.cpuid); + let mut per_cpu = this_cpu_data(); + + // info!("run! cpuid: {:x}", self.cpuid); + + self.power_on = true; + self.activate_vmx().unwrap(); + + if !per_cpu.boot_cpu { + if let Some(ipi_info) = ipi::get_ipi_info(self.cpuid) { + per_cpu.cpu_on_entry = ipi_info.lock().start_up_addr; + } + // VmcsGuestNW::RIP.write(per_cpu.cpu_on_entry).unwrap(); + // info!("AP start up! addr: {:x}", per_cpu.cpu_on_entry); + } + + self.setup_vmcs(per_cpu.cpu_on_entry, false).unwrap(); + per_cpu.activate_gpm(); + + if per_cpu.boot_cpu { + // must be called after activate_gpm() + iommu::activate(); + self.guest_regs = self.vm_launch_guest_regs.clone(); + } + + while VMXON_DONE.load(Ordering::Acquire) < unsafe { consts::MAX_CPU_NUM } as u32 - 1 { + core::hint::spin_loop(); + } + + self.host_stack_top = (core_end() + (self.cpuid + 1) * PER_CPU_SIZE) as _; + + clear_vectors(self.cpuid); + + unsafe { self.vmx_launch() }; + + loop {} + } + + pub fn set_boot_cpu_vm_launch_regs(&mut self, rax: u64, rsi: u64) { + self.vm_launch_guest_regs.rax = rax; + self.vm_launch_guest_regs.rsi = rsi; + } + + fn activate_vmx(&mut self) -> HvResult { + if self.vmx_on { + return Ok(()); + } + assert!(check_vmx_support()); + // assert!(!is_vmx_enabled()); + + // enable VMXON + unsafe { enable_vmxon().unwrap() }; + + // TODO: check related registers + + // get VMCS revision identifier in IA32_VMX_BASIC MSR + self.vmcs_revision_id = get_vmcs_revision_id(); + self.vmxon_region = VmxRegion::new(self.vmcs_revision_id, false); + + unsafe { execute_vmxon(self.vmxon_region.start_paddr() as u64).unwrap() }; + + self.vmx_on = true; + VMXON_DONE.fetch_add(1, Ordering::SeqCst); + Ok(()) + } + + fn set_cr(&mut self, cr_idx: usize, val: u64) -> HvResult { + match cr_idx { + 0 => { + // Retrieve/validate restrictions on CR0 + // + // In addition to what the VMX MSRs tell us, make sure that + // - NW and CD are kept off as they are not updated on VM exit and we + // don't want them enabled for performance reasons while in root mode + // - PE and PG can be freely chosen (by the guest) because we demand + // unrestricted guest mode support anyway + // - ET is ignored + let must0 = Msr::IA32_VMX_CR0_FIXED1.read(); + // & !(Cr0Flags::NOT_WRITE_THROUGH | Cr0Flags::CACHE_DISABLE).bits(); + let must1 = Msr::IA32_VMX_CR0_FIXED0.read() + & !(Cr0Flags::PAGING | Cr0Flags::PROTECTED_MODE_ENABLE).bits(); + VmcsGuestNW::CR0.write(((val & must0) | must1) as _)?; + VmcsControlNW::CR0_READ_SHADOW.write(val as _)?; + VmcsControlNW::CR0_GUEST_HOST_MASK.write((must1 | !must0) as _)?; + } + 3 => VmcsGuestNW::CR3.write(val as _)?, + 4 => { + let cr4_host_owned = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS; + let cr4_read_shadow = 0; + let val = val | Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS.bits(); + VmcsGuestNW::CR4.write(val as _)?; + VmcsControlNW::CR4_GUEST_HOST_MASK.write(cr4_host_owned.bits() as _)?; + VmcsControlNW::CR4_READ_SHADOW.write(cr4_read_shadow)?; + } + _ => unreachable!(), + } + Ok(()) + } + + // after activate_vmx + fn setup_vmcs(&mut self, entry: GuestPhysAddr, is_idle: bool) -> HvResult { + self.vmcs_region = VmxRegion::new(self.vmcs_revision_id, false); + + let start_paddr = self.vmcs_region.start_paddr() as usize; + Vmcs::clear(start_paddr)?; + Vmcs::load(start_paddr)?; + + self.setup_vmcs_host(&self.host_stack_top as *const _ as usize)?; + self.setup_vmcs_guest(entry, ROOT_ZONE_BOOT_STACK)?; + self.setup_vmcs_control()?; + + Ok(()) + } + + fn setup_vmcs_control(&mut self) -> HvResult { + // intercept NMI and external interrupts + use PinbasedControls as PinCtrl; + Vmcs::set_control( + VmcsControl32::PINBASED_EXEC_CONTROLS, + Msr::IA32_VMX_TRUE_PINBASED_CTLS, + Msr::IA32_VMX_PINBASED_CTLS.read() as u32, + (PinCtrl::NMI_EXITING | PinCtrl::EXTERNAL_INTERRUPT_EXITING).bits(), + 0, + )?; + + // use I/O bitmaps and MSR bitmaps, activate secondary controls, + // disable CR3 load/store interception + use PrimaryControls as CpuCtrl; + Vmcs::set_control( + VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS, + Msr::IA32_VMX_TRUE_PROCBASED_CTLS, + Msr::IA32_VMX_PROCBASED_CTLS.read() as u32, + (CpuCtrl::HLT_EXITING + // | CpuCtrl::RDTSC_EXITING + | CpuCtrl::USE_IO_BITMAPS + | CpuCtrl::USE_MSR_BITMAPS + | CpuCtrl::SECONDARY_CONTROLS) + .bits(), + (CpuCtrl::CR3_LOAD_EXITING | CpuCtrl::CR3_STORE_EXITING).bits(), + )?; + + // enable EPT, RDTSCP, INVPCID, and unrestricted guest + use SecondaryControls as CpuCtrl2; + Vmcs::set_control( + VmcsControl32::SECONDARY_PROCBASED_EXEC_CONTROLS, + Msr::IA32_VMX_PROCBASED_CTLS2, + 0, + (CpuCtrl2::ENABLE_EPT + | CpuCtrl2::ENABLE_RDTSCP + // | CpuCtrl2::VIRTUALIZE_X2APIC + | CpuCtrl2::ENABLE_INVPCID + | CpuCtrl2::UNRESTRICTED_GUEST) + .bits(), + 0, + )?; + + // load guest IA32_PAT/IA32_EFER on VM entry + use EntryControls as EntryCtrl; + Vmcs::set_control( + VmcsControl32::VMENTRY_CONTROLS, + Msr::IA32_VMX_TRUE_ENTRY_CTLS, + Msr::IA32_VMX_ENTRY_CTLS.read() as u32, + (EntryCtrl::LOAD_IA32_PAT | EntryCtrl::LOAD_IA32_EFER).bits(), + 0, + )?; + + // switch to 64-bit host, acknowledge interrupt info, switch IA32_PAT/IA32_EFER on VM exit + use ExitControls as ExitCtrl; + Vmcs::set_control( + VmcsControl32::VMEXIT_CONTROLS, + Msr::IA32_VMX_TRUE_EXIT_CTLS, + Msr::IA32_VMX_EXIT_CTLS.read() as u32, + (ExitCtrl::HOST_ADDRESS_SPACE_SIZE + | ExitCtrl::ACK_INTERRUPT_ON_EXIT + | ExitCtrl::SAVE_IA32_PAT + | ExitCtrl::LOAD_IA32_PAT + | ExitCtrl::SAVE_IA32_EFER + | ExitCtrl::LOAD_IA32_EFER) + .bits(), + 0, + )?; + + // no MSR switches if hypervisor doesn't use and there is only one vCPU + VmcsControl32::VMEXIT_MSR_STORE_COUNT.write(0)?; + VmcsControl32::VMEXIT_MSR_LOAD_COUNT.write(0)?; + VmcsControl32::VMENTRY_MSR_LOAD_COUNT.write(0)?; + + // pass-through exceptions, set I/O bitmap and MSR bitmaps + VmcsControl32::EXCEPTION_BITMAP.write(0)?; + + if self.power_on { + let pio_bitmap = get_pio_bitmap(this_zone_id()); + VmcsControl64::IO_BITMAP_A_ADDR.write(pio_bitmap.a.start_paddr() as _)?; + VmcsControl64::IO_BITMAP_B_ADDR.write(pio_bitmap.b.start_paddr() as _)?; + VmcsControl64::MSR_BITMAPS_ADDR + .write(get_msr_bitmap(this_zone_id()).phys_addr() as _)?; + } + + // set virtual-APIC page address + // self.virt_lapic.vapic_page = Frame::new_zero().unwrap(); + // VmcsControl64::VIRT_APIC_ADDR.write(self.virt_lapic.vapic_page.start_paddr() as _); + Ok(()) + } + + fn setup_vmcs_guest(&mut self, entry: GuestPhysAddr, rsp: GuestPhysAddr) -> HvResult { + let cr0_guest = Cr0Flags::EXTENSION_TYPE | Cr0Flags::NUMERIC_ERROR; + let cr4_guest = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS; + + self.set_cr(0, cr0_guest.bits()); + self.set_cr(3, 0); + self.set_cr(4, cr4_guest.bits()); + + macro_rules! set_guest_segment { + ($seg: ident, $access_rights: expr) => {{ + use VmcsGuest16::*; + use VmcsGuest32::*; + use VmcsGuestNW::*; + concat_idents!($seg, _SELECTOR).write(0)?; + concat_idents!($seg, _BASE).write(0)?; + concat_idents!($seg, _LIMIT).write(0xffff)?; + concat_idents!($seg, _ACCESS_RIGHTS).write($access_rights)?; + }}; + } + + set_guest_segment!(ES, 0x93); // 16-bit, present, data, read/write, accessed + set_guest_segment!(CS, 0x9b); // 16-bit, present, code, exec/read, accessed + set_guest_segment!(SS, 0x93); + set_guest_segment!(DS, 0x93); + set_guest_segment!(FS, 0x93); + set_guest_segment!(GS, 0x93); + set_guest_segment!(TR, 0x8b); // present, system, 32-bit TSS busy + set_guest_segment!(LDTR, 0x82); // present, system, LDT + + VmcsGuestNW::GDTR_BASE.write(0)?; + VmcsGuest32::GDTR_LIMIT.write(0xffff)?; + VmcsGuestNW::IDTR_BASE.write(0)?; + VmcsGuest32::IDTR_LIMIT.write(0xffff)?; + + VmcsGuestNW::DR7.write(0x400)?; + VmcsGuestNW::RSP.write(rsp)?; + VmcsGuestNW::RIP.write(entry)?; + VmcsGuestNW::RFLAGS.write(0x2)?; + VmcsGuestNW::PENDING_DBG_EXCEPTIONS.write(0)?; + VmcsGuestNW::IA32_SYSENTER_ESP.write(0)?; + VmcsGuestNW::IA32_SYSENTER_EIP.write(0)?; + VmcsGuest32::IA32_SYSENTER_CS.write(0)?; + + VmcsGuest32::INTERRUPTIBILITY_STATE.write(0)?; + VmcsGuest32::ACTIVITY_STATE.write(0)?; + VmcsGuest32::VMX_PREEMPTION_TIMER_VALUE.write(0)?; + + VmcsGuest64::LINK_PTR.write(u64::MAX)?; // SDM Vol. 3C, Section 24.4.2 + VmcsGuest64::IA32_DEBUGCTL.write(0)?; + VmcsGuest64::IA32_PAT.write(Msr::IA32_PAT.read())?; + VmcsGuest64::IA32_EFER.write(0)?; + + // for AP start up, set CS_BASE to entry address, and RIP to 0. + if self.power_on && !this_cpu_data().boot_cpu { + VmcsGuestNW::RIP.write(0)?; + VmcsGuestNW::CS_BASE.write(entry)?; + } + + Ok(()) + } + + fn setup_vmcs_host(&mut self, rsp: GuestPhysAddr) -> HvResult { + VmcsHost64::IA32_PAT.write(Msr::IA32_PAT.read())?; + VmcsHost64::IA32_EFER.write(Msr::IA32_EFER.read())?; + + VmcsHostNW::CR0.write(Cr0::read_raw() as _)?; + VmcsHostNW::CR3.write(Cr3::read_raw().0.start_address().as_u64() as _)?; + VmcsHostNW::CR4.write(Cr4::read_raw() as _)?; + + VmcsHost16::ES_SELECTOR.write(x86::segmentation::es().bits())?; + VmcsHost16::CS_SELECTOR.write(x86::segmentation::cs().bits())?; + VmcsHost16::SS_SELECTOR.write(x86::segmentation::ss().bits())?; + VmcsHost16::DS_SELECTOR.write(x86::segmentation::ds().bits())?; + VmcsHost16::FS_SELECTOR.write(x86::segmentation::fs().bits())?; + VmcsHost16::GS_SELECTOR.write(x86::segmentation::gs().bits())?; + VmcsHostNW::FS_BASE.write(Msr::IA32_FS_BASE.read() as _)?; + VmcsHostNW::GS_BASE.write(Msr::IA32_GS_BASE.read() as _)?; + + let tr = unsafe { x86::task::tr() }; + let mut gdtp = DescriptorTablePointer::::default(); + let mut idtp = DescriptorTablePointer::::default(); + unsafe { + dtables::sgdt(&mut gdtp); + dtables::sidt(&mut idtp); + } + VmcsHost16::TR_SELECTOR.write(tr.bits())?; + VmcsHostNW::TR_BASE.write(get_tr_base(tr, &gdtp) as _)?; + VmcsHostNW::GDTR_BASE.write(gdtp.base as _)?; + VmcsHostNW::IDTR_BASE.write(idtp.base as _)?; + VmcsHostNW::RSP.write(rsp)?; + VmcsHostNW::RIP.write(Self::vmx_exit as usize)?; + + VmcsHostNW::IA32_SYSENTER_ESP.write(0)?; + VmcsHostNW::IA32_SYSENTER_EIP.write(0)?; + VmcsHost32::IA32_SYSENTER_CS.write(0)?; + Ok(()) + } + + fn vmexit_handler(&mut self) { + crate::arch::trap::handle_vmexit(self).unwrap(); + if (self.power_on) { + check_pending_vectors(self.cpuid); + } + } + + unsafe fn vmx_entry_failed() -> ! { + panic!("{}", Vmcs::instruction_error().unwrap().as_str()); + } + + #[naked] + unsafe extern "C" fn vmx_exit(&mut self) -> ! { + asm!( + save_regs_to_stack!(), + "mov r15, rsp", // save temporary RSP to r15 + "mov rdi, rsp", // set the first arg to RSP + "mov rsp, [rsp + {host_stack_top}]", // set RSP to host_stack_top + "call {vmexit_handler}", // call vmexit_handler + "mov rsp, r15", // load temporary RSP from r15 + restore_regs_from_stack!(), + "vmresume", + "jmp {failed}", + host_stack_top = const size_of::(), + vmexit_handler = sym Self::vmexit_handler, + failed = sym Self::vmx_entry_failed, + options(noreturn), + ); + } + + #[naked] + unsafe extern "C" fn vmx_launch(&mut self) -> ! { + asm!( + // "mov [rdi + {host_stack_top}], rsp", // save current RSP to host_stack_top + "mov rsp, rdi", // set RSP to guest regs area + restore_regs_from_stack!(), + "vmlaunch", + "jmp {failed}", + // host_stack_top = const size_of::(), + failed = sym Self::vmx_entry_failed, + options(noreturn), + ) + } +} + +pub fn this_cpu_id() -> usize { + crate::arch::acpi::get_cpu_id(this_apic_id()) +} + +pub fn this_apic_id() -> usize { + match CpuId::new().get_feature_info() { + Some(info) => info.initial_local_apic_id() as usize, + None => { + panic!("can not find apic id!"); + 0 + } + } +} + +fn get_tr_base( + tr: x86::segmentation::SegmentSelector, + gdt: &x86::dtables::DescriptorTablePointer, +) -> u64 { + let index = tr.index() as usize; + let table_len = (gdt.limit as usize + 1) / core::mem::size_of::(); + let table = unsafe { core::slice::from_raw_parts(gdt.base, table_len) }; + let entry = table[index]; + if entry & (1 << 47) != 0 { + // present + let base_low = entry.get_bits(16..40) | entry.get_bits(56..64) << 24; + let base_high = table[index + 1] & 0xffff_ffff; + base_low | base_high << 32 + } else { + // no present + 0 + } +} + +impl Debug for ArchCpu { + fn fmt(&self, f: &mut Formatter) -> Result { + (|| -> HvResult { + Ok(f.debug_struct("ArchCpu") + .field("guest_regs", &self.guest_regs) + .field("rip", &VmcsGuestNW::RIP.read()?) + .field("rsp", &VmcsGuestNW::RSP.read()?) + .field("rflags", &VmcsGuestNW::RFLAGS.read()?) + .field("cr0", &VmcsGuestNW::CR0.read()?) + .field("cr3", &VmcsGuestNW::CR3.read()?) + .field("cr4", &VmcsGuestNW::CR4.read()?) + .field("gdtr_base", &VmcsGuestNW::GDTR_BASE.read()?) + .field("cs_selector", &VmcsGuest16::CS_SELECTOR.read()?) + .finish()) + })() + .unwrap() + } +} + +pub fn store_cpu_pointer_to_reg(pointer: usize) { + // println!("x86_64 doesn't support store cpu pointer to reg, pointer: {:#x}", pointer); + return; +} + +pub fn get_target_cpu(irq: usize, zone_id: usize) -> usize { + ioapic::get_irq_cpu(irq, zone_id) +} diff --git a/src/arch/x86_64/cpuid.rs b/src/arch/x86_64/cpuid.rs new file mode 100644 index 00000000..989666d5 --- /dev/null +++ b/src/arch/x86_64/cpuid.rs @@ -0,0 +1,207 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +numeric_enum_macro::numeric_enum! { +#[repr(u32)] +#[derive(Debug)] +pub enum CpuIdEax { + VendorInfo = 0x0, + FeatureInfo = 0x1, + StructuredExtendedFeatureInfo = 0x7, + ProcessorFrequencyInfo = 0x16, + HypervisorInfo = 0x4000_0000, + HypervisorFeatures = 0x4000_0001, +} +} + +bitflags::bitflags! { + /// Copied from https://docs.rs/raw-cpuid/8.1.2/src/raw_cpuid/lib.rs.html#1290-1294 + pub struct FeatureInfoFlags: u64 { + + // ECX flags + + /// Streaming SIMD Extensions 3 (SSE3). A value of 1 indicates the processor supports this technology. + const SSE3 = 1 << 0; + /// PCLMULQDQ. A value of 1 indicates the processor supports the PCLMULQDQ instruction + const PCLMULQDQ = 1 << 1; + /// 64-bit DS Area. A value of 1 indicates the processor supports DS area using 64-bit layout + const DTES64 = 1 << 2; + /// MONITOR/MWAIT. A value of 1 indicates the processor supports this feature. + const MONITOR = 1 << 3; + /// CPL Qualified Debug Store. A value of 1 indicates the processor supports the extensions to the Debug Store feature to allow for branch message storage qualified by CPL. + const DSCPL = 1 << 4; + /// Virtual Machine Extensions. A value of 1 indicates that the processor supports this technology. + const VMX = 1 << 5; + /// Safer Mode Extensions. A value of 1 indicates that the processor supports this technology. See Chapter 5, Safer Mode Extensions Reference. + const SMX = 1 << 6; + /// Enhanced Intel SpeedStep® technology. A value of 1 indicates that the processor supports this technology. + const EIST = 1 << 7; + /// Thermal Monitor 2. A value of 1 indicates whether the processor supports this technology. + const TM2 = 1 << 8; + /// A value of 1 indicates the presence of the Supplemental Streaming SIMD Extensions 3 (SSSE3). A value of 0 indicates the instruction extensions are not present in the processor + const SSSE3 = 1 << 9; + /// L1 Context ID. A value of 1 indicates the L1 data cache mode can be set to either adaptive mode or shared mode. A value of 0 indicates this feature is not supported. See definition of the IA32_MISC_ENABLE MSR Bit 24 (L1 Data Cache Context Mode) for details. + const CNXTID = 1 << 10; + /// A value of 1 indicates the processor supports FMA extensions using YMM state. + const FMA = 1 << 12; + /// CMPXCHG16B Available. A value of 1 indicates that the feature is available. See the CMPXCHG8B/CMPXCHG16B Compare and Exchange Bytes section. 14 + const CMPXCHG16B = 1 << 13; + /// Perfmon and Debug Capability: A value of 1 indicates the processor supports the performance and debug feature indication MSR IA32_PERF_CAPABILITIES. + const PDCM = 1 << 15; + /// Process-context identifiers. A value of 1 indicates that the processor supports PCIDs and the software may set CR4.PCIDE to 1. + const PCID = 1 << 17; + /// A value of 1 indicates the processor supports the ability to prefetch data from a memory mapped device. + const DCA = 1 << 18; + /// A value of 1 indicates that the processor supports SSE4.1. + const SSE41 = 1 << 19; + /// A value of 1 indicates that the processor supports SSE4.2. + const SSE42 = 1 << 20; + /// A value of 1 indicates that the processor supports x2APIC feature. + const X2APIC = 1 << 21; + /// A value of 1 indicates that the processor supports MOVBE instruction. + const MOVBE = 1 << 22; + /// A value of 1 indicates that the processor supports the POPCNT instruction. + const POPCNT = 1 << 23; + /// A value of 1 indicates that the processors local APIC timer supports one-shot operation using a TSC deadline value. + const TSC_DEADLINE = 1 << 24; + /// A value of 1 indicates that the processor supports the AESNI instruction extensions. + const AESNI = 1 << 25; + /// A value of 1 indicates that the processor supports the XSAVE/XRSTOR processor extended states feature, the XSETBV/XGETBV instructions, and XCR0. + const XSAVE = 1 << 26; + /// A value of 1 indicates that the OS has enabled XSETBV/XGETBV instructions to access XCR0, and support for processor extended state management using XSAVE/XRSTOR. + const OSXSAVE = 1 << 27; + /// A value of 1 indicates the processor supports the AVX instruction extensions. + const AVX = 1 << 28; + /// A value of 1 indicates that processor supports 16-bit floating-point conversion instructions. + const F16C = 1 << 29; + /// A value of 1 indicates that processor supports RDRAND instruction. + const RDRAND = 1 << 30; + /// A value of 1 indicates the indicates the presence of a hypervisor. + const HYPERVISOR = 1 << 31; + + // EDX flags + + /// Floating Point Unit On-Chip. The processor contains an x87 FPU. + const FPU = 1 << (32 + 0); + /// Virtual 8086 Mode Enhancements. Virtual 8086 mode enhancements, including CR4.VME for controlling the feature, CR4.PVI for protected mode virtual interrupts, software interrupt indirection, expansion of the TSS with the software indirection bitmap, and EFLAGS.VIF and EFLAGS.VIP flags. + const VME = 1 << (32 + 1); + /// Debugging Extensions. Support for I/O breakpoints, including CR4.DE for controlling the feature, and optional trapping of accesses to DR4 and DR5. + const DE = 1 << (32 + 2); + /// Page Size Extension. Large pages of size 4 MByte are supported, including CR4.PSE for controlling the feature, the defined dirty bit in PDE (Page Directory Entries), optional reserved bit trapping in CR3, PDEs, and PTEs. + const PSE = 1 << (32 + 3); + /// Time Stamp Counter. The RDTSC instruction is supported, including CR4.TSD for controlling privilege. + const TSC = 1 << (32 + 4); + /// Model Specific Registers RDMSR and WRMSR Instructions. The RDMSR and WRMSR instructions are supported. Some of the MSRs are implementation dependent. + const MSR = 1 << (32 + 5); + /// Physical Address Extension. Physical addresses greater than 32 bits are supported: extended page table entry formats, an extra level in the page translation tables is defined, 2-MByte pages are supported instead of 4 Mbyte pages if PAE bit is 1. + const PAE = 1 << (32 + 6); + /// Machine Check Exception. Exception 18 is defined for Machine Checks, including CR4.MCE for controlling the feature. This feature does not define the model-specific implementations of machine-check error logging, reporting, and processor shutdowns. Machine Check exception handlers may have to depend on processor version to do model specific processing of the exception, or test for the presence of the Machine Check feature. + const MCE = 1 << (32 + 7); + /// CMPXCHG8B Instruction. The compare-and-exchange 8 bytes (64 bits) instruction is supported (implicitly locked and atomic). + const CX8 = 1 << (32 + 8); + /// APIC On-Chip. The processor contains an Advanced Programmable Interrupt Controller (APIC), responding to memory mapped commands in the physical address range FFFE0000H to FFFE0FFFH (by default - some processors permit the APIC to be relocated). + const APIC = 1 << (32 + 9); + /// SYSENTER and SYSEXIT Instructions. The SYSENTER and SYSEXIT and associated MSRs are supported. + const SEP = 1 << (32 + 11); + /// Memory Type Range Registers. MTRRs are supported. The MTRRcap MSR contains feature bits that describe what memory types are supported, how many variable MTRRs are supported, and whether fixed MTRRs are supported. + const MTRR = 1 << (32 + 12); + /// Page Global Bit. The global bit is supported in paging-structure entries that map a page, indicating TLB entries that are common to different processes and need not be flushed. The CR4.PGE bit controls this feature. + const PGE = 1 << (32 + 13); + /// Machine Check Architecture. The Machine Check exArchitecture, which provides a compatible mechanism for error reporting in P6 family, Pentium 4, Intel Xeon processors, and future processors, is supported. The MCG_CAP MSR contains feature bits describing how many banks of error reporting MSRs are supported. + const MCA = 1 << (32 + 14); + /// Conditional Move Instructions. The conditional move instruction CMOV is supported. In addition, if x87 FPU is present as indicated by the CPUID.FPU feature bit, then the FCOMI and FCMOV instructions are supported + const CMOV = 1 << (32 + 15); + /// Page Attribute Table. Page Attribute Table is supported. This feature augments the Memory Type Range Registers (MTRRs), allowing an operating system to specify attributes of memory accessed through a linear address on a 4KB granularity. + const PAT = 1 << (32 + 16); + /// 36-Bit Page Size Extension. 4-MByte pages addressing physical memory beyond 4 GBytes are supported with 32-bit paging. This feature indicates that upper bits of the physical address of a 4-MByte page are encoded in bits 20:13 of the page-directory entry. Such physical addresses are limited by MAXPHYADDR and may be up to 40 bits in size. + const PSE36 = 1 << (32 + 17); + /// Processor Serial Number. The processor supports the 96-bit processor identification number feature and the feature is enabled. + const PSN = 1 << (32 + 18); + /// CLFLUSH Instruction. CLFLUSH Instruction is supported. + const CLFSH = 1 << (32 + 19); + /// Debug Store. The processor supports the ability to write debug information into a memory resident buffer. This feature is used by the branch trace store (BTS) and precise event-based sampling (PEBS) facilities (see Chapter 23, Introduction to Virtual-Machine Extensions, in the Intel® 64 and IA-32 Architectures Software Developers Manual, Volume 3C). + const DS = 1 << (32 + 21); + /// Thermal Monitor and Software Controlled Clock Facilities. The processor implements internal MSRs that allow processor temperature to be monitored and processor performance to be modulated in predefined duty cycles under software control. + const ACPI = 1 << (32 + 22); + /// Intel MMX Technology. The processor supports the Intel MMX technology. + const MMX = 1 << (32 + 23); + /// FXSAVE and FXRSTOR Instructions. The FXSAVE and FXRSTOR instructions are supported for fast save and restore of the floating point context. Presence of this bit also indicates that CR4.OSFXSR is available for an operating system to indicate that it supports the FXSAVE and FXRSTOR instructions. + const FXSR = 1 << (32 + 24); + /// SSE. The processor supports the SSE extensions. + const SSE = 1 << (32 + 25); + /// SSE2. The processor supports the SSE2 extensions. + const SSE2 = 1 << (32 + 26); + /// Self Snoop. The processor supports the management of conflicting memory types by performing a snoop of its own cache structure for transactions issued to the bus. + const SS = 1 << (32 + 27); + /// Max APIC IDs reserved field is Valid. A value of 0 for HTT indicates there is only a single logical processor in the package and software should assume only a single APIC ID is reserved. A value of 1 for HTT indicates the value in CPUID.1.EBX[23:16] (the Maximum number of addressable IDs for logical processors in this package) is valid for the package. + const HTT = 1 << (32 + 28); + /// Thermal Monitor. The processor implements the thermal monitor automatic thermal control circuitry (TCC). + const TM = 1 << (32 + 29); + /// Pending Break Enable. The processor supports the use of the FERR#/PBE# pin when the processor is in the stop-clock state (STPCLK# is asserted) to signal the processor that an interrupt is pending and that the processor should return to normal operation to handle the interrupt. Bit 10 (PBE enable) in the IA32_MISC_ENABLE MSR enables this capability. + const PBE = 1 << (32 + 31); + } + + pub struct ExtendedFeaturesEcx: u32 { + /// Bit 0: Prefetch WT1. (Intel® Xeon Phi™ only). + const PREFETCHWT1 = 1 << 0; + // Bit 01: AVX512_VBMI + const AVX512VBMI = 1 << 1; + /// Bit 02: UMIP. Supports user-mode instruction prevention if 1. + const UMIP = 1 << 2; + /// Bit 03: PKU. Supports protection keys for user-mode pages if 1. + const PKU = 1 << 3; + /// Bit 04: OSPKE. If 1, OS has set CR4.PKE to enable protection keys (and the RDPKRU/WRPKRU instruc-tions). + const OSPKE = 1 << 4; + /// Bit 5: WAITPKG + const WAITPKG = 1 << 5; + /// Bit 6: AV512_VBMI2 + const AVX512VBMI2 = 1 << 6; + /// Bit 7: CET_SS. Supports CET shadow stack features if 1. Processors that set this bit define bits 0..2 of the + /// IA32_U_CET and IA32_S_CET MSRs. Enumerates support for the following MSRs: + /// IA32_INTERRUPT_SPP_TABLE_ADDR, IA32_PL3_SSP, IA32_PL2_SSP, IA32_PL1_SSP, and IA32_PL0_SSP. + const CETSS = 1 << 7; + /// Bit 8: GFNI + const GFNI = 1 << 8; + /// Bit 9: VAES + const VAES = 1 << 9; + /// Bit 10: VPCLMULQDQ + const VPCLMULQDQ = 1 << 10; + /// Bit 11: AVX512_VNNI + const AVX512VNNI = 1 << 11; + /// Bit 12: AVX512_BITALG + const AVX512BITALG = 1 << 12; + /// Bit 13: TME_EN. If 1, the following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, + /// IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. + const TMEEN = 1 << 13; + /// Bit 14: AVX512_VPOPCNTDQ + const AVX512VPOPCNTDQ = 1 << 14; + + // Bit 15: Reserved. + + /// Bit 16: Supports 57-bit linear addresses and five-level paging if 1. + const LA57 = 1 << 16; + + // Bits 21 - 17: The value of MAWAU used by the BNDLDX and BNDSTX instructions in 64-bit mode + + /// Bit 22: RDPID. RDPID and IA32_TSC_AUX are available if 1. + const RDPID = 1 << 22; + + // Bits 29 - 23: Reserved. + + /// Bit 30: SGX_LC. Supports SGX Launch Configuration if 1. + const SGX_LC = 1 << 30; + } +} diff --git a/src/arch/x86_64/entry.rs b/src/arch/x86_64/entry.rs new file mode 100644 index 00000000..2a6db8bd --- /dev/null +++ b/src/arch/x86_64/entry.rs @@ -0,0 +1,109 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{boot, cpu::this_apic_id, graphics::font_init}, + consts::PER_CPU_SIZE, + memory::addr::PHYS_VIRT_OFFSET, + platform::__board, + rust_main, +}; +use core::arch::global_asm; +use x86::msr::IA32_EFER; +use x86_64::registers::{ + control::{Cr0Flags, Cr4Flags}, + model_specific::EferFlags, +}; + +const MULTIBOOT_HEADER_MAGIC: u32 = 0x1bad_b002; +const MULTIBOOT_HEADER_FLAGS: u32 = 0x0001_0002; +const MULTIBOOT2_HEADER_MAGIC: u32 = 0xe852_50d6; +const MULTIBOOT2_ARCH_I386: u32 = 0; +const X86_PHYS_VIRT_OFFSET: usize = 0xffff_ff80_0000_0000; + +const CR0: u64 = Cr0Flags::PROTECTED_MODE_ENABLE.bits() + | Cr0Flags::MONITOR_COPROCESSOR.bits() + | Cr0Flags::TASK_SWITCHED.bits() + | Cr0Flags::NUMERIC_ERROR.bits() + | Cr0Flags::WRITE_PROTECT.bits() + | Cr0Flags::PAGING.bits(); +const CR4: u64 = Cr4Flags::PHYSICAL_ADDRESS_EXTENSION.bits() | Cr4Flags::PAGE_GLOBAL.bits(); +const EFER: u64 = EferFlags::LONG_MODE_ENABLE.bits() | EferFlags::NO_EXECUTE_ENABLE.bits(); + +global_asm!( + include_str!("multiboot.S"), + multiboot_header_magic = const MULTIBOOT_HEADER_MAGIC, + multiboot_header_flags = const MULTIBOOT_HEADER_FLAGS, + multiboot2_header_magic = const MULTIBOOT2_HEADER_MAGIC, + multiboot2_arch_i386 = const MULTIBOOT2_ARCH_I386, + rust_entry = sym rust_entry, + rust_entry_secondary = sym rust_entry_secondary, + offset = const X86_PHYS_VIRT_OFFSET, + per_cpu_size = const PER_CPU_SIZE, + cr0 = const CR0, + cr4 = const CR4, + efer_msr = const IA32_EFER, + efer = const EFER, +); + +#[naked] +#[no_mangle] +#[link_section = ".text.entry"] +pub unsafe extern "C" fn arch_entry() -> i32 { + core::arch::asm!( + " + .code32 + cli + mov edi, eax // magic + mov esi, ebx // multiboot info + jmp bsp_entry32 + ", + options(noreturn), + ); +} + +extern "C" fn rust_entry(magic: u32, info_addr: usize) { + unsafe { fill_page_table() }; + crate::clear_bss(); + unsafe { PHYS_VIRT_OFFSET = X86_PHYS_VIRT_OFFSET }; + boot::multiboot_init(info_addr); + #[cfg(all(feature = "graphics"))] + font_init(__board::GRAPHICS_FONT); + boot::print_memory_map(); + rust_main(this_apic_id(), info_addr); +} + +fn rust_entry_secondary() { + // println!("CPUID: {}", this_cpu_id()); + rust_main(this_apic_id(), 0); +} + +extern "C" { + #[link_name = "Ltmp_pdpt_low"] + static mut PDPT_LOW: [u64; 512]; + #[link_name = "Ltmp_pdpt_high"] + static mut PDPT_HIGH: [u64; 512]; +} + +unsafe fn fill_page_table() { + let mut addr: usize = 0; + for i in 0..512 { + // paddr | PRESENT | WRITABLE | HUGE_PAGE + PDPT_LOW[i] = (addr | 0x83) as _; + PDPT_HIGH[i] = (addr | 0x83) as _; + addr += 0x4000_0000; + } +} diff --git a/src/arch/x86_64/graphics.rs b/src/arch/x86_64/graphics.rs new file mode 100644 index 00000000..af5453d8 --- /dev/null +++ b/src/arch/x86_64/graphics.rs @@ -0,0 +1,223 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::arch::boot::get_multiboot_tags; +use spin::{Mutex, Once}; + +const PSF2_MAGIC: u32 = 0x864ab572; + +#[repr(packed)] +#[derive(Debug, Clone, Copy)] +pub struct Psf2Header { + magic: u32, + version: u32, + header_size: u32, + flags: u32, + glyph_nr: u32, + bytes_per_glyph: u32, + height: u32, + width: u32, +} + +#[derive(Debug, Clone, Copy)] +pub struct FontInfo { + /// width in bytes (8 pixels) + width_bytes: usize, + /// width in pixels + width: usize, + /// height in pixels + height: usize, + /// table address + glyph_table: usize, + /// number of glyphs + glyph_nr: u32, + /// size of each glyph + bytes_per_glyph: u32, +} + +static FONT_INFO: Once = Once::new(); + +#[derive(Debug, Clone, Copy)] +pub struct FramebufferInfo { + /// x in char + cursor_x: usize, + /// y in char + cursor_y: usize, + max_char_nr_x: usize, + max_char_nr_y: usize, + pub addr: usize, + pub width: usize, + pub height: usize, +} + +static FRAMEBUFFER_INFO: Once> = Once::new(); + +pub fn font_init(psf: &'static [u8]) { + let psf_header = unsafe { *(psf.as_ptr() as *const Psf2Header) }; + // only support psf2 + assert!(psf_header.magic == PSF2_MAGIC); + + let font_width_bytes = (psf_header.width + 7) / 8; // up align to 8bit + let font_width = font_width_bytes * 8; + + // println!("{:#x?}", psf_header); + + FONT_INFO.call_once(|| FontInfo { + width: font_width as _, + height: psf_header.height as _, + glyph_table: (psf.as_ptr() as usize + psf_header.header_size as usize), + glyph_nr: psf_header.glyph_nr, + bytes_per_glyph: psf_header.bytes_per_glyph, + width_bytes: font_width_bytes as _, + }); + + let framebuffer = &get_multiboot_tags().framebuffer; + FRAMEBUFFER_INFO.call_once(|| { + Mutex::new(FramebufferInfo { + cursor_x: 0, + cursor_y: 0, + max_char_nr_x: (framebuffer.width / font_width) as _, + max_char_nr_y: (framebuffer.height / psf_header.height) as _, + addr: framebuffer.addr as _, + width: framebuffer.width as _, + height: framebuffer.height as _, + }) + }); + + fb_clear_screen(); +} + +fn fb_clear_screen() { + let mut fb_info = FRAMEBUFFER_INFO.get().unwrap().lock(); + let mut ptr = fb_info.addr as *mut u32; + for height in 0..fb_info.height { + for width in 0..fb_info.width { + unsafe { + core::ptr::write_volatile(ptr, 0); + ptr = ptr.wrapping_add(1); + } + } + } +} + +fn fb_putchar_internal(ch: u16, fg: u32, bg: u32) { + let font_info = FONT_INFO.get().unwrap(); + let mut glyph = font_info.glyph_table as *const u8; + + if (ch as u32) < font_info.glyph_nr { + glyph = glyph.wrapping_add((ch as usize) * (font_info.bytes_per_glyph as usize)); + } + + { + let mut fb_info = FRAMEBUFFER_INFO.get().unwrap().lock(); + // current pixel + let cur = fb_info.cursor_y * font_info.height * fb_info.width + + fb_info.cursor_x * font_info.width; + let base = fb_info.addr as *mut u32; + + for y in 0..font_info.height { + let mut mask: u8 = 1 << 7; + for x in 0..font_info.width { + if x % 8 == 0 { + mask = 1 << 7; + } + + let color = match unsafe { *glyph.wrapping_add(x / 8) } & mask != 0 { + true => fg, + false => bg, + }; + + let ptr = base.wrapping_add(cur + y * fb_info.width + x); + unsafe { core::ptr::write_volatile(ptr, color) }; + + mask = mask >> 1; + } + + glyph = glyph.wrapping_add(font_info.width_bytes); + } + + fb_info.cursor_x += 1; + if fb_info.cursor_x < fb_info.max_char_nr_x { + return; + } + } + + fb_putchar_new_line(bg); +} + +fn fb_putchar_new_line(bg: u32) { + let font_info = FONT_INFO.get().unwrap(); + let mut fb_info = FRAMEBUFFER_INFO.get().unwrap().lock(); + let base = fb_info.addr as *mut u32; + + fb_info.cursor_x = 0; + fb_info.cursor_y += 1; + + if fb_info.cursor_y >= fb_info.max_char_nr_y { + fb_info.cursor_y = 0; + } + + for y in 0..font_info.height { + let y1 = (y + fb_info.cursor_y * font_info.height) * fb_info.width; + for x in 0..fb_info.width { + unsafe { core::ptr::write_volatile(base.wrapping_add(x + y1), bg) }; + } + } + + // may need to scroll up + /*if fb_info.cursor_y >= fb_info.max_char_nr_y { + for y in 0..((fb_info.max_char_nr_y - 1) * font_info.height) { + let y1 = y * fb_info.width; + let y2 = (y + font_info.height) * fb_info.width; + for x in 0..fb_info.width { + unsafe { + core::ptr::write_volatile( + base.wrapping_add(x + y1), + core::ptr::read_volatile(base.wrapping_add(x + y2)), + ) + }; + } + } + + for y in 0..font_info.height { + let y1 = (y + (fb_info.max_char_nr_y - 1) * font_info.height) * fb_info.width; + for x in 0..fb_info.width { + unsafe { core::ptr::write_volatile(base.wrapping_add(x + y1), bg) }; + } + } + + fb_info.cursor_y -= 1; + }*/ +} + +pub fn fb_putchar(ch: u8, fg: u32, bg: u32) { + match ch as char { + '\r' => {} + '\n' => fb_putchar_new_line(bg), + _ => fb_putchar_internal(ch as _, fg, bg), + } +} + +pub fn fb_putstr(s: &str, fg: u32) { + for c in s.chars() { + match c { + '\n' => { + fb_putchar_new_line(0x0); + } + _ => fb_putchar_internal(c as _, fg, 0x0), + } + } +} diff --git a/src/arch/x86_64/hpet.rs b/src/arch/x86_64/hpet.rs new file mode 100644 index 00000000..0cd8da9b --- /dev/null +++ b/src/arch/x86_64/hpet.rs @@ -0,0 +1,227 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::memory::VirtAddr; +use bit_field::BitField; +use core::{arch::x86_64::_rdtsc, time::Duration, u32}; +use spin::Mutex; +use tock_registers::{ + interfaces::{Readable, Writeable}, + register_structs, + registers::{ReadOnly, ReadWrite}, +}; + +type TimeValue = Duration; + +lazy_static::lazy_static! { + static ref HPET: Hpet = { + let mut hpet = Hpet::new(0xfed0_0000); + hpet.init(); + hpet + }; +} + +bitflags::bitflags! { + struct TimerConfigCaps: u64 { + /// 0 - this timer generates edge-triggered interrupts. 1 - this timer + /// generates level-triggered interrupts. + const TN_INT_TYPE_CNF = 1 << 1; + /// Setting this bit to 1 enables triggering of interrupts. + const TN_INT_ENB_CNF = 1 << 2; + /// If Tn_PER_INT_CAP is 1, then writing 1 to this field enables periodic + /// timer. + const TN_TYPE_CNF = 1 << 3; + /// If this read-only bit is set to 1, this timer supports periodic mode. + const TN_PER_INT_CAP = 1 << 4; + /// If this read-only bit is set to 1, the size of the timer is 64-bit. + const TN_SIZE_CAP = 1 << 5; + /// This field is used to allow software to directly set periodic timer's + /// accumulator. + const TN_VAL_SET_CNF = 1 << 6; + /// For 64-bit timer, if this field is set, the timer will be forced to + /// work in 32-bit mode. + const TN_32MODE_CNF = 1 << 8; + } +} + +register_structs! { + HpetRegs { + /// General Capabilities and ID Register. + (0x000 => general_caps: ReadOnly), + (0x008 => _reserved_0), + /// General Configuration Register. + (0x010 => general_config: ReadWrite), + (0x018 => _reserved_1), + /// General Interrupt Status Register. + (0x020 => general_intr_status: ReadWrite), + (0x028 => _reserved_2), + /// Main Counter Value Register. + (0x0f0 => main_counter_value: ReadWrite), + (0x0f8 => _reserved_3), + (0x100 => @END), + } +} + +register_structs! { + HpetTimerRegs { + /// Timer N Configuration and Capability Register. + (0x0 => config_caps: ReadWrite), + /// Timer N Comparator Value Register. + (0x8 => comparator_value: ReadWrite), + /// Timer N FSB Interrupt Route Register. + (0x10 => fsb_int_route: ReadWrite), + (0x18 => _reserved_0), + (0x20 => @END), + } +} + +struct Hpet { + base_vaddr: VirtAddr, + num_timers: u8, + period_fs: u64, + freq_hz: u64, + freq_mhz: u64, + ticks_per_ms: u64, + is_64_bit: bool, +} + +impl Hpet { + const fn new(base_vaddr: VirtAddr) -> Self { + Self { + base_vaddr, + num_timers: 0, + period_fs: 0, + freq_hz: 0, + freq_mhz: 0, + ticks_per_ms: 0, + is_64_bit: false, + } + } + + const fn regs(&self) -> &HpetRegs { + unsafe { &*(self.base_vaddr as *const HpetRegs) } + } + + const fn timer_regs(&self, n: u8) -> &HpetTimerRegs { + assert!(n < self.num_timers); + unsafe { &*((self.base_vaddr + 0x100 + n as usize * 0x20) as *const HpetTimerRegs) } + } + + fn init(&mut self) { + println!("Initializing HPET..."); + let cap = self.regs().general_caps.get(); + let num_timers = cap.get_bits(8..=12) as u8 + 1; + let period_fs = cap.get_bits(32..); + let is_64_bit = cap.get_bit(13); + let freq_hz = 1_000_000_000_000_000 / period_fs; + println!( + "HPET: {}.{:06} MHz, {}-bit, {} timers", + freq_hz / 1_000_000, + freq_hz % 1_000_000, + if is_64_bit { 64 } else { 32 }, + num_timers + ); + + self.num_timers = num_timers; + self.period_fs = period_fs; + self.freq_hz = freq_hz; + self.freq_mhz = freq_hz / 1_000_000; + self.ticks_per_ms = freq_hz / 1000; + self.is_64_bit = is_64_bit; + + self.set_enable(false); + for i in 0..num_timers { + // disable timer interrupts + let config_caps = + unsafe { TimerConfigCaps::from_bits_retain(self.timer_regs(i).config_caps.get()) }; + self.timer_regs(i) + .config_caps + .set((config_caps - TimerConfigCaps::TN_INT_ENB_CNF).bits()); + } + self.set_enable(true); + } + + fn set_enable(&mut self, enable: bool) { + const LEG_RT_CNF: u64 = 1 << 1; // Legacy replacement mapping will disable PIT IRQs + const ENABLE_CNF: u64 = 1 << 0; + let config = &self.regs().general_config; + if enable { + config.set(LEG_RT_CNF | ENABLE_CNF); + } else { + config.set(0); + } + } + + fn wait_millis(&self, millis: u64) { + let main_counter_value = &self.regs().main_counter_value; + let ticks = millis * self.ticks_per_ms; + let init = main_counter_value.get(); + while main_counter_value.get().wrapping_sub(init) < ticks {} + } +} + +pub fn busy_wait(duration: Duration) { + busy_wait_until(current_time() + duration); +} + +fn busy_wait_until(deadline: TimeValue) { + while current_time() < deadline { + core::hint::spin_loop(); + } +} + +pub fn current_time() -> TimeValue { + TimeValue::from_nanos(current_time_nanos()) +} + +pub fn current_ticks() -> u64 { + HPET.regs().main_counter_value.get() +} + +pub fn ticks_to_nanos(ticks: u64) -> u64 { + ticks * 1_000 / HPET.freq_mhz +} + +pub fn current_time_nanos() -> u64 { + ticks_to_nanos(current_ticks()) +} + +pub fn wait_millis(millis: u64) { + HPET.wait_millis(millis); +} + +pub fn get_tsc_freq_mhz() -> Option { + let mut best_freq_mhz = u32::MAX; + for _ in 0..5 { + let tsc_start = unsafe { _rdtsc() }; + let hpet_start = current_ticks(); + wait_millis(10); + let tsc_end = unsafe { _rdtsc() }; + let hpet_end = current_ticks(); + + let nanos = ticks_to_nanos(hpet_end.wrapping_sub(hpet_start)); + let freq_mhz = ((tsc_end - tsc_start) * 1_000 / nanos) as u32; + + if freq_mhz < best_freq_mhz { + best_freq_mhz = freq_mhz; + } + } + if best_freq_mhz != u32::MAX { + Some(best_freq_mhz) + } else { + None + } +} diff --git a/src/arch/x86_64/hypercall.rs b/src/arch/x86_64/hypercall.rs new file mode 100644 index 00000000..9a84d84d --- /dev/null +++ b/src/arch/x86_64/hypercall.rs @@ -0,0 +1,86 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::cpu::this_cpu_id, + config::CONFIG_MAGIC_VERSION, + device::virtio_trampoline::MAX_DEVS, + hypercall::{HyperCall, HyperCallResult}, + percpu::this_zone, + zone::{Zone, ZoneInfo}, +}; +use spin::RwLock; + +impl<'a> HyperCall<'a> { + pub fn hv_ivc_info(&mut self, ivc_info_ipa: u64) -> HyperCallResult { + warn!("hv_ivc_info is not implemented for x86_64"); + HyperCallResult::Ok(0) + } + + pub fn wait_for_interrupt(&mut self, irq_list: &mut [u64; MAX_DEVS + 1]) { + trace!("wait_for_interrupt is not need for x86_64"); + } + + pub fn hv_get_real_pa(&mut self, config_addr: u64) -> u64 { + unsafe { + this_zone() + .read() + .gpm + .page_table_query(config_addr as _) + .unwrap() + .0 as _ + } + } + + pub fn hv_zone_config_check(&self, magic_version: *mut u64) -> HyperCallResult { + let magic_version = unsafe { + this_zone() + .read() + .gpm + .page_table_query(magic_version as usize) + .unwrap() + .0 as *mut u64 + }; + unsafe { + *magic_version = CONFIG_MAGIC_VERSION as _; + } + debug!( + "hv_zone_config_check: finished writing current magic version ({:#x})", + CONFIG_MAGIC_VERSION + ); + HyperCallResult::Ok(0) + } + + pub fn check_cpu_id(&self) { + let cpuid = this_cpu_id(); + trace!("CPU ID: {} Start Zone", cpuid); + } + + pub fn hv_virtio_get_irq(&self, virtio_irq: *mut u32) -> HyperCallResult { + let virtio_irq = unsafe { + this_zone() + .read() + .gpm + .page_table_query(virtio_irq as usize) + .unwrap() + .0 as *mut u32 + }; + unsafe { + (*virtio_irq) = crate::device::virtio_trampoline::IRQ_WAKEUP_VIRTIO_DEVICE as _; + }; + HyperCallResult::Ok(0) + } +} diff --git a/src/arch/x86_64/idt.rs b/src/arch/x86_64/idt.rs new file mode 100644 index 00000000..97f284e5 --- /dev/null +++ b/src/arch/x86_64/idt.rs @@ -0,0 +1,61 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{error::HvResult, zone::this_zone_id}; +use alloc::{collections::btree_map::BTreeMap, vec::Vec}; +use core::u32; +use spin::{Mutex, Once}; +use x86_64::structures::idt::{Entry, HandlerFunc, InterruptDescriptorTable}; + +const VECTOR_CNT: usize = 256; + +#[allow(non_snake_case)] +pub mod IdtVector { + pub const VIRT_IPI_VECTOR: u8 = 0xef; + pub const APIC_ERROR_VECTOR: u8 = 0xfc; + pub const APIC_SPURIOUS_VECTOR: u8 = 0xfd; + pub const APIC_TIMER_VECTOR: u8 = 0xfe; +} + +pub struct IdtStruct { + table: InterruptDescriptorTable, +} + +impl IdtStruct { + pub fn new() -> Self { + extern "C" { + #[link_name = "_hyp_trap_vector"] + static ENTRIES: [extern "C" fn(); VECTOR_CNT]; + } + let mut idt = Self { + table: InterruptDescriptorTable::new(), + }; + let entries = unsafe { + core::slice::from_raw_parts_mut( + &mut idt.table as *mut _ as *mut Entry, + VECTOR_CNT, + ) + }; + for i in 0..VECTOR_CNT { + entries[i].set_handler_fn(unsafe { core::mem::transmute(ENTRIES[i]) }); + } + idt + } + + pub fn load(&'static self) { + self.table.load(); + } +} diff --git a/src/arch/x86_64/iommu.rs b/src/arch/x86_64/iommu.rs new file mode 100644 index 00000000..80eff95f --- /dev/null +++ b/src/arch/x86_64/iommu.rs @@ -0,0 +1,527 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{acpi, hpet::current_time_nanos}, + memory::{addr::virt_to_phys, Frame, HostPhysAddr}, + zone::this_zone_id, +}; +use ::acpi::sdt::Signature; +use alloc::{collections::btree_map::BTreeMap, vec::Vec}; +use bit_field::BitField; +use core::{ + arch::asm, + hint::spin_loop, + mem::size_of, + ptr::{read_volatile, write_volatile}, + usize, +}; +use dma_remap_reg::*; +use spin::{Mutex, Once}; +use x86_64::instructions::port::Port; + +const IR_ENTRY_CNT: usize = 256; + +const ROOT_TABLE_ENTRY_SIZE: usize = 16; +const CONTEXT_TABLE_ENTRY_SIZE: usize = 16; + +const INVALIDATION_QUEUE_SIZE: usize = 4096; +const QI_INV_ENTRY_SIZE: usize = 16; +const NUM_IR_ENTRIES_PER_PAGE: usize = 256; + +const INV_CONTEXT_CACHE_DESC: u64 = 0x01; +const INV_IOTLB_DESC: u64 = 0x02; +const INV_WAIT_DESC: u64 = 0x05; + +const INV_STATUS_WRITE: u64 = 1 << 5; +const INV_STATUS_INCOMPLETED: u64 = 0; +const INV_STATUS_COMPLETED: u64 = 1; +const INV_STATUS_DATA: u64 = INV_STATUS_COMPLETED << 32; +const INV_WAIT_DESC_LOWER: u64 = INV_WAIT_DESC | INV_STATUS_WRITE | INV_STATUS_DATA; + +const DMA_CONTEXT_DEVICE_INVL: u64 = (3 << 4); + +const DMA_IOTLB_DOMAIN_INVL: u64 = (2 << 4); +const DMA_IOTLB_DW: u64 = (1 << 6); +const DMA_IOTLB_DR: u64 = (1 << 7); + +// DMA-remapping registers + +mod dma_remap_reg { + /// Capability Register + pub const DMAR_CAP_REG: usize = 0x8; + /// Extended Capability Register + pub const DMAR_ECAP_REG: usize = 0x10; + /// Global Command Register + pub const DMAR_GCMD_REG: usize = 0x18; + /// Global Status Register + pub const DMAR_GSTS_REG: usize = 0x1c; + /// Root Table Address Register + pub const DMAR_RTADDR_REG: usize = 0x20; + /// Fault Event Control Register + pub const DMAR_FECTL_REG: usize = 0x38; + /// Invalidation Queue Tail Register + pub const DMAR_IQT_REG: usize = 0x88; + /// Invalidation Queue Address Register + pub const DMAR_IQA_REG: usize = 0x90; + /// Interrupt Remapping Table Address Register + pub const DMAR_IRTA_REG: usize = 0xb8; +} + +static VTD: Once> = Once::new(); + +bitflags::bitflags! { + #[derive(Clone, Copy, Debug)] + pub struct EcapFlags: u64 { + /// Extended Interrupt Mode + const EIM = 1 << 4; + /// Interrupt Remapping Support + const IR = 1 << 3; + /// Queued Invalidation Support + const QI = 1 << 1; + } + + #[derive(Clone, Copy, Debug)] + pub struct GstsFlags: u32 { + /// Translation Enable Status + const TES = 1 << 31; + /// Root Table Pointer Status + const RTPS = 1 << 30; + /// Queue Invalidation Enable Status + const QIES = 1 << 26; + /// Interrupt Remapping Enable Status + const IRES = 1 << 25; + /// Interrupt Remap Table Pointer Status + const IRTPS = 1 << 24; + } + + #[derive(Clone, Copy, Debug)] + pub struct GcmdFlags: u32 { + /// Translation Enable + const TE = 1 << 31; + /// Set Root Table Pointer + const SRTP = 1 << 30; + /// Queue Invalidation Enable + const QIE = 1 << 26; + /// Interrupt Remapping Enable + const IRE = 1 << 25; + /// Set Interrupt Remap Table Pointer + const SIRTP = 1 << 24; + } +} + +/*numeric_enum_macro::numeric_enum! { +#[repr(u8)] +#[derive(Clone, Debug, PartialEq)] +pub enum DeviceScopeType { + NotUsed = 0x00, + PciEndpointDevice = 0x01, + PciSubHierarchy = 0x02, + IoApic = 0x03, + MsiCapableHpet = 0x04, + AcpiNamespaceDevice = 0x05 +} +}*/ + +#[derive(Clone, Debug)] +struct VtdDevice { + zone_id: usize, + bus: u8, + dev_func: u8, +} + +#[derive(Clone, Debug)] +struct DmarEntry { + lo_64: u64, + hi_64: u64, +} + +#[derive(Debug)] +struct Vtd { + reg_base_hpa: usize, + devices: BTreeMap, + + root_table: Frame, + context_tables: BTreeMap, + qi_queue: Frame, + ir_table: Frame, + /// cache value of DMAR_GCMD_REG + gcmd: GcmdFlags, + qi_queue_hpa: usize, + qi_tail: usize, +} + +impl Vtd { + fn activate(&mut self) { + self.activate_dma_translation(); + } + + fn activate_dma_translation(&mut self) { + if !self.gcmd.contains(GcmdFlags::TE) { + self.gcmd |= GcmdFlags::TE; + self.mmio_write_u32(DMAR_GCMD_REG, self.gcmd.bits()); + + self.wait(GstsFlags::TES, false); + } + } + + fn activate_interrupt_remapping(&mut self) { + if !self.gcmd.contains(GcmdFlags::IRE) { + self.gcmd |= GcmdFlags::IRE; + self.mmio_write_u32(DMAR_GCMD_REG, self.gcmd.bits()); + + self.wait(GstsFlags::IRES, false); + } + } + + fn activate_qi(&mut self) { + self.qi_queue_hpa = self.qi_queue.start_paddr(); + self.mmio_write_u64(DMAR_IQA_REG, self.qi_queue_hpa as u64); + self.mmio_write_u32(DMAR_IQT_REG, 0); + + if !self.gcmd.contains(GcmdFlags::QIE) { + self.gcmd |= GcmdFlags::QIE; + + self.mmio_write_u32(DMAR_GCMD_REG, self.gcmd.bits()); + + self.wait(GstsFlags::QIES, false); + } + } + + fn update_context_entry( + &mut self, + bus: u8, + dev_func: u8, + zone_s2pt_hpa: HostPhysAddr, + is_insert: bool, + ) { + let root_entry_hpa = self.root_table.start_paddr() + (bus as usize) * ROOT_TABLE_ENTRY_SIZE; + let root_entry_low = unsafe { &mut *(root_entry_hpa as *mut u64) }; + let zone_id = this_zone_id(); + + // context table not present + if !root_entry_low.get_bit(0) { + let context_table = Frame::new_zero().unwrap(); + let context_table_hpa = context_table.start_paddr(); + + // set context-table pointer + root_entry_low.set_bits(12..=63, context_table_hpa.get_bits(12..=63) as _); + // set present + root_entry_low.set_bit(0, true); + + flush_cache_range(root_entry_hpa, ROOT_TABLE_ENTRY_SIZE); + self.context_tables.insert(bus, context_table); + } + + let context_table_hpa = self.context_tables.get(&bus).unwrap().start_paddr(); + let context_entry_hpa = context_table_hpa + (dev_func as usize) * CONTEXT_TABLE_ENTRY_SIZE; + let context_entry = unsafe { &mut *(context_entry_hpa as *mut u128) }; + + if is_insert { + // address width: 010b (48bit 4-level page table) + context_entry.set_bits(64..=66, 0b010); + // domain identifier: zone id + context_entry.set_bits(72..=87, zone_id as _); + // second stage page translation pointer + context_entry.set_bits(12..=63, zone_s2pt_hpa.get_bits(12..=63) as _); + // present + context_entry.set_bit(0, true); + } else { + context_entry.set_bits(0..=127, 0); + } + + flush_cache_range(context_entry_hpa, CONTEXT_TABLE_ENTRY_SIZE); + let bdf: u16 = (bus as u16) << 8 | (dev_func as u16); + self.invalidate_context_cache(zone_id as _, bdf as _, 0); + } + + fn add_device(&mut self, zone_id: usize, bdf: u64) { + self.devices.insert(bdf, zone_id); + } + + fn add_interrupt_table_entry(&mut self, irq: u32) { + assert!(irq < (IR_ENTRY_CNT as u32)); + + let ir_table_hpa = self.ir_table.start_paddr(); + let irte_hpa = ir_table_hpa + (irq as usize) * size_of::(); + let irte_ptr = irte_hpa as *mut u128; + let mut irte: u128 = 0; + + // present + irte.set_bit(0, true); + // irte mode: remap + irte.set_bit(15, false); + // vector + irte.set_bits(16..=23, irq as _); + // dest id + irte.set_bits(32..=63, 0); + + unsafe { *irte_ptr = irte }; + flush_cache_range(irte_hpa, size_of::()); + + // TODO: iec + } + + fn check_capability(&mut self) { + let cap = self.mmio_read_u64(DMAR_CAP_REG); + let ecap = self.mmio_read_u64(DMAR_ECAP_REG); + info!("cap: {:x?} ecap: {:x?}", cap, ecap); + assert!(EcapFlags::from_bits_truncate(ecap) + .contains(EcapFlags::EIM | EcapFlags::IR | EcapFlags::QI)); + } + + fn clear_devices(&mut self, zone_id: usize) { + let bdfs: Vec<(u8, u8)> = self + .devices + .iter() + .filter(|&(_, &dev_zone_id)| dev_zone_id == zone_id) + .map(|(&bdf, _)| (bdf.get_bits(8..=15) as u8, bdf.get_bits(0..=7) as u8)) + .collect(); + + for (bus, dev_func) in bdfs { + self.update_context_entry(bus, dev_func, 0, false); + } + self.invalid_iotlb(zone_id as _); + } + + fn init(&mut self) { + self.check_capability(); + self.set_interrupt(); + self.set_root_table(); + self.activate_qi(); + + /* self.set_interrupt_remap_table(); + for irq in 0..IR_ENTRY_CNT { + self.add_interrupt_table_entry(irq as _); + } + self.activate_interrupt_remapping(); */ + } + + fn invalidate_context_cache(&mut self, domain_id: u16, source_id: u16, func_mask: u8) { + let entry: DmarEntry = DmarEntry { + lo_64: INV_CONTEXT_CACHE_DESC + | DMA_CONTEXT_DEVICE_INVL + | dma_ccmd_did(domain_id) + | dma_ccmd_sid(source_id) + | dma_ccmd_fm(func_mask), + hi_64: 0, + }; + if (entry.lo_64 != 0) { + self.issue_qi_request(entry); + } + } + + fn invalid_iotlb(&mut self, domain_id: u16) { + let entry: DmarEntry = DmarEntry { + // drain read & drain write + lo_64: INV_IOTLB_DESC + | DMA_IOTLB_DOMAIN_INVL + | DMA_IOTLB_DR + | DMA_IOTLB_DW + | dma_iotlb_did(domain_id), + hi_64: 0, + }; + if (entry.lo_64 != 0) { + self.issue_qi_request(entry); + } + } + + fn issue_qi_request(&mut self, entry: DmarEntry) { + let mut qi_status: u32 = 0; + let qi_status_ptr = &qi_status as *const u32; + + unsafe { + let mut invalidate_desc = &mut *((self.qi_queue_hpa + self.qi_tail) as *mut DmarEntry); + invalidate_desc.hi_64 = entry.hi_64; + invalidate_desc.lo_64 = entry.lo_64; + } + self.qi_tail = (self.qi_tail + QI_INV_ENTRY_SIZE) % INVALIDATION_QUEUE_SIZE; + unsafe { + let mut invalidate_desc = &mut *((self.qi_queue_hpa + self.qi_tail) as *mut DmarEntry); + invalidate_desc.hi_64 = virt_to_phys(qi_status_ptr as usize) as u64; + invalidate_desc.lo_64 = INV_WAIT_DESC_LOWER; + } + self.qi_tail = (self.qi_tail + QI_INV_ENTRY_SIZE) % INVALIDATION_QUEUE_SIZE; + + qi_status = INV_STATUS_INCOMPLETED as u32; + self.mmio_write_u32(DMAR_IQT_REG, self.qi_tail as _); + + let start_tick = current_time_nanos(); + while (qi_status != INV_STATUS_COMPLETED as _) { + if (current_time_nanos() - start_tick > 1000000) { + error!("issue qi request failed!"); + break; + } + unsafe { + asm!("pause", options(nostack, preserves_flags)); + } + } + } + + fn set_interrupt(&mut self) { + self.mmio_write_u32(DMAR_FECTL_REG, 0); + } + + fn set_interrupt_remap_table(&mut self) { + // bit 12-63: ir table address + // bit 11: x2apic mode active + // bit 0-3: X, where 2 ^ (X + 1) == number of entries + let address: u64 = + (self.ir_table.start_paddr() as u64) | (1 << 11) | ((IR_ENTRY_CNT.ilog2() - 1) as u64); + + self.mmio_write_u64(DMAR_IRTA_REG, address); + self.mmio_write_u32(DMAR_GCMD_REG, (self.gcmd | GcmdFlags::SIRTP).bits()); + + self.wait(GstsFlags::IRTPS, false); + } + + fn set_root_table(&mut self) { + self.mmio_write_u64(DMAR_RTADDR_REG, self.root_table.start_paddr() as _); + self.mmio_write_u32(DMAR_GCMD_REG, (self.gcmd | GcmdFlags::SRTP).bits()); + + self.wait(GstsFlags::RTPS, false); + } + + fn fill_dma_translation_tables(&mut self, zone_id: usize, zone_s2pt_hpa: HostPhysAddr) { + let bdfs: Vec<(u8, u8)> = self + .devices + .iter() + .filter(|&(_, &dev_zone_id)| dev_zone_id == zone_id) + .map(|(&bdf, _)| (bdf.get_bits(8..=15) as u8, bdf.get_bits(0..=7) as u8)) + .collect(); + + for (bus, dev_func) in bdfs { + self.update_context_entry(bus, dev_func, zone_s2pt_hpa, true); + } + self.invalid_iotlb(zone_id as _); + } + + fn wait(&mut self, mask: GstsFlags, cond: bool) { + loop { + spin_loop(); + if GstsFlags::from_bits_truncate(self.mmio_read_u32(DMAR_GSTS_REG)).contains(mask) + != cond + { + break; + } + } + } + + fn mmio_read_u32(&self, reg: usize) -> u32 { + unsafe { read_volatile((self.reg_base_hpa + reg) as *const u32) } + } + + fn mmio_read_u64(&self, reg: usize) -> u64 { + unsafe { read_volatile((self.reg_base_hpa + reg) as *const u64) } + } + + fn mmio_write_u32(&self, reg: usize, value: u32) { + unsafe { write_volatile((self.reg_base_hpa + reg) as *mut u32, value) }; + } + + fn mmio_write_u64(&self, reg: usize, value: u64) { + unsafe { write_volatile((self.reg_base_hpa + reg) as *mut u64, value) }; + } +} + +const fn dma_ccmd_sid(sid: u16) -> u64 { + ((sid as u64) & 0xffff) << 32 +} + +const fn dma_ccmd_did(did: u16) -> u64 { + ((did as u64) & 0xffff) << 16 +} + +const fn dma_ccmd_fm(fm: u8) -> u64 { + ((fm as u64) & 0x3) << 48 +} + +const fn dma_iotlb_did(did: u16) -> u64 { + ((did as u64) & 0xffff) << 16 +} + +pub fn parse_root_dmar() -> Mutex { + let dmar = acpi::root_get_table(&Signature::DMAR).unwrap(); + let mut cur: usize = 48; // start offset of remapping structures + let len = dmar.get_len(); + + let mut reg_base_hpa: usize = 0; + + while cur < len { + let struct_type = dmar.get_u16(cur); + let struct_len = dmar.get_u16(cur + 2) as usize; + + if struct_type == 0 { + let segment = dmar.get_u16(cur + 6); + + // we only support segment 0 + if segment == 0 { + reg_base_hpa = dmar.get_u64(cur + 8) as usize; + } + } + cur += struct_len; + } + + assert!(reg_base_hpa != 0); + + Mutex::new(Vtd { + reg_base_hpa, + devices: BTreeMap::new(), + root_table: Frame::new_zero().unwrap(), + context_tables: BTreeMap::new(), + qi_queue: Frame::new().unwrap(), + ir_table: Frame::new().unwrap(), + gcmd: GcmdFlags::empty(), + qi_queue_hpa: 0, + qi_tail: 0, + }) +} + +// called after acpi init +pub fn iommu_init() { + VTD.call_once(|| parse_root_dmar()); + VTD.get().unwrap().lock().init(); + // init_msi_cap_hpa_space(); +} + +pub fn iommu_add_device(zone_id: usize, bdf: usize, _: usize) { + // info!("vtd add device: {:x}, zone: {:x}", bdf, zone_id); + VTD.get().unwrap().lock().add_device(zone_id, bdf as _); +} + +pub fn clear_dma_translation_tables(zone_id: usize) { + VTD.get().unwrap().lock().clear_devices(zone_id); +} + +pub fn fill_dma_translation_tables(zone_id: usize, zone_s2pt_hpa: HostPhysAddr) { + VTD.get() + .unwrap() + .lock() + .fill_dma_translation_tables(zone_id, zone_s2pt_hpa); +} + +/// should be called after gpm is activated +pub fn activate() { + VTD.get().unwrap().lock().activate(); +} + +fn flush_cache_range(hpa: usize, size: usize) { + let mut i = 0usize; + while i < size { + unsafe { asm!("clflushopt [{addr}]", addr = in(reg) hpa + i) }; + i += 64; + } +} diff --git a/src/arch/x86_64/ipi.rs b/src/arch/x86_64/ipi.rs new file mode 100644 index 00000000..b8c20de5 --- /dev/null +++ b/src/arch/x86_64/ipi.rs @@ -0,0 +1,170 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{ + acpi::{get_apic_id, get_cpu_id}, + cpu::this_cpu_id, + idt::IdtVector, + }, + device::irqchip::inject_vector, + error::HvResult, + event, + hypercall::SGI_IPI_ID, + percpu::{this_cpu_data, this_zone, CpuSet}, +}; +use alloc::{collections::vec_deque::VecDeque, vec::Vec}; +use bit_field::BitField; +use spin::{Mutex, Once}; + +#[allow(non_snake_case)] +pub mod IpiDeliveryMode { + pub const FIXED: u8 = 0; + pub const NMI: u8 = 4; + pub const INIT: u8 = 5; + pub const START_UP: u8 = 6; +} + +#[allow(non_snake_case)] +pub mod IpiDestShorthand { + pub const NO_SHORTHAND: u8 = 0; + pub const SELF: u8 = 1; + pub const ALL_INCLUDING_SELF: u8 = 2; + pub const ALL_EXCLUDING_SELF: u8 = 3; +} + +pub struct IpiInfo { + pub start_up_addr: usize, +} + +impl IpiInfo { + fn new() -> Self { + Self { start_up_addr: 0 } + } +} + +static IPI_MANAGER: Once = Once::new(); +struct IpiManager { + pub inner: Vec>, +} + +impl IpiManager { + fn new(max_cpus: usize) -> Self { + let mut vs = vec![]; + for _ in 0..max_cpus { + let v = Mutex::new(IpiInfo::new()); + vs.push(v) + } + Self { inner: vs } + } + + fn get_ipi_info<'a>(&'a self, cpu: usize) -> Option<&'a Mutex> { + self.inner.get(cpu) + } +} + +pub fn init(max_cpus: usize) { + IPI_MANAGER.call_once(|| IpiManager::new(max_cpus)); +} + +pub fn get_ipi_info<'a>(cpu: usize) -> Option<&'a Mutex> { + IPI_MANAGER.get().unwrap().get_ipi_info(cpu) +} + +pub fn send_ipi(value: u64) -> HvResult { + let vector = value.get_bits(0..=7) as u8; + let delivery_mode: u8 = value.get_bits(8..=10) as u8; + let dest_shorthand = value.get_bits(18..=19) as u8; + let dest = get_cpu_id(value.get_bits(32..=39) as usize); + let cnt = value.get_bits(40..=63) as u32; + + let mut cpu_set = this_zone().read().cpu_set; + let cpu_id = this_cpu_id(); + let mut dest_set = CpuSet::new(cpu_set.max_cpu_id, 0); + + match dest_shorthand { + IpiDestShorthand::NO_SHORTHAND => { + dest_set.set_bit(dest); + } + IpiDestShorthand::SELF => { + dest_set.set_bit(cpu_id); + } + IpiDestShorthand::ALL_INCLUDING_SELF => { + dest_set = cpu_set; + } + IpiDestShorthand::ALL_EXCLUDING_SELF => { + dest_set = cpu_set; + dest_set.clear_bit(cpu_id); + } + _ => {} + } + + dest_set.iter().for_each(|dest| { + match delivery_mode { + IpiDeliveryMode::FIXED => { + // info!("dest: {:x}, vector: {:x}", dest, vector); + inject_vector(dest, vector, None, false); + } + IpiDeliveryMode::NMI => { + inject_vector(dest, 2, None, false); + } + IpiDeliveryMode::INIT => {} + IpiDeliveryMode::START_UP => { + let mut ipi_info = get_ipi_info(dest).unwrap().lock(); + ipi_info.start_up_addr = (vector as usize) << 12; + event::send_event(dest, SGI_IPI_ID as _, event::IPI_EVENT_WAKEUP); + } + _ => {} + } + }); + + Ok(()) +} + +pub fn arch_send_event(dest: u64, _: u64) { + unsafe { + this_cpu_data() + .arch_cpu + .virt_lapic + .phys_lapic + .send_ipi(IdtVector::VIRT_IPI_VECTOR, get_apic_id(dest as _) as _) + }; +} + +pub fn handle_virt_ipi() { + // this may never return! + loop { + let ret = event::check_events(); + if !ret { + break; + } + } +} + +pub fn arch_check_events(event: Option) { + match event { + _ => { + info!( + "x86_64: arch_check_events: event={:#x?} do nothing now", + event + ); + } + } +} + +pub fn arch_prepare_send_event(cpu_id: usize, ipi_int_id: usize, event_id: usize) { + debug!("x86_64 arch_prepare_send_event: do nothing now.") +} diff --git a/src/arch/x86_64/mm.rs b/src/arch/x86_64/mm.rs new file mode 100644 index 00000000..97680cf4 --- /dev/null +++ b/src/arch/x86_64/mm.rs @@ -0,0 +1,40 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{acpi, boot, s1pt::Stage1PageTable, s2pt::Stage2PageTable}, + error::HvResult, + memory::MemorySet, +}; + +pub fn new_s2_memory_set() -> MemorySet { + MemorySet::new(4) +} + +pub fn init_hv_page_table() -> HvResult { + Ok(()) +} + +pub fn arch_setup_parange() { + // x86_64 does not have a parange setup like AArch64. + // The parange is determined by the memory regions defined in the device tree. + // So we do not need to do anything here. +} + +pub fn arch_post_heap_init(host_dtb: usize) { + boot::module_init(host_dtb); + acpi::root_init(); +} diff --git a/src/arch/x86_64/mmio.rs b/src/arch/x86_64/mmio.rs new file mode 100644 index 00000000..2b7681c4 --- /dev/null +++ b/src/arch/x86_64/mmio.rs @@ -0,0 +1,550 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{ + s2pt::DescriptorAttr, + vmcs::{VmcsGuest16, VmcsGuestNW}, + }, + error::HvResult, + memory::{ + addr::{GuestPhysAddr, GuestVirtAddr, HostPhysAddr}, + MMIOAccess, MMIOHandler, + }, + percpu::{this_cpu_data, this_zone}, +}; +use alloc::{sync::Arc, vec::Vec}; +use bit_field::BitField; +use core::{mem::size_of, ops::Range, ptr::write_volatile, slice::from_raw_parts}; +use spin::Mutex; +use x86::controlregs::{Cr0, Cr4}; + +pub trait MMIoDevice: Send + Sync { + fn gpa_range(&self) -> &Vec>; + fn read(&self, gpa: GuestPhysAddr) -> HvResult; + fn write(&self, gpa: GuestPhysAddr, value: u64, size: usize) -> HvResult; + fn trigger(&self, signal: usize) -> HvResult; +} + +numeric_enum_macro::numeric_enum! { +#[repr(u32)] +#[derive(Debug)] +pub enum RmReg { + AX = 0, + CX = 1, + DX = 2, + BX = 3, + SP = 4, + BP = 5, + SI = 6, + DI = 7, + R8 = 8, + R9 = 9, + R10 = 10, + R11 = 11, + R12 = 12, + R13 = 13, + R14 = 14, + R15 = 15, + RIP = 16, + CR0 = 17, + CR1 = 18, + CR2 = 19, + CR3 = 20, + CR4 = 21, + GDTR = 22, + LDTR = 23, + TR = 24, + IDTR = 25, +} +} + +impl RmReg { + fn read(&self) -> HvResult { + let gen_regs = this_cpu_data().arch_cpu.regs(); + let res = match self { + RmReg::AX => gen_regs.rax, + RmReg::CX => gen_regs.rcx, + RmReg::DX => gen_regs.rdx, + RmReg::BX => gen_regs.rbx, + RmReg::SP => VmcsGuestNW::RSP.read().unwrap() as _, + RmReg::BP => gen_regs.rbp, + RmReg::SI => gen_regs.rsi, + RmReg::DI => gen_regs.rdi, + RmReg::R8 => gen_regs.r8, + RmReg::R9 => gen_regs.r9, + RmReg::R10 => gen_regs.r10, + RmReg::R11 => gen_regs.r11, + RmReg::R12 => gen_regs.r12, + RmReg::R13 => gen_regs.r13, + RmReg::R14 => gen_regs.r14, + RmReg::R15 => gen_regs.r15, + RmReg::RIP => VmcsGuestNW::RIP.read().unwrap() as _, + RmReg::CR0 => VmcsGuestNW::CR0.read().unwrap() as _, + RmReg::CR3 => VmcsGuestNW::CR3.read().unwrap() as _, + RmReg::CR4 => VmcsGuestNW::CR4.read().unwrap() as _, + RmReg::GDTR => VmcsGuestNW::GDTR_BASE.read().unwrap() as _, + RmReg::LDTR => VmcsGuestNW::LDTR_BASE.read().unwrap() as _, + RmReg::TR => VmcsGuestNW::TR_BASE.read().unwrap() as _, + RmReg::IDTR => VmcsGuestNW::IDTR_BASE.read().unwrap() as _, + _ => 0, + }; + Ok(res) + } + + fn write(&self, new_value: u64, size: usize) -> HvResult { + let mut gen_regs = this_cpu_data().arch_cpu.regs_mut(); + + let mut value = self.read().unwrap(); + value.set_bits(0..(size * 8), new_value.get_bits(0..(size * 8))); + + match self { + RmReg::AX => gen_regs.rax = value, + RmReg::CX => gen_regs.rcx = value, + RmReg::DX => gen_regs.rdx = value, + RmReg::BX => gen_regs.rbx = value, + RmReg::SP => VmcsGuestNW::RSP.write(value as _)?, + RmReg::BP => gen_regs.rbp = value, + RmReg::SI => gen_regs.rsi = value, + RmReg::DI => gen_regs.rdi = value, + RmReg::R8 => gen_regs.r8 = value, + RmReg::R9 => gen_regs.r9 = value, + RmReg::R10 => gen_regs.r10 = value, + RmReg::R11 => gen_regs.r11 = value, + RmReg::R12 => gen_regs.r12 = value, + RmReg::R13 => gen_regs.r13 = value, + RmReg::R14 => gen_regs.r14 = value, + RmReg::R15 => gen_regs.r15 = value, + RmReg::RIP => VmcsGuestNW::RIP.write(value as _)?, + RmReg::CR0 => VmcsGuestNW::CR0.write(value as _)?, + RmReg::CR3 => VmcsGuestNW::CR3.write(value as _)?, + RmReg::CR4 => VmcsGuestNW::CR4.write(value as _)?, + RmReg::GDTR => VmcsGuestNW::GDTR_BASE.write(value as _)?, + RmReg::LDTR => VmcsGuestNW::LDTR_BASE.write(value as _)?, + RmReg::TR => VmcsGuestNW::TR_BASE.write(value as _)?, + RmReg::IDTR => VmcsGuestNW::IDTR_BASE.write(value as _)?, + _ => {} + } + Ok(()) + } +} + +/* +G: general registers +E: registers / memory +b: byte +w: word +v: word / dword / qword +*/ +numeric_enum_macro::numeric_enum! { +#[repr(u8)] +#[derive(Debug)] +pub enum OneByteOpCode { + // move r to r/m + MovEbGb = 0x88, + MovEvGv = 0x89, + // move r/m to r + MovGbEb = 0x8a, + MovGvEv = 0x8b, +} +} +numeric_enum_macro::numeric_enum! { +#[repr(u8)] +#[derive(Debug)] +pub enum TwoByteOpCode { + MovZxGvEb = 0xb6, + MovZxGvEw = 0xb7, +} +} + +bitflags::bitflags! { + #[derive(Debug, PartialEq)] + struct RexPrefixLow: u8 { + const BASE = 1 << 0; + const INDEX = 1 << 1; + const REGISTERS = 1 << 2; + const OPERAND_WIDTH = 1 << 3; + } +} +const REX_PREFIX_HIGH: u8 = 0x4; + +const OPERAND_SIZE_OVERRIDE_PREFIX: u8 = 0x66; + +const TWO_BYTE_ESCAPE: u8 = 0xf; + +// len stands for instruction len +enum OprandType { + Reg { reg: RmReg, len: usize }, + Gpa { gpa: usize, len: usize }, +} + +struct ModRM { + pub _mod: u32, + pub reg_opcode: u32, + pub rm: u32, +} + +impl ModRM { + pub fn new(byte: u8, rex: &RexPrefixLow) -> Self { + let mut reg_opcode = byte.get_bits(3..=5) as u32; + if rex.contains(RexPrefixLow::REGISTERS) { + reg_opcode.set_bit(3, true); + } + Self { + _mod: byte.get_bits(6..=7) as _, + reg_opcode, + rm: byte.get_bits(0..=2) as _, + } + } + + pub fn get_reg(&self) -> RmReg { + self.reg_opcode.try_into().unwrap() + } + + pub fn get_modrm(&self, inst: &Vec, disp_id: usize) -> Option { + let reg: RmReg = self.rm.try_into().unwrap(); + let mut reg_val = reg.read().unwrap(); + // TODO: SIB + match self._mod { + 0 => Some(OprandType::Gpa { + gpa: gva_to_gpa(reg_val as _).unwrap(), + len: 0, + }), + 1 => { + let mut buf = [0u8; 1]; + buf[0..1].copy_from_slice(&inst[disp_id..disp_id + 1]); + let disp_8 = i8::from_ne_bytes(buf); + if disp_8 > 0 { + reg_val += (disp_8 as u64); + } else { + reg_val -= ((-disp_8) as u64); + } + Some(OprandType::Gpa { + gpa: gva_to_gpa(reg_val as _).unwrap(), + len: 1, + }) + } + 2 => { + let mut buf = [0u8; 4]; + buf[0..4].copy_from_slice(&inst[disp_id..disp_id + 4]); + let disp_32 = i32::from_ne_bytes(buf); + if disp_32 > 0 { + reg_val += (disp_32 as u64); + } else { + reg_val -= ((-disp_32) as u64); + } + Some(OprandType::Gpa { + gpa: gva_to_gpa(reg_val as _).unwrap(), + len: 4, + }) + } + 3 => Some(OprandType::Reg { reg, len: 0 }), + _ => None, + } + } +} + +fn gpa_to_hpa(gpa: GuestPhysAddr) -> HvResult { + let (hpa, _, _) = unsafe { this_zone().read().gpm.page_table_query(gpa)? }; + Ok(hpa) +} + +fn get_page_entry(pt_hpa: HostPhysAddr, pte_id: usize) -> usize { + unsafe { (*((pt_hpa + (pte_id * size_of::())) as *const usize)) & 0x7ffffffffffffusize } +} + +fn gva_to_gpa(gva: GuestVirtAddr) -> HvResult { + let mut gpa: GuestPhysAddr = 0; + let cr0 = VmcsGuestNW::CR0.read()?; + let cr4 = VmcsGuestNW::CR4.read()?; + + // guest hasn't enabled paging, va = pa + if cr0 & Cr0::CR0_ENABLE_PAGING.bits() == 0 { + gpa = gva; + // still in real mode, apply cs + if cr0 & Cr0::CR0_PROTECTED_MODE.bits() == 0 { + let cs_selector = VmcsGuest16::CS_SELECTOR.read()? as usize; + gpa = (cs_selector << 4) | gva; + } + return Ok(gpa); + } + + if cr4 & Cr4::CR4_ENABLE_PAE.bits() == 0 { + panic!("protected mode gva_to_gpa not implemented yet!"); + } + + // lookup guest page table in long mode + + let p4_gpa = (VmcsGuestNW::CR3.read()?) & !(0xfff); + let p4_hpa = gpa_to_hpa(p4_gpa)?; + let p4_entry_id = (gva >> 39) & 0x1ff; + let p4_entry = get_page_entry(p4_hpa, p4_entry_id); + + let p3_gpa = p4_entry & !(0xfff); + let p3_entry_id = (gva >> 30) & 0x1ff; + let p3_hpa = gpa_to_hpa(p3_gpa)?; + let p3_entry = get_page_entry(p3_hpa, p3_entry_id); + + // info!("p3_entry: {:x}", p3_entry); + + if p3_entry & (DescriptorAttr::HUGE_PAGE.bits() as usize) != 0 { + let page_gpa = p3_entry & !(0xfff); + return Ok(page_gpa | (gva & 0x3fffffff)); + } + + let p2_gpa = p3_entry & !(0xfff); + let p2_entry_id = (gva >> 21) & 0x1ff; + let p2_hpa = gpa_to_hpa(p2_gpa)?; + let p2_entry = get_page_entry(p2_hpa, p2_entry_id); + + // info!("p2_entry: {:x}", p2_entry); + + if p2_entry & (DescriptorAttr::HUGE_PAGE.bits() as usize) != 0 { + let page_gpa = p2_entry & !(0xfff); + return Ok(page_gpa | (gva & 0x1fffff)); + } + + let p1_gpa = p2_entry & !(0xfff); + let p1_entry_id = (gva >> 12) & 0x1ff; + let p1_hpa = gpa_to_hpa(p1_gpa)?; + let p1_entry = get_page_entry(p1_hpa, p1_entry_id); + + // info!("p1_entry: {:x}", p1_entry); + + let page_gpa: usize = p1_entry & !(0xfff); + Ok(page_gpa | (gva & 0xfff)) +} + +fn get_default_operand_size() -> HvResult { + let cr0 = VmcsGuestNW::CR0.read()?; + let mut size = size_of::(); + + // in protection mode + if cr0 & Cr0::CR0_PROTECTED_MODE.bits() != 0 { + let gdtr_hpa = gpa_to_hpa(gva_to_gpa(VmcsGuestNW::GDTR_BASE.read()?)?)?; + let cs_sel = VmcsGuest16::CS_SELECTOR.read()? as usize; + // info!("gdtr: {:x}", gdtr_hpa); + let cs_desc = unsafe { *((gdtr_hpa + (cs_sel & !(0x7))) as *const u64) }; + // info!("cs_desc: {:x}", cs_desc); + + // default operation size + let cs_d = cs_desc.get_bit(54); + // long mode + let cs_l = cs_desc.get_bit(53); + + // in 64-bit long mode or set CS.D to 1 + if (!cs_d && cs_l) || cs_d { + size = size_of::(); + } + } + + Ok(size) +} + +fn emulate_inst( + inst: &Vec, + handler: &MMIOHandler, + mmio: &mut MMIOAccess, + base: usize, +) -> HvResult { + assert!(inst.len() > 0); + + let mut size = get_default_operand_size()?; + let mut size_override = false; + let mut cur_id = 0; + + if inst[cur_id] == OPERAND_SIZE_OVERRIDE_PREFIX { + if size == size_of::() { + size = size_of::(); + } else { + size = size_of::(); + } + cur_id += 1; + size_override = true; + } + + let mut rex = RexPrefixLow::from_bits_truncate(0); + if inst[cur_id].get_bits(4..=7) == REX_PREFIX_HIGH { + rex = RexPrefixLow::from_bits_truncate(inst[cur_id].get_bits(0..=3)); + // we haven't implemented other situations yet + assert!(rex == RexPrefixLow::REGISTERS); + cur_id += 1; + } + + let mut two_byte = false; + if inst[cur_id] == TWO_BYTE_ESCAPE { + two_byte = true; + cur_id += 1; + } + + if !two_byte { + if OneByteOpCode::try_from(inst[cur_id]).is_err() { + error!("inst: {:#x?}", inst); + } + let opcode: OneByteOpCode = inst[cur_id].try_into().unwrap(); + cur_id += 1; + + if !size_override { + size = match opcode { + OneByteOpCode::MovEbGb | OneByteOpCode::MovGbEb => size_of::(), + _ => size, + }; + } + + match opcode { + OneByteOpCode::MovEbGb | OneByteOpCode::MovEvGv => { + let mod_rm = ModRM::new(inst[cur_id], &rex); + cur_id += 1; + + let src = mod_rm.get_reg(); + let src_val = src.read().unwrap(); + + let dst = mod_rm.get_modrm(inst, cur_id).unwrap(); + match dst { + OprandType::Reg { reg, len } => { + cur_id += len; + reg.write(src_val, size).unwrap(); + } + OprandType::Gpa { gpa, len } => { + cur_id += len; + + mmio.address = gpa - base; + mmio.is_write = true; + mmio.size = size; + mmio.value = src_val as _; + + handler(mmio, base); + } + _ => {} + } + + Ok(cur_id) + } + OneByteOpCode::MovGbEb | OneByteOpCode::MovGvEv => { + let mod_rm = ModRM::new(inst[cur_id], &rex); + cur_id += 1; + + let dst = mod_rm.get_reg(); + + let src = mod_rm.get_modrm(inst, cur_id).unwrap(); + let src_val = match src { + OprandType::Reg { reg, len } => { + cur_id += len; + reg.read().unwrap() + } + OprandType::Gpa { gpa, len } => { + cur_id += len; + + mmio.address = gpa - base; + mmio.is_write = false; + mmio.size = size; + mmio.value = 0; + // info!("src_val: {:x}", gpa); + + handler(mmio, base); + mmio.value as u64 + } + }; + + dst.write(src_val, size).unwrap(); + Ok(cur_id) + } + _ => { + hv_result_err!( + ENOSYS, + format!("Unimplemented opcode: 0x{:x}", opcode as u8) + ) + } + } + } else { + if TwoByteOpCode::try_from(inst[cur_id]).is_err() { + error!("inst: {:#x?}", inst); + } + let opcode: TwoByteOpCode = inst[cur_id].try_into().unwrap(); + cur_id += 1; + + if !size_override { + size = match opcode { + TwoByteOpCode::MovZxGvEb => size_of::(), + TwoByteOpCode::MovZxGvEw => size_of::(), + _ => size, + }; + } + + match opcode { + TwoByteOpCode::MovZxGvEb | TwoByteOpCode::MovZxGvEw => { + let mod_rm = ModRM::new(inst[cur_id], &rex); + cur_id += 1; + + let dst = mod_rm.get_reg(); + + let src = mod_rm.get_modrm(inst, cur_id).unwrap(); + let src_val = match src { + OprandType::Reg { reg, len } => { + cur_id += len; + reg.read().unwrap() + } + OprandType::Gpa { gpa, len } => { + cur_id += len; + + mmio.address = gpa - base; + mmio.is_write = false; + mmio.size = size; + mmio.value = 0; + // info!("src_val: {:x}", gpa); + + handler(mmio, base); + mmio.value as u64 + } + }; + let src_val_zero_extend = match size { + 1 => src_val.get_bits(0..8), + 2 => src_val.get_bits(0..16), + 4 => src_val.get_bits(0..32), + _ => src_val, + }; + + dst.write(src_val_zero_extend, 8).unwrap(); + Ok(cur_id) + } + _ => { + hv_result_err!( + ENOSYS, + format!("Unimplemented opcode: 0x{:x}", opcode as u8) + ) + } + } + } +} + +pub fn instruction_emulator(handler: &MMIOHandler, mmio: &mut MMIOAccess, base: usize) -> HvResult { + let rip_hpa = gpa_to_hpa(gva_to_gpa(VmcsGuestNW::RIP.read()?)?)? as *const u8; + let inst = unsafe { from_raw_parts(rip_hpa, 15) }.to_vec(); + + let len = emulate_inst(&inst, handler, mmio, base).unwrap(); + // info!("rip_hpa: {:?}, inst: {:x?}, len: {:x}", rip_hpa, inst, len); + + this_cpu_data().arch_cpu.advance_guest_rip(len as _)?; + + Ok(()) +} + +pub fn mmio_empty_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { + if !mmio.is_write { + mmio.value = 0; + } + Ok(()) +} diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs new file mode 100644 index 00000000..d75140ed --- /dev/null +++ b/src/arch/x86_64/mod.rs @@ -0,0 +1,45 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +#![allow(unused)] +pub mod acpi; +pub mod boot; +pub mod consts; +pub mod cpu; +pub mod cpuid; +pub mod entry; +pub mod graphics; +pub mod hpet; +pub mod hypercall; +pub mod idt; +pub mod iommu; +pub mod ipi; +pub mod mm; +pub mod mmio; +pub mod msr; +pub mod paging; +pub mod pci; +pub mod pio; +pub mod s1pt; +pub mod s2pt; +pub mod trap; +pub mod vmcs; +pub mod vmx; +pub mod zone; + +pub use s1pt::Stage1PageTable; +pub use s2pt::stage2_mode_detect; +pub use s2pt::Stage2PageTable; diff --git a/src/arch/x86_64/msr.rs b/src/arch/x86_64/msr.rs new file mode 100644 index 00000000..80585c5b --- /dev/null +++ b/src/arch/x86_64/msr.rs @@ -0,0 +1,261 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::msr::Msr::*, + consts::MAX_ZONE_NUM, + device::irqchip::pic::lapic::VirtLocalApic, + error::HvResult, + memory::{Frame, HostPhysAddr}, +}; +use heapless::FnvIndexMap; +use x86::msr::{rdmsr, wrmsr}; + +numeric_enum_macro::numeric_enum! { +#[repr(u32)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[allow(non_camel_case_types)] +/// X86 model-specific registers. (SDM Vol. 4) +pub enum Msr { + /// APIC Location and Status (R/W) See Table 35-2. See Section 10.4.4, Local APIC Status and Location. + IA32_APIC_BASE = 0x1b, + IA32_FEATURE_CONTROL = 0x3a, + IA32_PAT = 0x277, + + IA32_VMX_BASIC = 0x480, + IA32_VMX_PINBASED_CTLS = 0x481, + IA32_VMX_PROCBASED_CTLS = 0x482, + IA32_VMX_EXIT_CTLS = 0x483, + IA32_VMX_ENTRY_CTLS = 0x484, + IA32_VMX_MISC = 0x485, + IA32_VMX_CR0_FIXED0 = 0x486, + IA32_VMX_CR0_FIXED1 = 0x487, + IA32_VMX_CR4_FIXED0 = 0x488, + IA32_VMX_CR4_FIXED1 = 0x489, + IA32_VMX_PROCBASED_CTLS2 = 0x48b, + IA32_VMX_EPT_VPID_CAP = 0x48c, + IA32_VMX_TRUE_PINBASED_CTLS = 0x48d, + IA32_VMX_TRUE_PROCBASED_CTLS = 0x48e, + IA32_VMX_TRUE_EXIT_CTLS = 0x48f, + IA32_VMX_TRUE_ENTRY_CTLS = 0x490, + + /// X2APIC Msr + + /// TSC Target of Local APIC s TSC Deadline Mode (R/W) See Table 35-2 + IA32_TSC_DEADLINE = 0x6e0, + + /// ID register. + IA32_X2APIC_APICID = 0x802, + /// Version register. + IA32_X2APIC_VERSION = 0x803, + /// End-Of-Interrupt register. + IA32_X2APIC_EOI = 0x80B, + /// Logical Destination Register. + IA32_X2APIC_LDR = 0x80D, + /// Spurious Interrupt Vector register. + IA32_X2APIC_SIVR = 0x80F, + + /// In-Service register bits [31:0]. + IA32_X2APIC_ISR0 = 0x810, + /// In-Service register bits [63:32]. + IA32_X2APIC_ISR1 = 0x811, + /// In-Service register bits [95:64]. + IA32_X2APIC_ISR2 = 0x812, + /// In-Service register bits [127:96]. + IA32_X2APIC_ISR3 = 0x813, + /// In-Service register bits [159:128]. + IA32_X2APIC_ISR4 = 0x814, + /// In-Service register bits [159:128]. + IA32_X2APIC_ISR5 = 0x815, + /// In-Service register bits [191:160]. + IA32_X2APIC_ISR6 = 0x816, + /// In-Service register bits [223:192]. + IA32_X2APIC_ISR7 = 0x817, + + /// Interrupt Request register bits [31:0]. + IA32_X2APIC_IRR0 = 0x820, + /// Interrupt Request register bits [63:32]. + IA32_X2APIC_IRR1 = 0x821, + /// Interrupt Request register bits [95:64]. + IA32_X2APIC_IRR2 = 0x822, + /// Interrupt Request register bits [127:96]. + IA32_X2APIC_IRR3 = 0x823, + /// Interrupt Request register bits [159:128]. + IA32_X2APIC_IRR4 = 0x824, + /// Interrupt Request register bits [159:128]. + IA32_X2APIC_IRR5 = 0x825, + /// Interrupt Request register bits [191:160]. + IA32_X2APIC_IRR6 = 0x826, + /// Interrupt Request register bits [223:192]. + IA32_X2APIC_IRR7 = 0x827, + + /// Error Status register. + IA32_X2APIC_ESR = 0x828, + /// Interrupt Command register. + IA32_X2APIC_ICR = 0x830, + /// LVT Timer Interrupt register. + IA32_X2APIC_LVT_TIMER = 0x832, + /// LVT Thermal Sensor Interrupt register. + IA32_X2APIC_LVT_THERMAL = 0x833, + /// LVT Performance Monitor register. + IA32_X2APIC_LVT_PMI = 0x834, + /// LVT LINT0 register. + IA32_X2APIC_LVT_LINT0 = 0x835, + /// LVT LINT1 register. + IA32_X2APIC_LVT_LINT1 = 0x836, + /// LVT Error register. + IA32_X2APIC_LVT_ERROR = 0x837, + /// Initial Count register. + IA32_X2APIC_INIT_COUNT = 0x838, + /// Current Count register. + IA32_X2APIC_CUR_COUNT = 0x839, + /// Divide Configuration register. + IA32_X2APIC_DIV_CONF = 0x83E, + + IA32_EFER = 0xc000_0080, + IA32_STAR = 0xc000_0081, + IA32_LSTAR = 0xc000_0082, + IA32_CSTAR = 0xc000_0083, + IA32_FMASK = 0xc000_0084, + + IA32_FS_BASE = 0xc000_0100, + IA32_GS_BASE = 0xc000_0101, + IA32_KERNEL_GSBASE = 0xc000_0102, +} +} + +impl Msr { + /// Read 64 bits msr register. + #[inline(always)] + pub fn read(self) -> u64 { + unsafe { rdmsr(self as _) } + } + + /// Write 64 bits to msr register. + /// + /// # Safety + /// + /// The caller must ensure that this write operation has no unsafe side + /// effects. + #[inline(always)] + pub unsafe fn write(self, value: u64) { + wrmsr(self as _, value) + } +} + +static mut MSR_BITMAP_MAP: Option> = None; + +pub fn init_msr_bitmap_map() { + unsafe { MSR_BITMAP_MAP = Some(FnvIndexMap::new()) }; +} + +pub fn set_msr_bitmap(zone_id: usize) { + unsafe { + if let Some(map) = &mut MSR_BITMAP_MAP { + if map.contains_key(&zone_id) { + map.remove(&zone_id); + } + map.insert(zone_id, MsrBitmap::new()); + } + } +} + +pub fn get_msr_bitmap(zone_id: usize) -> &'static MsrBitmap { + unsafe { + MSR_BITMAP_MAP + .as_ref() + .expect("MSR_BITMAP_MAP is not initialized!") + .get(&zone_id) + .expect("msr bitmap for this Zone does not exist!") + } +} + +#[derive(Debug)] +pub struct MsrBitmap { + frame: Frame, +} + +impl MsrBitmap { + pub fn new() -> Self { + let mut bitmap = Self { + frame: Frame::new_zero().unwrap(), + }; + + bitmap.set_read_intercept(IA32_APIC_BASE, true); + bitmap.set_read_intercept(IA32_X2APIC_APICID, true); + bitmap.set_read_intercept(IA32_X2APIC_LDR, true); + bitmap.set_read_intercept(IA32_X2APIC_LVT_TIMER, true); + + bitmap.set_write_intercept(IA32_APIC_BASE, true); + bitmap.set_write_intercept(IA32_X2APIC_EOI, true); + bitmap.set_write_intercept(IA32_X2APIC_ICR, true); + bitmap.set_write_intercept(IA32_X2APIC_LVT_TIMER, true); + + for addr in (IA32_X2APIC_ISR0 as u32)..(IA32_X2APIC_ISR7 as u32 + 1) { + if let Ok(msr) = Msr::try_from(addr) { + bitmap.set_read_intercept(msr, true); + } + } + + for addr in (IA32_X2APIC_IRR0 as u32)..(IA32_X2APIC_IRR7 as u32 + 1) { + if let Ok(msr) = Msr::try_from(addr) { + bitmap.set_read_intercept(msr, true); + } + } + + bitmap + } + + pub fn phys_addr(&self) -> HostPhysAddr { + self.frame.start_paddr() + } + + pub fn set_read_intercept(&self, msr: Msr, intercept: bool) { + self.set_intercept(msr as u32, false, intercept); + } + + pub fn set_write_intercept(&self, msr: Msr, intercept: bool) { + self.set_intercept(msr as u32, true, intercept); + } + + fn set_intercept(&self, msr: u32, is_write: bool, intercept: bool) { + let offset = if msr <= 0x1fff { + if !is_write { + 0 // Read bitmap for low MSRs (0x0000_0000..0x0000_1FFF) + } else { + 2 // Write bitmap for low MSRs (0x0000_0000..0x0000_1FFF) + } + } else if (0xc000_0000..=0xc000_1fff).contains(&msr) { + if !is_write { + 1 // Read bitmap for high MSRs (0xC000_0000..0xC000_1FFF) + } else { + 3 // Write bitmap for high MSRs (0xC000_0000..0xC000_1FFF) + } + } else { + unreachable!() + } * 1024; + let bitmap = + unsafe { core::slice::from_raw_parts_mut(self.frame.as_mut_ptr().add(offset), 1024) }; + let msr = msr & 0x1fff; + let byte = (msr / 8) as usize; + let bits = msr % 8; + if intercept { + bitmap[byte] |= 1 << bits; + } else { + bitmap[byte] &= !(1 << bits); + } + } +} diff --git a/src/arch/x86_64/multiboot.S b/src/arch/x86_64/multiboot.S new file mode 100644 index 00000000..52c82cf7 --- /dev/null +++ b/src/arch/x86_64/multiboot.S @@ -0,0 +1,219 @@ +.equ bsp_boot_stack_top, __core_end + {per_cpu_size} +.equ multiboot2_header_len, multiboot2_header_end - multiboot2_header + +.equ multiboot2_header_tag_end, 0 +.equ multiboot2_header_tag_address, 2 +.equ multiboot2_header_tag_entry_address, 3 +.equ multiboot2_header_tag_framebuffer, 5 + +.section .text.header + +.balign 4 +.type multiboot_header, STT_OBJECT +multiboot_header: + .int {multiboot_header_magic} + .int {multiboot_header_flags} + .int -({multiboot_header_magic} + {multiboot_header_flags}) + .int multiboot_header - {offset} // header_addr + .int skernel - {offset} // load_addr + .int edata - {offset} // load_end + .int ebss - {offset} // bss_end_addr + .int arch_entry - {offset} // entry_addrs + +.align 8 +.type multiboot2_header STT_OBJECT +multiboot2_header: + .int {multiboot2_header_magic} + .int {multiboot2_arch_i386} + .int multiboot2_header_len + .int -({multiboot2_header_magic} + {multiboot2_arch_i386} + multiboot2_header_len) + +.align 8 +.type tag_address STT_OBJECT +tag_address: + .short multiboot2_header_tag_address + .short 0 + .int 24 + .int multiboot2_header - {offset} // header_addr + .int skernel - {offset} // load_addr + .int edata - {offset} // load_end_addr + .int bsp_boot_stack_top - {offset} // bss_end_addr + +.align 8 +.type tag_entry_address STT_OBJECT +tag_entry_address: + .short multiboot2_header_tag_entry_address + .short 0 + .int 12 + .int arch_entry - {offset} // entry_addr + +.align 8 +.type tag_framebuffer STT_OBJECT +tag_framebuffer: + .short multiboot2_header_tag_framebuffer + .short 0 + .int 20 + .int 1024 // width + .int 768 // height + .int 32 // depth + +.align 8 +.type tag_end STT_OBJECT +tag_end: + .short multiboot2_header_tag_end + .short 0 + .int 8 + +multiboot2_header_end: + +.section .text.entry + +.section .text.entry32 +.code32 + +.macro ENTRY32_COMMON_1 + // disable paging (UEFI may turn it on) + mov eax, cr0 + mov ebx, (1 << 31) + not ebx + and eax, ebx + mov eax, cr0 + + // load the temporary page table + lea eax, [.Ltmp_pml4 - {offset}] + mov cr3, eax + + // set PAE, PGE bit in CR4 + mov eax, {cr4} + mov cr4, eax + + // set LME, NXE bit in IA32_EFER + mov ecx, {efer_msr} + mov edx, 0 + mov eax, {efer} + wrmsr + + // set protected mode, write protect, paging bit in CR0 + mov eax, {cr0} + mov cr0, eax +.endm + +.macro ENTRY32_COMMON_2 + // set data segment selectors + mov ax, 0x18 + mov ss, ax + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax +.endm + +.macro ENTRY64_COMMON + // clear segment selectors + xor ax, ax + mov ss, ax + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax +.endm + +bsp_entry32: + ENTRY32_COMMON_1 + + // set up GDT + lgdt [.Ltmp_gdt_desc_phys - {offset}] + + ENTRY32_COMMON_2 + + // long return to the 64-bit entry + push 0x10 // code64 segment selector + lea eax, [bsp_entry64 - {offset}] + push eax + retf + +.global ap_entry32 +ap_entry32: + ENTRY32_COMMON_1 + ENTRY32_COMMON_2 + + // long return to the 64-bit entry + push 0x10 // code64 segment selector + lea eax, [ap_entry64 - {offset}] + push eax + retf + +.section .text.entry64 +.code64 + +bsp_entry64: + // reload GDT by high address + movabs rax, offset .Ltmp_gdt_desc + lgdt [rax] + + // load task register + mov ax, 0x20 + ltr ax + + ENTRY64_COMMON + + // set stack and jump to rust_entry + movabs rsp, offset bsp_boot_stack_top + movabs rax, offset {rust_entry} + call rax + jmp .Lhlt + +ap_entry64: + ENTRY64_COMMON + // set rsp to high address + mov rax, {offset} + add rsp, rax + + // jump to rust_entry_secondary + movabs rax, offset {rust_entry_secondary} + call rax + jmp .Lhlt + +.Lhlt: + hlt + jmp .Lhlt + +.section .rodata +.balign 8 +.Ltmp_gdt_desc_phys: + .short .Ltmp_gdt_end - .Ltmp_gdt - 1 // limit + .long .Ltmp_gdt - {offset} // base + +.balign 8 +.Ltmp_gdt_desc: + .short .Ltmp_gdt_end - .Ltmp_gdt - 1 // limit + .quad .Ltmp_gdt // base + +.section .data +.balign 16 +.Ltmp_gdt: + .quad 0x0000000000000000 // 0x00: null + .quad 0x00cf9b000000ffff // 0x08: code segment (base=0, limit=0xfffff, type=32bit code exec/read, DPL=0, 4k) + .quad 0x00af9b000000ffff // 0x10: code segment (base=0, limit=0xfffff, type=64bit code exec/read, DPL=0, 4k) + .quad 0x00cf93000000ffff // 0x18: data segment (base=0, limit=0xfffff, type=32bit data read/write, DPL=0, 4k) + .quad 0x00008934ee800067 // 0x20: tss low + .quad 0x00000000ffffff80 // 0x28: tss high +.Ltmp_gdt_end: + +.balign 4096 +.Ltmp_pml4: + // 0x0000_0000 ~ 0x4000_0000 + .quad Ltmp_pdpt_low - {offset} + 0x3 // PRESENT | WRITABLE | paddr(tmp_pdpt) + .zero 8 * 510 + // 0xffff_ff80_0000_0000 ~ 0xffff_ff80_4000_0000 + .quad Ltmp_pdpt_high - {offset} + 0x3 // PRESENT | WRITABLE | paddr(tmp_pdpt) + +.global Ltmp_pdpt_low +Ltmp_pdpt_low: + .quad 0x00000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) + .zero 8 * 511 + +.global Ltmp_pdpt_high +Ltmp_pdpt_high: + .quad 0x00000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) + .zero 8 * 511 diff --git a/src/arch/x86_64/paging.rs b/src/arch/x86_64/paging.rs new file mode 100644 index 00000000..94aedb61 --- /dev/null +++ b/src/arch/x86_64/paging.rs @@ -0,0 +1,572 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + error::{HvError, HvResult}, + memory::{addr::is_aligned, Frame, MemFlags, MemoryRegion, PhysAddr, VirtAddr}, +}; +use alloc::{sync::Arc, vec::Vec}; +use core::{fmt::Debug, marker::PhantomData, slice}; +use spin::Mutex; + +const LEVELS: usize = 4; +const ENTRY_COUNT: usize = 512; + +#[derive(Debug)] +pub enum PagingError { + NoMemory, + NotMapped, + AlreadyMapped, + MappedToHugePage, +} + +pub type PagingResult = Result; + +impl From for HvError { + fn from(err: PagingError) -> Self { + match err { + PagingError::NoMemory => hv_err!(ENOMEM), + _ => hv_err!(EFAULT, format!("{:?}", err)), + } + } +} + +#[repr(usize)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum PageSize { + Size4K = 0x1000, + Size2M = 0x20_0000, + Size1G = 0x4000_0000, +} + +#[derive(Debug, Copy, Clone)] +pub struct Page { + vaddr: VA, + size: PageSize, +} + +impl PageSize { + pub const fn is_aligned(self, addr: usize) -> bool { + self.page_offset(addr) == 0 + } + + pub const fn align_down(self, addr: usize) -> usize { + addr & !(self as usize - 1) + } + + pub const fn page_offset(self, addr: usize) -> usize { + addr & (self as usize - 1) + } + + pub const fn is_huge(self) -> bool { + matches!(self, Self::Size1G | Self::Size2M) + } +} + +impl + Copy> Page { + pub fn new_aligned(vaddr: VA, size: PageSize) -> Self { + debug_assert!(size.is_aligned(vaddr.into())); + Self { vaddr, size } + } +} + +pub trait GenericPTE: Debug + Clone { + /// Returns the physical address mapped by this entry. + fn addr(&self) -> PhysAddr; + /// Returns the flags of this entry. + fn flags(&self) -> MemFlags; + /// Returns whether this entry is zero. + fn is_unused(&self) -> bool; + /// Returns whether this entry flag indicates present. + fn is_present(&self) -> bool; + /// Returns whether this entry maps to a huge frame. + fn is_huge(&self) -> bool; + /// Set physical address for terminal entries. + fn set_addr(&mut self, paddr: PhysAddr); + /// Set flags for terminal entries. + fn set_flags(&mut self, flags: MemFlags, is_huge: bool); + /// Set physical address and flags for intermediate table entries. + fn set_table(&mut self, paddr: PhysAddr); + /// Set this entry to zero. + fn clear(&mut self); +} + +pub trait PagingInstr { + unsafe fn activate(root_paddr: PhysAddr); + fn flush(vaddr: Option); +} + +/// A basic read-only page table for address query only. +pub trait GenericPageTableImmut: Sized { + type VA: From + Into + Copy; + + unsafe fn from_root(root_paddr: PhysAddr) -> Self; + fn root_paddr(&self) -> PhysAddr; + fn query(&self, vaddr: Self::VA) -> PagingResult<(PhysAddr, MemFlags, PageSize)>; +} + +/// A extended mutable page table can change mappings. +pub trait GenericPageTable: GenericPageTableImmut { + fn new() -> Self; + + fn map(&mut self, region: &MemoryRegion) -> HvResult; + fn unmap(&mut self, region: &MemoryRegion) -> HvResult; + fn update( + &mut self, + vaddr: Self::VA, + paddr: PhysAddr, + flags: MemFlags, + ) -> PagingResult; + + fn clone(&self) -> Self; + + unsafe fn activate(&self); + fn flush(&self, vaddr: Option); +} + +/// A immutable level-4 page table implements `GenericPageTableImmut`. +pub struct Level4PageTableImmut { + // Root table frame. + root: Frame, + // Phantom data. + _phantom: PhantomData<(VA, PTE)>, +} + +impl Level4PageTableImmut +where + VA: From + Into + Copy, + PTE: GenericPTE, +{ + fn new() -> Self { + let frame = Frame::new_zero().expect("failed to allocate root frame for host page table"); + Self { + root: frame, + _phantom: PhantomData, + } + } + + fn get_entry_mut(&self, vaddr: VA) -> PagingResult<(&mut PTE, PageSize)> { + let vaddr = vaddr.into(); + + let p4 = table_of_mut::(self.root_paddr()); + let p4e = &mut p4[p4_index(vaddr)]; + + let p3 = next_table_mut(p4e)?; + let p3e = &mut p3[p3_index(vaddr)]; + if p3e.is_huge() { + return Ok((p3e, PageSize::Size1G)); + } + + let p2 = next_table_mut(p3e)?; + let p2e = &mut p2[p2_index(vaddr)]; + if p2e.is_huge() { + return Ok((p2e, PageSize::Size2M)); + } + + let p1 = next_table_mut(p2e)?; + let p1e = &mut p1[p1_index(vaddr)]; + Ok((p1e, PageSize::Size4K)) + } + + fn walk( + &self, + table: &[PTE], + level: usize, + start_vaddr: usize, + limit: usize, + func: &impl Fn(usize, usize, usize, &PTE), + ) { + let mut n = 0; + for (i, entry) in table.iter().enumerate() { + let vaddr = start_vaddr + (i << (12 + (3 - level) * 9)); + if entry.is_present() { + func(level, i, vaddr, entry); + if level < 3 { + match next_table_mut(entry) { + Ok(entry) => self.walk(entry, level + 1, vaddr, limit, func), + Err(PagingError::MappedToHugePage) => {} + _ => unreachable!(), + } + } + n += 1; + if n >= limit { + break; + } + } + } + } + + pub fn dump(&self, limit: usize) { + static LOCK: Mutex<()> = Mutex::new(()); + let _lock = LOCK.lock(); + + println!("Root: {:x?}", self.root_paddr()); + self.walk( + table_of(self.root_paddr()), + 0, + 0, + limit, + &|level: usize, idx: usize, vaddr: usize, entry: &PTE| { + for _ in 0..level * 2 { + print!(" "); + } + println!( + "[ADDR:{:#x?} level:{} - idx:{:03}], vaddr:{:08x?}: {:x?}", + entry as *const _ as VirtAddr, level, idx, vaddr, entry + ); + }, + ); + } +} + +impl GenericPageTableImmut for Level4PageTableImmut +where + VA: From + Into + Copy, + PTE: GenericPTE, +{ + type VA = VA; + + unsafe fn from_root(root_paddr: PhysAddr) -> Self { + Self { + root: Frame::from_paddr(root_paddr), + _phantom: PhantomData, + } + } + + fn root_paddr(&self) -> PhysAddr { + self.root.start_paddr() + } + + fn query(&self, vaddr: Self::VA) -> PagingResult<(PhysAddr, MemFlags, PageSize)> { + let (entry, size) = self.get_entry_mut(vaddr)?; + if entry.is_unused() { + return Err(PagingError::NotMapped); + } + let off = size.page_offset(vaddr.into()); + Ok((entry.addr() + off, entry.flags(), size)) + } +} + +/// A extended level-3/4 page table that can change its mapping. It also tracks all intermediate +/// level tables. Locks need to be used if change the same page table concurrently. +struct Level4PageTableUnlocked { + inner: Level4PageTableImmut, + /// Intermediate level table frames. + intrm_tables: Vec, + /// Phantom data. + _phantom: PhantomData<(VA, PTE, I)>, +} + +impl Level4PageTableUnlocked +where + VA: From + Into + Copy, + PTE: GenericPTE, + I: PagingInstr, +{ + fn new() -> Self { + Self { + inner: Level4PageTableImmut::new(), + intrm_tables: Vec::new(), + _phantom: PhantomData, + } + } + + unsafe fn from_root(root_paddr: PhysAddr) -> Self { + Self { + inner: Level4PageTableImmut::from_root(root_paddr), + intrm_tables: Vec::new(), + _phantom: PhantomData, + } + } + + fn alloc_intrm_table(&mut self) -> HvResult { + let frame = Frame::new_zero()?; + let paddr = frame.start_paddr(); + self.intrm_tables.push(frame); + Ok(paddr) + } + + fn _dealloc_intrm_table(&mut self, _paddr: PhysAddr) {} + + fn get_entry_mut_or_create(&mut self, page: Page) -> PagingResult<&mut PTE> { + let vaddr: usize = page.vaddr.into(); + + let p4 = table_of_mut::(self.inner.root_paddr()); + let p4e = &mut p4[p4_index(vaddr)]; + + let p3 = next_table_mut_or_create(p4e, || self.alloc_intrm_table())?; + let p3e = &mut p3[p3_index(vaddr)]; + if page.size == PageSize::Size1G { + return Ok(p3e); + } + + let p2 = next_table_mut_or_create(p3e, || self.alloc_intrm_table())?; + let p2e = &mut p2[p2_index(vaddr)]; + if page.size == PageSize::Size2M { + return Ok(p2e); + } + + let p1 = next_table_mut_or_create(p2e, || self.alloc_intrm_table())?; + let p1e = &mut p1[p1_index(vaddr)]; + Ok(p1e) + } + + fn map_page( + &mut self, + page: Page, + paddr: PhysAddr, + flags: MemFlags, + ) -> PagingResult<&mut PTE> { + let entry = self.get_entry_mut_or_create(page)?; + if !entry.is_unused() { + return Err(PagingError::AlreadyMapped); + } + entry.set_addr(page.size.align_down(paddr)); + entry.set_flags(flags, page.size.is_huge()); + Ok(entry) + } + + fn unmap_page(&mut self, vaddr: VA) -> PagingResult<(PhysAddr, PageSize)> { + let (entry, size) = self.inner.get_entry_mut(vaddr)?; + if entry.is_unused() { + return Err(PagingError::NotMapped); + } + let paddr = entry.addr(); + entry.clear(); + Ok((paddr, size)) + } + + fn update(&mut self, vaddr: VA, paddr: PhysAddr, flags: MemFlags) -> PagingResult { + let (entry, size) = self.inner.get_entry_mut(vaddr)?; + entry.set_addr(paddr); + entry.set_flags(flags, size.is_huge()); + Ok(size) + } +} + +/// A extended level-4 page table implements `GenericPageTable`. It use locks to avoid data +/// racing between it and its clonees. +pub struct Level4PageTable { + inner: Level4PageTableUnlocked, + /// Make sure all accesses to the page table and its clonees is exclusive. + clonee_lock: Arc>, +} + +impl Level4PageTable +where + VA: From + Into + Copy, + PTE: GenericPTE, + I: PagingInstr, +{ + pub fn dump(&self, limit: usize) { + self.inner.inner.dump(limit) + } + + /// Clone only the top level page table mapping from `src`. + pub fn clone_from(src: &impl GenericPageTableImmut) -> Self { + // XXX: The clonee won't track intermediate tables, must ensure it lives shorter than the + // original page table. + let pt = Self::new(); + let dst_p4_table = + unsafe { slice::from_raw_parts_mut(pt.root_paddr() as *mut PTE, ENTRY_COUNT) }; + let src_p4_table = + unsafe { slice::from_raw_parts(src.root_paddr() as *const PTE, ENTRY_COUNT) }; + dst_p4_table.clone_from_slice(src_p4_table); + pt + } +} + +impl GenericPageTableImmut for Level4PageTable +where + VA: From + Into + Copy, + PTE: GenericPTE, + I: PagingInstr, +{ + type VA = VA; + + unsafe fn from_root(root_paddr: PhysAddr) -> Self { + Self { + inner: Level4PageTableUnlocked::from_root(root_paddr), + clonee_lock: Arc::new(Mutex::new(())), + } + } + + fn root_paddr(&self) -> PhysAddr { + self.inner.inner.root_paddr() + } + + fn query(&self, vaddr: VA) -> PagingResult<(PhysAddr, MemFlags, PageSize)> { + let _lock = self.clonee_lock.lock(); + self.inner.inner.query(vaddr) + } +} + +impl GenericPageTable for Level4PageTable +where + VA: From + Into + Copy, + PTE: GenericPTE, + I: PagingInstr, +{ + fn new() -> Self { + Self { + inner: Level4PageTableUnlocked::new(), + clonee_lock: Arc::new(Mutex::new(())), + } + } + + fn map(&mut self, region: &MemoryRegion) -> HvResult { + assert!( + is_aligned(region.start.into()), + "region.start = {:#x?}", + region.start.into() + ); + assert!(is_aligned(region.size), "region.size = {:#x?}", region.size); + trace!( + "create mapping in {}: {:#x?}", + core::any::type_name::(), + region + ); + let _lock = self.clonee_lock.lock(); + let mut vaddr = region.start.into(); + let mut size = region.size; + while size > 0 { + let paddr = region.mapper.map_fn(vaddr); + let page_size = if PageSize::Size1G.is_aligned(vaddr) + && PageSize::Size1G.is_aligned(paddr) + && size >= PageSize::Size1G as usize + && !region.flags.contains(MemFlags::NO_HUGEPAGES) + { + PageSize::Size1G + } else if PageSize::Size2M.is_aligned(vaddr) + && PageSize::Size2M.is_aligned(paddr) + && size >= PageSize::Size2M as usize + && !region.flags.contains(MemFlags::NO_HUGEPAGES) + { + PageSize::Size2M + } else { + PageSize::Size4K + }; + let page = Page::new_aligned(vaddr.into(), page_size); + self.inner + .map_page(page, paddr, region.flags) + .map_err(|e: PagingError| { + error!( + "failed to map page: {:#x?}({:?}) -> {:#x?}, {:?}", + vaddr, page_size, paddr, e + ); + e + })?; + vaddr += page_size as usize; + size -= page_size as usize; + } + Ok(()) + } + + fn unmap(&mut self, region: &MemoryRegion) -> HvResult { + trace!( + "destroy mapping in {}: {:#x?}", + core::any::type_name::(), + region + ); + let _lock = self.clonee_lock.lock(); + let mut vaddr = region.start.into(); + let mut size = region.size; + while size > 0 { + let (_, page_size) = self.inner.unmap_page(vaddr.into()).map_err(|e| { + error!("failed to unmap page: {:#x?}, {:?}", vaddr, e); + e + })?; + if !page_size.is_aligned(vaddr) { + error!("error vaddr={:#x?}", vaddr); + loop {} + } + assert!(page_size.is_aligned(vaddr)); + assert!(page_size as usize <= size); + vaddr += page_size as usize; + size -= page_size as usize; + } + Ok(()) + } + + fn update(&mut self, vaddr: VA, paddr: PhysAddr, flags: MemFlags) -> PagingResult { + let _lock = self.clonee_lock.lock(); + self.inner.update(vaddr, paddr, flags) + } + + fn clone(&self) -> Self { + let mut pt = Self::clone_from(self); + // clone with lock to avoid data racing between it and its clonees. + pt.clonee_lock = self.clonee_lock.clone(); + pt + } + + unsafe fn activate(&self) { + I::activate(self.root_paddr()) + } + + fn flush(&self, vaddr: Option) { + I::flush(vaddr.map(Into::into)) + } +} + +const fn p4_index(vaddr: usize) -> usize { + (vaddr >> (12 + 27)) & (ENTRY_COUNT - 1) +} + +const fn p3_index(vaddr: usize) -> usize { + (vaddr >> (12 + 18)) & (ENTRY_COUNT - 1) +} + +const fn p2_index(vaddr: usize) -> usize { + (vaddr >> (12 + 9)) & (ENTRY_COUNT - 1) +} + +const fn p1_index(vaddr: usize) -> usize { + (vaddr >> 12) & (ENTRY_COUNT - 1) +} + +fn table_of<'a, E>(paddr: PhysAddr) -> &'a [E] { + let ptr = paddr as *const E; + unsafe { slice::from_raw_parts(ptr, ENTRY_COUNT) } +} + +fn table_of_mut<'a, E>(paddr: PhysAddr) -> &'a mut [E] { + let ptr = paddr as *mut E; + unsafe { slice::from_raw_parts_mut(ptr, ENTRY_COUNT) } +} + +fn next_table_mut<'a, E: GenericPTE>(entry: &E) -> PagingResult<&'a mut [E]> { + if !entry.is_present() { + Err(PagingError::NotMapped) + } else if entry.is_huge() { + Err(PagingError::MappedToHugePage) + } else { + Ok(table_of_mut(entry.addr())) + } +} + +fn next_table_mut_or_create<'a, E: GenericPTE>( + entry: &mut E, + mut allocator: impl FnMut() -> HvResult, +) -> PagingResult<&'a mut [E]> { + if entry.is_unused() { + let paddr = allocator().map_err(|_| PagingError::NoMemory)?; + entry.set_table(paddr); + Ok(table_of_mut(paddr)) + } else { + next_table_mut(entry) + } +} diff --git a/src/arch/x86_64/pci.rs b/src/arch/x86_64/pci.rs new file mode 100644 index 00000000..8383fccd --- /dev/null +++ b/src/arch/x86_64/pci.rs @@ -0,0 +1,267 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{acpi, idt, mmio::MMIoDevice, pio::get_pio_bitmap, zone::HvArchZoneConfig}, + error::HvResult, + memory::{ + mmio_generic_handler, mmio_handle_access, mmio_perform_access, GuestPhysAddr, MMIOAccess, + }, + pci::pcibar::BarRegion, + percpu::this_zone, + zone::{this_zone_id, Zone}, +}; +use ::acpi::{mcfg::Mcfg, sdt::Signature}; +use alloc::{ + collections::{btree_map::BTreeMap, vec_deque::VecDeque}, + sync::Arc, + vec::Vec, +}; +use bit_field::BitField; +use core::{mem::size_of, ops::Range, panic}; + +use super::{ + pio::{PCI_CONFIG_ADDR_PORT, PCI_CONFIG_DATA_PORT}, + vmx::VmxIoExitInfo, +}; + +impl Zone { + pub fn pci_config_space_mmio_init(&mut self, arch: &HvArchZoneConfig) { + /*let bytes = acpi::root_get_table(&Signature::MCFG) + .unwrap() + .get_bytes() + .clone(); + let mcfg = unsafe { &*(bytes.as_ptr() as *const Mcfg) };*/ + + let bytes = acpi::root_get_table(&Signature::MCFG) + .unwrap() + .get_unpatched_src(); + let mcfg = unsafe { &*(bytes as *const Mcfg) }; + + for entry in mcfg.entries() { + let start = entry.base_address as usize; + let size = + ((entry.bus_number_end as usize - entry.bus_number_start as usize) + 1) << 20; + // info!("entry start: {:x} size: {:x}", start, size); + self.mmio_region_register(start, size, mmio_generic_handler, 0); + } + } +} + +pub fn probe_root_pci_devices( + config_base_hpa: usize, +) -> ( + Vec, + BTreeMap, + BTreeMap, + usize, + u8, +) { + let mut bdfs: Vec = Vec::new(); + // key: data reg hpa, value: bdf + let mut msi_data_reg_map: BTreeMap = BTreeMap::new(); + // key: msi-x table bar, value: bdf + let mut msix_bar_map: BTreeMap = BTreeMap::new(); + let mut config_space_size = 0usize; + + // info!("entry start: {:x} size: {:x}", start, size); + let mut buses: VecDeque = VecDeque::new(); + let mut max_bus: u8 = 0; + buses.push_back(max_bus); + + while !buses.is_empty() { + let bus = buses.pop_front().unwrap(); + let bus_config_hpa = (config_base_hpa as usize) + ((bus as usize) << 20); + let mut bus_empty: bool = true; + + for dev_func in 0u8..=255 { + let bdf = ((bus as u16) << 8) + (dev_func as u16); + let bdf_config_hpa = bus_config_hpa + ((dev_func as usize) << 12); + + let vendor_id = unsafe { *(bdf_config_hpa as *const u16) }; + if vendor_id == 0xffff { + continue; + } + + let device_id = unsafe { *((bdf_config_hpa + 0x2) as *const u16) }; + let header_type = unsafe { *((bdf_config_hpa + 0xe) as *const u8) }; + + println!( + "bdf: {:x}, bus: {:x}, dev_func: {:x}, vendor id: {:x}, device id: {:x}, header type: {:x}", + bdf, bus, dev_func, vendor_id, device_id, header_type + ); + + bdfs.push(bdf as _); + bus_empty = false; + + // pci bridge + if header_type.get_bits(0..7) == 0x1 { + let secondary_bus = unsafe { *((bdf_config_hpa + 0x19) as *const u8) }; + buses.push_back(secondary_bus); + } + + // probe msi/msi-x capability registers + let mut cap_pointer = unsafe { *((bdf_config_hpa + 0x34) as *const u8) } as usize; + while cap_pointer != 0 { + let cap_hpa = bdf_config_hpa + cap_pointer; + let cap_id = unsafe { *(cap_hpa as *const u8) }; + + if cap_id == 0x5 { + // msi capablility + let msg_ctrl_reg = unsafe { *((cap_hpa + 0x2) as *const u16) }; + let is_64b = msg_ctrl_reg.get_bit(7); + let per_vector_masking = msg_ctrl_reg.get_bit(8); + + let data_reg_hpa = match is_64b { + true => cap_hpa + 0xc, + false => cap_hpa + 0x8, + }; + msi_data_reg_map.insert(data_reg_hpa, bdf as _); + // println!("msi data reg hpa: {:x?}", data_reg_hpa); + println!("msi per vector masking: {:#x?}", per_vector_masking); + } else if cap_id == 0x11 { + // msi-x capability + let msg_ctrl_reg = unsafe { *((cap_hpa + 0x2) as *const u16) }; + let table_size = msg_ctrl_reg.get_bits(0..=10) as usize; + let table_bir = + unsafe { *((cap_hpa + 0x4) as *const u16) }.get_bits(0..=2) as usize; + + // find msi-x table bar + let bar_hpa = bdf_config_hpa + 0x10 + (table_bir) * size_of::(); + let mut bar = unsafe { *(bar_hpa as *const u32) } as usize; + assert!(!bar.get_bit(0)); // memory request + match bar.get_bits(1..=2) { + 0b00 => { + // 32-bit decoding + bar &= !(0xfff); + } + 0b10 => { + // 64-bit decoding + let bar_high = + unsafe { *((bar_hpa + size_of::()) as *const u32) } as usize; + bar = (bar_high << 6) + bar.get_bits(26..=31); + } + _ => { + panic!("MSI-X table BAR type error!"); + } + } + + /*println!( + "table size: {:x}, table bir: {:x}, bar: {:x}", + table_size, table_bir, bar + );*/ + msix_bar_map.insert(bar, bdf as _); + + for i in 0..=table_size { + let data_reg_hpa = bar + i * size_of::() + 2 * size_of::(); + msi_data_reg_map.insert(data_reg_hpa, bdf as _); + // println!("msi-x data reg hpa: {:x?}", data_reg_hpa); + } + } + + // println!("cap id: {:x}, hpa: {:x}", cap_id, cap_hpa); + cap_pointer = unsafe { *((cap_hpa + 1) as *const u8) } as usize; + } + } + + if !bus_empty && bus > max_bus { + max_bus = bus; + } + } + + config_space_size = ((max_bus as usize - 0usize) + 1) << 20; + // info!("config space size: {:x}", config_space_size); + + ( + bdfs, + msi_data_reg_map, + msix_bar_map, + config_space_size, + max_bus, + ) +} + +fn get_pci_mmio_addr() -> Option { + let addr = get_pio_bitmap(this_zone_id()).pci_config_addr as usize; + let (base, _) = crate::arch::acpi::root_get_config_space_info().unwrap(); + + let enable = addr.get_bit(31); + let bdf = addr.get_bits(8..=23); + let reg = addr.get_bits(2..=7); + + if enable { + // info!("pio: {:x}, bdf: {:x}", base + (bdf << 12) + (reg << 2), bdf); + Some(base + (bdf << 12) + (reg << 2)) + } else { + None + } +} + +pub fn handle_pci_config_port_read(io_info: &VmxIoExitInfo) -> u32 { + let mut value = 0u32; + if PCI_CONFIG_ADDR_PORT.contains(&io_info.port) { + value = get_pio_bitmap(this_zone_id()).pci_config_addr; + + let offset_bit = 8 * (io_info.port - PCI_CONFIG_ADDR_PORT.start) as usize; + value = value.get_bits(offset_bit..offset_bit + (8 * io_info.access_size) as usize); + } else { + if let Some(mmio_addr) = get_pci_mmio_addr() { + let offset: usize = (io_info.port - PCI_CONFIG_DATA_PORT.start) as usize; + if this_zone() + .read() + .find_mmio_region(mmio_addr + offset, io_info.access_size as _) + .is_some() + { + let mut mmio_access = MMIOAccess { + address: mmio_addr + offset, + size: io_info.access_size as _, + is_write: false, + value: 0, + }; + mmio_handle_access(&mut mmio_access); + value = mmio_access.value as _; + // info!("value: {:x}", value); + } + } + } + value +} + +pub fn handle_pci_config_port_write(io_info: &VmxIoExitInfo, value: u32) { + if PCI_CONFIG_ADDR_PORT.contains(&io_info.port) { + let offset_bit = 8 * (io_info.port - PCI_CONFIG_ADDR_PORT.start) as usize; + get_pio_bitmap(this_zone_id()).pci_config_addr.set_bits( + offset_bit..offset_bit + (8 * (io_info.access_size as usize)), + value, + ); + } else { + if let Some(mmio_addr) = get_pci_mmio_addr() { + let offset: usize = (io_info.port - PCI_CONFIG_DATA_PORT.start) as usize; + if this_zone() + .read() + .find_mmio_region(mmio_addr + offset, io_info.access_size as _) + .is_some() + { + mmio_handle_access(&mut MMIOAccess { + address: mmio_addr + offset, + size: io_info.access_size as _, + is_write: true, + value: value as _, + }); + } + } + } +} diff --git a/src/arch/x86_64/pio.rs b/src/arch/x86_64/pio.rs new file mode 100644 index 00000000..feee7537 --- /dev/null +++ b/src/arch/x86_64/pio.rs @@ -0,0 +1,125 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + consts::MAX_ZONE_NUM, + error::HvResult, + memory::{Frame, HostPhysAddr}, + zone::this_zone_id, +}; +use core::ops::Range; +use heapless::FnvIndexMap; + +pub const UART_COM1_BASE_PORT: u16 = 0x3f8; +pub const UART_COM1_PORT: Range = 0x3f8..0x400; +pub const PCI_CONFIG_ADDR_PORT: Range = 0xcf8..0xcfc; +pub const PCI_CONFIG_DATA_PORT: Range = 0xcfc..0xd00; + +static mut PIO_BITMAP_MAP: Option> = None; + +pub fn init_pio_bitmap_map() { + unsafe { PIO_BITMAP_MAP = Some(FnvIndexMap::new()) }; +} + +pub fn set_pio_bitmap(zone_id: usize) { + unsafe { + if let Some(map) = &mut PIO_BITMAP_MAP { + if map.contains_key(&zone_id) { + map.remove(&zone_id); + } + map.insert(zone_id, PortIoBitmap::new(zone_id)); + } + } +} + +pub fn get_pio_bitmap(zone_id: usize) -> &'static mut PortIoBitmap { + unsafe { + PIO_BITMAP_MAP + .as_mut() + .expect("PIO_BITMAP_MAP is not initialized!") + .get_mut(&zone_id) + .expect("pio bitmap for this Zone does not exist!") + } +} + +#[derive(Debug)] +pub struct PortIoBitmap { + pub a: Frame, + pub b: Frame, + pub pci_config_addr: u32, +} + +impl PortIoBitmap { + pub fn new(zone_id: usize) -> Self { + let mut bitmap = Self { + a: Frame::new_zero().unwrap(), + b: Frame::new_zero().unwrap(), + pci_config_addr: 0, + }; + + if zone_id == 0 { + bitmap.a.fill(0); + bitmap.b.fill(0); + } else { + bitmap.a.fill(0xff); + bitmap.b.fill(0xff); + } + + // ban i8259a ports + bitmap.set_intercept(0x20, true); + bitmap.set_intercept(0x21, true); + bitmap.set_intercept(0xa0, true); + bitmap.set_intercept(0xa1, true); + + // pci config ports + bitmap.set_range_intercept(PCI_CONFIG_ADDR_PORT, true); + bitmap.set_range_intercept(PCI_CONFIG_DATA_PORT, true); + + if zone_id == 0 { + #[cfg(feature = "graphics")] + bitmap.set_range_intercept(UART_COM1_PORT, true); + } + + // i8042, we won't use it, but intercept its ports might block linux init + bitmap.set_range_intercept(0x60..0x65, false); + + bitmap + } + + pub fn set_range_intercept(&mut self, mut ports: Range, intercept: bool) { + for port in ports { + self.set_intercept(port, intercept); + } + } + + pub fn set_intercept(&mut self, mut port: u16, intercept: bool) { + let bitmap = match port <= 0x7fff { + true => unsafe { core::slice::from_raw_parts_mut(self.a.as_mut_ptr(), 0x1000) }, + false => { + port -= 0x8000; + unsafe { core::slice::from_raw_parts_mut(self.b.as_mut_ptr(), 0x1000) } + } + }; + + let byte = (port / 8) as usize; + let bits = port % 8; + if intercept { + bitmap[byte] |= 1 << bits; + } else { + bitmap[byte] &= !(1 << bits); + } + } +} diff --git a/src/arch/x86_64/s1pt.rs b/src/arch/x86_64/s1pt.rs new file mode 100644 index 00000000..13ead238 --- /dev/null +++ b/src/arch/x86_64/s1pt.rs @@ -0,0 +1,76 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use super::paging::{GenericPTE, Level4PageTable, PagingInstr}; +use crate::{ + consts::PAGE_SIZE, + memory::{ + addr::{GuestPhysAddr, HostPhysAddr, PhysAddr}, + MemFlags, + }, +}; +use core::fmt; +use numeric_enum_macro::numeric_enum; + +numeric_enum! { + #[repr(u64)] + #[derive(Debug, Clone, Copy, Eq, PartialEq)] + enum MemType { + Normal = 0, + Device = 1, + } +} + +#[derive(Clone, Copy)] +#[repr(transparent)] +pub struct PageTableEntry(pub u64); + +impl GenericPTE for PageTableEntry { + fn addr(&self) -> HostPhysAddr { + 0 + } + fn flags(&self) -> MemFlags { + MemFlags::READ + } + fn is_unused(&self) -> bool { + false + } + fn is_present(&self) -> bool { + false + } + fn set_addr(&mut self, addr: HostPhysAddr) {} + fn set_flags(&mut self, flags: MemFlags, is_huge: bool) {} + fn set_table(&mut self, pa: HostPhysAddr) {} + fn clear(&mut self) {} + fn is_huge(&self) -> bool { + false + } +} + +impl fmt::Debug for PageTableEntry { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Ok(()) + } +} + +pub struct S1PTInstr; + +impl PagingInstr for S1PTInstr { + unsafe fn activate(root_paddr: HostPhysAddr) {} + fn flush(_vaddr: Option) {} +} + +pub type Stage1PageTable = Level4PageTable; diff --git a/src/arch/x86_64/s2pt.rs b/src/arch/x86_64/s2pt.rs new file mode 100644 index 00000000..9a4e8e49 --- /dev/null +++ b/src/arch/x86_64/s2pt.rs @@ -0,0 +1,308 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{ + iommu, + paging::{GenericPTE, Level4PageTable, PagingInstr}, + vmcs::*, + }, + consts::PAGE_SIZE, + error::HvResult, + memory::{ + addr::{GuestPhysAddr, HostPhysAddr, PhysAddr}, + MemFlags, + }, + percpu::this_cpu_data, + zone::this_zone_id, +}; +use bit_field::BitField; +use bitflags::bitflags; +use core::{arch::asm, fmt}; + +bitflags! { + /// EPT entry flags. (SDM Vol. 3C, Section 28.3.2) + pub struct DescriptorAttr: u64 { + /// Read access. + const READ = 1 << 0; + /// Write access. + const WRITE = 1 << 1; + /// Execute access. + const EXECUTE = 1 << 2; + /// EPT memory type. Only for terminate pages. + const MEM_TYPE_MASK = 0b111 << 3; + /// Ignore PAT memory type. Only for terminate pages. + const IGNORE_PAT = 1 << 6; + /// Specifies that the entry maps a huge frame instead of a page table. + /// Only allowed in P2 or P3 tables. + const HUGE_PAGE = 1 << 7; + /// If bit 6 of EPTP is 1, accessed flag for EPT. + const ACCESSED = 1 << 8; + /// If bit 6 of EPTP is 1, dirty flag for EPT. + const DIRTY = 1 << 9; + /// Execute access for user-mode linear addresses. + const EXECUTE_FOR_USER = 1 << 10; + } +} + +/// INVEPT type. (SDM Vol. 3C, Section 30.3) +#[repr(u64)] +#[derive(Debug)] +#[allow(dead_code)] +pub enum InvS2PTType { + /// The logical processor invalidates all mappings associated with bits + /// 51:12 of the EPT pointer (EPTP) specified in the INVEPT descriptor. + /// It may invalidate other mappings as well. + SingleContext = 1, + /// The logical processor invalidates mappings associated with all EPTPs. + Global = 2, +} + +bitflags! { + /// Extended-Page-Table Pointer. (SDM Vol. 3C, Section 24.6.11) + pub struct S2PTPointer: u64 { + /// EPT paging-structure memory type: Uncacheable (UC). + #[allow(clippy::identity_op)] + const MEM_TYPE_UC = 0 << 0; + /// EPT paging-structure memory type: Write-back (WB). + #[allow(clippy::identity_op)] + const MEM_TYPE_WB = 6 << 0; + /// EPT page-walk length 1. + const WALK_LENGTH_1 = 0 << 3; + /// EPT page-walk length 2. + const WALK_LENGTH_2 = 1 << 3; + /// EPT page-walk length 3. + const WALK_LENGTH_3 = 2 << 3; + /// EPT page-walk length 4. + const WALK_LENGTH_4 = 3 << 3; + /// Setting this control to 1 enables accessed and dirty flags for EPT. + const ENABLE_ACCESSED_DIRTY = 1 << 6; + } +} + +impl S2PTPointer { + pub fn from_table_phys(root_paddr: HostPhysAddr) -> Self { + let aligned_addr = root_paddr & !(PAGE_SIZE - 1); + let flags = unsafe { Self::from_bits_retain(aligned_addr as u64) }; + flags | Self::MEM_TYPE_WB | Self::WALK_LENGTH_4 | Self::ENABLE_ACCESSED_DIRTY + } +} + +numeric_enum_macro::numeric_enum! { + #[repr(u8)] + #[derive(Debug, PartialEq, Clone, Copy)] + /// EPT memory typing. (SDM Vol. 3C, Section 28.3.7) + enum MemType { + Uncacheable = 0, + WriteCombining = 1, + WriteThrough = 4, + WriteProtected = 5, + WriteBack = 6, + } +} + +impl DescriptorAttr { + fn set_mem_type(&mut self, mem_type: MemType) { + let mut bits = self.bits(); + bits.set_bits(3..6, mem_type as u64); + *self = Self::from_bits_truncate(bits) + } + + fn mem_type(&self) -> Result { + MemType::try_from(self.bits().get_bits(3..6) as u8) + } +} + +impl From for MemFlags { + fn from(attr: DescriptorAttr) -> Self { + let mut flags = Self::empty(); + if attr.contains(DescriptorAttr::READ) { + flags |= Self::READ; + } + if attr.contains(DescriptorAttr::WRITE) { + flags |= Self::WRITE; + } + if attr.contains(DescriptorAttr::EXECUTE) { + flags |= Self::EXECUTE; + } + if let Ok(MemType::Uncacheable) = attr.mem_type() { + flags |= Self::IO; + } + flags + } +} + +impl From for DescriptorAttr { + fn from(flags: MemFlags) -> Self { + if flags.is_empty() { + return Self::empty(); + } + let mut attr = Self::empty(); + if flags.contains(MemFlags::READ) { + attr |= Self::READ; + } + if flags.contains(MemFlags::WRITE) { + attr |= Self::WRITE; + } + if flags.contains(MemFlags::EXECUTE) { + attr |= Self::EXECUTE | Self::EXECUTE_FOR_USER; + } + if !flags.contains(MemFlags::IO) { + attr.set_mem_type(MemType::WriteBack); + } else { + attr.set_mem_type(MemType::WriteThrough); + // attr &= !Self::READ; + } + attr + } +} + +#[derive(Clone, Copy)] +#[repr(transparent)] +pub struct PageTableEntry(u64); + +impl PageTableEntry { + const PHYS_ADDR_MASK: usize = 0x000f_ffff_ffff_f000; // 12..52 + + fn memory_type(&self) -> MemType { + DescriptorAttr::from_bits_truncate(self.0) + .mem_type() + .unwrap() + } +} + +impl GenericPTE for PageTableEntry { + fn addr(&self) -> HostPhysAddr { + self.0 as usize & Self::PHYS_ADDR_MASK + } + + fn flags(&self) -> MemFlags { + DescriptorAttr::from_bits_truncate(self.0).into() + } + + fn is_unused(&self) -> bool { + self.0 == 0 + } + + fn is_present(&self) -> bool { + self.0 & 0x7 != 0 // RWX != 0 + } + + fn is_huge(&self) -> bool { + DescriptorAttr::from_bits_truncate(self.0).contains(DescriptorAttr::HUGE_PAGE) + } + + fn set_addr(&mut self, paddr: HostPhysAddr) { + self.0 = + (self.0 & !Self::PHYS_ADDR_MASK as u64) | (paddr as u64 & Self::PHYS_ADDR_MASK as u64); + } + + fn set_flags(&mut self, flags: MemFlags, is_huge: bool) { + let mut attr = DescriptorAttr::from(flags); + if is_huge { + attr |= DescriptorAttr::HUGE_PAGE; + } + self.0 = (attr.bits() & !Self::PHYS_ADDR_MASK as u64) + | (self.0 as u64 & Self::PHYS_ADDR_MASK as u64); + } + + fn set_table(&mut self, paddr: HostPhysAddr) { + let attr = DescriptorAttr::READ | DescriptorAttr::WRITE | DescriptorAttr::EXECUTE; + self.0 = (attr.bits() & !Self::PHYS_ADDR_MASK as u64) + | (paddr as u64 & Self::PHYS_ADDR_MASK as u64); + } + + fn clear(&mut self) { + self.0 = 0 + } +} + +impl fmt::Debug for PageTableEntry { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Stage2PageTableEntry") + .field("raw", &self.0) + .field("paddr", &self.addr()) + .field("flags", &self.flags()) + .field("memory_type", &self.memory_type()) + .finish() + } +} + +/// Invalidate Translations Derived from EPT. (SDM Vol. 3C, Section 30.3) +/// +/// Invalidates mappings in the translation lookaside buffers (TLBs) and +/// paging-structure caches that were derived from extended page tables (EPT). +/// (See Chapter 28, “VMX Support for Address Translation”.) Invalidation is +/// based on the INVEPT type specified in the register operand and the INVEPT +/// descriptor specified in the memory operand. +unsafe fn invs2pt(inv_type: InvS2PTType, s2ptp: u64) { + let invs2pt_desc = [s2ptp, 0]; + asm!("invept {0}, [{1}]", in(reg) inv_type as u64, in(reg) &invs2pt_desc); +} + +pub struct S2PTInstr; + +impl PagingInstr for S2PTInstr { + unsafe fn activate(root_paddr: HostPhysAddr) { + let s2ptp = S2PTPointer::from_table_phys(root_paddr).bits(); + crate::arch::vmcs::VmcsControl64::EPTP.write(s2ptp).unwrap(); + unsafe { invs2pt(InvS2PTType::SingleContext, s2ptp) }; + + // if this cpu is boot cpu and it is running + if this_cpu_data().arch_cpu.power_on && this_cpu_data().boot_cpu { + iommu::fill_dma_translation_tables(this_zone_id(), root_paddr); + } + } + + fn flush(_vaddr: Option) {} +} + +/// Information about nested page faults. +#[derive(Debug)] +pub struct Stage2PageFaultInfo { + /// Access type that caused the nested page fault. + pub access_flags: MemFlags, + /// Guest physical address that caused the nested page fault. + pub fault_guest_paddr: GuestPhysAddr, +} + +impl Stage2PageFaultInfo { + pub fn new() -> HvResult { + // SDM Vol. 3C, Section 27.2.1, Table 27-7 + let qualification = VmcsReadOnlyNW::EXIT_QUALIFICATION.read()?; + let fault_guest_paddr = VmcsReadOnly64::GUEST_PHYSICAL_ADDR.read()? as usize; + let mut access_flags = MemFlags::empty(); + if qualification.get_bit(0) { + access_flags |= MemFlags::READ; + } + if qualification.get_bit(1) { + access_flags |= MemFlags::WRITE; + } + if qualification.get_bit(2) { + access_flags |= MemFlags::EXECUTE; + } + Ok(Stage2PageFaultInfo { + access_flags, + fault_guest_paddr, + }) + } +} + +pub type Stage2PageTable = Level4PageTable; + +pub fn stage2_mode_detect() { + info!("Dynamical detection of stage-2 paging mode is not supported yet."); +} diff --git a/src/arch/x86_64/trap.S b/src/arch/x86_64/trap.S new file mode 100644 index 00000000..613f1a15 --- /dev/null +++ b/src/arch/x86_64/trap.S @@ -0,0 +1,75 @@ +.equ NUM_INT, 256 + +.altmacro +.macro DEF_HANDLER, i +.Ltrap_handler_\i: +.if \i == 8 || (\i >= 10 && \i <= 14) || \i == 17 + // error code pushed by CPU + push \i // interrupt vector + jmp .Ltrap_common +.else + push 0 // fill in error code in trap frame + push \i // interrupt vector + jmp .Ltrap_common +.endif +.endm + +.macro DEF_TABLE_ENTRY, i + .quad .Ltrap_handler_\i +.endm + +.section .text +_trap_handlers: +.set i, 0 +.rept NUM_INT + DEF_HANDLER %i + .set i, i + 1 +.endr + +.Ltrap_common: + push r15 + push r14 + push r13 + push r12 + push r11 + push r10 + push r9 + push r8 + push rdi + push rsi + push rbp + push rbx + push rdx + push rcx + push rax + + mov rdi, rsp + call {0} + + pop rax + pop rcx + pop rdx + pop rbx + pop rbp + pop rsi + pop rdi + pop r8 + pop r9 + pop r10 + pop r11 + pop r12 + pop r13 + pop r14 + pop r15 + + add rsp, 16 // pop vector, error_code + iretq + +.section .rodata +.global _hyp_trap_vector +_hyp_trap_vector: +.set i, 0 +.rept NUM_INT + DEF_TABLE_ENTRY %i + .set i, i + 1 +.endr \ No newline at end of file diff --git a/src/arch/x86_64/trap.rs b/src/arch/x86_64/trap.rs new file mode 100644 index 00000000..edfbeee9 --- /dev/null +++ b/src/arch/x86_64/trap.rs @@ -0,0 +1,429 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{ + cpu::{this_cpu_id, ArchCpu}, + cpuid::{CpuIdEax, ExtendedFeaturesEcx, FeatureInfoFlags}, + hpet, + idt::{IdtStruct, IdtVector}, + ipi, + msr::Msr::{self, *}, + s2pt::Stage2PageFaultInfo, + vmcs::*, + vmx::{VmxCrAccessInfo, VmxExitInfo, VmxExitReason, VmxInterruptInfo, VmxIoExitInfo}, + }, + device::{ + irqchip::{ + inject_vector, + pic::{ioapic::irqs, lapic::VirtLocalApic}, + }, + uart::{virt_console_io_read, virt_console_io_write, UartReg}, + }, + error::HvResult, + hypercall::HyperCall, + memory::{mmio_handle_access, MMIOAccess, MemFlags}, + percpu::{this_cpu_data, this_zone}, + zone::this_zone_id, +}; +use bit_field::BitField; +use core::mem::size_of; +use x86_64::registers::control::Cr4Flags; + +use super::{ + pci::{handle_pci_config_port_read, handle_pci_config_port_write}, + pio::{PCI_CONFIG_ADDR_PORT, PCI_CONFIG_DATA_PORT, UART_COM1_PORT}, +}; + +core::arch::global_asm!( + include_str!("trap.S"), + sym arch_handle_trap +); + +const IRQ_VECTOR_START: u8 = 0x20; +const IRQ_VECTOR_END: u8 = 0xff; + +const VM_EXIT_INSTR_LEN_CPUID: u8 = 2; +const VM_EXIT_INSTR_LEN_HLT: u8 = 1; +const VM_EXIT_INSTR_LEN_RDMSR: u8 = 2; +const VM_EXIT_INSTR_LEN_WRMSR: u8 = 2; +const VM_EXIT_INSTR_LEN_VMCALL: u8 = 3; + +#[repr(C)] +#[derive(Debug, Default, Clone, Copy)] +pub struct TrapFrame { + pub usr: [u64; 15], + + // pushed by 'trap.S' + pub vector: u64, + pub error_code: u64, + + // pushed by CPU + pub rip: u64, + pub cs: u64, + pub rflags: u64, + pub rsp: u64, + pub ss: u64, +} + +lazy_static::lazy_static! { + static ref IDT: IdtStruct = IdtStruct::new(); +} + +pub fn install_trap_vector() { + IDT.load(); +} + +#[no_mangle] +pub fn arch_handle_trap(tf: &mut TrapFrame) { + // println!("trap {} @ {:#x}", tf.vector, tf.rip); + match tf.vector as u8 { + IRQ_VECTOR_START..=IRQ_VECTOR_END => handle_irq(tf.vector as u8), + _ => { + println!( + "Unhandled exception {} (error_code = {:#x}) @ {:#x}", + tf.vector, tf.error_code, tf.rip + ); + } + } +} + +fn handle_irq(vector: u8) { + match vector { + IdtVector::VIRT_IPI_VECTOR => { + ipi::handle_virt_ipi(); + } + IdtVector::APIC_SPURIOUS_VECTOR | IdtVector::APIC_ERROR_VECTOR => {} + _ => { + if vector >= 0x20 && this_cpu_data().arch_cpu.power_on { + inject_vector(this_cpu_id(), vector, None, false); + } + } + } + unsafe { VirtLocalApic::phys_local_apic().end_of_interrupt() }; +} + +fn handle_cpuid(arch_cpu: &mut ArchCpu) -> HvResult { + use raw_cpuid::{cpuid, CpuIdResult}; + // TODO: temporary hypervisor hack + let signature = unsafe { &*("ACRNACRNACRN".as_ptr() as *const [u32; 3]) }; + let cr4_flags = Cr4Flags::from_bits_truncate(arch_cpu.cr(4) as _); + let regs = arch_cpu.regs_mut(); + let rax: Result = (regs.rax as u32).try_into(); + let mut res: CpuIdResult = cpuid!(regs.rax, regs.rcx); + + if let Ok(function) = rax { + res = match function { + CpuIdEax::FeatureInfo => { + let mut res = cpuid!(regs.rax, regs.rcx); + let mut ecx = FeatureInfoFlags::from_bits_truncate(res.ecx as _); + + ecx.remove(FeatureInfoFlags::VMX); + // ecx.remove(FeatureInfoFlags::TSC_DEADLINE); + ecx.remove(FeatureInfoFlags::XSAVE); + + ecx.insert(FeatureInfoFlags::X2APIC); + ecx.insert(FeatureInfoFlags::HYPERVISOR); + res.ecx = ecx.bits() as _; + + let mut edx = FeatureInfoFlags::from_bits_truncate((res.edx as u64) << 32); + // edx.remove(FeatureInfoFlags::TSC); + res.edx = (edx.bits() >> 32) as _; + + res + } + CpuIdEax::StructuredExtendedFeatureInfo => { + let mut res = cpuid!(regs.rax, regs.rcx); + let mut ecx = ExtendedFeaturesEcx::from_bits_truncate(res.ecx as _); + ecx.remove(ExtendedFeaturesEcx::WAITPKG); + res.ecx = ecx.bits() as _; + + res + } + CpuIdEax::ProcessorFrequencyInfo => { + if let Some(freq_mhz) = hpet::get_tsc_freq_mhz() { + CpuIdResult { + eax: freq_mhz, + ebx: freq_mhz, + ecx: freq_mhz, + edx: 0, + } + } else { + cpuid!(regs.rax, regs.rcx) + } + } + CpuIdEax::HypervisorInfo => CpuIdResult { + eax: CpuIdEax::HypervisorFeatures as u32, + ebx: signature[0], + ecx: signature[1], + edx: signature[2], + }, + CpuIdEax::HypervisorFeatures => CpuIdResult { + eax: 0, + ebx: 0, + ecx: 0, + edx: 0, + }, + _ => cpuid!(regs.rax, regs.rcx), + }; + } + + trace!( + "VM exit: CPUID({:#x}, {:#x}): {:?}", + regs.rax, + regs.rcx, + res + ); + regs.rax = res.eax as _; + regs.rbx = res.ebx as _; + regs.rcx = res.ecx as _; + regs.rdx = res.edx as _; + + arch_cpu.advance_guest_rip(VM_EXIT_INSTR_LEN_CPUID)?; + Ok(()) +} + +fn handle_cr_access(arch_cpu: &mut ArchCpu) -> HvResult { + let cr_access_info = VmxCrAccessInfo::new()?; + panic!( + "VM-exit: CR{} access:\n{:#x?}", + cr_access_info.cr_n, arch_cpu + ); + + match cr_access_info.cr_n { + 0 => {} + _ => {} + } + + Ok(()) +} + +fn handle_external_interrupt() -> HvResult { + let int_info = VmxInterruptInfo::new()?; + trace!("VM-exit: external interrupt: {:#x?}", int_info); + assert!(int_info.valid); + handle_irq(int_info.vector); + Ok(()) +} + +fn handle_hypercall(arch_cpu: &mut ArchCpu) -> HvResult { + let regs = arch_cpu.regs_mut(); + debug!( + "VM exit: VMCALL({:#x}): {:x?}", + regs.rax, + [regs.rdi, regs.rsi] + ); + let (code, arg0, arg1) = (regs.rax, regs.rdi, regs.rsi); + let cpu_data = this_cpu_data(); + let result = match HyperCall::new(cpu_data).hypercall(code as _, arg0, arg1) { + Ok(ret) => ret as _, + Err(e) => { + error!("hypercall error: {:#?}", e); + e.code() + } + }; + debug!("HVC result = {}", result); + regs.rax = result as _; + + arch_cpu.advance_guest_rip(VM_EXIT_INSTR_LEN_VMCALL)?; + Ok(()) +} + +fn handle_io_instruction(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvResult { + let io_info = VmxIoExitInfo::new()?; + + /*info!( + "VM exit: I/O instruction @ {:#x}: {:#x?}", + exit_info.guest_rip, io_info, + );*/ + + if io_info.is_string { + error!("INS/OUTS instructions are not supported!"); + return hv_result_err!(ENOSYS); + } + if io_info.is_repeat { + error!("REP prefixed I/O instructions are not supported!"); + return hv_result_err!(ENOSYS); + } + + let mut value: u32 = 0; + if !io_info.is_in { + let rax = arch_cpu.regs().rax; + value = match io_info.access_size { + 1 => rax & 0xff, + 2 => rax & 0xffff, + 4 => rax, + _ => unreachable!(), + } as _; + + // TODO: reconstruct + if PCI_CONFIG_ADDR_PORT.contains(&io_info.port) + || PCI_CONFIG_DATA_PORT.contains(&io_info.port) + { + handle_pci_config_port_write(&io_info, value); + } else if UART_COM1_PORT.contains(&io_info.port) { + virt_console_io_write(io_info.port, value); + } else { + /* info!( + "unhandled port io write {:x} value: {:x}", + io_info.port, value + ); */ + } + } else { + if PCI_CONFIG_ADDR_PORT.contains(&io_info.port) + || PCI_CONFIG_DATA_PORT.contains(&io_info.port) + { + value = handle_pci_config_port_read(&io_info); + } else if UART_COM1_PORT.contains(&io_info.port) { + value = virt_console_io_read(io_info.port); + } else { + // info!("unhandled port io read {:x}", io_info.port); + value = 0x0; + } + let rax = &mut arch_cpu.regs_mut().rax; + // SDM Vol. 1, Section 3.4.1.1: + // * 32-bit operands generate a 32-bit result, zero-extended to a 64-bit result in the + // destination general-purpose register. + // * 8-bit and 16-bit operands generate an 8-bit or 16-bit result. The upper 56 bits or + // 48 bits (respectively) of the destination general-purpose register are not modified + // by the operation. + match io_info.access_size { + 1 => *rax = (*rax & !0xff) | (value & 0xff) as u64, + 2 => *rax = (*rax & !0xffff) | (value & 0xffff) as u64, + 4 => *rax = value as u64, + _ => unreachable!(), + } + } + + arch_cpu.advance_guest_rip(exit_info.exit_instruction_length as _)?; + Ok(()) +} + +fn handle_msr_read(arch_cpu: &mut ArchCpu) -> HvResult { + let rcx = arch_cpu.regs().rcx as u32; + + if let Ok(msr) = Msr::try_from(rcx) { + let res = if msr == IA32_APIC_BASE { + let mut apic_base = unsafe { IA32_APIC_BASE.read() }; + // info!("APIC BASE: {:x}", apic_base); + apic_base |= 1 << 11 | 1 << 10; // enable xAPIC and x2APIC + Ok(apic_base) + } else if VirtLocalApic::msr_range().contains(&rcx) { + arch_cpu.virt_lapic.rdmsr(msr) + } else { + hv_result_err!(ENOSYS) + }; + + if let Ok(value) = res { + debug!("VM exit: RDMSR({:#x}) -> {:#x}", rcx, value); + arch_cpu.regs_mut().rax = value & 0xffff_ffff; + arch_cpu.regs_mut().rdx = value >> 32; + } else { + warn!("Failed to handle RDMSR({:#x}): {:?}", rcx, res); + } + } else { + // warn!("Unrecognized RDMSR({:#x})", rcx); + } + + arch_cpu.advance_guest_rip(VM_EXIT_INSTR_LEN_RDMSR)?; + Ok(()) +} + +fn handle_msr_write(arch_cpu: &mut ArchCpu) -> HvResult { + let rcx = arch_cpu.regs().rcx as u32; + let msr = Msr::try_from(rcx).unwrap(); + let value = (arch_cpu.regs().rax & 0xffff_ffff) | (arch_cpu.regs().rdx << 32); + debug!("VM exit: WRMSR({:#x}) <- {:#x}", rcx, value); + + let res = if msr == IA32_APIC_BASE { + Ok(()) // ignore + } else if VirtLocalApic::msr_range().contains(&rcx) || msr == IA32_TSC_DEADLINE { + arch_cpu.virt_lapic.wrmsr(msr, value) + } else { + hv_result_err!(ENOSYS) + }; + + if res.is_err() { + warn!( + "Failed to handle WRMSR({:#x}) <- {:#x}: {:?}\n{:#x?}", + rcx, value, res, arch_cpu + ); + } + arch_cpu.advance_guest_rip(VM_EXIT_INSTR_LEN_WRMSR)?; + Ok(()) +} + +fn handle_s2pt_violation(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvResult { + let fault_info = Stage2PageFaultInfo::new()?; + mmio_handle_access(&mut MMIOAccess { + address: fault_info.fault_guest_paddr, + size: 0, + is_write: fault_info.access_flags.contains(MemFlags::WRITE), + value: 0, + })?; + + Ok(()) +} + +fn handle_triple_fault(arch_cpu: &mut ArchCpu, exit_info: &VmxExitInfo) -> HvResult { + panic!( + "VM exit: Triple fault @ {:#x}, instr length: {:x}\n {:#x?}", + exit_info.guest_rip, exit_info.exit_instruction_length, arch_cpu + ); + // arch_cpu.advance_guest_rip(exit_info.exit_instruction_length as _)?; + Ok(()) +} + +pub fn handle_vmexit(arch_cpu: &mut ArchCpu) -> HvResult { + let exit_info = VmxExitInfo::new()?; + debug!("VM exit: {:#x?}", exit_info); + + if exit_info.entry_failure { + panic!("VM entry failed: {:#x?}", exit_info); + } + + let res = match exit_info.exit_reason { + VmxExitReason::EXTERNAL_INTERRUPT => handle_external_interrupt(), + VmxExitReason::TRIPLE_FAULT => handle_triple_fault(arch_cpu, &exit_info), + VmxExitReason::INTERRUPT_WINDOW => Vmcs::set_interrupt_window(false), + VmxExitReason::CPUID => handle_cpuid(arch_cpu), + VmxExitReason::HLT => { + arch_cpu.advance_guest_rip(VM_EXIT_INSTR_LEN_HLT)?; + Ok(()) + } + VmxExitReason::VMCALL => handle_hypercall(arch_cpu), + VmxExitReason::CR_ACCESS => handle_cr_access(arch_cpu), + VmxExitReason::IO_INSTRUCTION => handle_io_instruction(arch_cpu, &exit_info), + VmxExitReason::MSR_READ => handle_msr_read(arch_cpu), + VmxExitReason::MSR_WRITE => handle_msr_write(arch_cpu), + VmxExitReason::EPT_VIOLATION => handle_s2pt_violation(arch_cpu, &exit_info), + _ => panic!( + "Unhandled VM-Exit reason {:?}:\n{:#x?}", + exit_info.exit_reason, arch_cpu + ), + }; + + if res.is_err() { + panic!( + "Failed to handle VM-exit {:?}:\n{:#x?}\n{:?}", + exit_info.exit_reason, + arch_cpu, + res.err() + ); + } + + Ok(()) +} diff --git a/src/arch/x86_64/vmcs.rs b/src/arch/x86_64/vmcs.rs new file mode 100644 index 00000000..858e2a20 --- /dev/null +++ b/src/arch/x86_64/vmcs.rs @@ -0,0 +1,614 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +#![allow(non_camel_case_types)] +use crate::{ + arch::{ + msr::Msr, + s2pt::Stage2PageFaultInfo, + vmx::{ + VmxExitInfo, VmxInstructionError, VmxInterruptInfo, VmxInterruptionType, VmxIoExitInfo, + }, + }, + error::{HvError, HvResult}, + memory::MemFlags, +}; +use bit_field::BitField; +use x86::{ + bits64::vmx, + vmx::{vmcs::control::PrimaryControls, Result as VmResult, VmFail}, +}; + +macro_rules! vmcs_read { + ($field_enum: ident, u64) => { + impl $field_enum { + pub fn read(self) -> x86::vmx::Result { + #[cfg(target_pointer_width = "64")] + unsafe { + vmx::vmread(self as u32) + } + #[cfg(target_pointer_width = "32")] + unsafe { + let field = self as u32; + Ok(vmx::vmread(field)? + (vmx::vmread(field + 1)? << 32)) + } + } + } + }; + ($field_enum: ident, $ux: ty) => { + impl $field_enum { + pub fn read(self) -> x86::vmx::Result<$ux> { + unsafe { vmx::vmread(self as u32).map(|v| v as $ux) } + } + } + }; +} + +macro_rules! vmcs_write { + ($field_enum: ident, u64) => { + impl $field_enum { + pub fn write(self, value: u64) -> x86::vmx::Result<()> { + #[cfg(target_pointer_width = "64")] + unsafe { + vmx::vmwrite(self as u32, value) + } + #[cfg(target_pointer_width = "32")] + unsafe { + let field = self as u32; + vmx::vmwrite(field, value & 0xffff_ffff)?; + vmx::vmwrite(field + 1, value >> 32)?; + Ok(()) + } + } + } + }; + ($field_enum: ident, $ux: ty) => { + impl $field_enum { + pub fn write(self, value: $ux) -> x86::vmx::Result<()> { + unsafe { vmx::vmwrite(self as u32, value as u64) } + } + } + }; +} + +/// 16-Bit Control Fields. (SDM Vol. 3D, Appendix B.1.1) +#[derive(Clone, Copy, Debug)] +pub enum VmcsControl16 { + /// Virtual-processor identifier (VPID). + VPID = 0x0, + /// Posted-interrupt notification vector. + POSTED_INTERRUPT_NOTIFICATION_VECTOR = 0x2, + /// EPTP index. + EPTP_INDEX = 0x4, +} +vmcs_read!(VmcsControl16, u16); +vmcs_write!(VmcsControl16, u16); + +/// 64-Bit Control Fields. (SDM Vol. 3D, Appendix B.2.1) +#[derive(Clone, Copy, Debug)] +pub enum VmcsControl64 { + /// Address of I/O bitmap A (full). + IO_BITMAP_A_ADDR = 0x2000, + /// Address of I/O bitmap B (full). + IO_BITMAP_B_ADDR = 0x2002, + /// Address of MSR bitmaps (full). + MSR_BITMAPS_ADDR = 0x2004, + /// VM-exit MSR-store address (full). + VMEXIT_MSR_STORE_ADDR = 0x2006, + /// VM-exit MSR-load address (full). + VMEXIT_MSR_LOAD_ADDR = 0x2008, + /// VM-entry MSR-load address (full). + VMENTRY_MSR_LOAD_ADDR = 0x200A, + /// Executive-VMCS pointer (full). + EXECUTIVE_VMCS_PTR = 0x200C, + /// PML address (full). + PML_ADDR = 0x200E, + /// TSC offset (full). + TSC_OFFSET = 0x2010, + /// Virtual-APIC address (full). + VIRT_APIC_ADDR = 0x2012, + /// APIC-access address (full). + APIC_ACCESS_ADDR = 0x2014, + /// Posted-interrupt descriptor address (full). + POSTED_INTERRUPT_DESC_ADDR = 0x2016, + /// VM-function controls (full). + VM_FUNCTION_CONTROLS = 0x2018, + /// EPT pointer (full). + EPTP = 0x201A, + /// EOI-exit bitmap 0 (full). + EOI_EXIT0 = 0x201C, + /// EOI-exit bitmap 1 (full). + EOI_EXIT1 = 0x201E, + /// EOI-exit bitmap 2 (full). + EOI_EXIT2 = 0x2020, + /// EOI-exit bitmap 3 (full). + EOI_EXIT3 = 0x2022, + /// EPTP-list address (full). + EPTP_LIST_ADDR = 0x2024, + /// VMREAD-bitmap address (full). + VMREAD_BITMAP_ADDR = 0x2026, + /// VMWRITE-bitmap address (full). + VMWRITE_BITMAP_ADDR = 0x2028, + /// Virtualization-exception information address (full). + VIRT_EXCEPTION_INFO_ADDR = 0x202A, + /// XSS-exiting bitmap (full). + XSS_EXITING_BITMAP = 0x202C, + /// ENCLS-exiting bitmap (full). + ENCLS_EXITING_BITMAP = 0x202E, + /// Sub-page-permission-table pointer (full). + SUBPAGE_PERM_TABLE_PTR = 0x2030, + /// TSC multiplier (full). + TSC_MULTIPLIER = 0x2032, +} +vmcs_read!(VmcsControl64, u64); +vmcs_write!(VmcsControl64, u64); + +/// 32-Bit Control Fields. (SDM Vol. 3D, Appendix B.3.1) +#[derive(Clone, Copy, Debug)] +pub enum VmcsControl32 { + /// Pin-based VM-execution controls. + PINBASED_EXEC_CONTROLS = 0x4000, + /// Primary processor-based VM-execution controls. + PRIMARY_PROCBASED_EXEC_CONTROLS = 0x4002, + /// Exception bitmap. + EXCEPTION_BITMAP = 0x4004, + /// Page-fault error-code mask. + PAGE_FAULT_ERR_CODE_MASK = 0x4006, + /// Page-fault error-code match. + PAGE_FAULT_ERR_CODE_MATCH = 0x4008, + /// CR3-target count. + CR3_TARGET_COUNT = 0x400A, + /// VM-exit controls. + VMEXIT_CONTROLS = 0x400C, + /// VM-exit MSR-store count. + VMEXIT_MSR_STORE_COUNT = 0x400E, + /// VM-exit MSR-load count. + VMEXIT_MSR_LOAD_COUNT = 0x4010, + /// VM-entry controls. + VMENTRY_CONTROLS = 0x4012, + /// VM-entry MSR-load count. + VMENTRY_MSR_LOAD_COUNT = 0x4014, + /// VM-entry interruption-information field. + VMENTRY_INTERRUPTION_INFO_FIELD = 0x4016, + /// VM-entry exception error code. + VMENTRY_EXCEPTION_ERR_CODE = 0x4018, + /// VM-entry instruction length. + VMENTRY_INSTRUCTION_LEN = 0x401A, + /// TPR threshold. + TPR_THRESHOLD = 0x401C, + /// Secondary processor-based VM-execution controls. + SECONDARY_PROCBASED_EXEC_CONTROLS = 0x401E, + /// PLE_Gap. + PLE_GAP = 0x4020, + /// PLE_Window. + PLE_WINDOW = 0x4022, +} +vmcs_read!(VmcsControl32, u32); +vmcs_write!(VmcsControl32, u32); + +/// Natural-Width Control Fields. (SDM Vol. 3D, Appendix B.4.1) +#[derive(Clone, Copy, Debug)] +pub enum VmcsControlNW { + /// CR0 guest/host mask. + CR0_GUEST_HOST_MASK = 0x6000, + /// CR4 guest/host mask. + CR4_GUEST_HOST_MASK = 0x6002, + /// CR0 read shadow. + CR0_READ_SHADOW = 0x6004, + /// CR4 read shadow. + CR4_READ_SHADOW = 0x6006, + /// CR3-target value 0. + CR3_TARGET_VALUE0 = 0x6008, + /// CR3-target value 1. + CR3_TARGET_VALUE1 = 0x600A, + /// CR3-target value 2. + CR3_TARGET_VALUE2 = 0x600C, + /// CR3-target value 3. + CR3_TARGET_VALUE3 = 0x600E, +} +vmcs_read!(VmcsControlNW, usize); +vmcs_write!(VmcsControlNW, usize); + +/// 16-Bit Guest-State Fields. (SDM Vol. 3D, Appendix B.1.2) +pub enum VmcsGuest16 { + /// Guest ES selector. + ES_SELECTOR = 0x800, + /// Guest CS selector. + CS_SELECTOR = 0x802, + /// Guest SS selector. + SS_SELECTOR = 0x804, + /// Guest DS selector. + DS_SELECTOR = 0x806, + /// Guest FS selector. + FS_SELECTOR = 0x808, + /// Guest GS selector. + GS_SELECTOR = 0x80a, + /// Guest LDTR selector. + LDTR_SELECTOR = 0x80c, + /// Guest TR selector. + TR_SELECTOR = 0x80e, + /// Guest interrupt status. + INTERRUPT_STATUS = 0x810, + /// PML index. + PML_INDEX = 0x812, +} +vmcs_read!(VmcsGuest16, u16); +vmcs_write!(VmcsGuest16, u16); + +/// 64-Bit Guest-State Fields. (SDM Vol. 3D, Appendix B.2.3) +#[derive(Clone, Copy, Debug)] +pub enum VmcsGuest64 { + /// VMCS link pointer (full). + LINK_PTR = 0x2800, + /// Guest IA32_DEBUGCTL (full). + IA32_DEBUGCTL = 0x2802, + /// Guest IA32_PAT (full). + IA32_PAT = 0x2804, + /// Guest IA32_EFER (full). + IA32_EFER = 0x2806, + /// Guest IA32_PERF_GLOBAL_CTRL (full). + IA32_PERF_GLOBAL_CTRL = 0x2808, + /// Guest PDPTE0 (full). + PDPTE0 = 0x280A, + /// Guest PDPTE1 (full). + PDPTE1 = 0x280C, + /// Guest PDPTE2 (full). + PDPTE2 = 0x280E, + /// Guest PDPTE3 (full). + PDPTE3 = 0x2810, + /// Guest IA32_BNDCFGS (full). + IA32_BNDCFGS = 0x2812, + /// Guest IA32_RTIT_CTL (full). + IA32_RTIT_CTL = 0x2814, +} +vmcs_read!(VmcsGuest64, u64); +vmcs_write!(VmcsGuest64, u64); + +/// 32-Bit Guest-State Fields. (SDM Vol. 3D, Appendix B.3.3) +#[derive(Clone, Copy, Debug)] +pub enum VmcsGuest32 { + /// Guest ES limit. + ES_LIMIT = 0x4800, + /// Guest CS limit. + CS_LIMIT = 0x4802, + /// Guest SS limit. + SS_LIMIT = 0x4804, + /// Guest DS limit. + DS_LIMIT = 0x4806, + /// Guest FS limit. + FS_LIMIT = 0x4808, + /// Guest GS limit. + GS_LIMIT = 0x480A, + /// Guest LDTR limit. + LDTR_LIMIT = 0x480C, + /// Guest TR limit. + TR_LIMIT = 0x480E, + /// Guest GDTR limit. + GDTR_LIMIT = 0x4810, + /// Guest IDTR limit. + IDTR_LIMIT = 0x4812, + /// Guest ES access rights. + ES_ACCESS_RIGHTS = 0x4814, + /// Guest CS access rights. + CS_ACCESS_RIGHTS = 0x4816, + /// Guest SS access rights. + SS_ACCESS_RIGHTS = 0x4818, + /// Guest DS access rights. + DS_ACCESS_RIGHTS = 0x481A, + /// Guest FS access rights. + FS_ACCESS_RIGHTS = 0x481C, + /// Guest GS access rights. + GS_ACCESS_RIGHTS = 0x481E, + /// Guest LDTR access rights. + LDTR_ACCESS_RIGHTS = 0x4820, + /// Guest TR access rights. + TR_ACCESS_RIGHTS = 0x4822, + /// Guest interruptibility state. + INTERRUPTIBILITY_STATE = 0x4824, + /// Guest activity state. + ACTIVITY_STATE = 0x4826, + /// Guest SMBASE. + SMBASE = 0x4828, + /// Guest IA32_SYSENTER_CS. + IA32_SYSENTER_CS = 0x482A, + /// VMX-preemption timer value. + VMX_PREEMPTION_TIMER_VALUE = 0x482E, +} +vmcs_read!(VmcsGuest32, u32); +vmcs_write!(VmcsGuest32, u32); + +/// Natural-Width Guest-State Fields. (SDM Vol. 3D, Appendix B.4.3) +#[derive(Clone, Copy, Debug)] +pub enum VmcsGuestNW { + /// Guest CR0. + CR0 = 0x6800, + /// Guest CR3. + CR3 = 0x6802, + /// Guest CR4. + CR4 = 0x6804, + /// Guest ES base. + ES_BASE = 0x6806, + /// Guest CS base. + CS_BASE = 0x6808, + /// Guest SS base. + SS_BASE = 0x680A, + /// Guest DS base. + DS_BASE = 0x680C, + /// Guest FS base. + FS_BASE = 0x680E, + /// Guest GS base. + GS_BASE = 0x6810, + /// Guest LDTR base. + LDTR_BASE = 0x6812, + /// Guest TR base. + TR_BASE = 0x6814, + /// Guest GDTR base. + GDTR_BASE = 0x6816, + /// Guest IDTR base. + IDTR_BASE = 0x6818, + /// Guest DR7. + DR7 = 0x681A, + /// Guest RSP. + RSP = 0x681C, + /// Guest RIP. + RIP = 0x681E, + /// Guest RFLAGS. + RFLAGS = 0x6820, + /// Guest pending debug exceptions. + PENDING_DBG_EXCEPTIONS = 0x6822, + /// Guest IA32_SYSENTER_ESP. + IA32_SYSENTER_ESP = 0x6824, + /// Guest IA32_SYSENTER_EIP. + IA32_SYSENTER_EIP = 0x6826, +} +vmcs_read!(VmcsGuestNW, usize); +vmcs_write!(VmcsGuestNW, usize); + +/// 16-Bit Host-State Fields. (SDM Vol. 3D, Appendix B.1.3) +#[derive(Clone, Copy, Debug)] +pub enum VmcsHost16 { + /// Host ES selector. + ES_SELECTOR = 0xC00, + /// Host CS selector. + CS_SELECTOR = 0xC02, + /// Host SS selector. + SS_SELECTOR = 0xC04, + /// Host DS selector. + DS_SELECTOR = 0xC06, + /// Host FS selector. + FS_SELECTOR = 0xC08, + /// Host GS selector. + GS_SELECTOR = 0xC0A, + /// Host TR selector. + TR_SELECTOR = 0xC0C, +} +vmcs_read!(VmcsHost16, u16); +vmcs_write!(VmcsHost16, u16); + +/// 64-Bit Host-State Fields. (SDM Vol. 3D, Appendix B.2.4) +#[derive(Clone, Copy, Debug)] +pub enum VmcsHost64 { + /// Host IA32_PAT (full). + IA32_PAT = 0x2C00, + /// Host IA32_EFER (full). + IA32_EFER = 0x2C02, + /// Host IA32_PERF_GLOBAL_CTRL (full). + IA32_PERF_GLOBAL_CTRL = 0x2C04, +} +vmcs_read!(VmcsHost64, u64); +vmcs_write!(VmcsHost64, u64); + +/// 32-Bit Host-State Field. (SDM Vol. 3D, Appendix B.3.4) +#[derive(Clone, Copy, Debug)] +pub enum VmcsHost32 { + /// Host IA32_SYSENTER_CS. + IA32_SYSENTER_CS = 0x4C00, +} +vmcs_read!(VmcsHost32, u32); +vmcs_write!(VmcsHost32, u32); + +/// Natural-Width Host-State Fields. (SDM Vol. 3D, Appendix B.4.4) +#[derive(Clone, Copy, Debug)] +pub enum VmcsHostNW { + /// Host CR0. + CR0 = 0x6C00, + /// Host CR3. + CR3 = 0x6C02, + /// Host CR4. + CR4 = 0x6C04, + /// Host FS base. + FS_BASE = 0x6C06, + /// Host GS base. + GS_BASE = 0x6C08, + /// Host TR base. + TR_BASE = 0x6C0A, + /// Host GDTR base. + GDTR_BASE = 0x6C0C, + /// Host IDTR base. + IDTR_BASE = 0x6C0E, + /// Host IA32_SYSENTER_ESP. + IA32_SYSENTER_ESP = 0x6C10, + /// Host IA32_SYSENTER_EIP. + IA32_SYSENTER_EIP = 0x6C12, + /// Host RSP. + RSP = 0x6C14, + /// Host RIP. + RIP = 0x6C16, +} +vmcs_read!(VmcsHostNW, usize); +vmcs_write!(VmcsHostNW, usize); + +/// 64-Bit Read-Only Data Fields. (SDM Vol. 3D, Appendix B.2.2) +#[derive(Clone, Copy, Debug)] +pub enum VmcsReadOnly64 { + /// Guest-physical address (full). + GUEST_PHYSICAL_ADDR = 0x2400, +} +vmcs_read!(VmcsReadOnly64, u64); + +/// 32-Bit Read-Only Data Fields. (SDM Vol. 3D, Appendix B.3.2) +#[derive(Clone, Copy, Debug)] +pub enum VmcsReadOnly32 { + /// VM-instruction error. + VM_INSTRUCTION_ERROR = 0x4400, + /// Exit reason. + EXIT_REASON = 0x4402, + /// VM-exit interruption information. + VMEXIT_INTERRUPTION_INFO = 0x4404, + /// VM-exit interruption error code. + VMEXIT_INTERRUPTION_ERR_CODE = 0x4406, + /// IDT-vectoring information field. + IDT_VECTORING_INFO = 0x4408, + /// IDT-vectoring error code. + IDT_VECTORING_ERR_CODE = 0x440A, + /// VM-exit instruction length. + VMEXIT_INSTRUCTION_LEN = 0x440C, + /// VM-exit instruction information. + VMEXIT_INSTRUCTION_INFO = 0x440E, +} +vmcs_read!(VmcsReadOnly32, u32); + +/// Natural-Width Read-Only Data Fields. (SDM Vol. 3D, Appendix B.4.2) +#[derive(Clone, Copy, Debug)] +pub enum VmcsReadOnlyNW { + /// Exit qualification. + EXIT_QUALIFICATION = 0x6400, + /// I/O RCX. + IO_RCX = 0x6402, + /// I/O RSI. + IO_RSI = 0x6404, + /// I/O RDI. + IO_RDI = 0x6406, + /// I/O RIP. + IO_RIP = 0x6408, + /// Guest-linear address. + GUEST_LINEAR_ADDR = 0x640A, +} +vmcs_read!(VmcsReadOnlyNW, usize); + +pub struct Vmcs; + +impl Vmcs { + pub fn load(paddr: usize) -> VmResult<()> { + unsafe { vmx::vmptrld(paddr as _) } + } + + pub fn clear(paddr: usize) -> VmResult<()> { + unsafe { vmx::vmclear(paddr as _) } + } + + /// Whether the guest interrupts are blocked. (SDM Vol. 3C, Section 24.4.2, Table 24-3) + pub fn allow_interrupt() -> HvResult { + let rflags = VmcsGuestNW::RFLAGS.read().unwrap(); + let block_state = VmcsGuest32::INTERRUPTIBILITY_STATE.read().unwrap(); + Ok( + rflags as u64 & x86_64::registers::rflags::RFlags::INTERRUPT_FLAG.bits() != 0 + && block_state == 0, + ) + } + + pub fn inject_interrupt(vector: u8, err_code: Option) -> HvResult { + // SDM Vol. 3C, Section 24.8.3 + let err_code = if VmxInterruptionType::vector_has_error_code(vector) { + err_code.or_else(|| Some(VmcsReadOnly32::VMEXIT_INTERRUPTION_ERR_CODE.read().unwrap())) + } else { + None + }; + let int_info = VmxInterruptInfo::from(vector, err_code); + if let Some(err_code) = int_info.err_code { + VmcsControl32::VMENTRY_EXCEPTION_ERR_CODE.write(err_code)?; + } + if int_info.int_type.is_soft() { + VmcsControl32::VMENTRY_INSTRUCTION_LEN + .write(VmcsReadOnly32::VMEXIT_INSTRUCTION_LEN.read()?)?; + } + VmcsControl32::VMENTRY_INTERRUPTION_INFO_FIELD.write(int_info.bits())?; + Ok(()) + } + + pub fn instruction_error() -> HvResult { + Ok(VmcsReadOnly32::VM_INSTRUCTION_ERROR.read()?.into()) + } + + /// If enable, a VM exit occurs at the beginning of any instruction if + /// `RFLAGS.IF` = 1 and there are no other blocking of interrupts. + /// (see SDM, Vol. 3C, Section 24.4.2) + pub fn set_interrupt_window(enable: bool) -> HvResult { + let mut ctrl: u32 = VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS.read()?; + let bits = PrimaryControls::INTERRUPT_WINDOW_EXITING.bits(); + if enable { + ctrl |= bits + } else { + ctrl &= !bits + } + VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS.write(ctrl)?; + Ok(()) + } + + pub fn set_control( + control: VmcsControl32, + capability_msr: Msr, + old_value: u32, + set: u32, + clear: u32, + ) -> HvResult { + let cap = capability_msr.read(); + let allowed0 = cap as u32; + let allowed1 = (cap >> 32) as u32; + assert_eq!(allowed0 & allowed1, allowed0); + debug!( + "set {:?}: {:#x} (+{:#x}, -{:#x})", + control, old_value, set, clear + ); + if (set & clear) != 0 { + return hv_result_err!( + EINVAL, + format!("can not set and clear the same bit in {:?}", control) + ); + } + if (allowed1 & set) != set { + // failed if set 0-bits in allowed1 + warn!("allow1: {:x}", allowed1); + return hv_result_err!( + EINVAL, + format!("can not set bits {:#x} in {:?}", set, control) + ); + } + if (allowed0 & clear) != 0 { + // failed if clear 1-bits in allowed0 + return hv_result_err!( + EINVAL, + format!("can not clear bits {:#x} in {:?}", clear, control) + ); + } + // SDM Vol. 3C, Section 31.5.1, Algorithm 3 + let flexible = !allowed0 & allowed1; // therse bits can be either 0 or 1 + let unknown = flexible & !(set | clear); // hypervisor untouched bits + let default = unknown & old_value; // these bits keep unchanged in old value + let fixed1 = allowed0; // these bits are fixed to 1 + control.write(fixed1 | default | set)?; + Ok(()) + } +} + +impl From for HvError { + fn from(err: VmFail) -> Self { + match err { + VmFail::VmFailValid => hv_err!(EFAULT, Vmcs::instruction_error().unwrap().as_str()), + _ => hv_err!(EFAULT, format!("VMX instruction failed: {:?}", err)), + } + } +} diff --git a/src/arch/x86_64/vmx.rs b/src/arch/x86_64/vmx.rs new file mode 100644 index 00000000..a4cbe2bf --- /dev/null +++ b/src/arch/x86_64/vmx.rs @@ -0,0 +1,453 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{ + cpu::ArchCpu, + msr::Msr, + s2pt::Stage2PageFaultInfo, + vmcs::{self, *}, + }, + consts::PAGE_SIZE, + error::{HvError, HvResult}, + memory::{Frame, GuestPhysAddr, HostPhysAddr, MemFlags, PhysAddr}, +}; +use bit_field::BitField; +use bitflags::{bitflags, Flags}; +use core::fmt::{Debug, Formatter, Result}; +use raw_cpuid::CpuId; +use x86::{ + bits64::vmx, + dtables, + dtables::DescriptorTablePointer, + segmentation::SegmentSelector, + vmx::{vmcs::control::*, vmcs::*, VmFail}, +}; +use x86_64::{ + registers::control::{Cr0, Cr0Flags, Cr3, Cr4, Cr4Flags}, + structures::gdt, +}; + +bitflags! { + pub struct FeatureControlFlags: u64 { + // Lock bit: when set, locks this MSR from being written. when clear, + // VMXON causes a #GP. + const LOCKED = 1 << 0; + // Enable VMX inside SMX operation. + const VMXON_ENABLED_INSIDE_SMX = 1 << 1; + // Enable VMX outside SMX operation. + const VMXON_ENABLED_OUTSIDE_SMX = 1 << 2; + } +} + +/// Exit Qualification for CR access. (SDM Vol. 3C, Section 27.2.1, Table 27-5) +#[derive(Debug)] +pub struct VmxCrAccessInfo { + /// Control register number (CR0/CR3/CR4). + pub cr_n: u8, + /// Access type (0 = MOV to CR; 1 = MOV from CR; 2 = CLTS; 3 = LMSW). + pub access_type: u8, + /// LMSW operand type. + pub lmsw_op_type: u8, + /// General register. + pub gpr: u8, + /// LMSW source. + pub lmsw_src: u16, +} + +impl VmxCrAccessInfo { + pub fn new() -> HvResult { + let qualification = VmcsReadOnlyNW::EXIT_QUALIFICATION.read()?; + Ok(VmxCrAccessInfo { + cr_n: qualification.get_bits(0..=3) as _, + access_type: qualification.get_bits(4..=5) as _, + lmsw_op_type: qualification.get_bit(6) as _, + gpr: qualification.get_bits(8..=11) as _, + lmsw_src: qualification.get_bits(16..=31) as _, + }) + } +} + +/// VM-Exit Informations. (SDM Vol. 3C, Section 24.9.1) +#[derive(Debug)] +pub struct VmxExitInfo { + /// VM-entry failure. (0 = true VM exit; 1 = VM-entry failure) + pub entry_failure: bool, + /// Basic exit reason. + pub exit_reason: VmxExitReason, + /// For VM exits resulting from instruction execution, this field receives + /// the length in bytes of the instruction whose execution led to the VM exit. + pub exit_instruction_length: u32, + /// Guest `RIP` where the VM exit occurs. + pub guest_rip: usize, +} + +impl VmxExitInfo { + pub fn new() -> HvResult { + let full_reason = VmcsReadOnly32::EXIT_REASON.read()?; + Ok(Self { + exit_reason: full_reason + .get_bits(0..16) + .try_into() + .expect("Unknown VM-exit reason"), + entry_failure: full_reason.get_bit(31), + exit_instruction_length: VmcsReadOnly32::VMEXIT_INSTRUCTION_LEN.read()?, + guest_rip: VmcsGuestNW::RIP.read()?, + }) + } +} + +numeric_enum_macro::numeric_enum! { +#[repr(u32)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[allow(non_camel_case_types)] +/// VMX basic exit reasons. (SDM Vol. 3D, Appendix C) +pub enum VmxExitReason { + EXCEPTION_NMI = 0, + EXTERNAL_INTERRUPT = 1, + TRIPLE_FAULT = 2, + INIT = 3, + SIPI = 4, + SMI = 5, + OTHER_SMI = 6, + INTERRUPT_WINDOW = 7, + NMI_WINDOW = 8, + TASK_SWITCH = 9, + CPUID = 10, + GETSEC = 11, + HLT = 12, + INVD = 13, + INVLPG = 14, + RDPMC = 15, + RDTSC = 16, + RSM = 17, + VMCALL = 18, + VMCLEAR = 19, + VMLAUNCH = 20, + VMPTRLD = 21, + VMPTRST = 22, + VMREAD = 23, + VMRESUME = 24, + VMWRITE = 25, + VMOFF = 26, + VMON = 27, + CR_ACCESS = 28, + DR_ACCESS = 29, + IO_INSTRUCTION = 30, + MSR_READ = 31, + MSR_WRITE = 32, + INVALID_GUEST_STATE = 33, + MSR_LOAD_FAIL = 34, + MWAIT_INSTRUCTION = 36, + MONITOR_TRAP_FLAG = 37, + MONITOR_INSTRUCTION = 39, + PAUSE_INSTRUCTION = 40, + MCE_DURING_VMENTRY = 41, + TPR_BELOW_THRESHOLD = 43, + APIC_ACCESS = 44, + VIRTUALIZED_EOI = 45, + GDTR_IDTR = 46, + LDTR_TR = 47, + EPT_VIOLATION = 48, + EPT_MISCONFIG = 49, + INVEPT = 50, + RDTSCP = 51, + PREEMPTION_TIMER = 52, + INVVPID = 53, + WBINVD = 54, + XSETBV = 55, + APIC_WRITE = 56, + RDRAND = 57, + INVPCID = 58, + VMFUNC = 59, + ENCLS = 60, + RDSEED = 61, + PML_FULL = 62, + XSAVES = 63, + XRSTORS = 64, + PCONFIG = 65, + SPP_EVENT = 66, + UMWAIT = 67, + TPAUSE = 68, + LOADIWKEY = 69, +} +} + +/// VM instruction error numbers. (SDM Vol. 3C, Section 30.4) +pub struct VmxInstructionError(u32); + +impl VmxInstructionError { + pub fn as_str(&self) -> &str { + match self.0 { + 0 => "OK", + 1 => "VMCALL executed in VMX root operation", + 2 => "VMCLEAR with invalid physical address", + 3 => "VMCLEAR with VMXON pointer", + 4 => "VMLAUNCH with non-clear VMCS", + 5 => "VMRESUME with non-launched VMCS", + 6 => "VMRESUME after VMXOFF (VMXOFF and VMXON between VMLAUNCH and VMRESUME)", + 7 => "VM entry with invalid control field(s)", + 8 => "VM entry with invalid host-state field(s)", + 9 => "VMPTRLD with invalid physical address", + 10 => "VMPTRLD with VMXON pointer", + 11 => "VMPTRLD with incorrect VMCS revision identifier", + 12 => "VMREAD/VMWRITE from/to unsupported VMCS component", + 13 => "VMWRITE to read-only VMCS component", + 15 => "VMXON executed in VMX root operation", + 16 => "VM entry with invalid executive-VMCS pointer", + 17 => "VM entry with non-launched executive VMCS", + 18 => "VM entry with executive-VMCS pointer not VMXON pointer (when attempting to deactivate the dual-monitor treatment of SMIs and SMM)", + 19 => "VMCALL with non-clear VMCS (when attempting to activate the dual-monitor treatment of SMIs and SMM)", + 20 => "VMCALL with invalid VM-exit control fields", + 22 => "VMCALL with incorrect MSEG revision identifier (when attempting to activate the dual-monitor treatment of SMIs and SMM)", + 23 => "VMXOFF under dual-monitor treatment of SMIs and SMM", + 24 => "VMCALL with invalid SMM-monitor features (when attempting to activate the dual-monitor treatment of SMIs and SMM)", + 25 => "VM entry with invalid VM-execution control fields in executive VMCS (when attempting to return from SMM)", + 26 => "VM entry with events blocked by MOV SS", + 28 => "Invalid operand to INVEPT/INVVPID", + _ => "[INVALID]", + } + } +} + +impl From for VmxInstructionError { + fn from(value: u32) -> Self { + Self(value) + } +} + +impl Debug for VmxInstructionError { + fn fmt(&self, f: &mut Formatter) -> Result { + write!(f, "VmxInstructionError({}, {:?})", self.0, self.as_str()) + } +} + +/// VM-Entry / VM-Exit Interruption-Information Field. (SDM Vol. 3C, Section 24.8.3, 24.9.2) +#[derive(Debug)] +pub struct VmxInterruptInfo { + /// Vector of interrupt or exception. + pub vector: u8, + /// Determines details of how the injection is performed. + pub int_type: VmxInterruptionType, + /// For hardware exceptions that would have delivered an error code on the stack. + pub err_code: Option, + /// Whether the field is valid. + pub valid: bool, +} + +impl VmxInterruptInfo { + pub fn new() -> HvResult { + // SDM Vol. 3C, Section 24.9.2 + let info = VmcsReadOnly32::VMEXIT_INTERRUPTION_INFO.read()?; + Ok(VmxInterruptInfo { + vector: info.get_bits(0..8) as u8, + int_type: VmxInterruptionType::try_from(info.get_bits(8..11) as u8).unwrap(), + err_code: if info.get_bit(11) { + Some(VmcsReadOnly32::VMEXIT_INTERRUPTION_ERR_CODE.read()?) + } else { + None + }, + valid: info.get_bit(31), + }) + } + + /// Convert from the interrupt vector and the error code. + pub fn from(vector: u8, err_code: Option) -> Self { + Self { + vector, + int_type: VmxInterruptionType::from_vector(vector), + err_code, + valid: true, + } + } + + /// Raw bits for writing to VMCS. + pub fn bits(&self) -> u32 { + let mut bits = self.vector as u32; + bits |= (self.int_type as u32) << 8; + bits.set_bit(11, self.err_code.is_some()); + bits.set_bit(31, self.valid); + bits + } +} + +numeric_enum_macro::numeric_enum! { +#[repr(u8)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +/// The interruption type (bits 10:8) in VM-Entry Interruption-Information Field +/// and VM-Exit Interruption-Information Field. (SDM Vol. 3C, Section 24.8.3, 24.9.2) +pub enum VmxInterruptionType { + /// External interrupt + External = 0, + /// Reserved + Reserved = 1, + /// Non-maskable interrupt (NMI) + NMI = 2, + /// Hardware exception (e.g,. #PF) + HardException = 3, + /// Software interrupt (INT n) + SoftIntr = 4, + /// Privileged software exception (INT1) + PrivSoftException = 5, + /// Software exception (INT3 or INTO) + SoftException = 6, + /// Other event + Other = 7, +} +} + +impl VmxInterruptionType { + /// Whether the exception/interrupt with `vector` has an error code. + pub const fn vector_has_error_code(vector: u8) -> bool { + use x86::irq::*; + matches!( + vector, + DOUBLE_FAULT_VECTOR + | INVALID_TSS_VECTOR + | SEGMENT_NOT_PRESENT_VECTOR + | STACK_SEGEMENT_FAULT_VECTOR + | GENERAL_PROTECTION_FAULT_VECTOR + | PAGE_FAULT_VECTOR + | ALIGNMENT_CHECK_VECTOR + ) + } + + /// Determine interruption type by the interrupt vector. + pub const fn from_vector(vector: u8) -> Self { + // SDM Vol. 3C, Section 24.8.3 + use x86::irq::*; + match vector { + DEBUG_VECTOR => Self::PrivSoftException, + NONMASKABLE_INTERRUPT_VECTOR => Self::NMI, + BREAKPOINT_VECTOR | OVERFLOW_VECTOR => Self::SoftException, + // SDM Vol. 3A, Section 6.15: All other vectors from 0 to 21 are exceptions. + 0..=VIRTUALIZATION_VECTOR => Self::HardException, + 32..=255 => Self::External, + _ => Self::Other, + } + } + + /// For software interrupt, software exception, or privileged software + /// exception, we need to set VM-Entry Instruction Length Field. + pub const fn is_soft(&self) -> bool { + matches!( + *self, + Self::SoftIntr | Self::SoftException | Self::PrivSoftException + ) + } +} + +/// Exit Qualification for I/O Instructions. (SDM Vol. 3C, Section 27.2.1, Table 27-5) +#[derive(Debug)] +pub struct VmxIoExitInfo { + /// Size of access. + pub access_size: u8, + /// Direction of the attempted access (0 = OUT, 1 = IN). + pub is_in: bool, + /// String instruction (0 = not string; 1 = string). + pub is_string: bool, + /// REP prefixed (0 = not REP; 1 = REP). + pub is_repeat: bool, + /// Port number. (as specified in DX or in an immediate operand) + pub port: u16, +} + +impl VmxIoExitInfo { + pub fn new() -> HvResult { + // SDM Vol. 3C, Section 27.2.1, Table 27-5 + let qualification = VmcsReadOnlyNW::EXIT_QUALIFICATION.read()?; + Ok(VmxIoExitInfo { + access_size: qualification.get_bits(0..3) as u8 + 1, + is_in: qualification.get_bit(3), + is_string: qualification.get_bit(4), + is_repeat: qualification.get_bit(5), + port: qualification.get_bits(16..32) as u16, + }) + } +} + +#[derive(Debug)] +pub struct VmxRegion { + frame: Frame, +} + +impl VmxRegion { + pub fn fake_init() -> Self { + Self { + frame: unsafe { Frame::from_paddr(0) }, + } + } + + pub fn new(revision_id: u32, shadow_indicator: bool) -> Self { + let frame = Frame::new_zero().unwrap(); + unsafe { + (*(frame.start_paddr() as *mut u32)) + .set_bits(0..=30, revision_id) + .set_bit(31, shadow_indicator); + } + Self { frame } + } + + pub fn start_paddr(&self) -> PhysAddr { + self.frame.start_paddr() + } +} + +pub fn check_vmx_support() -> bool { + if let Some(feature) = CpuId::new().get_feature_info() { + feature.has_vmx() + } else { + false + } +} + +pub fn enable_vmxon() -> HvResult { + let mut ctrl_reg = Msr::IA32_FEATURE_CONTROL; + let ctrl_flag = FeatureControlFlags::from_bits_truncate(ctrl_reg.read()); + let locked = ctrl_flag.contains(FeatureControlFlags::LOCKED); + let vmxon_outside = ctrl_flag.contains(FeatureControlFlags::VMXON_ENABLED_OUTSIDE_SMX); + if !locked { + unsafe { + ctrl_reg.write( + (ctrl_flag + | FeatureControlFlags::LOCKED + | FeatureControlFlags::VMXON_ENABLED_OUTSIDE_SMX) + .bits(), + ) + } + } else if !vmxon_outside { + return hv_result_err!(EPERM, "VMX disabled by BIOS"); + } + Ok(()) +} + +pub unsafe fn execute_vmxon(start_paddr: u64) -> HvResult { + // enable VMX using the VMXE bit + Cr4::write(Cr4::read() | Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS); + // execute VMXON + vmx::vmxon(start_paddr)?; + + Ok(()) +} + +pub fn get_vmcs_revision_id() -> u32 { + let vmx_basic_flag = Msr::IA32_VMX_BASIC.read(); + vmx_basic_flag.get_bits(0..=30) as u32 +} + +pub fn is_vmx_enabled() -> bool { + Cr4::read().contains(Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS) +} diff --git a/src/arch/x86_64/zone.rs b/src/arch/x86_64/zone.rs new file mode 100644 index 00000000..75e7c43d --- /dev/null +++ b/src/arch/x86_64/zone.rs @@ -0,0 +1,178 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{acpi, boot, msr::set_msr_bitmap, pio, pio::set_pio_bitmap, Stage2PageTable}, + config::*, + device::virtio_trampoline::mmio_virtio_handler, + error::HvResult, + memory::{GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion, MemorySet}, + pci::pcibar::{BarRegion, BarType}, + percpu::get_cpu_data, + platform::MEM_TYPE_RESERVED, + zone::Zone, +}; +use alloc::vec::Vec; + +#[repr(C)] +#[derive(Debug, Clone)] +pub struct HvArchZoneConfig { + /// base address of ioapic mmio registers, usually 0xfec00000 + pub ioapic_base: usize, + /// size of ioapic mmio registers, usually 0x1000 + pub ioapic_size: usize, + /// start gpa of vmlinux.bin, usually 0x100000 + pub kernel_entry_gpa: usize, + /// gpa of linux boot command line + pub cmdline_load_gpa: usize, + /// start gpa of setup.bin, address length no bigger than 16 bits + pub setup_load_gpa: usize, + /// If you want to use initrd, set initrd_load_gpa and initrd_size. + /// Otherwise, leave them as zero. The memory region type of + /// initrd should be set to MEM_TYPE_RESERVED. + /// initrd_load_gpa is the start gpa of initrd + pub initrd_load_gpa: usize, + /// size of initrd + pub initrd_size: usize, + /// RSDP table will be copied to the memory region with this id. + /// The start gpa of this memory region should 0xe_0000 + /// and the size should be 0x2_0000. Set its type to MEM_TYPE_RAM. + pub rsdp_memory_region_id: usize, + /// Other ACPI tables will be copied to the memory region with this id. + /// no restriction on start gpa and size, but its type should be MEM_TYPE_RAM as well. + /// Usually, the DSDT table is large, so the size of this region should be large enough. + pub acpi_memory_region_id: usize, + /// If you want to use a graphical console, set screen_base to a preferred gpa + /// as the start of the framebuffer. Otherwise, leave it as zero. + /// No need to add a memory region for the framebuffer, + /// Hvisor will do the job. **IMPORTANT: screen_base should be no longer than 32 bits.** + pub screen_base: usize, +} + +impl Zone { + pub fn pt_init(&mut self, mem_regions: &[HvConfigMemoryRegion]) -> HvResult { + for mem_region in mem_regions.iter() { + let mut flags = MemFlags::READ | MemFlags::WRITE | MemFlags::EXECUTE; + if mem_region.mem_type == MEM_TYPE_IO { + flags |= MemFlags::IO; + } + match mem_region.mem_type { + MEM_TYPE_RAM | MEM_TYPE_IO | MEM_TYPE_RESERVED => { + self.gpm.insert(MemoryRegion::new_with_offset_mapper( + mem_region.virtual_start as GuestPhysAddr, + mem_region.physical_start as HostPhysAddr, + mem_region.size as _, + flags, + )); + } + MEM_TYPE_VIRTIO => { + self.mmio_region_register( + mem_region.physical_start as _, + mem_region.size as _, + mmio_virtio_handler, + mem_region.physical_start as _, + ); + } + _ => { + panic!("Unsupported memory type: {}", mem_region.mem_type) + } + } + } + + // info!("VM stage 2 memory set: {:#x?}", self.gpm); + Ok(()) + } + + pub fn irq_bitmap_init(&mut self, irqs: &[u32]) {} + + /// called after cpu_set is initialized + pub fn arch_zone_pre_configuration(&mut self, config: &HvZoneConfig) -> HvResult { + self.cpu_set.iter().for_each(|cpuid| { + let cpu_data = get_cpu_data(cpuid); + // boot cpu + if cpuid == self.cpu_set.first_cpu().unwrap() { + cpu_data.arch_cpu.set_boot_cpu_vm_launch_regs( + config.arch_config.kernel_entry_gpa as _, + config.arch_config.setup_load_gpa as _, + ); + } + }); + + set_msr_bitmap(config.zone_id as _); + set_pio_bitmap(config.zone_id as _); + + Ok(()) + } + + pub fn arch_zone_post_configuration(&mut self, config: &HvZoneConfig) -> HvResult { + let mut msix_bar_regions: Vec = Vec::new(); + for region in self.pciroot.bar_regions.iter_mut() { + // check whether this bar is msi-x table + // if true, use msi-x table handler instead + if region.bar_type != BarType::IO { + if let Some(bdf) = acpi::is_msix_bar(region.start) { + info!("msi-x bar! hpa: {:x} bdf: {:x}", region.start, bdf); + msix_bar_regions.push(region.clone()); + + continue; + } + } + } + for region in msix_bar_regions.iter() { + self.mmio_region_register( + region.start, + region.size, + crate::memory::mmio_generic_handler, + region.start, + ); + } + + if self.id == 0 { + self.pci_bars_register(&config.pci_config); + } + + boot::BootParams::fill(&config, &mut self.gpm); + acpi::copy_to_guest_memory_region(&config, &self.cpu_set); + + Ok(()) + } +} + +impl BarRegion { + pub fn arch_set_bar_region_start(&mut self, cpu_base: usize, pci_base: usize) { + self.start = cpu_base + self.start - pci_base; + if self.bar_type != BarType::IO { + self.start = crate::memory::addr::align_down(self.start); + } + } + + pub fn arch_insert_bar_region(&self, gpm: &mut MemorySet, zone_id: usize) { + if self.bar_type != BarType::IO { + gpm.insert(MemoryRegion::new_with_offset_mapper( + self.start as GuestPhysAddr, + self.start, + self.size, + MemFlags::READ | MemFlags::WRITE | MemFlags::IO, + )) + .ok(); + } else { + pio::get_pio_bitmap(zone_id).set_range_intercept( + (self.start as u16)..((self.start + self.size) as u16), + false, + ); + } + } +} diff --git a/src/consts.rs b/src/consts.rs index c78adbe8..32c55260 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -32,7 +32,7 @@ pub const MAX_CPU_NUM: usize = BOARD_NCPUS; pub const MAX_ZONE_NUM: usize = 4; -pub const MAX_WAIT_TIMES: usize = 10000000; +pub const MAX_WAIT_TIMES: usize = 100000000; pub fn core_end() -> VirtAddr { __core_end as _ diff --git a/src/device/irqchip/gicv3/gits.rs b/src/device/irqchip/gicv3/gits.rs index b43c2849..e3ad3b75 100644 --- a/src/device/irqchip/gicv3/gits.rs +++ b/src/device/irqchip/gicv3/gits.rs @@ -37,7 +37,7 @@ pub const GITS_UMSIR: usize = 0x0048; // unmapped msi pub const GITS_CBASER: usize = 0x0080; // the addr of command queue pub const GITS_CWRITER: usize = 0x0088; // rw, write an command to the cmdq, write this reg to tell hw pub const GITS_CREADR: usize = 0x0090; // read-only, hardware changes it -pub const GITS_BASER: usize = 0x0100; // itt, desc +pub const GITS_BASER: usize = 0x0100; // device table, itt, desc pub const GITS_COLLECTION_BASER: usize = GITS_BASER + 0x8; pub const GITS_TRANSLATER: usize = 0x10000 + 0x0040; // to signal an interrupt, written by devices @@ -73,19 +73,25 @@ fn vicid_to_icid(vicid: u64, cpu_bitmap: u64) -> Option { // created by root linux, and make a virtual one to non root pub struct DeviceTable { baser: usize, + mask: usize, + fix_val: usize, } impl DeviceTable { fn new() -> Self { let dt_baser_reg = host_gits_base() + GITS_BASER; let dt_baser = unsafe { ptr::read_volatile(dt_baser_reg as *mut u64) }; + let mask = 0x71f000000000000; + let fix_val = dt_baser & mask; Self { baser: dt_baser as _, + mask: mask as _, + fix_val: fix_val as _, } } fn set_baser(&mut self, value: usize) { - self.baser = value; + self.baser = (value & !self.mask) | self.fix_val; } fn read_baser(&self) -> usize { @@ -95,19 +101,25 @@ impl DeviceTable { pub struct CollectionTable { baser: usize, + mask: usize, + fix_val: usize, } impl CollectionTable { fn new() -> Self { let ct_baser_reg = host_gits_base() + GITS_COLLECTION_BASER; let ct_baser = unsafe { ptr::read_volatile(ct_baser_reg as *mut u64) }; + let mask = 0x71f000000000000; + let fix_val = ct_baser & mask; Self { baser: ct_baser as _, + mask: mask as _, + fix_val: fix_val as _, } } fn set_baser(&mut self, value: usize) { - self.baser = value; + self.baser = (value & !self.mask) | self.fix_val; } fn read_baser(&self) -> usize { @@ -121,8 +133,8 @@ pub struct Cmdq { writer: usize, frame: Frame, - phy_base_list: [usize; MAX_ZONE_NUM], - cbaser_list: [usize; MAX_ZONE_NUM], + phy_base_list: [usize; MAX_ZONE_NUM], // the real phy addr for vm cmdq + cbaser_list: [usize; MAX_ZONE_NUM], // the v register for vm creadr_list: [usize; MAX_ZONE_NUM], cwriter_list: [usize; MAX_ZONE_NUM], cmdq_page_num: [usize; MAX_ZONE_NUM], @@ -131,7 +143,6 @@ pub struct Cmdq { impl Cmdq { fn new() -> Self { let f = Frame::new_contiguous_with_base(CMDQ_PAGES_NUM, 16).unwrap(); - info!("ITS cmdq base: 0x{:x}", f.start_paddr()); let r = Self { phy_addr: f.start_paddr(), readr: 0, @@ -154,8 +165,8 @@ impl Cmdq { val = val | (CMDQ_PAGES_NUM - 1); // 16 contigous 4KB pages let ctrl = host_gits_base() + GITS_CTRL; unsafe { - let origin_ctrl = ptr::read_volatile(ctrl as *mut u64); - ptr::write_volatile(ctrl as *mut u64, origin_ctrl & 0xfffffffffffffffeu64); // turn off, vm will turn on this ctrl + let origin_ctrl = ptr::read_volatile(ctrl as *mut u32); + ptr::write_volatile(ctrl as *mut u32, origin_ctrl & 0xfffffffeu32); // turn off, vm will turn on this ctrl ptr::write_volatile(reg as *mut u64, val as u64); ptr::write_volatile(writer as *mut u64, 0 as u64); // init cwriter } @@ -164,9 +175,15 @@ impl Cmdq { fn set_cbaser(&mut self, zone_id: usize, value: usize) { assert!(zone_id < MAX_ZONE_NUM, "Invalid zone id!"); self.cbaser_list[zone_id] = value; - self.phy_base_list[zone_id] = value & 0xffffffffff000; + let gpa_base = value & 0xffffffffff000; + unsafe { + let phy_base = match this_zone().read().gpm.page_table_query(gpa_base) { + Ok(p) => self.phy_base_list[zone_id] = p.0, + _ => {} + }; + } self.cmdq_page_num[zone_id] = (value & 0xff) + 1; // get the page num - info!( + debug!( "zone_id: {}, cmdq base: {:#x}, page num: {}", zone_id, self.phy_base_list[zone_id], self.cmdq_page_num[zone_id] ); @@ -182,7 +199,7 @@ impl Cmdq { if value == self.creadr_list[zone_id] { // if the off vmm gonna read is equal to the cwriter, it means that // the first write cmd is not sent to the hw, so we ignore it. - trace!("ignore first write"); + debug!("ignore first write"); } else { self.insert_cmd(zone_id, value); } @@ -205,7 +222,7 @@ impl Cmdq { self.creadr_list[zone_id] = writer; } - // it's ok to add qemu-args: -trace gicv3_gits_cmd_*, remember to remain `enable one lpi` + // it's ok to add qemu-args: -info gicv3_gits_cmd_*, remember to remain `enable one lpi` // we need changge vicid to icid here fn analyze_cmd(&self, value: [u64; 4]) -> [u64; 4] { let code = (value[0] & 0xff) as usize; @@ -223,7 +240,7 @@ impl Cmdq { new_cmd[2] &= !0xffffu64; new_cmd[2] |= icid & 0xffff; enable_one_lpi((event - 8192) as _); - info!( + debug!( "MAPI cmd, for device {:#x}, event = intid = {:#x} -> vicid {:#x} (icid {:#x})", id >> 32, event, @@ -233,10 +250,25 @@ impl Cmdq { } 0x08 => { let id = value[0] & 0xffffffff00000000; - let itt_base = (value[2] & 0x000fffffffffffff) >> 8; - trace!( + let itt_base = value[2] & 0x000fffffffffff00; // the lowest 8 bits are zeros + debug!( + "MAPD cmd, for device {:#x}, itt base {:#x}", + id >> 32, + itt_base + ); + let phys_itt_base = unsafe { + this_zone() + .read() + .gpm + .page_table_query(itt_base as _) + .unwrap() + .0 + }; + new_cmd[2] &= !0x000fffffffffff00u64; + new_cmd[2] |= phys_itt_base as u64; + debug!( "MAPD cmd, set ITT: {:#x} to device {:#x}", - itt_base, + phys_itt_base, id >> 32 ); } @@ -250,7 +282,7 @@ impl Cmdq { new_cmd[2] &= !0xffffu64; new_cmd[2] |= icid & 0xffff; enable_one_lpi((intid - 8192) as _); - info!( + debug!( "MAPTI cmd, for device {:#x}, event {:#x} -> vicid {:#x} (icid {:#x}) + intid {:#x}", id >> 32, event, @@ -266,33 +298,34 @@ impl Cmdq { new_cmd[2] &= !0xffffu64; new_cmd[2] |= icid & 0xffff; let rd_base = (value[2] >> 16) & 0x7ffffffff; - info!( + debug!( "MAPC cmd, vicid {:#x} (icid {:#x}) -> redist {:#x}", vicid, icid, rd_base ); } 0x05 => { - trace!("SYNC cmd"); + debug!("SYNC cmd"); } 0x04 => { - trace!("CLEAR cmd"); + debug!("CLEAR cmd"); } 0x0f => { - trace!("DISCARD cmd"); + debug!("DISCARD cmd"); } 0x03 => { - trace!("INT cmd"); + debug!("INT cmd"); } 0x0c => { - trace!("INV cmd"); + debug!("INV cmd"); } 0x0d => { - trace!("INVALL cmd"); + debug!("INVALL cmd"); } _ => { - trace!("other cmd, code: 0x{:x}", code); + debug!("other cmd, code: 0x{:x}", code); } } + new_cmd } @@ -312,7 +345,7 @@ impl Cmdq { }; let cmd_num = cmd_size / PER_CMD_BYTES; - trace!("cmd size: {:#x}, cmd num: {:#x}", cmd_size, cmd_num); + debug!("cmd size: {:#x}, cmd num: {:#x}", cmd_size, cmd_num); let mut vm_cmdq_addr = zone_addr + origin_readr; let mut real_cmdq_addr = self.phy_addr + self.readr; @@ -342,10 +375,9 @@ impl Cmdq { loop { self.readr = (ptr::read_volatile(readr as *mut u64)) as usize; // hw readr if self.readr == self.writer { - trace!( + debug!( "readr={:#x}, writer={:#x}, its cmd end", - self.readr, - self.writer + self.readr, self.writer ); break; } else { diff --git a/src/device/irqchip/gicv3/vgic.rs b/src/device/irqchip/gicv3/vgic.rs index b5662b4d..7c52c26e 100644 --- a/src/device/irqchip/gicv3/vgic.rs +++ b/src/device/irqchip/gicv3/vgic.rs @@ -338,86 +338,79 @@ pub fn vgicv3_its_handler(mmio: &mut MMIOAccess, _arg: usize) -> HvResult { match reg { GITS_CTRL => { mmio_perform_access(gits_base, mmio); - if mmio.is_write { - trace!("write GITS_CTRL: {:#x}", mmio.value); - } else { - trace!("read GITS_CTRL: {:#x}", mmio.value); - } } GITS_CBASER => { if mmio.is_write { - if zone_id == 0 { - mmio_perform_access(gits_base, mmio); - } set_cbaser(mmio.value, zone_id); - trace!("write GITS_CBASER: {:#x}", mmio.value); } else { mmio.value = read_cbaser(zone_id); - trace!("read GITS_CBASER: {:#x}", mmio.value); } } + // v_dt_addr + 0x10000000; GITS_BASER => { - if zone_id == 0 { - mmio_perform_access(gits_base, mmio); - } else { - if mmio.is_write { - set_dt_baser(mmio.value, zone_id); - } else { - mmio.value = read_dt_baser(zone_id); - } - } if mmio.is_write { - trace!("write GITS_BASER: 0x{:016x}", mmio.value); + set_dt_baser(mmio.value, zone_id); + if zone_id == 0 { + let v_dt_addr = mmio.value & 0xfff_fff_fff_000usize; + let phys_dt_trans = + unsafe { this_zone().read().gpm.page_table_query(v_dt_addr) }; + match phys_dt_trans { + Ok(p) => { + mmio.value &= !0xfff_fff_fff_000usize; + mmio.value |= p.0 as usize; + } + _ => {} + } + mmio_perform_access(gits_base, mmio); + } } else { - trace!("read GITS_BASER: 0x{:016x}", mmio.value); + mmio.value = read_dt_baser(zone_id); } } GITS_COLLECTION_BASER => { - if zone_id == 0 { - mmio_perform_access(gits_base, mmio); - } else { - if mmio.is_write { - set_ct_baser(mmio.value, zone_id); - } else { - mmio.value = read_ct_baser(zone_id); - } - } if mmio.is_write { - trace!("write GITS_COLL_BASER: 0x{:016x}", mmio.value); + set_ct_baser(mmio.value, zone_id); + if zone_id == 0 { + let v_ct_addr = mmio.value & 0xfff_fff_fff_000usize; + let phys_ct_trans = + unsafe { this_zone().read().gpm.page_table_query(v_ct_addr) }; + match phys_ct_trans { + Ok(p) => { + mmio.value &= !0xfff_fff_fff_000usize; + mmio.value |= p.0 as usize; + } + _ => {} + } + mmio_perform_access(gits_base, mmio); + } } else { - trace!("read GITS_COLL_BASER: 0x{:016x}", mmio.value); + mmio.value = read_ct_baser(zone_id); } } GITS_CWRITER => { if mmio.is_write { - trace!("write GITS_CWRITER: {:#x}", mmio.value); set_cwriter(mmio.value, zone_id); } else { mmio.value = read_cwriter(zone_id); - trace!("read GITS_CWRITER: {:#x}", mmio.value); } } GITS_CREADR => { mmio.value = read_creadr(zone_id); - trace!("read GITS_CREADER: {:#x}", mmio.value); } GITS_TYPER => { mmio_perform_access(gits_base, mmio); - trace!("GITS_TYPER: {:#x}", mmio.value); } _ => { mmio_perform_access(gits_base, mmio); if mmio.is_write { - trace!( + debug!( "write GITS offset: {:#x}, 0x{:016x}", - mmio.address, - mmio.value + mmio.address, mmio.value ); } else { - trace!( + debug!( "read GITS offset: {:#x}, 0x{:016x}", - mmio.address, - mmio.value + mmio.address, mmio.value ); } } diff --git a/src/device/irqchip/mod.rs b/src/device/irqchip/mod.rs index 13a2e76f..de616fd3 100644 --- a/src/device/irqchip/mod.rs +++ b/src/device/irqchip/mod.rs @@ -109,6 +109,11 @@ impl Zone { #[cfg(feature = "eic7700_sysreg")] self.virtual_syscon_mmio_init(); } + #[cfg(target_arch = "x86_64")] + { + self.ioapic_mmio_init(hv_config); + // self.pci_config_space_mmio_init(hv_config); + } } } @@ -142,5 +147,11 @@ pub fn primary_init_early() { #[cfg(target_arch = "loongarch64")] pub mod ls7a2000; +#[cfg(target_arch = "x86_64")] +pub mod pic; + #[cfg(target_arch = "loongarch64")] pub use ls7a2000::{inject_irq, percpu_init, primary_init_early, primary_init_late}; + +#[cfg(target_arch = "x86_64")] +pub use pic::{inject_irq, inject_vector, percpu_init, primary_init_early, primary_init_late}; diff --git a/src/device/irqchip/pic/ioapic.rs b/src/device/irqchip/pic/ioapic.rs new file mode 100644 index 00000000..84b6e5a2 --- /dev/null +++ b/src/device/irqchip/pic/ioapic.rs @@ -0,0 +1,233 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{ + acpi::{get_apic_id, get_cpu_id}, + cpu::this_cpu_id, + idt, ipi, + mmio::MMIoDevice, + zone::HvArchZoneConfig, + }, + device::irqchip::pic::inject_vector, + error::HvResult, + memory::{GuestPhysAddr, MMIOAccess}, + platform::ROOT_ZONE_IOAPIC_BASE, + zone::{this_zone_id, Zone}, +}; +use alloc::{sync::Arc, vec::Vec}; +use bit_field::BitField; +use core::{ops::Range, u32}; +use spin::{Mutex, Once}; +use x2apic::ioapic::IoApic; +use x86_64::instructions::port::Port; + +pub mod irqs { + pub const UART_COM1_IRQ: u8 = 0x4; +} + +#[allow(non_snake_case)] +pub mod IoApicReg { + pub const ID: u32 = 0x00; + pub const VERSION: u32 = 0x01; + pub const ARBITRATION: u32 = 0x02; + pub const TABLE_BASE: u32 = 0x10; +} + +const IOAPIC_MAX_REDIRECT_ENTRIES: u64 = 0x17; + +lazy_static::lazy_static! { + static ref IO_APIC: Mutex = { + unsafe { Mutex::new(IoApic::new(ROOT_ZONE_IOAPIC_BASE as _)) } + }; +} + +static VIRT_IOAPIC: Once = Once::new(); + +#[derive(Default)] +struct VirtIoApicUnlocked { + cur_reg: u32, + rte: [u64; (IOAPIC_MAX_REDIRECT_ENTRIES + 1) as usize], +} + +pub struct VirtIoApic { + inner: Vec>, +} + +impl VirtIoApic { + pub fn new(max_zones: usize) -> Self { + let mut vs = vec![]; + for _ in 0..max_zones { + let v = Mutex::new(VirtIoApicUnlocked::default()); + vs.push(v) + } + Self { inner: vs } + } + + fn read(&self, gpa: GuestPhysAddr) -> HvResult { + // info!("ioapic read! gpa: {:x}", gpa,); + let zone_id = this_zone_id(); + let ioapic = self.inner.get(zone_id).unwrap(); + + if gpa == 0 { + return Ok(ioapic.lock().cur_reg as _); + } + assert!(gpa == 0x10); + + let inner = ioapic.lock(); + match inner.cur_reg { + IoApicReg::ID => Ok(0), + IoApicReg::VERSION => Ok(IOAPIC_MAX_REDIRECT_ENTRIES << 16 | 0x11), // max redirect entries: 0x17, version: 0x11 + IoApicReg::ARBITRATION => Ok(0), + mut reg => { + reg -= IoApicReg::TABLE_BASE; + let index = (reg >> 1) as usize; + if let Some(entry) = inner.rte.get(index) { + if reg % 2 == 0 { + Ok((*entry).get_bits(0..=31)) + } else { + Ok((*entry).get_bits(32..=63)) + } + } else { + Ok(0) + } + } + } + } + + fn write(&self, gpa: GuestPhysAddr, value: u64, size: usize) -> HvResult { + /*info!( + "ioapic write! gpa: {:x}, value: {:x}, size: {:x}", + gpa, value, size, + );*/ + + let zone_id = this_zone_id(); + let ioapic = self.inner.get(zone_id).unwrap(); + if gpa == 0 { + ioapic.lock().cur_reg = value as _; + return Ok(()); + } + assert!(gpa == 0x10); + + let mut inner = ioapic.lock(); + match inner.cur_reg { + IoApicReg::ID | IoApicReg::VERSION | IoApicReg::ARBITRATION => {} + mut reg => { + reg -= IoApicReg::TABLE_BASE; + let index = (reg >> 1) as usize; + if let Some(entry) = inner.rte.get_mut(index) { + if reg % 2 == 0 { + entry.set_bits(0..=31, value.get_bits(0..=31)); + } else { + entry.set_bits(32..=63, value.get_bits(0..=31)); + + /*if zone_id == 0 { + // info!("1 write {:x} entry: {:x?}", index, *entry); + // only root zone modify the real I/O APIC + // unsafe { configure_gsi_from_raw(index as _, *entry) }; + }*/ + } + if zone_id == 0 { + // only root zone modify the real I/O APIC + unsafe { configure_gsi_from_raw(index as _, *entry) }; + } + } + } + } + Ok(()) + } + + fn get_irq_cpu(&self, irq: usize, zone_id: usize) -> Option { + let ioapic = self.inner.get(zone_id).unwrap(); + if let Some(entry) = ioapic.lock().rte.get(irq) { + let dest = get_cpu_id(entry.get_bits(56..=63) as usize); + return Some(dest); + } + None + } + + fn trigger(&self, irq: usize, allow_repeat: bool) -> HvResult { + let zone_id = this_zone_id(); + let ioapic = self.inner.get(zone_id).unwrap(); + if let Some(entry) = ioapic.lock().rte.get(irq) { + // TODO: physical & logical mode + let dest = get_cpu_id(entry.get_bits(56..=63) as usize); + let masked = entry.get_bit(16); + let vector = entry.get_bits(0..=7) as u8; + // info!("trigger hv: {:x} zone: {:x}", vector, zone_id); + if !masked && vector >= 0x20 { + inject_vector(dest, vector, None, allow_repeat); + } + } + Ok(()) + } +} + +impl Zone { + pub fn ioapic_mmio_init(&mut self, arch: &HvArchZoneConfig) { + if arch.ioapic_base == 0 || arch.ioapic_size == 0 { + return; + } + self.mmio_region_register( + arch.ioapic_base, + arch.ioapic_size, + mmio_ioapic_handler, + arch.ioapic_base, + ); + } +} + +fn mmio_ioapic_handler(mmio: &mut MMIOAccess, _: usize) -> HvResult { + if mmio.is_write { + VIRT_IOAPIC + .get() + .unwrap() + .write(mmio.address, mmio.value as _, mmio.size) + } else { + mmio.value = VIRT_IOAPIC.get().unwrap().read(mmio.address).unwrap() as _; + Ok(()) + } +} + +unsafe fn configure_gsi_from_raw(irq: u8, raw: u64) { + // info!("irq={:x} {:x}", irq, raw); + let mut io_apic = IO_APIC.lock(); + io_apic.set_table_entry(irq, core::mem::transmute(raw)); +} + +pub fn init_ioapic() { + // println!("Initializing I/O APIC..."); + unsafe { + Port::::new(0x20).write(0xff); + Port::::new(0xa0).write(0xff); + } +} + +pub fn init_virt_ioapic(max_zones: usize) { + VIRT_IOAPIC.call_once(|| VirtIoApic::new(max_zones)); +} + +pub fn ioapic_inject_irq(irq: u8, allow_repeat: bool) { + VIRT_IOAPIC.get().unwrap().trigger(irq as _, allow_repeat); +} + +pub fn get_irq_cpu(irq: usize, zone_id: usize) -> usize { + VIRT_IOAPIC + .get() + .unwrap() + .get_irq_cpu(irq, zone_id) + .unwrap() +} diff --git a/src/device/irqchip/pic/lapic.rs b/src/device/irqchip/pic/lapic.rs new file mode 100644 index 00000000..f5697c3b --- /dev/null +++ b/src/device/irqchip/pic/lapic.rs @@ -0,0 +1,137 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{ + cpu::{this_apic_id, this_cpu_id}, + idt::IdtVector, + ipi, + msr::Msr::{self, *}, + }, + device::irqchip::pic::pop_vector, + error::HvResult, + memory::Frame, + percpu::this_cpu_data, +}; +use bit_field::BitField; +use core::{ops::Range, u32}; +use x2apic::lapic::{LocalApic, LocalApicBuilder, TimerMode}; + +pub struct VirtLocalApic { + pub phys_lapic: LocalApic, + pub virt_timer_vector: u8, + virt_lvt_timer_bits: u32, +} + +impl VirtLocalApic { + pub fn new() -> Self { + Self { + phys_lapic: Self::new_phys_lapic( + IdtVector::APIC_TIMER_VECTOR as _, + IdtVector::APIC_ERROR_VECTOR as _, + IdtVector::APIC_SPURIOUS_VECTOR as _, + ), + virt_timer_vector: IdtVector::APIC_TIMER_VECTOR as _, + virt_lvt_timer_bits: (1 << 16) as _, // masked + } + } + + fn new_phys_lapic(timer: usize, error: usize, spurious: usize) -> LocalApic { + let mut lapic = LocalApicBuilder::new() + .timer_vector(timer) + .error_vector(error) + .spurious_vector(spurious) + .build() + .unwrap(); + unsafe { + lapic.enable(); + lapic.disable_timer(); + } + lapic + } + + pub const fn msr_range() -> Range { + 0x800..0x840 + } + + pub fn phys_local_apic<'a>() -> &'a mut LocalApic { + &mut this_cpu_data().arch_cpu.virt_lapic.phys_lapic + } + + pub fn rdmsr(&mut self, msr: Msr) -> HvResult { + match msr { + IA32_X2APIC_APICID => { + // info!("apicid: {:x}", this_cpu_id()); + Ok(this_apic_id() as u64) + } + IA32_X2APIC_LDR => Ok(this_apic_id() as u64), // logical apic id + IA32_X2APIC_ISR0 | IA32_X2APIC_ISR1 | IA32_X2APIC_ISR2 | IA32_X2APIC_ISR3 + | IA32_X2APIC_ISR4 | IA32_X2APIC_ISR5 | IA32_X2APIC_ISR6 | IA32_X2APIC_ISR7 => { + // info!("isr!"); + Ok(0) + } + IA32_X2APIC_IRR0 | IA32_X2APIC_IRR1 | IA32_X2APIC_IRR2 | IA32_X2APIC_IRR3 + | IA32_X2APIC_IRR4 | IA32_X2APIC_IRR5 | IA32_X2APIC_IRR6 | IA32_X2APIC_IRR7 => { + // info!("irr!"); + Ok(0) + } + IA32_X2APIC_LVT_TIMER => Ok(self.virt_lvt_timer_bits as _), + _ => hv_result_err!(ENOSYS), + } + } + + pub fn wrmsr(&mut self, msr: Msr, value: u64) -> HvResult { + match msr { + IA32_X2APIC_EOI => { + // info!("eoi"); + pop_vector(this_cpu_id()); + Ok(()) + } + IA32_X2APIC_ICR => { + // info!("ICR value: {:x}", value); + ipi::send_ipi(value); + Ok(()) + } + IA32_X2APIC_LVT_TIMER => { + self.virt_lvt_timer_bits = value as u32; + let timer = value.get_bits(0..=7) as u8; + if timer != self.virt_timer_vector { + self.virt_timer_vector = timer; + self.phys_lapic = Self::new_phys_lapic( + timer as _, + IdtVector::APIC_ERROR_VECTOR as _, + IdtVector::APIC_SPURIOUS_VECTOR as _, + ) + } + unsafe { + self.phys_lapic + .set_timer_mode(match value.get_bits(17..19) { + 0 => TimerMode::OneShot, + 1 => TimerMode::Periodic, + _ => TimerMode::TscDeadline, + }); + if value.get_bit(16) { + self.phys_lapic.disable_timer(); + } else { + self.phys_lapic.enable_timer(); + } + } + Ok(()) + } + _ => hv_result_err!(ENOSYS), + } + } +} diff --git a/src/device/irqchip/pic/mod.rs b/src/device/irqchip/pic/mod.rs new file mode 100644 index 00000000..a72b4e07 --- /dev/null +++ b/src/device/irqchip/pic/mod.rs @@ -0,0 +1,149 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +pub mod ioapic; +pub mod lapic; + +use crate::{ + arch::{acpi, cpu::this_cpu_id, idt, iommu, ipi, msr, pio, vmcs::Vmcs}, + consts::{MAX_CPU_NUM, MAX_ZONE_NUM}, + zone::Zone, +}; +use alloc::{collections::vec_deque::VecDeque, vec::Vec}; +use core::arch::asm; +use ioapic::ioapic_inject_irq; +use spin::{Mutex, Once}; + +static PENDING_VECTORS: Once = Once::new(); + +struct InnerPendingVectors { + pub queue: VecDeque<(u8, Option)>, + pub has_eoi: bool, +} + +struct PendingVectors { + inner: Vec>, +} + +impl PendingVectors { + fn new(max_cpus: usize) -> Self { + let mut vs = vec![]; + for _ in 0..max_cpus { + let v = Mutex::new(InnerPendingVectors { + queue: VecDeque::new(), + has_eoi: true, + }); + vs.push(v); + } + Self { inner: vs } + } + + fn add_vector(&self, cpu_id: usize, vector: u8, err_code: Option, allow_repeat: bool) { + let mut vectors = self.inner.get(cpu_id).unwrap().lock(); + if vectors.queue.len() > 10 { + warn!("too many pending vectors! cnt: {:x?}", vectors.queue.len()); + } + if allow_repeat || !vectors.queue.contains(&(vector, err_code)) { + vectors.queue.push_back((vector, err_code)); + } + } + + fn check_pending_vectors(&self, cpu_id: usize) -> bool { + let mut vectors = self.inner.get(cpu_id).unwrap().lock(); + + if let Some(vector) = vectors.queue.front() { + let allow_interrupt = Vmcs::allow_interrupt().unwrap() && vectors.has_eoi; + if vector.0 < 32 || allow_interrupt { + if vectors.queue.len() > 10 { + warn!("too many pending vectors!"); + } + // if it's an exception, or an interrupt that is not blocked, inject it directly. + Vmcs::inject_interrupt(vector.0, vector.1).unwrap(); + vectors.has_eoi = false; + vectors.queue.pop_front(); + return true; + } else if vectors.has_eoi { + // interrupts are blocked, enable interrupt-window exiting. + Vmcs::set_interrupt_window(true).unwrap(); + } + } + false + } + + fn pop_vector(&self, cpu_id: usize) { + let mut vectors = self.inner.get(cpu_id).unwrap().lock(); + vectors.has_eoi = true; + } + + fn clear_vectors(&self, cpu_id: usize) { + let mut vectors = self.inner.get(cpu_id).unwrap().lock(); + vectors.queue.clear(); + } +} + +pub fn inject_vector(cpu_id: usize, vector: u8, err_code: Option, allow_repeat: bool) { + PENDING_VECTORS + .get() + .unwrap() + .add_vector(cpu_id, vector, err_code, allow_repeat); + if cpu_id != this_cpu_id() { + // wake up dest + ipi::arch_send_event(cpu_id as _, 0); + } +} + +pub fn check_pending_vectors(cpu_id: usize) -> bool { + PENDING_VECTORS.get().unwrap().check_pending_vectors(cpu_id) +} + +pub fn pop_vector(cpu_id: usize) { + PENDING_VECTORS.get().unwrap().pop_vector(cpu_id); +} + +pub fn clear_vectors(cpu_id: usize) { + PENDING_VECTORS.get().unwrap().clear_vectors(cpu_id); +} + +pub fn enable_irq() { + unsafe { asm!("sti") }; +} + +pub fn disable_irq() { + unsafe { asm!("cli") }; +} + +pub fn inject_irq(_irq: usize, allow_repeat: bool) { + ioapic_inject_irq(_irq as _, allow_repeat); +} + +pub fn percpu_init() {} + +pub fn primary_init_early() { + ipi::init(MAX_CPU_NUM); + PENDING_VECTORS.call_once(|| PendingVectors::new(MAX_CPU_NUM)); + ioapic::init_ioapic(); + ioapic::init_virt_ioapic(MAX_ZONE_NUM); + msr::init_msr_bitmap_map(); + pio::init_pio_bitmap_map(); +} + +pub fn primary_init_late() {} + +impl Zone { + pub fn arch_irqchip_reset(&self) { + iommu::clear_dma_translation_tables(self.id); + } +} diff --git a/src/device/uart/mod.rs b/src/device/uart/mod.rs index 899c5110..f30f036d 100644 --- a/src/device/uart/mod.rs +++ b/src/device/uart/mod.rs @@ -46,3 +46,10 @@ pub use loongson_uart::{console_getchar, console_putchar}; mod uart_16550; #[cfg(all(feature = "uart_16550", target_arch = "aarch64"))] pub use uart_16550::{console_getchar, console_putchar}; + +#[cfg(all(feature = "uart16550a", target_arch = "x86_64"))] +mod uart16550a; +#[cfg(all(feature = "uart16550a", target_arch = "x86_64"))] +pub use uart16550a::{ + console_getchar, console_putchar, virt_console_io_read, virt_console_io_write, UartReg, +}; diff --git a/src/device/uart/uart16550a.rs b/src/device/uart/uart16550a.rs new file mode 100644 index 00000000..144b788b --- /dev/null +++ b/src/device/uart/uart16550a.rs @@ -0,0 +1,365 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Solicey + +use crate::{ + arch::{graphics::fb_putchar, pio::UART_COM1_BASE_PORT}, + device::irqchip::inject_irq, + error::HvResult, +}; +use alloc::vec::Vec; +use core::ops::Range; +use spin::Mutex; +use x86_64::instructions::port::{PortReadOnly, PortWriteOnly}; + +#[allow(non_snake_case)] +pub mod UartReg { + pub const RCVR_BUFFER: u16 = 0; + pub const XMIT_BUFFER: u16 = 0; + pub const INTR_ENABLE: u16 = 1; + pub const INTR_IDENT: u16 = 2; + pub const FIFO_CTRL: u16 = 2; + pub const LINE_CTRL: u16 = 3; + pub const MODEM_CTRL: u16 = 4; + pub const LINE_STATUS: u16 = 5; + pub const MODEM_STATUS: u16 = 6; + pub const SCRATCH: u16 = 7; +} + +const UART_CLOCK_FACTOR: usize = 16; +const OSC_FREQ: usize = 1_843_200; + +lazy_static::lazy_static! { + static ref COM1: Mutex = { + let mut uart = Uart16550a::new(UART_COM1_BASE_PORT); + uart.init(115200); + Mutex::new(uart) + }; + + static ref VIRT_COM1: VirtUart16550a = { + let uart = VirtUart16550a::new(UART_COM1_BASE_PORT); + uart + }; +} + +bitflags::bitflags! { + struct InterruptEnableFlags: u8 { + const ENABLE_RCVR_DATA_AVAIL_INTR = 1 << 0; + const ENABLE_XMIT_HOLD_REG_EMPTY_INTR = 1 << 1; + const _ = !0; + } + + struct InterruptIdentFlags: u8 { + const NO_INTR_IS_PENDING = 1 << 0; + const XMIT_HOLD_REG_EMPTY = 0x2; + const RCVR_DATA_AVAIL = 0x4; + const FIFO_ENABLED_16550_MODE = 0xc0; + const _ = !0; + } + + struct LineControlFlags: u8 { + const WORD_LENGTH_SELECT_8_BITS = 0x3; + const DIVISOR_LATCH_ACCESS_BIT = 1 << 7; + const _ = !0; + } + + struct LineStatusFlags: u8 { + const RCVR_DATA_READY = 1; + const XMIT_HOLD_REG_EMPTY = 1 << 5; + const XMIT_EMPTY = 1 << 6; + const _ = !0; + } +} + +/// FIFO queue for caching bytes read. +struct Fifo { + buf: [u8; CAP], + head: usize, + num: usize, +} + +impl Fifo { + const fn new() -> Self { + Self { + buf: [0; CAP], + head: 0, + num: 0, + } + } + + fn is_empty(&self) -> bool { + self.num == 0 + } + + fn is_full(&self) -> bool { + self.num == CAP + } + + fn push(&mut self, value: u8) { + assert!(self.num < CAP); + self.buf[(self.head + self.num) % CAP] = value; + self.num += 1; + } + + fn pop(&mut self) -> u8 { + assert!(self.num > 0); + let ret = self.buf[self.head]; + self.head += 1; + self.head %= CAP; + self.num -= 1; + ret + } +} + +struct Uart16550a { + rhr: PortReadOnly, // receive holding + thr: PortWriteOnly, // transmit holding + ier: PortWriteOnly, // interrupt enable + fcr: PortWriteOnly, // fifo control + lcr: PortWriteOnly, // line control + mcr: PortWriteOnly, // modem control + lsr: PortReadOnly, // line status +} + +impl Uart16550a { + const fn new(base_port: u16) -> Self { + Self { + rhr: PortReadOnly::new(base_port + UartReg::RCVR_BUFFER), + thr: PortWriteOnly::new(base_port + UartReg::XMIT_BUFFER), + ier: PortWriteOnly::new(base_port + UartReg::INTR_ENABLE), + fcr: PortWriteOnly::new(base_port + UartReg::FIFO_CTRL), + lcr: PortWriteOnly::new(base_port + UartReg::LINE_CTRL), + mcr: PortWriteOnly::new(base_port + UartReg::MODEM_CTRL), + lsr: PortReadOnly::new(base_port + UartReg::LINE_STATUS), + } + } + + fn init(&mut self, baud_rate: usize) { + unsafe { + // enable read available interrupts + self.ier + .write(InterruptEnableFlags::ENABLE_RCVR_DATA_AVAIL_INTR.bits()); + + // enable DLAB, set baud rate + let divisor = OSC_FREQ / (baud_rate * UART_CLOCK_FACTOR); + self.lcr + .write(LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits()); + self.thr.write((divisor & 0xff) as u8); + self.ier.write((divisor >> 8) as u8); + + // disable DLAB, set word length to 8 bits + self.lcr + .write(LineControlFlags::WORD_LENGTH_SELECT_8_BITS.bits()); + + // enable fifo, clear tx/rx queues + // set interrupt level to 14 bytes + self.fcr.write(0xC7); + + // data terminal ready, request to send + // enable option 2 output (used as interrupt line for CPU) + self.mcr.write(0x0B); + } + } + + fn putchar(&mut self, c: u8) { + unsafe { + while self.lsr.read() & LineStatusFlags::XMIT_HOLD_REG_EMPTY.bits() == 0 {} + self.thr.write(c); + } + } + + fn getchar(&mut self) -> Option { + unsafe { + if self.lsr.read() & LineStatusFlags::RCVR_DATA_READY.bits() != 0 { + Some(self.rhr.read()) + } else { + None + } + } + } +} + +pub struct VirtUart16550aUnlocked { + iir: u8, + ier: u8, + lcr: u8, + lsr: u8, + fifo: Fifo<64>, +} + +impl VirtUart16550aUnlocked { + fn new() -> Self { + Self { + iir: 0, + ier: 0, + lcr: 0, + lsr: (LineStatusFlags::XMIT_HOLD_REG_EMPTY | LineStatusFlags::XMIT_EMPTY).bits(), + fifo: Fifo::new(), + } + } + + fn update_irq(&mut self) { + let mut iir: u8 = 0; + + if self.ier & InterruptEnableFlags::ENABLE_RCVR_DATA_AVAIL_INTR.bits() != 0 + && self.lsr & LineStatusFlags::RCVR_DATA_READY.bits() != 0 + { + iir |= InterruptIdentFlags::RCVR_DATA_AVAIL.bits(); + } + + if self.ier & InterruptEnableFlags::ENABLE_XMIT_HOLD_REG_EMPTY_INTR.bits() != 0 + && self.lsr & LineStatusFlags::XMIT_HOLD_REG_EMPTY.bits() != 0 + { + iir |= InterruptIdentFlags::XMIT_HOLD_REG_EMPTY.bits(); + } + + if iir == 0 { + self.iir = InterruptIdentFlags::NO_INTR_IS_PENDING.bits(); + } else { + self.iir = iir; + // use COM1 irq + inject_irq(0x4, false); + } + } +} + +pub struct VirtUart16550a { + base_port: u16, + port_range: Vec>, + uart: Mutex, +} + +impl VirtUart16550a { + pub fn new(base_port: u16) -> Self { + Self { + base_port, + port_range: vec![base_port..base_port + 8], + uart: Mutex::new(VirtUart16550aUnlocked::new()), + } + } + + fn port_range(&self) -> &Vec> { + &self.port_range + } + + fn read(&self, port: u16) -> HvResult { + let mut uart = self.uart.lock(); + + let ret = match port - self.base_port { + UartReg::RCVR_BUFFER => { + if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { + 1 // dll + } else { + // read a byte from FIFO + if uart.fifo.is_empty() { + 0 + } else { + uart.fifo.pop() + } + } + } + UartReg::INTR_ENABLE => { + if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { + 0 // dlm + } else { + uart.ier + } + } + UartReg::INTR_IDENT => { + // info!("IIR read, {:x}", uart.iir); + uart.iir | InterruptIdentFlags::FIFO_ENABLED_16550_MODE.bits() + } + UartReg::LINE_CTRL => uart.lcr, + UartReg::LINE_STATUS => { + // check if the physical serial port has an available byte, and push it to FIFO. + if !uart.fifo.is_full() { + if let Some(c) = console_getchar() { + uart.fifo.push(c); + } + } + if !uart.fifo.is_empty() { + uart.lsr |= LineStatusFlags::RCVR_DATA_READY.bits(); + } else { + uart.lsr &= (!LineStatusFlags::RCVR_DATA_READY).bits(); + } + uart.lsr + } + UartReg::MODEM_CTRL | UartReg::MODEM_STATUS | UartReg::SCRATCH => { + debug!("Unimplemented serial port I/O read: {:#x}", port); // unimplemented + 0 + } + _ => unreachable!(), + }; + + uart.update_irq(); + Ok(ret as u32) + } + + fn write(&self, port: u16, value: u32) -> HvResult { + let mut uart = self.uart.lock(); + let value: u8 = value as u8; + + match port - self.base_port { + UartReg::XMIT_BUFFER => { + if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { + // dll + } else { + uart.lsr |= + (LineStatusFlags::XMIT_HOLD_REG_EMPTY | LineStatusFlags::XMIT_EMPTY).bits(); + if value != 0xff { + console_putchar(value as u8); + } + } + } + UartReg::INTR_ENABLE => { + if uart.lcr & LineControlFlags::DIVISOR_LATCH_ACCESS_BIT.bits() != 0 { + // dlm + } else { + // info!("ier: {:x}", uart.ier); + uart.ier = value & 0x0f; + } + } + UartReg::LINE_CTRL => { + uart.lcr = value; + } + UartReg::FIFO_CTRL | UartReg::MODEM_CTRL | UartReg::SCRATCH => { + debug!("Unimplemented serial port I/O write: {:#x}", port); + } + UartReg::LINE_STATUS => {} // ignore + _ => unreachable!(), + } + + uart.update_irq(); + Ok(()) + } +} + +pub fn console_putchar(c: u8) { + COM1.lock().putchar(c); + #[cfg(all(feature = "graphics"))] + fb_putchar(c, 0xffffffff, 0); +} + +pub fn console_getchar() -> Option { + COM1.lock().getchar() +} + +pub fn virt_console_io_read(port: u16) -> u32 { + VIRT_COM1.read(port).unwrap() +} + +pub fn virt_console_io_write(port: u16, value: u32) { + VIRT_COM1.write(port, value).unwrap() +} diff --git a/src/device/virtio_trampoline.rs b/src/device/virtio_trampoline.rs index 0f570599..ebd851b7 100644 --- a/src/device/virtio_trampoline.rs +++ b/src/device/virtio_trampoline.rs @@ -13,7 +13,7 @@ // // Authors: // -use crate::arch::cpu::this_cpu_id; +use crate::arch::cpu::{get_target_cpu, this_cpu_id}; use crate::consts::MAX_CPU_NUM; use crate::consts::MAX_WAIT_TIMES; use crate::device::irqchip::inject_irq; @@ -41,10 +41,12 @@ pub const MAX_REQ: u32 = 32; pub const MAX_DEVS: usize = 8; // Attention: The max virtio-dev number for vm is 8 (loongarch64 needs 3 consoles and 3 disks for zgclab project). pub const MAX_CPUS: usize = 32; -#[cfg(not(target_arch = "riscv64"))] +#[cfg(all(not(target_arch = "riscv64"), not(target_arch = "x86_64")))] pub const IRQ_WAKEUP_VIRTIO_DEVICE: usize = 32 + 0x20; #[cfg(target_arch = "riscv64")] pub const IRQ_WAKEUP_VIRTIO_DEVICE: usize = 0x20; +#[cfg(target_arch = "x86_64")] +pub const IRQ_WAKEUP_VIRTIO_DEVICE: usize = 0x6; /// non root zone's virtio request handler pub fn mmio_virtio_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { @@ -85,29 +87,38 @@ pub fn mmio_virtio_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { #[cfg(not(target_arch = "loongarch64"))] if dev.need_wakeup() { debug!("need wakeup, sending ipi to wake up virtio device"); - let root_cpu = root_zone().read().cpu_set.first_cpu().unwrap(); - send_event(root_cpu, SGI_IPI_ID as _, IPI_EVENT_WAKEUP_VIRTIO_DEVICE); + send_event( + get_target_cpu(IRQ_WAKEUP_VIRTIO_DEVICE, 0), + SGI_IPI_ID as _, + IPI_EVENT_WAKEUP_VIRTIO_DEVICE, + ); } drop(dev); let mut count: usize = 0; // if it is cfg request, current cpu should be blocked until gets the result if need_interrupt == 0 { // when virtio backend finish the req, it will add 1 to cfg_flag. - while cfg_flags[cpu_id] == old_cfg_flag { + while unsafe { core::ptr::read_volatile(&cfg_flags[cpu_id]) } == old_cfg_flag { // fence(Ordering::Acquire); count += 1; if count == MAX_WAIT_TIMES { - warn!("virtio backend is too slow, please check it!"); + warn!( + "virtio backend is too slow, please check it! addr: {:x} is_write: {:x?}", + mmio.address, mmio.is_write + ); fence(Ordering::Acquire); } if count == MAX_WAIT_TIMES * 10 { - error!("virtio backend may have some problem, please check it!"); + error!( + "virtio backend may have some problem, please check it! addr: {:x} is_write: {:x?}", + mmio.address, mmio.is_write + ); count = 0; } } if !mmio.is_write { // ensure cfg value is right. - mmio.value = cfg_values[cpu_id] as _; + mmio.value = unsafe { core::ptr::read_volatile(&cfg_values[cpu_id]) as _ }; // debug!("non root receives value: {:#x?}", mmio.value); } } diff --git a/src/event.rs b/src/event.rs index c7c2243e..e084a219 100644 --- a/src/event.rs +++ b/src/event.rs @@ -115,13 +115,12 @@ pub fn clear_events(cpu: usize) { } pub fn check_events() -> bool { - trace!("check_events"); let cpu_data = this_cpu_data(); let event = fetch_event(cpu_data.id); match event { Some(IPI_EVENT_WAKEUP) => { - info!("cpu {} wakeup", cpu_data.id); cpu_data.arch_cpu.run(); + false } Some(IPI_EVENT_SHUTDOWN) => { cpu_data.arch_cpu.idle(); diff --git a/src/hypercall/mod.rs b/src/hypercall/mod.rs index b8407f4e..5065b324 100644 --- a/src/hypercall/mod.rs +++ b/src/hypercall/mod.rs @@ -16,6 +16,7 @@ #![allow(dead_code)] #![allow(unreachable_patterns)] +use crate::arch::cpu::get_target_cpu; use crate::config::{HvZoneConfig, CONFIG_MAGIC_VERSION}; use crate::consts::{INVALID_ADDRESS, MAX_CPU_NUM, MAX_WAIT_TIMES, PAGE_SIZE}; use crate::device::virtio_trampoline::{MAX_DEVS, MAX_REQ, VIRTIO_BRIDGE, VIRTIO_IRQS}; @@ -37,6 +38,7 @@ numeric_enum! { pub enum HyperCallCode { HvVirtioInit = 0, HvVirtioInjectIrq = 1, + HvVirtioGetIrq = 86, HvZoneStart = 2, HvZoneShutdown = 3, HvZoneList = 4, @@ -74,6 +76,7 @@ impl<'a> HyperCall<'a> { match code { HyperCallCode::HvVirtioInit => self.hv_virtio_init(arg0), HyperCallCode::HvVirtioInjectIrq => self.hv_virtio_inject_irq(), + HyperCallCode::HvVirtioGetIrq => self.hv_virtio_get_irq(arg0 as *mut u32), HyperCallCode::HvZoneStart => { self.hv_zone_start(&*(arg0 as *const HvZoneConfig), arg1) } @@ -110,7 +113,7 @@ impl<'a> HyperCall<'a> { return hv_result_err!(EPERM, "Init virtio over non-root zones: unsupported!"); } - let shared_region_addr_pa = self.translate_ipa_to_hva(shared_region_addr) as usize; + let shared_region_addr_pa = self.hv_get_real_pa(shared_region_addr) as usize; assert!(shared_region_addr_pa % PAGE_SIZE == 0); // let offset = shared_region_addr_pa & (PAGE_SIZE - 1); @@ -127,6 +130,7 @@ impl<'a> HyperCall<'a> { .lock() .set_base_addr(shared_region_addr_pa as _); info!("hvisor device region base is {:#x?}", shared_region_addr_pa); + HyperCallResult::Ok(0) } @@ -148,7 +152,7 @@ impl<'a> HyperCall<'a> { let irq_id = region.res_list[res_front].irq_id as u64; let target_zone = region.res_list[res_front].target_zone; let target_cpu = match find_zone(target_zone as _) { - Some(zone) => zone.read().cpu_set.first_cpu().unwrap(), + Some(zone) => get_target_cpu(irq_id as _, target_zone as _), _ => { fence(Ordering::SeqCst); region.res_front = (region.res_front + 1) & (MAX_REQ - 1); diff --git a/src/logging.rs b/src/logging.rs index f153ffcc..65f2370d 100644 --- a/src/logging.rs +++ b/src/logging.rs @@ -94,6 +94,28 @@ enum ColorCode { BrightWhite = 97, } +fn color_code_to_bgra(code: &ColorCode) -> u32 { + match code { + ColorCode::Black => 0, + ColorCode::Red => 0x0000aaff, + ColorCode::Green => 0x00aa00ff, + ColorCode::Yellow => 0x0055aaff, + ColorCode::Blue => 0xaa0000ff, + ColorCode::Magenta => 0xaa00aaff, + ColorCode::Cyan => 0xaaaa00ff, + ColorCode::White => 0xaaaaaaff, + ColorCode::BrightBlack => 0x555555ff, + ColorCode::BrightRed => 0x5555ffff, + ColorCode::BrightGreen => 0x55ff55ff, + ColorCode::BrightYellow => 0x55ffffff, + ColorCode::BrightBlue => 0xff5555ff, + ColorCode::BrightMagenta => 0xff55ffff, + ColorCode::BrightCyan => 0xffff55ff, + ColorCode::BrightWhite => 0xffffffff, + _ => 0, + } +} + pub fn init() { static LOGGER: SimpleLogger = SimpleLogger; log::set_logger(&LOGGER).unwrap(); @@ -109,6 +131,50 @@ pub fn init() { struct SimpleLogger; +impl SimpleLogger { + #[cfg(feature = "graphics")] + fn print( + &self, + level: Level, + line: u32, + target: &str, + cpu_id: usize, + level_color: ColorCode, + args_color: ColorCode, + record: &Record, + ) { + println!( + "[{:<5} {}] ({}:{}) {}", + level, + cpu_id, + target, + line, + record.args() + ); + } + + #[cfg(not(feature = "graphics"))] + fn print( + &self, + level: Level, + line: u32, + target: &str, + cpu_id: usize, + level_color: ColorCode, + args_color: ColorCode, + record: &Record, + ) { + print(with_color!( + ColorCode::White, + "[{} {}] {} {}\n", + with_color!(level_color, "{:<5}", level), + with_color!(ColorCode::White, "{}", cpu_id), + with_color!(ColorCode::White, "({}:{})", target, line), + with_color!(args_color, "{}", record.args()), + )); + } +} + impl Log for SimpleLogger { fn enabled(&self, _metadata: &Metadata) -> bool { true @@ -137,14 +203,8 @@ impl Log for SimpleLogger { Level::Debug => ColorCode::Cyan, Level::Trace => ColorCode::BrightBlack, }; - print(with_color!( - ColorCode::White, - "[{} {}] {} {}\n", - with_color!(level_color, "{:<5}", level), - with_color!(ColorCode::White, "{}", cpu_id), - with_color!(ColorCode::White, "({}:{})", target, line), - with_color!(args_color, "{}", record.args()), - )); + + self.print(level, line, target, cpu_id, level_color, args_color, record); } fn flush(&self) {} diff --git a/src/main.rs b/src/main.rs index 10f9ae27..b11ac8c4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -28,6 +28,7 @@ #![no_main] #![feature(asm_const)] #![feature(naked_functions)] +#![feature(concat_idents)] // #![feature(core_panic)] // #![deny(warnings, missing_docs)] #![feature(proc_macro_hygiene)] @@ -66,7 +67,7 @@ mod pci; mod tests; use crate::arch::iommu::iommu_init; -use crate::arch::mm::arch_setup_parange; +use crate::arch::mm::{arch_post_heap_init, arch_setup_parange}; use crate::consts::{hv_end, mem_pool_start, MAX_CPU_NUM}; use arch::{cpu::cpu_start, entry::arch_entry}; use config::root_zone_config; @@ -176,6 +177,7 @@ fn rust_main(cpuid: usize, host_dtb: usize) { is_primary = true; memory::heap::init(); memory::heap::test(); + arch_post_heap_init(host_dtb); } let cpu = PerCpu::new(cpuid); diff --git a/src/memory/frame.rs b/src/memory/frame.rs index 030b3e55..b7a64f85 100644 --- a/src/memory/frame.rs +++ b/src/memory/frame.rs @@ -23,6 +23,7 @@ use spin::Mutex; use super::addr::{align_down, align_up, is_aligned, PhysAddr}; use crate::consts::PAGE_SIZE; use crate::error::HvResult; +use crate::memory::addr::virt_to_phys; // Support max 1M * 4096 = 1GB memory. type FrameAlloc = bitmap_allocator::BitAlloc1M; @@ -277,7 +278,9 @@ pub fn init() { let mem_pool_start = crate::consts::mem_pool_start(); let mem_pool_end = align_down(crate::consts::hv_end()); let mem_pool_size = mem_pool_end - mem_pool_start; - FRAME_ALLOCATOR.lock().init(mem_pool_start, mem_pool_size); + FRAME_ALLOCATOR + .lock() + .init(virt_to_phys(mem_pool_start), mem_pool_size); info!( "Frame allocator initialization finished: {:#x?}", diff --git a/src/memory/mm.rs b/src/memory/mm.rs index 10196366..54a7c7a8 100644 --- a/src/memory/mm.rs +++ b/src/memory/mm.rs @@ -77,6 +77,8 @@ where pt: PT::new(pt_level), #[cfg(target_arch = "loongarch64")] pt: PT::new(), + #[cfg(target_arch = "x86_64")] + pt: PT::new(), } } diff --git a/src/memory/mmio.rs b/src/memory/mmio.rs index fe9c64d6..5c88ac31 100644 --- a/src/memory/mmio.rs +++ b/src/memory/mmio.rs @@ -90,6 +90,13 @@ pub fn mmio_handle_access(mmio: &mut MMIOAccess) -> HvResult { match res { Some((region, handler, arg)) => { mmio.address -= region.start; + + // x86_64 requires instruction emulation for mmio access + #[cfg(target_arch = "x86_64")] + if mmio.size == 0 { + return crate::arch::mmio::instruction_emulator(&handler, mmio, arg); + } + match handler(mmio, arg) { Ok(_) => Ok(()), Err(e) => { diff --git a/src/pci/pci.rs b/src/pci/pci.rs index ac0d5ec7..5ab53bbc 100644 --- a/src/pci/pci.rs +++ b/src/pci/pci.rs @@ -13,8 +13,6 @@ // // Authors: // -use core::{panic, ptr, usize}; - use crate::config::{HvPciConfig, CONFIG_MAX_PCI_DEV}; use crate::memory::addr::align_down; use crate::memory::mmio_perform_access; @@ -29,6 +27,8 @@ use crate::{ zone::Zone, }; use alloc::vec::Vec; +use core::ptr::{read_volatile, write_volatile}; +use core::{panic, ptr, usize}; use super::bridge::BridgeConfig; use super::endpoint::EndpointConfig; @@ -46,7 +46,7 @@ pub struct PciRoot { bridges: Vec, alloc_devs: Vec, // include host bridge phantom_devs: Vec, - bar_regions: Vec, + pub bar_regions: Vec, } impl PciRoot { pub fn new() -> Self { @@ -120,11 +120,11 @@ impl PciRoot { for bar_id in 0..NUM_BAR_REGS_TYPE0 { unsafe { let reg_ptr = (cfg_base + offsets[bar_id]) as *mut u32; - let origin_val = *reg_ptr; - *reg_ptr = 0xffffffffu32; - let new_val = *reg_ptr; + let origin_val = read_volatile(reg_ptr); + write_volatile(reg_ptr, 0xffffffffu32); + let new_val = read_volatile(reg_ptr); ep.bars_init(bar_id, origin_val, new_val); - *reg_ptr = origin_val; + write_volatile(reg_ptr, origin_val); } } } @@ -137,11 +137,11 @@ impl PciRoot { for bar_id in 0..NUM_BAR_REGS_TYPE1 { unsafe { let reg_ptr = (cfg_base + offsets[bar_id]) as *mut u32; - let origin_val = *reg_ptr; - *reg_ptr = 0xffffffffu32; - let new_val = *reg_ptr; + let origin_val = read_volatile(reg_ptr); + write_volatile(reg_ptr, 0xffffffffu32); + let new_val = read_volatile(reg_ptr); bridge.bars_init(bar_id, origin_val, new_val); - *reg_ptr = origin_val; + write_volatile(reg_ptr, origin_val); } } } @@ -185,8 +185,17 @@ impl Zone { alloc_pci_devs[idx] & 0b111 ); self.pciroot.alloc_devs.push(alloc_pci_devs[idx] as _); + #[cfg(any( + all(feature = "iommu", target_arch = "aarch64"), + target_arch = "x86_64" + ))] if alloc_pci_devs[idx] != 0 { - iommu_add_device(self.id, alloc_pci_devs[idx] as _); + let iommu_pt_addr = if self.iommu_pt.is_some() { + self.iommu_pt.as_ref().unwrap().root_paddr() + } else { + 0 + }; + iommu_add_device(self.id, alloc_pci_devs[idx] as _, iommu_pt_addr); } } @@ -332,7 +341,7 @@ impl Zone { self.pciroot.generate_vdevs(); } - fn pci_bars_register(&mut self, pci_config: &HvPciConfig) { + pub fn pci_bars_register(&mut self, pci_config: &HvPciConfig) { for region in self.pciroot.bar_regions.iter_mut() { let (cpu_base, pci_base) = match region.bar_type { BarType::IO => (pci_config.io_base as usize, pci_config.pci_io_base as usize), @@ -347,22 +356,14 @@ impl Zone { _ => panic!("Unknown BAR type!"), }; - region.start = cpu_base + region.start - pci_base; - region.start = align_down(region.start); + region.arch_set_bar_region_start(cpu_base, pci_base); info!( - "pci bar region: type: {:?}, base: {:#x}, size:{:#x}", + "pci bar region: type: {:?}, base: {:#x}, size: {:#x}", region.bar_type, region.start, region.size ); - self.gpm - .insert(MemoryRegion::new_with_offset_mapper( - region.start as GuestPhysAddr, - region.start, - region.size, - MemFlags::READ | MemFlags::WRITE | MemFlags::IO, - )) - .ok(); + region.arch_insert_bar_region(&mut self.gpm, self.id); } } } @@ -410,7 +411,11 @@ pub fn mmio_pci_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { mmio.value = header_val as _; return Ok(()); } else { + #[cfg(not(target_arch = "x86_64"))] panic!("invalid access to empty device {:x}:{:x}.{:x}, addr: {:#x}, reg_addr: {:#x}!", bdf >> 8, (bdf >> 3) & 0b11111, bdf & 0b111, mmio.address, reg_addr); + // in x86, linux will probe for pci devices automatically + #[cfg(target_arch = "x86_64")] + return Ok(()); } } else { // device exists, so we try to get the phantom device diff --git a/src/pci/pcibar.rs b/src/pci/pcibar.rs index fd42bbc8..67530982 100644 --- a/src/pci/pcibar.rs +++ b/src/pci/pcibar.rs @@ -27,7 +27,7 @@ pub struct BarRegion { pub bar_type: BarType, } -#[derive(Default, Debug, Copy, Clone)] +#[derive(Default, Debug, Copy, Clone, PartialEq)] pub enum BarType { Mem32, Mem64, diff --git a/src/percpu.rs b/src/percpu.rs index b9e460af..e731a106 100644 --- a/src/percpu.rs +++ b/src/percpu.rs @@ -40,14 +40,15 @@ pub struct PerCpu { impl PerCpu { pub fn new<'a>(cpu_id: usize) -> &'static mut PerCpu { - let vaddr = PER_CPU_ARRAY_PTR as VirtAddr + cpu_id as usize * PER_CPU_SIZE; + let arch_cpu = ArchCpu::new(cpu_id); + let vaddr = PER_CPU_ARRAY_PTR as VirtAddr + arch_cpu.cpuid as usize * PER_CPU_SIZE; let ret = vaddr as *mut Self; unsafe { ret.write_volatile(PerCpu { - id: cpu_id, + id: arch_cpu.cpuid, cpu_on_entry: INVALID_ADDRESS, dtb_ipa: INVALID_ADDRESS, - arch_cpu: ArchCpu::new(cpu_id), + arch_cpu, zone: None, ctrl_lock: Mutex::new(()), boot_cpu: false, diff --git a/src/zone.rs b/src/zone.rs index 5e6483e1..58bf6ec4 100644 --- a/src/zone.rs +++ b/src/zone.rs @@ -39,6 +39,7 @@ pub struct Zone { pub irq_bitmap: [u32; 1024 / 32], pub gpm: MemorySet, pub pciroot: PciRoot, + pub iommu_pt: Option>, pub is_err: bool, } @@ -53,6 +54,11 @@ impl Zone { mmio: Vec::new(), irq_bitmap: [0; 1024 / 32], pciroot: PciRoot::new(), + iommu_pt: if cfg!(feature = "iommu") { + Some(new_s2_memory_set()) + } else { + None + }, is_err: false, } } @@ -204,10 +210,33 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult>> { zone.pt_init(config.memory_regions()).unwrap(); zone.mmio_init(&config.arch_config); - zone.arch_zone_configuration(config)?; + let mut cpu_num = 0; + for cpu_id in config.cpus().iter() { + if let Some(zone) = get_cpu_data(*cpu_id as _).zone.clone() { + return hv_result_err!( + EBUSY, + format!( + "Failed to create zone: cpu {} already belongs to zone {}", + cpu_id, + zone.read().id + ) + ); + } + zone.cpu_set.set_bit(*cpu_id as _); + cpu_num += 1; + } + zone.cpu_num = cpu_num; + info!("zone cpu_set: {:#b}", zone.cpu_set.bitmap); + let cpu_set = zone.cpu_set; + + zone.arch_zone_pre_configuration(config)?; // #[cfg(target_arch = "aarch64")] // zone.ivc_init(config.ivc_config()); + #[cfg(all(feature = "iommu", target_arch = "aarch64"))] + zone.iommu_pt_init(config.memory_regions(), &config.arch_config) + .unwrap(); + /* loongarch page table emergency */ /* Kai: Maybe unnecessary but i can't boot vms on my 3A6000 PC without this function. */ // #[cfg(target_arch = "loongarch64")] @@ -222,24 +251,8 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult>> { &config.alloc_pci_devs, ); - let mut cpu_num = 0; - - for cpu_id in config.cpus().iter() { - if let Some(zone) = get_cpu_data(*cpu_id as _).zone.clone() { - return hv_result_err!( - EBUSY, - format!( - "Failed to create zone: cpu {} already belongs to zone {}", - cpu_id, - zone.read().id - ) - ); - } - zone.cpu_set.set_bit(*cpu_id as _); - cpu_num += 1; - } + zone.arch_zone_post_configuration(config)?; - zone.cpu_num = cpu_num; // Initialize the virtual interrupt controller, it needs zone.cpu_num zone.virqc_init(config); @@ -254,8 +267,6 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult>> { dtb_ipa = region.virtual_start + config.dtb_load_paddr - region.physical_start; } } - info!("zone cpu_set: {:#b}", zone.cpu_set.bitmap); - let cpu_set = zone.cpu_set; let new_zone_pointer = Arc::new(RwLock::new(zone)); {