diff --git a/ci/main/app-unsound/.cargo b/ci/main/app-unsound/.cargo new file mode 120000 index 0000000..e7f95c9 --- /dev/null +++ b/ci/main/app-unsound/.cargo @@ -0,0 +1 @@ +../../memory-layout/.cargo \ No newline at end of file diff --git a/ci/main/app-unsound/Cargo.toml b/ci/main/app-unsound/Cargo.toml new file mode 100644 index 0000000..ba73731 --- /dev/null +++ b/ci/main/app-unsound/Cargo.toml @@ -0,0 +1,7 @@ +[package] +edition = "2024" +name = "app" +version = "0.1.0" + +[dependencies] +rt = { path = "../rt-unsound" } diff --git a/ci/main/app-unsound/src/main.rs b/ci/main/app-unsound/src/main.rs new file mode 100644 index 0000000..9bfdc54 --- /dev/null +++ b/ci/main/app-unsound/src/main.rs @@ -0,0 +1,23 @@ +#![no_main] +#![no_std] + +use core::{arch::asm, ptr}; + +use rt::entry; + +entry!(main); + +static mut DATA: i32 = 1; + +#[allow(static_mut_refs)] +fn main() -> ! { + unsafe { + // check that DATA is properly initialized + if ptr::read_volatile(&DATA) != 1 { + // this makes QEMU crash + asm!("BKPT"); + } + } + + loop {} +} diff --git a/ci/main/rt-unsound/Cargo.toml b/ci/main/rt-unsound/Cargo.toml new file mode 120000 index 0000000..6d53c37 --- /dev/null +++ b/ci/main/rt-unsound/Cargo.toml @@ -0,0 +1 @@ +../rt/Cargo.toml \ No newline at end of file diff --git a/ci/main/rt-unsound/build.rs b/ci/main/rt-unsound/build.rs new file mode 120000 index 0000000..75e67a8 --- /dev/null +++ b/ci/main/rt-unsound/build.rs @@ -0,0 +1 @@ +../rt/build.rs \ No newline at end of file diff --git a/ci/main/rt-unsound/link.x b/ci/main/rt-unsound/link.x new file mode 100644 index 0000000..f0c8b11 --- /dev/null +++ b/ci/main/rt-unsound/link.x @@ -0,0 +1,56 @@ +/* Memory layout of the LM3S6965 microcontroller */ +/* 1K = 1 KiBi = 1024 bytes */ +MEMORY +{ + FLASH : ORIGIN = 0x00000000, LENGTH = 256K + RAM : ORIGIN = 0x20000000, LENGTH = 64K +} + +/* The entry point is the reset handler */ +ENTRY(Reset); + +EXTERN(RESET_VECTOR); + +SECTIONS +{ + .vector_table ORIGIN(FLASH) : + { + /* First entry: initial Stack Pointer value */ + LONG(ORIGIN(RAM) + LENGTH(RAM)); + + /* Second entry: reset vector */ + KEEP(*(.vector_table.reset_vector)); + } > FLASH + + .text : + { + *(.text .text.*); + } > FLASH + + /* CHANGED! */ + .rodata : + { + *(.rodata .rodata.*); + } > FLASH + + .bss : + { + _sbss = .; + *(.bss .bss.*); + _ebss = .; + } > RAM + + .data : AT(ADDR(.rodata) + SIZEOF(.rodata)) + { + _sdata = .; + *(.data .data.*); + _edata = .; + } > RAM + + _sidata = LOADADDR(.data); + + /DISCARD/ : + { + *(.ARM.exidx .ARM.exidx.*); + } +} diff --git a/ci/main/rt-unsound/src/lib.rs b/ci/main/rt-unsound/src/lib.rs new file mode 100644 index 0000000..78f7e70 --- /dev/null +++ b/ci/main/rt-unsound/src/lib.rs @@ -0,0 +1,55 @@ +#![no_std] + +use core::panic::PanicInfo; +use core::ptr; + +#[unsafe(no_mangle)] +#[allow(static_mut_refs)] +pub unsafe extern "C" fn Reset() -> ! { + // NEW! + // Initialize RAM + unsafe extern "C" { + static mut _sbss: u8; + static mut _ebss: u8; + + static mut _sdata: u8; + static mut _edata: u8; + static _sidata: u8; + } + + let count = unsafe { &_ebss as *const u8 as usize - &_sbss as *const u8 as usize }; + unsafe { ptr::write_bytes(&mut _sbss as *mut u8, 0, count) }; + + let count = unsafe { &_edata as *const u8 as usize - &_sdata as *const u8 as usize }; + unsafe { ptr::copy_nonoverlapping(&_sidata as *const u8, &mut _sdata as *mut u8, count) }; + + // Call user entry point + unsafe extern "Rust" { + safe fn main() -> !; + } + + main() +} + +// The reset vector, a pointer into the reset handler +#[unsafe(link_section = ".vector_table.reset_vector")] +#[unsafe(no_mangle)] +pub static RESET_VECTOR: unsafe extern "C" fn() -> ! = Reset; + +#[panic_handler] +fn panic(_panic: &PanicInfo<'_>) -> ! { + loop {} +} + +#[macro_export] +macro_rules! entry { + ($path:path) => { + #[unsafe(export_name = "main")] + pub unsafe fn __main() -> ! { + // type check the given path + let f: fn() -> ! = $path; + + f() + } + }; +} diff --git a/ci/main/rt2/src/lib.rs b/ci/main/rt2/src/lib.rs index 78f7e70..2ce6024 100644 --- a/ci/main/rt2/src/lib.rs +++ b/ci/main/rt2/src/lib.rs @@ -1,34 +1,59 @@ #![no_std] use core::panic::PanicInfo; -use core::ptr; -#[unsafe(no_mangle)] -#[allow(static_mut_refs)] -pub unsafe extern "C" fn Reset() -> ! { - // NEW! - // Initialize RAM - unsafe extern "C" { - static mut _sbss: u8; - static mut _ebss: u8; - - static mut _sdata: u8; - static mut _edata: u8; - static _sidata: u8; - } - - let count = unsafe { &_ebss as *const u8 as usize - &_sbss as *const u8 as usize }; - unsafe { ptr::write_bytes(&mut _sbss as *mut u8, 0, count) }; - - let count = unsafe { &_edata as *const u8 as usize - &_sdata as *const u8 as usize }; - unsafe { ptr::copy_nonoverlapping(&_sidata as *const u8, &mut _sdata as *mut u8, count) }; - - // Call user entry point - unsafe extern "Rust" { - safe fn main() -> !; - } - - main() +use core::arch::global_asm; + +global_asm!( + ".text + + .syntax unified + .global _sbss + .global _ebss + + .global _sdata + .global _edata + .global _sidata + + .global main + .global Reset + + .type Reset,%function + .thumb_func + Reset: + + _init_bss: + movs r2, #0 + ldr r0, =_sbss + ldr r1, =_ebss + + 1: + cmp r1, r0 + beq _init_data + strb r2, [r0] + add r0, #1 + b 1b + + _init_data: + ldr r0, =_sdata + ldr r1, =_edata + ldr r2, =_sidata + + 1: + cmp r0, r1 + beq _main_trampoline + ldrb r3, [r2] + strb r3, [r0] + add r0, #1 + add r2, #1 + b 1b + _main_trampoline: + ldr r0, =main + bx r0" +); + +unsafe extern "C" { + pub safe fn Reset() -> !; } // The reset vector, a pointer into the reset handler diff --git a/ci/script.sh b/ci/script.sh index acd3e55..7422c31 100644 --- a/ci/script.sh +++ b/ci/script.sh @@ -92,6 +92,12 @@ main() { edition_check popd + pushd app-unsound + cargo build + qemu_check target/thumbv7m-none-eabi/debug/app + edition_check + popd + popd diff --git a/src/SUMMARY.md b/src/SUMMARY.md index 61f47fa..329e65f 100644 --- a/src/SUMMARY.md +++ b/src/SUMMARY.md @@ -4,6 +4,7 @@ - [The smallest `#![no_std]` program](./smallest-no-std.md) - [Memory layout](./memory-layout.md) - [A `main` interface](./main.md) + - [Why don't we initialize `.data` and `.bss` using Rust](./sections-in-rust.md) - [Exception handling](./exceptions.md) - [Assembly on stable](./asm.md) - [Logging with symbols](./logging.md) diff --git a/src/main.md b/src/main.md index 744bad3..a29a813 100644 --- a/src/main.md +++ b/src/main.md @@ -192,7 +192,7 @@ Let's go into the details of these changes: ``` We associate symbols to the start and end addresses of the `.bss` and `.data` sections, which we'll -later use from Rust code. +later use to initialize them. ``` text {{#include ../ci/main/rt2/link.x:43}} @@ -210,18 +210,24 @@ memory (Flash); the LMA is where in Flash those initial values are stored. Finally, we associate a symbol to the LMA of `.data`. -On the Rust side, we zero the `.bss` section and initialize the `.data` section. We can reference -the symbols we created in the linker script from the Rust code. The *addresses*[^1] of these symbols are +Using our initialization code, we zero the `.bss` section and initialize the `.data` section. We can reference +the symbols we created in the linker script from the code. The *addresses*[^1] of these symbols are the boundaries of the `.bss` and `.data` sections. -The updated reset handler is shown below: +We could write the initialization `.bss` and `.data` section code in pure Rust code. In fact, earlier +versions of this book did so. However, several soundness questions have been raised over time, +and it is no longer considered good practice to initialize them in Rust code. See the +[Why don't we initialize .data and .bss using Rust](./sections-in-rust.md) section of the book for more details. +We will write the initialization code using the `global_asm!` macro to define our reset handler. + +The updated reset handler, now written in `Thumb-2` assembly, is shown below: ``` console -$ head -n33 ../rt/src/lib.rs +$ head -n53 ../rt/src/lib.rs ``` ``` rust -{{#include ../ci/main/rt2/src/lib.rs:1:32}} +{{#include ../ci/main/rt2/src/lib.rs:1:53}} ``` Now end users can directly and indirectly make use of `static` variables without running into diff --git a/src/sections-in-rust.md b/src/sections-in-rust.md new file mode 100644 index 0000000..d001274 --- /dev/null +++ b/src/sections-in-rust.md @@ -0,0 +1,100 @@ +# Why don't we initialize `.data` and `.bss` using Rust + +Earlier versions of this book initialized the `.data` and `.bss` sections using Rust code. +This has proven to have questionable soundness, and the recommended method of +performing the initialization of these sections nowadays relies on assembly. + +This chapter discusses the reasons that led to the decision of various crates like +[cortex-m-rt](https://crates.io/crates/cortex-m-rt) and [riscv-rt](https://crates.io/crates/riscv-rt) +to migrate to performing assembly initialization of these sections. There are +[a](https://github.com/rust-embedded/cortex-m-rt/issues/300) +[decent](https://github.com/rust-embedded/embedonomicon/issues/69) +[number](https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/The.20least.20incorrect.20init.20code.20.3A\)) +[of](https://github.com/rust-lang/unsafe-code-guidelines/issues/259) +[threads](https://github.com/rust-embedded/wg/issues/771) +where the soundness of such code has been questioned. We will summarize +them in this chapter. + +The original code used for global data initialization in Rust in this book is listed +as follows: + +``` rust +{{#include ../ci/main/rt-unsound/src/lib.rs:1:32}} +``` + +Five `extern "C"` variables are declared to reference specific memory locations. +Our linker script defines each symbol, so we do not need to worry about their +exact placement. + +## Pointer proventace + +To initialize the `.bss` section, we take the address of `_sbss` `u8` variable, +which points to the start of the `.bss` section. Then we write an arbitrary +amount of data to its location. `_sbss` is declared as an `u8` variables, and +the pointer provenance rules only allow us to write an amount of data that fits +within the allocation of our `_sbss` variable. Despite that, we are writing past +the single byte (as far as Rust is aware, a single byte is allocated at this +address) up until we hit the location of the `_ebss`. + +There is a separate issue in which we actually have an `_ebss` variable that is +pointing one byte outside of the `.bss` section. In specific implementations, +accessing this byte might not even be possible if the `.bss` section exhausted +the available memory. Ideally `_ebss` needs to be declared as a ZST. And by +extension, because the `.bss` section can be empty, `_sbss` should also be a +ZST, because in this case `_sbss` would also fall outside of the region reserved +for the `.bss`. + +## Aliasing + +Another potential problem with the code above is aliasing. Consider our linker +script. + +``` text +{{#include ../ci/main/rt-unsound/link.x:36:48}} +``` + +The following situations can occur: +- `_sbss` might be located at the same address as the first variable in the `.bss` +section, assuming that the section is not empty. +- `_ebss` will be located at the same address as `_sdata`, and by extension, it +will also be located at the same address as the first variable in the `.data` +section. +- If the `.bss` section is empty, both `_sbss` and `_ebss` will alias each other. +- If the `.data` section is empty, both `_sdata` and `_edata` will alias each other. + +Rust does not allow to have more than one variable to be located at the same address +(with ZSTs being a key exception). But even if it did, we are using these variables +to write the whole global memory area, which effectively is mutably aliasing all +global data defined in the program. + +## Abstract machine initialization + +Another question is whether it is safe to enter any Rust code before the Rust +abstract machine has been fully initialized. Can we rely on Rust not using any +of the global memory while it is not yet initialized? The answer to this question +is not clear (or does not seem clear to the author of the section at the time of +this writing). + +## More potential provenance issues + +A clever reader might have seen how we compute the offset between `_ebss` and `_sbss` and thought, +couldn't we instad use the [`offset_from`](https://doc.rust-lang.org/std/primitive.pointer.html#method.offset_from) +method of a pointer? + +The problem with this approach, however, is that, as we mentioned above, both `_ebss` +and `_sbss` belong to different allocations, so they do not share the same pointer +provenance. This is true even if they both are aliased and happen to fall at the +same address (i.e. when the `.bss` section is empty). + +Running Miri on this [Rust Playground Snippet](https://play.rust-lang.org/?version=stable&mode=release&edition=2024&gist=3225a585752704d9c58b1842e0fc5307) +shows the undefined behavior. + +## Ok, but it works, doesn't it? + +Yes. While the code provided at the beginning of this chapter does produce the +right behavior as of Rust 1.89, the problem is that **we cannot rely on this behavior +being preserved in future releases**, or even in the optimizer doing something +funky in the future. + +That is why, overall, the recommendation of this books is to **not** perform the initialization +using Rust code for this purpose.