From 28ba46cad9bf62716c6b7c48d7bacba8582d3633 Mon Sep 17 00:00:00 2001 From: Javier Alvarez Date: Fri, 19 Sep 2025 00:12:49 +0200 Subject: [PATCH 1/3] Initialize data and bss sections in assmebly The embedonomicon currently suggests to initialize the bss and data sections in Rust code. However, since this was written, there has been extensive discussion about the soundness of the runtime initialization code. In particular, the following questions have been raised: - Whether it is sound to write outside the bounds of the data pointed by pointers whose provenance come from a static variable. - Whether it is sound to mutably alias all static memory and write to it during initialization. - Whether it is unsound to enter the Rust abstract machine without the static variables being initialized. - Whether pointers obtained from different static allocations (such as `_sbss` and `_ebss`) can be compared. Note that the code in embedonomicon does not suffer from this issue, since pointers are converted to `usize` values such that provenance does no longer apply by the time they are subtracted to determine the region size. This was not the case in multiple runtime crates. The general consensus in the ecosystem has been to move away from performing the initialization of the static memory regions in Rust. In order to avoid creating a false sense of security while reading the embedonomicon, these issues should be mentioned in the book, and the code written in assembly to reflect the current best-practice approach to platform initialization code. References: - https://github.com/rust-embedded/cortex-m-rt/issues/300 - https://github.com/rust-embedded/embedonomicon/issues/69 - https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/The.20least.20incorrect.20init.20code.20.3A) - https://github.com/rust-lang/unsafe-code-guidelines/issues/259 - https://github.com/rust-embedded/cortex-m-rt/pull/301 --- ci/main/rt2/src/lib.rs | 79 +++++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 27 deletions(-) diff --git a/ci/main/rt2/src/lib.rs b/ci/main/rt2/src/lib.rs index 78f7e70..2ce6024 100644 --- a/ci/main/rt2/src/lib.rs +++ b/ci/main/rt2/src/lib.rs @@ -1,34 +1,59 @@ #![no_std] use core::panic::PanicInfo; -use core::ptr; -#[unsafe(no_mangle)] -#[allow(static_mut_refs)] -pub unsafe extern "C" fn Reset() -> ! { - // NEW! - // Initialize RAM - unsafe extern "C" { - static mut _sbss: u8; - static mut _ebss: u8; - - static mut _sdata: u8; - static mut _edata: u8; - static _sidata: u8; - } - - let count = unsafe { &_ebss as *const u8 as usize - &_sbss as *const u8 as usize }; - unsafe { ptr::write_bytes(&mut _sbss as *mut u8, 0, count) }; - - let count = unsafe { &_edata as *const u8 as usize - &_sdata as *const u8 as usize }; - unsafe { ptr::copy_nonoverlapping(&_sidata as *const u8, &mut _sdata as *mut u8, count) }; - - // Call user entry point - unsafe extern "Rust" { - safe fn main() -> !; - } - - main() +use core::arch::global_asm; + +global_asm!( + ".text + + .syntax unified + .global _sbss + .global _ebss + + .global _sdata + .global _edata + .global _sidata + + .global main + .global Reset + + .type Reset,%function + .thumb_func + Reset: + + _init_bss: + movs r2, #0 + ldr r0, =_sbss + ldr r1, =_ebss + + 1: + cmp r1, r0 + beq _init_data + strb r2, [r0] + add r0, #1 + b 1b + + _init_data: + ldr r0, =_sdata + ldr r1, =_edata + ldr r2, =_sidata + + 1: + cmp r0, r1 + beq _main_trampoline + ldrb r3, [r2] + strb r3, [r0] + add r0, #1 + add r2, #1 + b 1b + _main_trampoline: + ldr r0, =main + bx r0" +); + +unsafe extern "C" { + pub safe fn Reset() -> !; } // The reset vector, a pointer into the reset handler From 1eea2ed4827552a5a7579e058dc2bc144a5b24dd Mon Sep 17 00:00:00 2001 From: Javier Alvarez Date: Sat, 11 Oct 2025 22:17:07 +0200 Subject: [PATCH 2/3] Add rust section initialization example code --- ci/main/app-unsound/.cargo | 1 + ci/main/app-unsound/Cargo.toml | 7 +++++ ci/main/app-unsound/src/main.rs | 23 ++++++++++++++ ci/main/rt-unsound/Cargo.toml | 1 + ci/main/rt-unsound/build.rs | 1 + ci/main/rt-unsound/link.x | 56 +++++++++++++++++++++++++++++++++ ci/main/rt-unsound/src/lib.rs | 55 ++++++++++++++++++++++++++++++++ ci/script.sh | 6 ++++ 8 files changed, 150 insertions(+) create mode 120000 ci/main/app-unsound/.cargo create mode 100644 ci/main/app-unsound/Cargo.toml create mode 100644 ci/main/app-unsound/src/main.rs create mode 120000 ci/main/rt-unsound/Cargo.toml create mode 120000 ci/main/rt-unsound/build.rs create mode 100644 ci/main/rt-unsound/link.x create mode 100644 ci/main/rt-unsound/src/lib.rs diff --git a/ci/main/app-unsound/.cargo b/ci/main/app-unsound/.cargo new file mode 120000 index 0000000..e7f95c9 --- /dev/null +++ b/ci/main/app-unsound/.cargo @@ -0,0 +1 @@ +../../memory-layout/.cargo \ No newline at end of file diff --git a/ci/main/app-unsound/Cargo.toml b/ci/main/app-unsound/Cargo.toml new file mode 100644 index 0000000..ba73731 --- /dev/null +++ b/ci/main/app-unsound/Cargo.toml @@ -0,0 +1,7 @@ +[package] +edition = "2024" +name = "app" +version = "0.1.0" + +[dependencies] +rt = { path = "../rt-unsound" } diff --git a/ci/main/app-unsound/src/main.rs b/ci/main/app-unsound/src/main.rs new file mode 100644 index 0000000..9bfdc54 --- /dev/null +++ b/ci/main/app-unsound/src/main.rs @@ -0,0 +1,23 @@ +#![no_main] +#![no_std] + +use core::{arch::asm, ptr}; + +use rt::entry; + +entry!(main); + +static mut DATA: i32 = 1; + +#[allow(static_mut_refs)] +fn main() -> ! { + unsafe { + // check that DATA is properly initialized + if ptr::read_volatile(&DATA) != 1 { + // this makes QEMU crash + asm!("BKPT"); + } + } + + loop {} +} diff --git a/ci/main/rt-unsound/Cargo.toml b/ci/main/rt-unsound/Cargo.toml new file mode 120000 index 0000000..6d53c37 --- /dev/null +++ b/ci/main/rt-unsound/Cargo.toml @@ -0,0 +1 @@ +../rt/Cargo.toml \ No newline at end of file diff --git a/ci/main/rt-unsound/build.rs b/ci/main/rt-unsound/build.rs new file mode 120000 index 0000000..75e67a8 --- /dev/null +++ b/ci/main/rt-unsound/build.rs @@ -0,0 +1 @@ +../rt/build.rs \ No newline at end of file diff --git a/ci/main/rt-unsound/link.x b/ci/main/rt-unsound/link.x new file mode 100644 index 0000000..f0c8b11 --- /dev/null +++ b/ci/main/rt-unsound/link.x @@ -0,0 +1,56 @@ +/* Memory layout of the LM3S6965 microcontroller */ +/* 1K = 1 KiBi = 1024 bytes */ +MEMORY +{ + FLASH : ORIGIN = 0x00000000, LENGTH = 256K + RAM : ORIGIN = 0x20000000, LENGTH = 64K +} + +/* The entry point is the reset handler */ +ENTRY(Reset); + +EXTERN(RESET_VECTOR); + +SECTIONS +{ + .vector_table ORIGIN(FLASH) : + { + /* First entry: initial Stack Pointer value */ + LONG(ORIGIN(RAM) + LENGTH(RAM)); + + /* Second entry: reset vector */ + KEEP(*(.vector_table.reset_vector)); + } > FLASH + + .text : + { + *(.text .text.*); + } > FLASH + + /* CHANGED! */ + .rodata : + { + *(.rodata .rodata.*); + } > FLASH + + .bss : + { + _sbss = .; + *(.bss .bss.*); + _ebss = .; + } > RAM + + .data : AT(ADDR(.rodata) + SIZEOF(.rodata)) + { + _sdata = .; + *(.data .data.*); + _edata = .; + } > RAM + + _sidata = LOADADDR(.data); + + /DISCARD/ : + { + *(.ARM.exidx .ARM.exidx.*); + } +} diff --git a/ci/main/rt-unsound/src/lib.rs b/ci/main/rt-unsound/src/lib.rs new file mode 100644 index 0000000..78f7e70 --- /dev/null +++ b/ci/main/rt-unsound/src/lib.rs @@ -0,0 +1,55 @@ +#![no_std] + +use core::panic::PanicInfo; +use core::ptr; + +#[unsafe(no_mangle)] +#[allow(static_mut_refs)] +pub unsafe extern "C" fn Reset() -> ! { + // NEW! + // Initialize RAM + unsafe extern "C" { + static mut _sbss: u8; + static mut _ebss: u8; + + static mut _sdata: u8; + static mut _edata: u8; + static _sidata: u8; + } + + let count = unsafe { &_ebss as *const u8 as usize - &_sbss as *const u8 as usize }; + unsafe { ptr::write_bytes(&mut _sbss as *mut u8, 0, count) }; + + let count = unsafe { &_edata as *const u8 as usize - &_sdata as *const u8 as usize }; + unsafe { ptr::copy_nonoverlapping(&_sidata as *const u8, &mut _sdata as *mut u8, count) }; + + // Call user entry point + unsafe extern "Rust" { + safe fn main() -> !; + } + + main() +} + +// The reset vector, a pointer into the reset handler +#[unsafe(link_section = ".vector_table.reset_vector")] +#[unsafe(no_mangle)] +pub static RESET_VECTOR: unsafe extern "C" fn() -> ! = Reset; + +#[panic_handler] +fn panic(_panic: &PanicInfo<'_>) -> ! { + loop {} +} + +#[macro_export] +macro_rules! entry { + ($path:path) => { + #[unsafe(export_name = "main")] + pub unsafe fn __main() -> ! { + // type check the given path + let f: fn() -> ! = $path; + + f() + } + }; +} diff --git a/ci/script.sh b/ci/script.sh index acd3e55..7422c31 100644 --- a/ci/script.sh +++ b/ci/script.sh @@ -92,6 +92,12 @@ main() { edition_check popd + pushd app-unsound + cargo build + qemu_check target/thumbv7m-none-eabi/debug/app + edition_check + popd + popd From 1693511f601da8b5d62275eeb4986b46ab1e862f Mon Sep 17 00:00:00 2001 From: Javier Alvarez Date: Sat, 11 Oct 2025 22:17:34 +0200 Subject: [PATCH 3/3] Update book to discuss assembly globals init And why we do not do it in Rust --- src/SUMMARY.md | 1 + src/main.md | 18 +++++--- src/sections-in-rust.md | 100 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 6 deletions(-) create mode 100644 src/sections-in-rust.md diff --git a/src/SUMMARY.md b/src/SUMMARY.md index 61f47fa..329e65f 100644 --- a/src/SUMMARY.md +++ b/src/SUMMARY.md @@ -4,6 +4,7 @@ - [The smallest `#![no_std]` program](./smallest-no-std.md) - [Memory layout](./memory-layout.md) - [A `main` interface](./main.md) + - [Why don't we initialize `.data` and `.bss` using Rust](./sections-in-rust.md) - [Exception handling](./exceptions.md) - [Assembly on stable](./asm.md) - [Logging with symbols](./logging.md) diff --git a/src/main.md b/src/main.md index 744bad3..a29a813 100644 --- a/src/main.md +++ b/src/main.md @@ -192,7 +192,7 @@ Let's go into the details of these changes: ``` We associate symbols to the start and end addresses of the `.bss` and `.data` sections, which we'll -later use from Rust code. +later use to initialize them. ``` text {{#include ../ci/main/rt2/link.x:43}} @@ -210,18 +210,24 @@ memory (Flash); the LMA is where in Flash those initial values are stored. Finally, we associate a symbol to the LMA of `.data`. -On the Rust side, we zero the `.bss` section and initialize the `.data` section. We can reference -the symbols we created in the linker script from the Rust code. The *addresses*[^1] of these symbols are +Using our initialization code, we zero the `.bss` section and initialize the `.data` section. We can reference +the symbols we created in the linker script from the code. The *addresses*[^1] of these symbols are the boundaries of the `.bss` and `.data` sections. -The updated reset handler is shown below: +We could write the initialization `.bss` and `.data` section code in pure Rust code. In fact, earlier +versions of this book did so. However, several soundness questions have been raised over time, +and it is no longer considered good practice to initialize them in Rust code. See the +[Why don't we initialize .data and .bss using Rust](./sections-in-rust.md) section of the book for more details. +We will write the initialization code using the `global_asm!` macro to define our reset handler. + +The updated reset handler, now written in `Thumb-2` assembly, is shown below: ``` console -$ head -n33 ../rt/src/lib.rs +$ head -n53 ../rt/src/lib.rs ``` ``` rust -{{#include ../ci/main/rt2/src/lib.rs:1:32}} +{{#include ../ci/main/rt2/src/lib.rs:1:53}} ``` Now end users can directly and indirectly make use of `static` variables without running into diff --git a/src/sections-in-rust.md b/src/sections-in-rust.md new file mode 100644 index 0000000..d001274 --- /dev/null +++ b/src/sections-in-rust.md @@ -0,0 +1,100 @@ +# Why don't we initialize `.data` and `.bss` using Rust + +Earlier versions of this book initialized the `.data` and `.bss` sections using Rust code. +This has proven to have questionable soundness, and the recommended method of +performing the initialization of these sections nowadays relies on assembly. + +This chapter discusses the reasons that led to the decision of various crates like +[cortex-m-rt](https://crates.io/crates/cortex-m-rt) and [riscv-rt](https://crates.io/crates/riscv-rt) +to migrate to performing assembly initialization of these sections. There are +[a](https://github.com/rust-embedded/cortex-m-rt/issues/300) +[decent](https://github.com/rust-embedded/embedonomicon/issues/69) +[number](https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/The.20least.20incorrect.20init.20code.20.3A\)) +[of](https://github.com/rust-lang/unsafe-code-guidelines/issues/259) +[threads](https://github.com/rust-embedded/wg/issues/771) +where the soundness of such code has been questioned. We will summarize +them in this chapter. + +The original code used for global data initialization in Rust in this book is listed +as follows: + +``` rust +{{#include ../ci/main/rt-unsound/src/lib.rs:1:32}} +``` + +Five `extern "C"` variables are declared to reference specific memory locations. +Our linker script defines each symbol, so we do not need to worry about their +exact placement. + +## Pointer proventace + +To initialize the `.bss` section, we take the address of `_sbss` `u8` variable, +which points to the start of the `.bss` section. Then we write an arbitrary +amount of data to its location. `_sbss` is declared as an `u8` variables, and +the pointer provenance rules only allow us to write an amount of data that fits +within the allocation of our `_sbss` variable. Despite that, we are writing past +the single byte (as far as Rust is aware, a single byte is allocated at this +address) up until we hit the location of the `_ebss`. + +There is a separate issue in which we actually have an `_ebss` variable that is +pointing one byte outside of the `.bss` section. In specific implementations, +accessing this byte might not even be possible if the `.bss` section exhausted +the available memory. Ideally `_ebss` needs to be declared as a ZST. And by +extension, because the `.bss` section can be empty, `_sbss` should also be a +ZST, because in this case `_sbss` would also fall outside of the region reserved +for the `.bss`. + +## Aliasing + +Another potential problem with the code above is aliasing. Consider our linker +script. + +``` text +{{#include ../ci/main/rt-unsound/link.x:36:48}} +``` + +The following situations can occur: +- `_sbss` might be located at the same address as the first variable in the `.bss` +section, assuming that the section is not empty. +- `_ebss` will be located at the same address as `_sdata`, and by extension, it +will also be located at the same address as the first variable in the `.data` +section. +- If the `.bss` section is empty, both `_sbss` and `_ebss` will alias each other. +- If the `.data` section is empty, both `_sdata` and `_edata` will alias each other. + +Rust does not allow to have more than one variable to be located at the same address +(with ZSTs being a key exception). But even if it did, we are using these variables +to write the whole global memory area, which effectively is mutably aliasing all +global data defined in the program. + +## Abstract machine initialization + +Another question is whether it is safe to enter any Rust code before the Rust +abstract machine has been fully initialized. Can we rely on Rust not using any +of the global memory while it is not yet initialized? The answer to this question +is not clear (or does not seem clear to the author of the section at the time of +this writing). + +## More potential provenance issues + +A clever reader might have seen how we compute the offset between `_ebss` and `_sbss` and thought, +couldn't we instad use the [`offset_from`](https://doc.rust-lang.org/std/primitive.pointer.html#method.offset_from) +method of a pointer? + +The problem with this approach, however, is that, as we mentioned above, both `_ebss` +and `_sbss` belong to different allocations, so they do not share the same pointer +provenance. This is true even if they both are aliased and happen to fall at the +same address (i.e. when the `.bss` section is empty). + +Running Miri on this [Rust Playground Snippet](https://play.rust-lang.org/?version=stable&mode=release&edition=2024&gist=3225a585752704d9c58b1842e0fc5307) +shows the undefined behavior. + +## Ok, but it works, doesn't it? + +Yes. While the code provided at the beginning of this chapter does produce the +right behavior as of Rust 1.89, the problem is that **we cannot rely on this behavior +being preserved in future releases**, or even in the optimizer doing something +funky in the future. + +That is why, overall, the recommendation of this books is to **not** perform the initialization +using Rust code for this purpose.