diff --git a/asm/inline.rs b/asm/inline.rs index 9150c9c3..5e2fbc10 100644 --- a/asm/inline.rs +++ b/asm/inline.rs @@ -52,13 +52,16 @@ pub unsafe fn __cpsie() { #[inline(always)] pub unsafe fn __delay(cyc: u32) { - // Use local labels to avoid R_ARM_THM_JUMP8 relocations which fail on thumbv6m. + // The loop will normally take 3 to 4 CPU cycles per iteration, but superscalar cores + // (eg. Cortex-M7) can potentially do it in 2, so we use that as the lower bound, since delaying + // for more cycles is okay. + let real_cyc = cyc / 2; asm!( + // Use local labels to avoid R_ARM_THM_JUMP8 relocations which fail on thumbv6m. "1:", - "nop", "subs {}, #1", "bne 1b", - in(reg) cyc + in(reg) real_cyc ); } diff --git a/bin/thumbv6m-none-eabi-lto.a b/bin/thumbv6m-none-eabi-lto.a index 1b0e77a4..70b4f3f3 100644 Binary files a/bin/thumbv6m-none-eabi-lto.a and b/bin/thumbv6m-none-eabi-lto.a differ diff --git a/bin/thumbv6m-none-eabi.a b/bin/thumbv6m-none-eabi.a index 6835c5b6..6b66561e 100644 Binary files a/bin/thumbv6m-none-eabi.a and b/bin/thumbv6m-none-eabi.a differ diff --git a/bin/thumbv7em-none-eabi-lto.a b/bin/thumbv7em-none-eabi-lto.a index 0a42c47f..b2ae124b 100644 Binary files a/bin/thumbv7em-none-eabi-lto.a and b/bin/thumbv7em-none-eabi-lto.a differ diff --git a/bin/thumbv7em-none-eabi.a b/bin/thumbv7em-none-eabi.a index aeef3ab2..e414f399 100644 Binary files a/bin/thumbv7em-none-eabi.a and b/bin/thumbv7em-none-eabi.a differ diff --git a/bin/thumbv7em-none-eabihf-lto.a b/bin/thumbv7em-none-eabihf-lto.a index 6d5e7cf6..073bfebf 100644 Binary files a/bin/thumbv7em-none-eabihf-lto.a and b/bin/thumbv7em-none-eabihf-lto.a differ diff --git a/bin/thumbv7em-none-eabihf.a b/bin/thumbv7em-none-eabihf.a index dd4f80d9..c13d9a7f 100644 Binary files a/bin/thumbv7em-none-eabihf.a and b/bin/thumbv7em-none-eabihf.a differ diff --git a/bin/thumbv7m-none-eabi-lto.a b/bin/thumbv7m-none-eabi-lto.a index 66b6be35..a3fcb494 100644 Binary files a/bin/thumbv7m-none-eabi-lto.a and b/bin/thumbv7m-none-eabi-lto.a differ diff --git a/bin/thumbv7m-none-eabi.a b/bin/thumbv7m-none-eabi.a index cdae52f6..ce8057db 100644 Binary files a/bin/thumbv7m-none-eabi.a and b/bin/thumbv7m-none-eabi.a differ diff --git a/bin/thumbv8m.base-none-eabi-lto.a b/bin/thumbv8m.base-none-eabi-lto.a index 59c65104..ff66b5ae 100644 Binary files a/bin/thumbv8m.base-none-eabi-lto.a and b/bin/thumbv8m.base-none-eabi-lto.a differ diff --git a/bin/thumbv8m.base-none-eabi.a b/bin/thumbv8m.base-none-eabi.a index a300490b..a5418d8c 100644 Binary files a/bin/thumbv8m.base-none-eabi.a and b/bin/thumbv8m.base-none-eabi.a differ diff --git a/bin/thumbv8m.main-none-eabi-lto.a b/bin/thumbv8m.main-none-eabi-lto.a index 43065266..5b92a95e 100644 Binary files a/bin/thumbv8m.main-none-eabi-lto.a and b/bin/thumbv8m.main-none-eabi-lto.a differ diff --git a/bin/thumbv8m.main-none-eabi.a b/bin/thumbv8m.main-none-eabi.a index 5f0b282a..035c821d 100644 Binary files a/bin/thumbv8m.main-none-eabi.a and b/bin/thumbv8m.main-none-eabi.a differ diff --git a/bin/thumbv8m.main-none-eabihf-lto.a b/bin/thumbv8m.main-none-eabihf-lto.a index cc627612..30c6ccc2 100644 Binary files a/bin/thumbv8m.main-none-eabihf-lto.a and b/bin/thumbv8m.main-none-eabihf-lto.a differ diff --git a/bin/thumbv8m.main-none-eabihf.a b/bin/thumbv8m.main-none-eabihf.a index cd688ac2..9f6241e6 100644 Binary files a/bin/thumbv8m.main-none-eabihf.a and b/bin/thumbv8m.main-none-eabihf.a differ diff --git a/src/asm.rs b/src/asm.rs index 297198b9..4dc1ab07 100644 --- a/src/asm.rs +++ b/src/asm.rs @@ -15,7 +15,7 @@ pub fn bkpt() { call_asm!(__bkpt()); } -/// Blocks the program for *at least* `n` instruction cycles +/// Blocks the program for *at least* `cycles` CPU cycles. /// /// This is implemented in assembly so its execution time is independent of the optimization /// level, however it is dependent on the specific architecture and core configuration. @@ -25,10 +25,8 @@ pub fn bkpt() { /// timer-less initialization of peripherals if and only if accurate timing is not essential. In /// any other case please use a more accurate method to produce a delay. #[inline] -pub fn delay(n: u32) { - // NOTE(divide by 4) is easier to compute than `/ 3` because it's just a shift (`>> 2`). - let real_cyc = n / 4 + 1; - call_asm!(__delay(real_cyc: u32)); +pub fn delay(cycles: u32) { + call_asm!(__delay(cycles: u32)); } /// A no-operation. Useful to prevent delay loops from being optimized away.