Skip to content

Commit b583aa2

Browse files
authored
Unrolled build for #144938
Rollup merge of #144938 - tgross35:more-outline-atomics, r=davidtwco Enable `outline-atomics` by default on more AArch64 platforms The baseline Armv8.0 ISA doesn't have atomics instructions, but in practice most hardware is at least Armv8.1-A (2014), which includes single-instruction atomics as part of the LSE feature. As a performance optimization for these cases, GCC and LLVM have the `-moutline-atomics` flag to turn atomic operations into calls to symbols like `__aarch64_cas1_acq`. These can do runtime feature detection and use the LSE instructions if available, falling back to more portable load-exclusive/store-exclusive loops. Since the recent 3b50253 ("compiler-builtins: plumb LSE support for aarch64 on linux") our builtins support this LSE optimization, and since 6936bb9 ("Dynamically enable LSE for aarch64 rust provided intrinsics"), std will set the flag as part of its startup code. The first commit in this PR configures this to work on all platforms built with `outline-atomics`, not just Linux. Thus, enable `outline-atomics` by default on Android, OpenBSD, Windows, and Fuchsia platforms that don't have LSE in the baseline. The feature is already enabled on Linux. Platform-specific details are included in each commit message. The current implementation can still be accessed by setting `-Ctarget-feature=-outline-atomics`. Setting `-Ctarget-feature=+lse` or a relevant CPU will use the single-instruction atomics without the call overhead. https://rust.godbolt.org/z/dsdrzszoe Link: https://learn.arm.com/learning-paths/servers-and-cloud-computing/lse/intro/ Original Clang outline-atomics benchmarks: https://reviews.llvm.org/D91157#2435844 try-job: aarch64-msvc-* try-job: arm-android try-job: dist-android try-job: dist-aarch64-llvm-mingw try-job: dist-aarch64-msvc try-job: dist-various-* try-job: test-various
2 parents c61a3a4 + 66c150c commit b583aa2

File tree

8 files changed

+52
-16
lines changed

8 files changed

+52
-16
lines changed

compiler/rustc_target/src/spec/targets/aarch64_linux_android.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ pub(crate) fn target() -> Target {
2121
max_atomic_width: Some(128),
2222
// As documented in https://developer.android.com/ndk/guides/cpu-features.html
2323
// the neon (ASIMD) and FP must exist on all android aarch64 targets.
24-
features: "+v8a,+neon".into(),
24+
features: "+v8a,+neon,+outline-atomics".into(),
2525
// the AAPCS64 expects use of non-leaf frame pointers per
2626
// https://github.com/ARM-software/abi-aa/blob/4492d1570eb70c8fd146623e0db65b2d241f12e7/aapcs64/aapcs64.rst#the-frame-pointer
2727
// and we tend to encounter interesting bugs in AArch64 unwinding code if we do not

compiler/rustc_target/src/spec/targets/aarch64_pc_windows_gnullvm.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use crate::spec::{Arch, Cc, FramePointer, LinkerFlavor, Lld, Target, TargetMetad
33
pub(crate) fn target() -> Target {
44
let mut base = base::windows_gnullvm::opts();
55
base.max_atomic_width = Some(128);
6-
base.features = "+v8a,+neon".into();
6+
base.features = "+v8a,+neon,+outline-atomics".into();
77
base.linker = Some("aarch64-w64-mingw32-clang".into());
88
base.add_pre_link_args(LinkerFlavor::Gnu(Cc::No, Lld::No), &["-m", "arm64pe"]);
99

compiler/rustc_target/src/spec/targets/aarch64_pc_windows_msvc.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use crate::spec::{Arch, FramePointer, Target, TargetMetadata, base};
33
pub(crate) fn target() -> Target {
44
let mut base = base::windows_msvc::opts();
55
base.max_atomic_width = Some(128);
6-
base.features = "+v8a,+neon".into();
6+
base.features = "+v8a,+neon,+outline-atomics".into();
77

88
// Microsoft recommends enabling frame pointers on Arm64 Windows.
99
// From https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170#integer-registers

compiler/rustc_target/src/spec/targets/aarch64_unknown_fuchsia.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::spec::{
55
pub(crate) fn target() -> Target {
66
let mut base = base::fuchsia::opts();
77
base.cpu = "generic".into();
8-
base.features = "+v8a,+crc,+aes,+sha2,+neon".into();
8+
base.features = "+v8a,+crc,+aes,+sha2,+neon,+outline-atomics".into();
99
base.max_atomic_width = Some(128);
1010
base.stack_probes = StackProbeType::Inline;
1111
base.supported_sanitizers = SanitizerSet::ADDRESS

compiler/rustc_target/src/spec/targets/aarch64_unknown_openbsd.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ pub(crate) fn target() -> Target {
1313
data_layout: "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32".into(),
1414
arch: Arch::AArch64,
1515
options: TargetOptions {
16-
features: "+v8a".into(),
16+
features: "+v8a,+outline-atomics".into(),
1717
max_atomic_width: Some(128),
1818
stack_probes: StackProbeType::Inline,
1919
..base::openbsd::opts()

library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs renamed to library/compiler-builtins/compiler-builtins/src/aarch64_outline_atomics.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ macro_rules! compare_and_swap {
196196
"cbnz w17, 0b",
197197
"1:",
198198
"ret",
199-
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
199+
have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS,
200200
}
201201
}
202202
}
@@ -228,7 +228,7 @@ macro_rules! compare_and_swap_i128 {
228228
"cbnz w15, 0b",
229229
"1:",
230230
"ret",
231-
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
231+
have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS,
232232
}
233233
}
234234
}
@@ -256,7 +256,7 @@ macro_rules! swap {
256256
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"),
257257
"cbnz w17, 0b",
258258
"ret",
259-
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
259+
have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS,
260260
}
261261
}
262262
}
@@ -286,7 +286,7 @@ macro_rules! fetch_op {
286286
concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"),
287287
"cbnz w15, 0b",
288288
"ret",
289-
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
289+
have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS,
290290
}
291291
}
292292
}

library/compiler-builtins/compiler-builtins/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@ pub mod arm;
5555
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
5656
pub mod aarch64;
5757

58-
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
59-
pub mod aarch64_linux;
58+
#[cfg(all(target_arch = "aarch64", target_feature = "outline-atomics"))]
59+
pub mod aarch64_outline_atomics;
6060

6161
#[cfg(all(
6262
kernel_user_helpers,

library/std/src/sys/configure_builtins.rs

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,49 @@
1-
/// Hook into .init_array to enable LSE atomic operations at startup, if
2-
/// supported.
3-
#[cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "compiler-builtins-c")))]
1+
/// Enable LSE atomic operations at startup, if supported.
2+
///
3+
/// Linker sections are based on what [`ctor`] does, with priorities to run slightly before user
4+
/// code:
5+
///
6+
/// - Apple uses the section `__mod_init_func`, `mod_init_funcs` is needed to set
7+
/// `S_MOD_INIT_FUNC_POINTERS`. There doesn't seem to be a way to indicate priorities.
8+
/// - Windows uses `.CRT$XCT`, which is run before user constructors (these should use `.CRT$XCU`).
9+
/// - ELF uses `.init_array` with a priority of 90, which runs before our `ARGV_INIT_ARRAY`
10+
/// initializer (priority 99). Both are within the 0-100 implementation-reserved range, per docs
11+
/// for the [`prio-ctor-dtor`] warning, and this matches compiler-rt's `CONSTRUCTOR_PRIORITY`.
12+
///
13+
/// To save startup time, the initializer is only run if outline atomic routines from
14+
/// compiler-builtins may be used. If LSE is known to be available then the calls are never
15+
/// emitted, and if we build the C intrinsics then it has its own initializer using the symbol
16+
/// `__aarch64_have_lse_atomics`.
17+
///
18+
/// Initialization is done in a global constructor to so we get the same behavior regardless of
19+
/// whether Rust's `init` is used, or if we are in a `dylib` or `no_main` situation (as opposed
20+
/// to doing it as part of pre-main startup). This also matches C implementations.
21+
///
22+
/// Ideally `core` would have something similar, but detecting the CPU features requires the
23+
/// auxiliary vector from the OS. We do the initialization in `std` rather than as part of
24+
/// `compiler-builtins` because a builtins->std dependency isn't possible, and inlining parts of
25+
/// `std-detect` would be much messier.
26+
///
27+
/// [`ctor`]: https://github.com/mmastrac/rust-ctor/blob/63382b833ddcbfb8b064f4e86bfa1ed4026ff356/shared/src/macros/mod.rs#L522-L534
28+
/// [`prio-ctor-dtor`]: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html
29+
#[cfg(all(
30+
target_arch = "aarch64",
31+
target_feature = "outline-atomics",
32+
not(target_feature = "lse"),
33+
not(feature = "compiler-builtins-c"),
34+
))]
435
#[used]
5-
#[unsafe(link_section = ".init_array.90")]
36+
#[cfg_attr(target_vendor = "apple", unsafe(link_section = "__DATA,__mod_init_func,mod_init_funcs"))]
37+
#[cfg_attr(target_os = "windows", unsafe(link_section = ".CRT$XCT"))]
38+
#[cfg_attr(
39+
not(any(target_vendor = "apple", target_os = "windows")),
40+
unsafe(link_section = ".init_array.90")
41+
)]
642
static RUST_LSE_INIT: extern "C" fn() = {
743
extern "C" fn init_lse() {
844
use crate::arch;
945

10-
// This is provided by compiler-builtins::aarch64_linux.
46+
// This is provided by compiler-builtins::aarch64_outline_atomics.
1147
unsafe extern "C" {
1248
fn __rust_enable_lse();
1349
}

0 commit comments

Comments
 (0)