From 43484472eba0c60c9a1d41c07fd835111d242c80 Mon Sep 17 00:00:00 2001 From: George Tokmaji Date: Sun, 2 Nov 2025 19:35:36 +0100 Subject: [PATCH 01/13] Mangle symbols with a mangled name close to PDB limits with v0 instead of legacy mangling to avoid linker errors --- compiler/rustc_symbol_mangling/src/lib.rs | 26 ++++++++++++++++++++++- compiler/rustc_target/src/spec/mod.rs | 4 ++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_symbol_mangling/src/lib.rs b/compiler/rustc_symbol_mangling/src/lib.rs index d97ee95652530..0ea20ae4dc604 100644 --- a/compiler/rustc_symbol_mangling/src/lib.rs +++ b/compiler/rustc_symbol_mangling/src/lib.rs @@ -291,7 +291,31 @@ fn compute_symbol_name<'tcx>( export::compute_hash_of_export_fn(tcx, instance) ), false => match mangling_version { - SymbolManglingVersion::Legacy => legacy::mangle(tcx, instance, instantiating_crate), + SymbolManglingVersion::Legacy => { + let mangled_name = legacy::mangle(tcx, instance, instantiating_crate); + + let mangled_name_too_long = { + // The PDB debug info format cannot store mangled symbol names for which its + // internal record exceeds u16::MAX bytes, a limit multiple Rust projects have been + // hitting due to the verbosity of legacy name manglng. Depending on the linker version + // in use, such symbol names can lead to linker crashes or incomprehensible linker error + // about a limit being hit. + // Mangle those symbols with v0 mangling instead, which gives us more room to breathe + // as v0 mangling is more compact. + // Empirical testing has shown the limit for the symbol name to be 65521 bytes; use + // 65000 bytes to leave some room for prefixes / suffixes as well as unknown scenarios + // with a different limit. + const MAX_SYMBOL_LENGTH: usize = 65000; + + tcx.sess.target.uses_pdb_debuginfo() && mangled_name.len() > MAX_SYMBOL_LENGTH + }; + + if mangled_name_too_long { + v0::mangle(tcx, instance, instantiating_crate, false) + } else { + mangled_name + } + } SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate, false), SymbolManglingVersion::Hashed => { hashed::mangle(tcx, instance, instantiating_crate, || { diff --git a/compiler/rustc_target/src/spec/mod.rs b/compiler/rustc_target/src/spec/mod.rs index b49e7fc9cff66..95c0299bfeb7c 100644 --- a/compiler/rustc_target/src/spec/mod.rs +++ b/compiler/rustc_target/src/spec/mod.rs @@ -2397,6 +2397,10 @@ impl TargetOptions { // XCOFF and MachO don't support COMDAT. !self.is_like_aix && !self.is_like_darwin } + + pub fn uses_pdb_debuginfo(&self) -> bool { + self.debuginfo_kind == DebuginfoKind::Pdb + } } impl TargetOptions { From 21ee1cf6bbf7da1cf22475b7df6e518ba943ea47 Mon Sep 17 00:00:00 2001 From: Fulgen301 Date: Tue, 4 Nov 2025 21:04:35 +0100 Subject: [PATCH 02/13] Fix typo Co-authored-by: teor --- compiler/rustc_symbol_mangling/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_symbol_mangling/src/lib.rs b/compiler/rustc_symbol_mangling/src/lib.rs index 0ea20ae4dc604..85b77ab2a5f01 100644 --- a/compiler/rustc_symbol_mangling/src/lib.rs +++ b/compiler/rustc_symbol_mangling/src/lib.rs @@ -297,7 +297,7 @@ fn compute_symbol_name<'tcx>( let mangled_name_too_long = { // The PDB debug info format cannot store mangled symbol names for which its // internal record exceeds u16::MAX bytes, a limit multiple Rust projects have been - // hitting due to the verbosity of legacy name manglng. Depending on the linker version + // hitting due to the verbosity of legacy name mangling. Depending on the linker version // in use, such symbol names can lead to linker crashes or incomprehensible linker error // about a limit being hit. // Mangle those symbols with v0 mangling instead, which gives us more room to breathe From 1f7a3427f250d1fdc3fb3a059bd8e47b6aedfd1e Mon Sep 17 00:00:00 2001 From: Makai Date: Sat, 1 Nov 2025 15:51:46 +0800 Subject: [PATCH 03/13] rustc_public: fix some issues --- compiler/rustc_public/src/compiler_interface.rs | 2 ++ compiler/rustc_public/src/unstable/mod.rs | 3 +++ 2 files changed, 5 insertions(+) diff --git a/compiler/rustc_public/src/compiler_interface.rs b/compiler/rustc_public/src/compiler_interface.rs index b17d31f2b91ab..e01ffa2309049 100644 --- a/compiler/rustc_public/src/compiler_interface.rs +++ b/compiler/rustc_public/src/compiler_interface.rs @@ -835,6 +835,8 @@ impl<'tcx> CompilerInterface<'tcx> { // A thread local variable that stores a pointer to [`CompilerInterface`]. scoped_tls::scoped_thread_local!(static TLV: Cell<*const ()>); +// remove this cfg when we have a stable driver. +#[cfg(feature = "rustc_internal")] pub(crate) fn run<'tcx, F, T>(interface: &CompilerInterface<'tcx>, f: F) -> Result where F: FnOnce() -> T, diff --git a/compiler/rustc_public/src/unstable/mod.rs b/compiler/rustc_public/src/unstable/mod.rs index 72b14cfa072ae..2b69fb5408cf9 100644 --- a/compiler/rustc_public/src/unstable/mod.rs +++ b/compiler/rustc_public/src/unstable/mod.rs @@ -22,6 +22,7 @@ mod internal_cx; /// /// This trait is only for [`RustcInternal`]. Any other other access to rustc's internals /// should go through [`rustc_public_bridge::context::CompilerCtxt`]. +#[cfg_attr(not(feature = "rustc_internal"), allow(unreachable_pub))] pub trait InternalCx<'tcx>: Copy + Clone { fn tcx(self) -> TyCtxt<'tcx>; @@ -59,6 +60,7 @@ pub trait InternalCx<'tcx>: Copy + Clone { /// between internal MIR and rustc_public's IR constructs. /// However, they should be used seldom and they have no influence in this crate semver. #[doc(hidden)] +#[cfg_attr(not(feature = "rustc_internal"), allow(unreachable_pub))] pub trait Stable<'tcx>: PointeeSized { /// The stable representation of the type implementing Stable. type T; @@ -78,6 +80,7 @@ pub trait Stable<'tcx>: PointeeSized { /// between internal MIR and rustc_public's IR constructs. /// They should be used seldom as they have no stability guarantees. #[doc(hidden)] +#[cfg_attr(not(feature = "rustc_internal"), allow(unreachable_pub))] pub trait RustcInternal { type T<'tcx>; fn internal<'tcx>( From ab169ed983cb773dd861d5e3b96d24566c1ea699 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Miku=C5=82a?= Date: Sun, 9 Nov 2025 16:01:59 +0100 Subject: [PATCH 04/13] Build gnullvm toolchains on Windows natively --- src/bootstrap/src/core/build_steps/dist.rs | 28 +++------ .../dist-aarch64-windows-gnullvm/Dockerfile | 35 ----------- .../dist-x86_64-windows-gnullvm/Dockerfile | 37 ------------ .../install-llvm-mingw.sh | 13 ---- src/ci/github-actions/jobs.yml | 28 +++++++-- src/ci/scripts/install-mingw.sh | 59 +++++++++++++++---- src/etc/installer/msi/rust.wxs | 20 +++++++ 7 files changed, 97 insertions(+), 123 deletions(-) delete mode 100644 src/ci/docker/host-x86_64/dist-aarch64-windows-gnullvm/Dockerfile delete mode 100644 src/ci/docker/host-x86_64/dist-x86_64-windows-gnullvm/Dockerfile delete mode 100755 src/ci/docker/host-x86_64/dist-x86_64-windows-gnullvm/install-llvm-mingw.sh diff --git a/src/bootstrap/src/core/build_steps/dist.rs b/src/bootstrap/src/core/build_steps/dist.rs index 4fdcdb2051f55..074ebe8c1317a 100644 --- a/src/bootstrap/src/core/build_steps/dist.rs +++ b/src/bootstrap/src/core/build_steps/dist.rs @@ -338,7 +338,7 @@ fn runtime_dll_dist(rust_root: &Path, target: TargetSelection, builder: &Builder return; } - let (bin_path, libs_path) = get_cc_search_dirs(target, builder); + let (bin_path, _) = get_cc_search_dirs(target, builder); let mut rustc_dlls = vec![]; // windows-gnu and windows-gnullvm require different runtime libs @@ -354,15 +354,6 @@ fn runtime_dll_dist(rust_root: &Path, target: TargetSelection, builder: &Builder } else { panic!("Vendoring of runtime DLLs for `{target}` is not supported`"); } - // FIXME(#144656): Remove this whole `let ...` - let bin_path = if target.ends_with("windows-gnullvm") && builder.host_target != target { - bin_path - .into_iter() - .chain(libs_path.iter().map(|path| path.with_file_name("bin"))) - .collect() - } else { - bin_path - }; let rustc_dlls = find_files(&rustc_dlls, &bin_path); // Copy runtime dlls next to rustc.exe @@ -1721,7 +1712,7 @@ impl Step for Extended { tarballs.push(builder.ensure(Rustc { target_compiler })); tarballs.push(builder.ensure(Std { build_compiler, target }).expect("missing std")); - if target.is_windows_gnu() { + if target.is_windows_gnu() || target.is_windows_gnullvm() { tarballs.push(builder.ensure(Mingw { target }).expect("missing mingw")); } @@ -1868,8 +1859,7 @@ impl Step for Extended { cmd.run(builder); } - // FIXME(mati865): `gnullvm` here is temporary, remove it once it can host itself - if target.is_windows() && !target.contains("gnullvm") { + if target.is_windows() { let exe = tmp.join("exe"); let _ = fs::remove_dir_all(&exe); @@ -1907,7 +1897,7 @@ impl Step for Extended { prepare(tool); } } - if target.is_windows_gnu() { + if target.is_windows_gnu() || target.is_windows_gnullvm() { prepare("rust-mingw"); } @@ -2072,7 +2062,7 @@ impl Step for Extended { .arg("-t") .arg(etc.join("msi/remove-duplicates.xsl")) .run(builder); - if target.is_windows_gnu() { + if target.is_windows_gnu() || target.is_windows_gnullvm() { command(&heat) .current_dir(&exe) .arg("dir") @@ -2121,7 +2111,7 @@ impl Step for Extended { if built_tools.contains("miri") { cmd.arg("-dMiriDir=miri"); } - if target.is_windows_gnu() { + if target.is_windows_gnu() || target.is_windows_gnullvm() { cmd.arg("-dGccDir=rust-mingw"); } cmd.run(builder); @@ -2149,7 +2139,7 @@ impl Step for Extended { } candle("AnalysisGroup.wxs".as_ref()); - if target.is_windows_gnu() { + if target.is_windows_gnu() || target.is_windows_gnullvm() { candle("GccGroup.wxs".as_ref()); } @@ -2192,7 +2182,7 @@ impl Step for Extended { cmd.arg("DocsGroup.wixobj"); } - if target.is_windows_gnu() { + if target.is_windows_gnu() || target.is_windows_gnullvm() { cmd.arg("GccGroup.wixobj"); } // ICE57 wrongly complains about the shortcuts @@ -2231,7 +2221,7 @@ fn add_env( .env("CFG_BUILD", target.triple) .env("CFG_CHANNEL", &builder.config.channel); - if target.contains("windows-gnullvm") { + if target.is_windows_gnullvm() { cmd.env("CFG_MINGW", "1").env("CFG_ABI", "LLVM"); } else if target.is_windows_gnu() { cmd.env("CFG_MINGW", "1").env("CFG_ABI", "GNU"); diff --git a/src/ci/docker/host-x86_64/dist-aarch64-windows-gnullvm/Dockerfile b/src/ci/docker/host-x86_64/dist-aarch64-windows-gnullvm/Dockerfile deleted file mode 100644 index 0bb51af817ab2..0000000000000 --- a/src/ci/docker/host-x86_64/dist-aarch64-windows-gnullvm/Dockerfile +++ /dev/null @@ -1,35 +0,0 @@ -FROM ubuntu:24.04 - -WORKDIR /build - -ARG DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y --no-install-recommends \ - ca-certificates \ - cmake \ - curl \ - g++ \ - git \ - make \ - ninja-build \ - python3 \ - xz-utils - -ENV ARCH=aarch64 -COPY host-x86_64/dist-x86_64-windows-gnullvm/install-llvm-mingw.sh /build -RUN ./install-llvm-mingw.sh - -COPY scripts/sccache.sh /scripts/ -RUN sh /scripts/sccache.sh - -ENV CC_aarch64_pc_windows_gnullvm=aarch64-w64-mingw32-clang \ - CXX_aarch64_pc_windows_gnullvm=aarch64-w64-mingw32-clang++ - -ENV HOST=aarch64-pc-windows-gnullvm - -ENV RUST_CONFIGURE_ARGS \ - --enable-full-tools \ - --enable-profiler \ - --enable-sanitizers \ - --disable-docs - -ENV SCRIPT python3 ../x.py dist --host $HOST --target $HOST diff --git a/src/ci/docker/host-x86_64/dist-x86_64-windows-gnullvm/Dockerfile b/src/ci/docker/host-x86_64/dist-x86_64-windows-gnullvm/Dockerfile deleted file mode 100644 index da0c065c8547e..0000000000000 --- a/src/ci/docker/host-x86_64/dist-x86_64-windows-gnullvm/Dockerfile +++ /dev/null @@ -1,37 +0,0 @@ -FROM ubuntu:24.04 - -WORKDIR /build - -ARG DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y --no-install-recommends \ - ca-certificates \ - cmake \ - curl \ - g++ \ - git \ - make \ - ninja-build \ - python3 \ - xz-utils - -ENV ARCH='i686 x86_64' -COPY host-x86_64/dist-x86_64-windows-gnullvm/install-llvm-mingw.sh /build -RUN ./install-llvm-mingw.sh - -COPY scripts/sccache.sh /scripts/ -RUN sh /scripts/sccache.sh - -ENV CC_i686_pc_windows_gnullvm=i686-w64-mingw32-clang \ - CC_x86_64_pc_windows_gnullvm=x86_64-w64-mingw32-clang \ - CXX_x86_64_pc_windows_gnullvm=x86_64-w64-mingw32-clang++ - -ENV HOST=x86_64-pc-windows-gnullvm -ENV TARGETS=i686-pc-windows-gnullvm,x86_64-pc-windows-gnullvm - -ENV RUST_CONFIGURE_ARGS \ - --enable-full-tools \ - --enable-profiler \ - --enable-sanitizers \ - --disable-docs - -ENV SCRIPT python3 ../x.py dist --host $HOST --target $TARGETS diff --git a/src/ci/docker/host-x86_64/dist-x86_64-windows-gnullvm/install-llvm-mingw.sh b/src/ci/docker/host-x86_64/dist-x86_64-windows-gnullvm/install-llvm-mingw.sh deleted file mode 100755 index 0ea5dae3ffbd9..0000000000000 --- a/src/ci/docker/host-x86_64/dist-x86_64-windows-gnullvm/install-llvm-mingw.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash - -set -ex - -release_date=20250613 -archive=llvm-mingw-${release_date}-ucrt-ubuntu-22.04-x86_64.tar.xz -curl -L https://github.com/mstorsjo/llvm-mingw/releases/download/${release_date}/${archive} | \ -tar --extract --xz --strip 1 --directory /usr/local - -# https://github.com/mstorsjo/llvm-mingw/issues/493 -for arch in $ARCH; do - ln -s $arch-w64-windows-gnu.cfg /usr/local/bin/$arch-pc-windows-gnu.cfg -done diff --git a/src/ci/github-actions/jobs.yml b/src/ci/github-actions/jobs.yml index 33de0b09d94d1..ee10e36e1c37c 100644 --- a/src/ci/github-actions/jobs.yml +++ b/src/ci/github-actions/jobs.yml @@ -247,12 +247,6 @@ auto: - name: dist-s390x-linux <<: *job-linux-4c - - name: dist-aarch64-windows-gnullvm - <<: *job-linux-4c - - - name: dist-x86_64-windows-gnullvm - <<: *job-linux-4c - - name: dist-various-1 <<: *job-linux-4c @@ -689,6 +683,28 @@ auto: CODEGEN_BACKENDS: llvm,cranelift <<: *job-windows + - name: dist-aarch64-llvm-mingw + env: + SCRIPT: python x.py dist bootstrap --include-default-paths + RUST_CONFIGURE_ARGS: >- + --build=aarch64-pc-windows-gnullvm + --enable-full-tools + --enable-profiler + DIST_REQUIRE_ALL_TOOLS: 1 + CODEGEN_BACKENDS: llvm,cranelift + <<: *job-windows-aarch64 + + - name: dist-x86_64-llvm-mingw + env: + SCRIPT: python x.py dist bootstrap --include-default-paths + RUST_CONFIGURE_ARGS: >- + --build=x86_64-pc-windows-gnullvm + --enable-full-tools + --enable-profiler + DIST_REQUIRE_ALL_TOOLS: 1 + CODEGEN_BACKENDS: llvm,cranelift + <<: *job-windows + - name: dist-x86_64-msvc-alt env: RUST_CONFIGURE_ARGS: --build=x86_64-pc-windows-msvc --enable-extended --enable-profiler diff --git a/src/ci/scripts/install-mingw.sh b/src/ci/scripts/install-mingw.sh index ed87628659b41..17bedaa7b8266 100755 --- a/src/ci/scripts/install-mingw.sh +++ b/src/ci/scripts/install-mingw.sh @@ -8,22 +8,37 @@ source "$(cd "$(dirname "$0")" && pwd)/../shared.sh" MINGW_ARCHIVE_32="i686-14.1.0-release-posix-dwarf-msvcrt-rt_v12-rev0.7z" MINGW_ARCHIVE_64="x86_64-14.1.0-release-posix-seh-msvcrt-rt_v12-rev0.7z" +LLVM_MINGW_ARCHIVE_AARCH64="llvm-mingw-20251104-ucrt-aarch64.zip" +LLVM_MINGW_ARCHIVE_X86_64="llvm-mingw-20251104-ucrt-x86_64.zip" if isWindows && isKnownToBeMingwBuild; then case "${CI_JOB_NAME}" in + *aarch64-llvm*) + mingw_dir="clangarm64" + mingw_archive="${LLVM_MINGW_ARCHIVE_AARCH64}" + arch="aarch64" + # Rustup defaults to AArch64 MSVC which has a hard time building Ring crate + # for citool. MSVC jobs install special Clang build to solve that, but here + # it would be an overkill. So we just use toolchain that doesn't have this + # issue. + rustup default stable-aarch64-pc-windows-gnullvm + ;; + *x86_64-llvm*) + mingw_dir="clang64" + mingw_archive="${LLVM_MINGW_ARCHIVE_X86_64}" + arch="x86_64" + ;; *i686*) - bits=32 + mingw_dir="mingw32" mingw_archive="${MINGW_ARCHIVE_32}" ;; *x86_64*) - bits=64 + mingw_dir="mingw64" mingw_archive="${MINGW_ARCHIVE_64}" ;; *aarch64*) - # aarch64 is a cross-compiled target. Use the x86_64 - # mingw, since that's the host architecture. - bits=64 - mingw_archive="${MINGW_ARCHIVE_64}" + echo "AArch64 Windows is not supported by GNU tools" + exit 1 ;; *) echo "src/ci/scripts/install-mingw.sh can't detect the builder's architecture" @@ -38,14 +53,32 @@ if isWindows && isKnownToBeMingwBuild; then msys2Path="c:/msys64" ciCommandAddPath "${msys2Path}/usr/bin" - mingw_dir="mingw${bits}" + case "${mingw_archive}" in + *.7z) + curl -o mingw.7z "${MIRRORS_BASE}/${mingw_archive}" + 7z x -y mingw.7z > /dev/null + ;; + *.zip) + curl -o mingw.zip "${MIRRORS_BASE}/${mingw_archive}" + unzip -q mingw.zip + mv llvm-mingw-20251104-ucrt-$arch $mingw_dir + # Temporary workaround: https://github.com/mstorsjo/llvm-mingw/issues/493 + mkdir -p $mingw_dir/bin + ln -s $arch-w64-windows-gnu.cfg $mingw_dir/bin/$arch-pc-windows-gnu.cfg + ;; + *) + echo "Unrecognized archive type" + exit 1 + ;; + esac - curl -o mingw.7z "${MIRRORS_BASE}/${mingw_archive}" - 7z x -y mingw.7z > /dev/null ciCommandAddPath "$(cygpath -m "$(pwd)/${mingw_dir}/bin")" - # Initialize mingw for the user. - # This should be done by github but isn't for some reason. - # (see https://github.com/actions/runner-images/issues/12600) - /c/msys64/usr/bin/bash -lc ' ' + # MSYS2 is not installed on AArch64 runners + if [[ "${CI_JOB_NAME}" != *aarch64-llvm* ]]; then + # Initialize mingw for the user. + # This should be done by github but isn't for some reason. + # (see https://github.com/actions/runner-images/issues/12600) + /c/msys64/usr/bin/bash -lc ' ' + fi fi diff --git a/src/etc/installer/msi/rust.wxs b/src/etc/installer/msi/rust.wxs index 64cceccc97582..43a436f1e6950 100644 --- a/src/etc/installer/msi/rust.wxs +++ b/src/etc/installer/msi/rust.wxs @@ -38,6 +38,16 @@ + + + + + + + + + + @@ -61,6 +71,16 @@ + + + + + + + + + + From 17230eb5bba2b311f5048aea5073895572746637 Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Fri, 14 Nov 2025 18:51:07 +0000 Subject: [PATCH 05/13] rustc_target: aarch64: Remove deprecated FEAT_TME ARM has withdrawn FEAT_TME https://developer.arm.com/documentation/102105/lb-05/ LLVM has dropped support for it recently as a result. --- compiler/rustc_codegen_llvm/src/llvm_util.rs | 2 ++ library/std/tests/run-time-detect.rs | 1 - library/std_detect/src/detect/os/aarch64.rs | 1 - library/std_detect/tests/cpu-detection.rs | 2 -- 4 files changed, 2 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs index b498448417f58..687238968e97d 100644 --- a/compiler/rustc_codegen_llvm/src/llvm_util.rs +++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs @@ -243,6 +243,8 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option Some(LLVMFeature::new("fullfp16")), // Filter out features that are not supported by the current LLVM version "fpmr" => None, // only existed in 18 + // Withdrawn by ARM; removed from LLVM in 22 + "tme" if major >= 22 => None, s => Some(LLVMFeature::new(s)), } } diff --git a/library/std/tests/run-time-detect.rs b/library/std/tests/run-time-detect.rs index be2980f73268f..5506fd469732d 100644 --- a/library/std/tests/run-time-detect.rs +++ b/library/std/tests/run-time-detect.rs @@ -106,7 +106,6 @@ fn aarch64_linux() { println!("sve2: {}", is_aarch64_feature_detected!("sve2")); println!("sve2p1: {}", is_aarch64_feature_detected!("sve2p1")); println!("sve: {}", is_aarch64_feature_detected!("sve")); - println!("tme: {}", is_aarch64_feature_detected!("tme")); println!("wfxt: {}", is_aarch64_feature_detected!("wfxt")); // tidy-alphabetical-end } diff --git a/library/std_detect/src/detect/os/aarch64.rs b/library/std_detect/src/detect/os/aarch64.rs index c2c754ccf8db2..3232e435d524c 100644 --- a/library/std_detect/src/detect/os/aarch64.rs +++ b/library/std_detect/src/detect/os/aarch64.rs @@ -84,7 +84,6 @@ pub(crate) fn parse_system_registers( // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 enable_feature(Feature::pmull, bits_shift(aa64isar0, 7, 4) >= 2); - enable_feature(Feature::tme, bits_shift(aa64isar0, 27, 24) == 1); enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 2); enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1); diff --git a/library/std_detect/tests/cpu-detection.rs b/library/std_detect/tests/cpu-detection.rs index e653889c3788a..196abfdb7c4dd 100644 --- a/library/std_detect/tests/cpu-detection.rs +++ b/library/std_detect/tests/cpu-detection.rs @@ -85,7 +85,6 @@ fn aarch64_linux() { println!("rcpc2: {}", is_aarch64_feature_detected!("rcpc2")); println!("rcpc3: {}", is_aarch64_feature_detected!("rcpc3")); println!("dotprod: {}", is_aarch64_feature_detected!("dotprod")); - println!("tme: {}", is_aarch64_feature_detected!("tme")); println!("fhm: {}", is_aarch64_feature_detected!("fhm")); println!("dit: {}", is_aarch64_feature_detected!("dit")); println!("flagm: {}", is_aarch64_feature_detected!("flagm")); @@ -175,7 +174,6 @@ fn aarch64_bsd() { println!("rdm: {:?}", is_aarch64_feature_detected!("rdm")); println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); - println!("tme: {:?}", is_aarch64_feature_detected!("tme")); println!("paca: {:?}", is_aarch64_feature_detected!("paca")); println!("pacg: {:?}", is_aarch64_feature_detected!("pacg")); println!("aes: {:?}", is_aarch64_feature_detected!("aes")); From fd1852f5f338e38eda48a2616372758cd449d684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Miku=C5=82a?= Date: Tue, 25 Nov 2025 12:31:19 +0100 Subject: [PATCH 06/13] Mark riscv64gc-unknown-linux-musl as tier 2 target --- .../src/spec/targets/riscv64gc_unknown_linux_musl.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_target/src/spec/targets/riscv64gc_unknown_linux_musl.rs b/compiler/rustc_target/src/spec/targets/riscv64gc_unknown_linux_musl.rs index f5d647d0fc545..6eba0994deb44 100644 --- a/compiler/rustc_target/src/spec/targets/riscv64gc_unknown_linux_musl.rs +++ b/compiler/rustc_target/src/spec/targets/riscv64gc_unknown_linux_musl.rs @@ -7,7 +7,7 @@ pub(crate) fn target() -> Target { llvm_target: "riscv64-unknown-linux-musl".into(), metadata: TargetMetadata { description: Some("RISC-V Linux (kernel 4.20, musl 1.2.5)".into()), - tier: Some(3), + tier: Some(2), host_tools: Some(false), std: Some(true), }, From 231a3a241bbf91771533d3c3b35e868a50ee6d15 Mon Sep 17 00:00:00 2001 From: lapla Date: Tue, 25 Nov 2025 17:53:48 +0900 Subject: [PATCH 07/13] Deny const auto traits --- compiler/rustc_ast_passes/messages.ftl | 3 +++ compiler/rustc_ast_passes/src/ast_validation.rs | 8 ++++++++ compiler/rustc_ast_passes/src/errors.rs | 8 ++++++++ tests/ui/traits/const-traits/const-auto-trait.rs | 6 ++++++ tests/ui/traits/const-traits/const-auto-trait.stderr | 10 ++++++++++ 5 files changed, 35 insertions(+) create mode 100644 tests/ui/traits/const-traits/const-auto-trait.rs create mode 100644 tests/ui/traits/const-traits/const-auto-trait.stderr diff --git a/compiler/rustc_ast_passes/messages.ftl b/compiler/rustc_ast_passes/messages.ftl index 9bdbcf6ab9071..f03c7dd5b9d59 100644 --- a/compiler/rustc_ast_passes/messages.ftl +++ b/compiler/rustc_ast_passes/messages.ftl @@ -89,6 +89,9 @@ ast_passes_const_and_coroutine = functions cannot be both `const` and `{$corouti .coroutine = `{$coroutine_kind}` because of this .label = {""} +ast_passes_const_auto_trait = auto traits cannot be const + .help = remove the `const` keyword + ast_passes_const_bound_trait_object = const trait bounds are not allowed in trait object types ast_passes_const_without_body = diff --git a/compiler/rustc_ast_passes/src/ast_validation.rs b/compiler/rustc_ast_passes/src/ast_validation.rs index 163dbc3350ba2..e57f8da26769b 100644 --- a/compiler/rustc_ast_passes/src/ast_validation.rs +++ b/compiler/rustc_ast_passes/src/ast_validation.rs @@ -820,6 +820,12 @@ impl<'a> AstValidator<'a> { self.dcx().emit_err(errors::ModuleNonAscii { span: ident.span, name: ident.name }); } + fn deny_const_auto_traits(&self, constness: Const) { + if let Const::Yes(span) = constness { + self.dcx().emit_err(errors::ConstAutoTrait { span }); + } + } + fn deny_generic_params(&self, generics: &Generics, ident_span: Span) { if !generics.params.is_empty() { self.dcx() @@ -1257,6 +1263,8 @@ impl<'a> Visitor<'a> for AstValidator<'a> { }) => { self.visit_attrs_vis_ident(&item.attrs, &item.vis, ident); if *is_auto == IsAuto::Yes { + // For why we reject `const auto trait`, see rust-lang/rust#149285. + self.deny_const_auto_traits(*constness); // Auto traits cannot have generics, super traits nor contain items. self.deny_generic_params(generics, ident.span); self.deny_super_traits(bounds, ident.span); diff --git a/compiler/rustc_ast_passes/src/errors.rs b/compiler/rustc_ast_passes/src/errors.rs index 02e6ecfbaee74..c700ae517140c 100644 --- a/compiler/rustc_ast_passes/src/errors.rs +++ b/compiler/rustc_ast_passes/src/errors.rs @@ -429,6 +429,14 @@ pub(crate) struct AutoTraitItems { pub ident: Span, } +#[derive(Diagnostic)] +#[diag(ast_passes_const_auto_trait)] +#[help] +pub(crate) struct ConstAutoTrait { + #[primary_span] + pub span: Span, +} + #[derive(Diagnostic)] #[diag(ast_passes_generic_before_constraints)] pub(crate) struct ArgsBeforeConstraint { diff --git a/tests/ui/traits/const-traits/const-auto-trait.rs b/tests/ui/traits/const-traits/const-auto-trait.rs new file mode 100644 index 0000000000000..06558df4623f9 --- /dev/null +++ b/tests/ui/traits/const-traits/const-auto-trait.rs @@ -0,0 +1,6 @@ +#![feature(auto_traits, const_trait_impl)] + +const auto trait Marker {} +//~^ ERROR: auto traits cannot be const + +fn main() {} diff --git a/tests/ui/traits/const-traits/const-auto-trait.stderr b/tests/ui/traits/const-traits/const-auto-trait.stderr new file mode 100644 index 0000000000000..cb8ff8001ba09 --- /dev/null +++ b/tests/ui/traits/const-traits/const-auto-trait.stderr @@ -0,0 +1,10 @@ +error: auto traits cannot be const + --> $DIR/const-auto-trait.rs:3:1 + | +LL | const auto trait Marker {} + | ^^^^^ + | + = help: remove the `const` keyword + +error: aborting due to 1 previous error + From 3326fbd1f4670e0270cd92734cf468e90e2ccc0c Mon Sep 17 00:00:00 2001 From: lapla Date: Tue, 25 Nov 2025 17:54:16 +0900 Subject: [PATCH 08/13] Check existing tests to only verify const auto traits parsing --- tests/ui/traits/const-traits/parse-const-unsafe-trait.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ui/traits/const-traits/parse-const-unsafe-trait.rs b/tests/ui/traits/const-traits/parse-const-unsafe-trait.rs index 3d62405d9ae20..58a3b5a4cc61f 100644 --- a/tests/ui/traits/const-traits/parse-const-unsafe-trait.rs +++ b/tests/ui/traits/const-traits/parse-const-unsafe-trait.rs @@ -1,5 +1,6 @@ // Test that `const unsafe trait` and `const unsafe auto trait` works. +//@ compile-flags: -Zparse-crate-root-only //@ check-pass #![feature(const_trait_impl)] From fedbccd3200099f7c4c1d28aa97ed6ce5e6aa71e Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Fri, 21 Nov 2025 10:15:19 +0000 Subject: [PATCH 09/13] Use rust rather than LLVM target features in the target spec This works better with non-LLVM codegen backends. --- compiler/rustc_codegen_gcc/src/gcc_util.rs | 15 +++-- compiler/rustc_codegen_llvm/src/llvm_util.rs | 55 ++++++++++--------- .../rustc_codegen_ssa/src/target_features.rs | 46 ++++++++++++++-- compiler/rustc_interface/src/util.rs | 7 ++- compiler/rustc_target/src/spec/mod.rs | 8 +-- .../src/spec/targets/aarch64_apple_ios.rs | 2 +- .../spec/targets/aarch64_apple_ios_macabi.rs | 2 +- .../src/spec/targets/aarch64_apple_ios_sim.rs | 2 +- .../src/spec/targets/aarch64_apple_tvos.rs | 2 +- .../spec/targets/aarch64_apple_tvos_sim.rs | 2 +- .../spec/targets/aarch64_apple_visionos.rs | 2 +- .../targets/aarch64_apple_visionos_sim.rs | 2 +- .../src/spec/targets/aarch64_apple_watchos.rs | 2 +- .../spec/targets/aarch64_apple_watchos_sim.rs | 2 +- .../spec/targets/aarch64_be_unknown_hermit.rs | 2 +- .../aarch64_be_unknown_none_softfloat.rs | 2 +- .../spec/targets/aarch64_kmc_solid_asp3.rs | 2 +- .../src/spec/targets/aarch64_linux_android.rs | 2 +- .../targets/aarch64_pc_windows_gnullvm.rs | 2 +- .../spec/targets/aarch64_pc_windows_msvc.rs | 2 +- .../spec/targets/aarch64_unknown_hermit.rs | 2 +- .../src/spec/targets/aarch64_unknown_none.rs | 2 +- .../targets/aarch64_unknown_none_softfloat.rs | 2 +- .../src/spec/targets/aarch64_unknown_nuttx.rs | 2 +- .../src/spec/targets/aarch64_unknown_teeos.rs | 2 +- .../spec/targets/aarch64_unknown_trusty.rs | 2 +- .../spec/targets/arm64_32_apple_watchos.rs | 2 +- .../src/spec/targets/arm64e_apple_ios.rs | 2 +- .../src/spec/targets/arm64e_apple_tvos.rs | 2 +- .../spec/targets/arm64ec_pc_windows_msvc.rs | 2 +- .../targets/x86_64_fortanix_unknown_sgx.rs | 2 +- .../src/spec/targets/x86_64_pc_windows_gnu.rs | 2 +- .../spec/targets/x86_64_pc_windows_gnullvm.rs | 2 +- .../spec/targets/x86_64_pc_windows_msvc.rs | 2 +- .../spec/targets/x86_64_unknown_fuchsia.rs | 2 +- .../src/spec/targets/x86_64_unknown_hermit.rs | 2 +- .../spec/targets/x86_64_uwp_windows_gnu.rs | 2 +- .../spec/targets/x86_64_uwp_windows_msvc.rs | 2 +- .../src/spec/targets/x86_64h_apple_darwin.rs | 2 +- 39 files changed, 120 insertions(+), 79 deletions(-) diff --git a/compiler/rustc_codegen_gcc/src/gcc_util.rs b/compiler/rustc_codegen_gcc/src/gcc_util.rs index e4e2dfdd5643b..330b5ff6828d5 100644 --- a/compiler/rustc_codegen_gcc/src/gcc_util.rs +++ b/compiler/rustc_codegen_gcc/src/gcc_util.rs @@ -33,11 +33,7 @@ pub(crate) fn global_gcc_features(sess: &Session) -> Vec { // should be taken in cases like these. let mut features = vec![]; - // Features implied by an implicit or explicit `--target`. - features.extend(sess.target.features.split(',').filter(|v| !v.is_empty()).map(String::from)); - - // -Ctarget-features - target_features::flag_to_backend_features(sess, |feature, enable| { + let mut extend_backend_features = |feature: &str, enable: bool| { // We run through `to_gcc_features` when // passing requests down to GCC. This means that all in-language // features also work on the command line instead of having two @@ -48,7 +44,13 @@ pub(crate) fn global_gcc_features(sess: &Session) -> Vec { .flat_map(|feat| to_gcc_features(sess, feat).into_iter()) .map(|feature| if !enable { format!("-{}", feature) } else { feature.to_string() }), ); - }); + }; + + // Features implied by an implicit or explicit `--target`. + target_features::target_spec_to_backend_features(sess, &mut extend_backend_features); + + // -Ctarget-features + target_features::flag_to_backend_features(sess, extend_backend_features); gcc_features_by_flags(sess, &mut features); @@ -66,6 +68,7 @@ pub fn to_gcc_features<'a>(sess: &Session, s: &'a str) -> SmallVec<[&'a str; 2]> (&Arch::X86 | &Arch::X86_64, "rdrand") => smallvec!["rdrnd"], (&Arch::X86 | &Arch::X86_64, "bmi1") => smallvec!["bmi"], (&Arch::X86 | &Arch::X86_64, "cmpxchg16b") => smallvec!["cx16"], + (&Arch::X86 | &Arch::X86_64, "lahfsahf") => smallvec!["sahf"], (&Arch::X86 | &Arch::X86_64, "avx512vaes") => smallvec!["vaes"], (&Arch::X86 | &Arch::X86_64, "avx512gfni") => smallvec!["gfni"], (&Arch::X86 | &Arch::X86_64, "avx512vpclmulqdq") => smallvec!["vpclmulqdq"], diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs index b498448417f58..56f5836acfc34 100644 --- a/compiler/rustc_codegen_llvm/src/llvm_util.rs +++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs @@ -626,6 +626,10 @@ pub(crate) fn target_cpu(sess: &Session) -> &str { /// The target features for compiler flags other than `-Ctarget-features`. fn llvm_features_by_flags(sess: &Session, features: &mut Vec) { + if wants_wasm_eh(sess) && sess.panic_strategy() == PanicStrategy::Unwind { + features.push("+exception-handling".into()); + } + target_features::retpoline_features_by_flags(sess, features); // -Zfixed-x18 @@ -691,36 +695,35 @@ pub(crate) fn global_llvm_features(sess: &Session, only_base_features: bool) -> Some(_) | None => {} }; - // Features implied by an implicit or explicit `--target`. - features.extend(sess.target.features.split(',').filter(|v| !v.is_empty()).map(String::from)); + let mut extend_backend_features = |feature: &str, enable: bool| { + let enable_disable = if enable { '+' } else { '-' }; + // We run through `to_llvm_features` when + // passing requests down to LLVM. This means that all in-language + // features also work on the command line instead of having two + // different names when the LLVM name and the Rust name differ. + let Some(llvm_feature) = to_llvm_features(sess, feature) else { return }; + + features.extend( + std::iter::once(format!("{}{}", enable_disable, llvm_feature.llvm_feature_name)).chain( + llvm_feature.dependencies.into_iter().filter_map(move |feat| { + match (enable, feat) { + (_, TargetFeatureFoldStrength::Both(f)) + | (true, TargetFeatureFoldStrength::EnableOnly(f)) => { + Some(format!("{enable_disable}{f}")) + } + _ => None, + } + }), + ), + ); + }; - if wants_wasm_eh(sess) && sess.panic_strategy() == PanicStrategy::Unwind { - features.push("+exception-handling".into()); - } + // Features implied by an implicit or explicit `--target`. + target_features::target_spec_to_backend_features(sess, &mut extend_backend_features); // -Ctarget-features if !only_base_features { - target_features::flag_to_backend_features(sess, |feature, enable| { - let enable_disable = if enable { '+' } else { '-' }; - // We run through `to_llvm_features` when - // passing requests down to LLVM. This means that all in-language - // features also work on the command line instead of having two - // different names when the LLVM name and the Rust name differ. - let Some(llvm_feature) = to_llvm_features(sess, feature) else { return }; - - features.extend( - std::iter::once(format!("{}{}", enable_disable, llvm_feature.llvm_feature_name)) - .chain(llvm_feature.dependencies.into_iter().filter_map(move |feat| { - match (enable, feat) { - (_, TargetFeatureFoldStrength::Both(f)) - | (true, TargetFeatureFoldStrength::EnableOnly(f)) => { - Some(format!("{enable_disable}{f}")) - } - _ => None, - } - })), - ) - }); + target_features::flag_to_backend_features(sess, extend_backend_features); } // We add this in the "base target" so that these show up in `sess.unstable_target_features`. diff --git a/compiler/rustc_codegen_ssa/src/target_features.rs b/compiler/rustc_codegen_ssa/src/target_features.rs index 3267eb1a15c73..e9209657984e0 100644 --- a/compiler/rustc_codegen_ssa/src/target_features.rs +++ b/compiler/rustc_codegen_ssa/src/target_features.rs @@ -139,11 +139,12 @@ pub(crate) fn check_target_feature_trait_unsafe(tcx: TyCtxt<'_>, id: LocalDefId, } } -/// Parse the value of `-Ctarget-feature`, also expanding implied features, -/// and call the closure for each (expanded) Rust feature. If the list contains +/// Parse the value of the target spec `features` field or `-Ctarget-feature`, also expanding +/// implied features, and call the closure for each (expanded) Rust feature. If the list contains /// a syntactically invalid item (not starting with `+`/`-`), the error callback is invoked. -fn parse_rust_feature_flag<'a>( +fn parse_rust_feature_list<'a>( sess: &'a Session, + features: &'a str, err_callback: impl Fn(&'a str), mut callback: impl FnMut( /* base_feature */ &'a str, @@ -154,7 +155,7 @@ fn parse_rust_feature_flag<'a>( // A cache for the backwards implication map. let mut inverse_implied_features: Option>> = None; - for feature in sess.opts.cg.target_feature.split(',') { + for feature in features.split(',') { if let Some(base_feature) = feature.strip_prefix('+') { // Skip features that are not target features, but rustc features. if RUSTC_SPECIFIC_FEATURES.contains(&base_feature) { @@ -244,8 +245,9 @@ pub fn cfg_target_feature<'a, const N: usize>( let mut enabled_disabled_features = FxHashMap::default(); // Add enabled and remove disabled features. - parse_rust_feature_flag( + parse_rust_feature_list( sess, + &sess.opts.cg.target_feature, /* err_callback */ |feature| { sess.dcx().emit_warn(errors::UnknownCTargetFeaturePrefix { feature }); @@ -366,6 +368,37 @@ pub fn check_tied_features( None } +/// Translates the target spec `features` field into a backend target feature list. +/// +/// `extend_backend_features` extends the set of backend features (assumed to be in mutable state +/// accessible by that closure) to enable/disable the given Rust feature name. +pub fn target_spec_to_backend_features<'a>( + sess: &'a Session, + mut extend_backend_features: impl FnMut(&'a str, /* enable */ bool), +) { + // Compute implied features + let mut rust_features = vec![]; + parse_rust_feature_list( + sess, + &sess.target.features, + /* err_callback */ + |feature| { + panic!("Target spec contains invalid feature {feature}"); + }, + |_base_feature, new_features, enable| { + // FIXME emit an error for unknown features like cfg_target_feature would for -Ctarget-feature + rust_features.extend( + UnordSet::from(new_features).to_sorted_stable_ord().iter().map(|&&s| (enable, s)), + ); + }, + ); + + // Add this to the backend features. + for (enable, feature) in rust_features { + extend_backend_features(feature, enable); + } +} + /// Translates the `-Ctarget-feature` flag into a backend target feature list. /// /// `extend_backend_features` extends the set of backend features (assumed to be in mutable state @@ -376,8 +409,9 @@ pub fn flag_to_backend_features<'a>( ) { // Compute implied features let mut rust_features = vec![]; - parse_rust_feature_flag( + parse_rust_feature_list( sess, + &sess.opts.cg.target_feature, /* err_callback */ |_feature| { // Errors are already emitted in `cfg_target_feature`; avoid duplicates. diff --git a/compiler/rustc_interface/src/util.rs b/compiler/rustc_interface/src/util.rs index 2e100a6215c09..de4175c3511e7 100644 --- a/compiler/rustc_interface/src/util.rs +++ b/compiler/rustc_interface/src/util.rs @@ -364,15 +364,16 @@ impl CodegenBackend for DummyCodegenBackend { } fn target_config(&self, sess: &Session) -> TargetConfig { + let abi_required_features = sess.target.abi_required_features(); let (target_features, unstable_target_features) = cfg_target_feature::<0>( sess, |_feature| Default::default(), |feature| { // This is a standin for the list of features a backend is expected to enable. // It would be better to parse target.features instead and handle implied features, - // but target.features is a list of LLVM target features, not Rust target features. - // The dummy backend doesn't know the mapping between LLVM and Rust target features. - sess.target.abi_required_features().required.contains(&feature) + // but target.features doesn't contain features that are enabled by default for an + // architecture or target cpu. + abi_required_features.required.contains(&feature) }, ); diff --git a/compiler/rustc_target/src/spec/mod.rs b/compiler/rustc_target/src/spec/mod.rs index 1a71e344276a8..2779fb376616a 100644 --- a/compiler/rustc_target/src/spec/mod.rs +++ b/compiler/rustc_target/src/spec/mod.rs @@ -2246,10 +2246,10 @@ pub struct TargetOptions { /// Whether a cpu needs to be explicitly set. /// Set to true if there is no default cpu. Defaults to false. pub need_explicit_cpu: bool, - /// Default target features to pass to LLVM. These features overwrite - /// `-Ctarget-cpu` but can be overwritten with `-Ctarget-features`. - /// Corresponds to `llc -mattr=$features`. - /// Note that these are LLVM feature names, not Rust feature names! + /// Default (Rust) target features to enable for this target. These features + /// overwrite `-Ctarget-cpu` but can be overwritten with `-Ctarget-features`. + /// Corresponds to `llc -mattr=$llvm_features` where `$llvm_features` is the + /// result of mapping the Rust features in this field to LLVM features. /// /// Generally it is a bad idea to use negative target features because they often interact very /// poorly with how `-Ctarget-cpu` works. Instead, try to use a lower "base CPU" and enable the diff --git a/compiler/rustc_target/src/spec/targets/aarch64_apple_ios.rs b/compiler/rustc_target/src/spec/targets/aarch64_apple_ios.rs index e6c64add9ecb2..4b8356cc71ca6 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_apple_ios.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_apple_ios.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { .into(), arch, options: TargetOptions { - features: "+neon,+fp-armv8,+apple-a7".into(), + features: "+neon,+apple-a7".into(), max_atomic_width: Some(128), supported_sanitizers: SanitizerSet::ADDRESS | SanitizerSet::THREAD diff --git a/compiler/rustc_target/src/spec/targets/aarch64_apple_ios_macabi.rs b/compiler/rustc_target/src/spec/targets/aarch64_apple_ios_macabi.rs index 28d22e022c4e8..10d2dafb8db2b 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_apple_ios_macabi.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_apple_ios_macabi.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { .into(), arch, options: TargetOptions { - features: "+neon,+fp-armv8,+apple-a12".into(), + features: "+neon,+apple-a12".into(), max_atomic_width: Some(128), supported_sanitizers: SanitizerSet::ADDRESS | SanitizerSet::LEAK | SanitizerSet::THREAD, ..opts diff --git a/compiler/rustc_target/src/spec/targets/aarch64_apple_ios_sim.rs b/compiler/rustc_target/src/spec/targets/aarch64_apple_ios_sim.rs index 3b6a075588a85..351f8ef9eb9a5 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_apple_ios_sim.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_apple_ios_sim.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { .into(), arch, options: TargetOptions { - features: "+neon,+fp-armv8,+apple-a7".into(), + features: "+neon,+apple-a7".into(), max_atomic_width: Some(128), supported_sanitizers: SanitizerSet::ADDRESS | SanitizerSet::THREAD diff --git a/compiler/rustc_target/src/spec/targets/aarch64_apple_tvos.rs b/compiler/rustc_target/src/spec/targets/aarch64_apple_tvos.rs index b4e60f769b3bc..0916864ac4cc9 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_apple_tvos.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_apple_tvos.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { .into(), arch, options: TargetOptions { - features: "+neon,+fp-armv8,+apple-a7".into(), + features: "+neon,+apple-a7".into(), max_atomic_width: Some(128), ..opts }, diff --git a/compiler/rustc_target/src/spec/targets/aarch64_apple_tvos_sim.rs b/compiler/rustc_target/src/spec/targets/aarch64_apple_tvos_sim.rs index 0f2f73c50d0b2..2e18abdef3991 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_apple_tvos_sim.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_apple_tvos_sim.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { .into(), arch, options: TargetOptions { - features: "+neon,+fp-armv8,+apple-a7".into(), + features: "+neon,+apple-a7".into(), max_atomic_width: Some(128), ..opts }, diff --git a/compiler/rustc_target/src/spec/targets/aarch64_apple_visionos.rs b/compiler/rustc_target/src/spec/targets/aarch64_apple_visionos.rs index b7ddd58fbb323..b771b3ff10b99 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_apple_visionos.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_apple_visionos.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { .into(), arch, options: TargetOptions { - features: "+neon,+fp-armv8,+apple-a16".into(), + features: "+neon,+apple-a16".into(), max_atomic_width: Some(128), supported_sanitizers: SanitizerSet::ADDRESS | SanitizerSet::THREAD, ..opts diff --git a/compiler/rustc_target/src/spec/targets/aarch64_apple_visionos_sim.rs b/compiler/rustc_target/src/spec/targets/aarch64_apple_visionos_sim.rs index 9ad313f149222..d4b4d94115b40 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_apple_visionos_sim.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_apple_visionos_sim.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { .into(), arch, options: TargetOptions { - features: "+neon,+fp-armv8,+apple-a16".into(), + features: "+neon,+apple-a16".into(), max_atomic_width: Some(128), supported_sanitizers: SanitizerSet::ADDRESS | SanitizerSet::THREAD, ..opts diff --git a/compiler/rustc_target/src/spec/targets/aarch64_apple_watchos.rs b/compiler/rustc_target/src/spec/targets/aarch64_apple_watchos.rs index f0a5a616a6d2d..9f0a24ddd334d 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_apple_watchos.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_apple_watchos.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { .into(), arch, options: TargetOptions { - features: "+v8a,+neon,+fp-armv8,+apple-a7".into(), + features: "+v8a,+neon,+apple-a7".into(), max_atomic_width: Some(128), dynamic_linking: false, position_independent_executables: true, diff --git a/compiler/rustc_target/src/spec/targets/aarch64_apple_watchos_sim.rs b/compiler/rustc_target/src/spec/targets/aarch64_apple_watchos_sim.rs index db73687a715fc..fabf01d641c37 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_apple_watchos_sim.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_apple_watchos_sim.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { .into(), arch, options: TargetOptions { - features: "+neon,+fp-armv8,+apple-a7".into(), + features: "+neon,+apple-a7".into(), max_atomic_width: Some(128), ..opts }, diff --git a/compiler/rustc_target/src/spec/targets/aarch64_be_unknown_hermit.rs b/compiler/rustc_target/src/spec/targets/aarch64_be_unknown_hermit.rs index 78ae0f55bb42e..7b63261330732 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_be_unknown_hermit.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_be_unknown_hermit.rs @@ -15,7 +15,7 @@ pub(crate) fn target() -> Target { arch: Arch::AArch64, data_layout: "E-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32".into(), options: TargetOptions { - features: "+v8a,+strict-align,+neon,+fp-armv8".into(), + features: "+v8a,+strict-align,+neon".into(), max_atomic_width: Some(128), stack_probes: StackProbeType::Inline, endian: Endian::Big, diff --git a/compiler/rustc_target/src/spec/targets/aarch64_be_unknown_none_softfloat.rs b/compiler/rustc_target/src/spec/targets/aarch64_be_unknown_none_softfloat.rs index 3b899b13d6d00..07512c01dc4a8 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_be_unknown_none_softfloat.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_be_unknown_none_softfloat.rs @@ -17,7 +17,7 @@ pub(crate) fn target() -> Target { abi: Abi::SoftFloat, linker_flavor: LinkerFlavor::Gnu(Cc::No, Lld::Yes), linker: Some("rust-lld".into()), - features: "+v8a,+strict-align,-neon,-fp-armv8".into(), + features: "+v8a,+strict-align,-neon".into(), relocation_model: RelocModel::Static, disable_redzone: true, max_atomic_width: Some(128), diff --git a/compiler/rustc_target/src/spec/targets/aarch64_kmc_solid_asp3.rs b/compiler/rustc_target/src/spec/targets/aarch64_kmc_solid_asp3.rs index 77ed9a6b82c1a..b7709bd01340a 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_kmc_solid_asp3.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_kmc_solid_asp3.rs @@ -15,7 +15,7 @@ pub(crate) fn target() -> Target { arch: Arch::AArch64, options: TargetOptions { linker: Some("aarch64-kmc-elf-gcc".into()), - features: "+v8a,+neon,+fp-armv8".into(), + features: "+v8a,+neon".into(), relocation_model: RelocModel::Static, disable_redzone: true, max_atomic_width: Some(128), diff --git a/compiler/rustc_target/src/spec/targets/aarch64_linux_android.rs b/compiler/rustc_target/src/spec/targets/aarch64_linux_android.rs index 2bee1cf70b45d..3b158c13521ea 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_linux_android.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_linux_android.rs @@ -21,7 +21,7 @@ pub(crate) fn target() -> Target { max_atomic_width: Some(128), // As documented in https://developer.android.com/ndk/guides/cpu-features.html // the neon (ASIMD) and FP must exist on all android aarch64 targets. - features: "+v8a,+neon,+fp-armv8".into(), + features: "+v8a,+neon".into(), // the AAPCS64 expects use of non-leaf frame pointers per // https://github.com/ARM-software/abi-aa/blob/4492d1570eb70c8fd146623e0db65b2d241f12e7/aapcs64/aapcs64.rst#the-frame-pointer // and we tend to encounter interesting bugs in AArch64 unwinding code if we do not diff --git a/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_gnullvm.rs b/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_gnullvm.rs index 7b3a234cd9e8e..fc1aa7241c6c9 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_gnullvm.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_gnullvm.rs @@ -3,7 +3,7 @@ use crate::spec::{Arch, FramePointer, Target, TargetMetadata, base}; pub(crate) fn target() -> Target { let mut base = base::windows_gnullvm::opts(); base.max_atomic_width = Some(128); - base.features = "+v8a,+neon,+fp-armv8".into(); + base.features = "+v8a,+neon".into(); base.linker = Some("aarch64-w64-mingw32-clang".into()); // Microsoft recommends enabling frame pointers on Arm64 Windows. diff --git a/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_msvc.rs b/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_msvc.rs index 3453f1d6101b9..0d06bec21f4a4 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_msvc.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_msvc.rs @@ -3,7 +3,7 @@ use crate::spec::{Arch, FramePointer, Target, TargetMetadata, base}; pub(crate) fn target() -> Target { let mut base = base::windows_msvc::opts(); base.max_atomic_width = Some(128); - base.features = "+v8a,+neon,+fp-armv8".into(); + base.features = "+v8a,+neon".into(); // Microsoft recommends enabling frame pointers on Arm64 Windows. // From https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170#integer-registers diff --git a/compiler/rustc_target/src/spec/targets/aarch64_unknown_hermit.rs b/compiler/rustc_target/src/spec/targets/aarch64_unknown_hermit.rs index 2cf5c2519d562..4ecbb73f7a4ec 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_unknown_hermit.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_unknown_hermit.rs @@ -13,7 +13,7 @@ pub(crate) fn target() -> Target { arch: Arch::AArch64, data_layout: "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32".into(), options: TargetOptions { - features: "+v8a,+strict-align,+neon,+fp-armv8".into(), + features: "+v8a,+strict-align,+neon".into(), max_atomic_width: Some(128), stack_probes: StackProbeType::Inline, ..base::hermit::opts() diff --git a/compiler/rustc_target/src/spec/targets/aarch64_unknown_none.rs b/compiler/rustc_target/src/spec/targets/aarch64_unknown_none.rs index ffb136366acc6..b6bc261cd0dfb 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_unknown_none.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_unknown_none.rs @@ -20,7 +20,7 @@ pub(crate) fn target() -> Target { LinkerFlavor::Gnu(Cc::No, Lld::No), &["--fix-cortex-a53-843419"], ), - features: "+v8a,+strict-align,+neon,+fp-armv8".into(), + features: "+v8a,+strict-align,+neon".into(), supported_sanitizers: SanitizerSet::KCFI | SanitizerSet::KERNELADDRESS, relocation_model: RelocModel::Static, disable_redzone: true, diff --git a/compiler/rustc_target/src/spec/targets/aarch64_unknown_none_softfloat.rs b/compiler/rustc_target/src/spec/targets/aarch64_unknown_none_softfloat.rs index c0fd021497b65..8e1484774cdbd 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_unknown_none_softfloat.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_unknown_none_softfloat.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { abi: Abi::SoftFloat, linker_flavor: LinkerFlavor::Gnu(Cc::No, Lld::Yes), linker: Some("rust-lld".into()), - features: "+v8a,+strict-align,-neon,-fp-armv8".into(), + features: "+v8a,+strict-align,-neon".into(), relocation_model: RelocModel::Static, disable_redzone: true, max_atomic_width: Some(128), diff --git a/compiler/rustc_target/src/spec/targets/aarch64_unknown_nuttx.rs b/compiler/rustc_target/src/spec/targets/aarch64_unknown_nuttx.rs index b8618050074d3..19b7ebe036747 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_unknown_nuttx.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_unknown_nuttx.rs @@ -20,7 +20,7 @@ pub(crate) fn target() -> Target { LinkerFlavor::Gnu(Cc::No, Lld::No), &["--fix-cortex-a53-843419"], ), - features: "+v8a,+strict-align,+neon,+fp-armv8".into(), + features: "+v8a,+strict-align,+neon".into(), supported_sanitizers: SanitizerSet::KCFI | SanitizerSet::KERNELADDRESS, relocation_model: RelocModel::Static, disable_redzone: true, diff --git a/compiler/rustc_target/src/spec/targets/aarch64_unknown_teeos.rs b/compiler/rustc_target/src/spec/targets/aarch64_unknown_teeos.rs index 009f027ca24f5..b8f1bf1ca1a98 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_unknown_teeos.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_unknown_teeos.rs @@ -2,7 +2,7 @@ use crate::spec::{Arch, StackProbeType, Target, TargetMetadata, base}; pub(crate) fn target() -> Target { let mut base = base::teeos::opts(); - base.features = "+strict-align,+neon,+fp-armv8".into(); + base.features = "+strict-align,+neon".into(); base.max_atomic_width = Some(128); base.stack_probes = StackProbeType::Inline; diff --git a/compiler/rustc_target/src/spec/targets/aarch64_unknown_trusty.rs b/compiler/rustc_target/src/spec/targets/aarch64_unknown_trusty.rs index 7dfa29787c86f..bacc99c64f161 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_unknown_trusty.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_unknown_trusty.rs @@ -18,7 +18,7 @@ pub(crate) fn target() -> Target { data_layout: "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32".into(), arch: Arch::AArch64, options: TargetOptions { - features: "+neon,+fp-armv8,+reserve-x18".into(), + features: "+neon,+reserve-x18".into(), executables: true, max_atomic_width: Some(128), panic_strategy: PanicStrategy::Abort, diff --git a/compiler/rustc_target/src/spec/targets/arm64_32_apple_watchos.rs b/compiler/rustc_target/src/spec/targets/arm64_32_apple_watchos.rs index dab8cd157d2bd..a9fe6bf0efd25 100644 --- a/compiler/rustc_target/src/spec/targets/arm64_32_apple_watchos.rs +++ b/compiler/rustc_target/src/spec/targets/arm64_32_apple_watchos.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32".into(), arch, options: TargetOptions { - features: "+v8a,+neon,+fp-armv8,+apple-a7".into(), + features: "+v8a,+neon,+apple-a7".into(), max_atomic_width: Some(128), dynamic_linking: false, position_independent_executables: true, diff --git a/compiler/rustc_target/src/spec/targets/arm64e_apple_ios.rs b/compiler/rustc_target/src/spec/targets/arm64e_apple_ios.rs index 4ee4a16ab7476..396f0c347a086 100644 --- a/compiler/rustc_target/src/spec/targets/arm64e_apple_ios.rs +++ b/compiler/rustc_target/src/spec/targets/arm64e_apple_ios.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { .into(), arch, options: TargetOptions { - features: "+neon,+fp-armv8,+apple-a12,+v8.3a,+pauth".into(), + features: "+neon,+apple-a12,+v8.3a,+paca,+pacg".into(), max_atomic_width: Some(128), supported_sanitizers: SanitizerSet::ADDRESS | SanitizerSet::THREAD, ..opts diff --git a/compiler/rustc_target/src/spec/targets/arm64e_apple_tvos.rs b/compiler/rustc_target/src/spec/targets/arm64e_apple_tvos.rs index c09591be95f96..3cd120567a33f 100644 --- a/compiler/rustc_target/src/spec/targets/arm64e_apple_tvos.rs +++ b/compiler/rustc_target/src/spec/targets/arm64e_apple_tvos.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { .into(), arch, options: TargetOptions { - features: "+neon,+fp-armv8,+apple-a12,+v8.3a,+pauth".into(), + features: "+neon,+apple-a12,+v8.3a,+paca,+pacg".into(), max_atomic_width: Some(128), ..opts }, diff --git a/compiler/rustc_target/src/spec/targets/arm64ec_pc_windows_msvc.rs b/compiler/rustc_target/src/spec/targets/arm64ec_pc_windows_msvc.rs index aa31f53bf0fcb..b333f2891626c 100644 --- a/compiler/rustc_target/src/spec/targets/arm64ec_pc_windows_msvc.rs +++ b/compiler/rustc_target/src/spec/targets/arm64ec_pc_windows_msvc.rs @@ -5,7 +5,7 @@ use crate::spec::{ pub(crate) fn target() -> Target { let mut base = base::windows_msvc::opts(); base.max_atomic_width = Some(128); - base.features = "+v8a,+neon,+fp-armv8".into(); + base.features = "+v8a,+neon".into(); add_link_args( &mut base.late_link_args, LinkerFlavor::Msvc(Lld::No), diff --git a/compiler/rustc_target/src/spec/targets/x86_64_fortanix_unknown_sgx.rs b/compiler/rustc_target/src/spec/targets/x86_64_fortanix_unknown_sgx.rs index 290087d8cef66..cd074a468fd60 100644 --- a/compiler/rustc_target/src/spec/targets/x86_64_fortanix_unknown_sgx.rs +++ b/compiler/rustc_target/src/spec/targets/x86_64_fortanix_unknown_sgx.rs @@ -66,7 +66,7 @@ pub(crate) fn target() -> Target { max_atomic_width: Some(64), cpu: "x86-64".into(), plt_by_default: false, - features: "+rdrnd,+rdseed,+lvi-cfi,+lvi-load-hardening".into(), + features: "+rdrand,+rdseed,+lvi-cfi,+lvi-load-hardening".into(), llvm_args: cvs!["--x86-experimental-lvi-inline-asm-hardening"], position_independent_executables: true, pre_link_args, diff --git a/compiler/rustc_target/src/spec/targets/x86_64_pc_windows_gnu.rs b/compiler/rustc_target/src/spec/targets/x86_64_pc_windows_gnu.rs index ba0882b8693b1..995bfd07071e4 100644 --- a/compiler/rustc_target/src/spec/targets/x86_64_pc_windows_gnu.rs +++ b/compiler/rustc_target/src/spec/targets/x86_64_pc_windows_gnu.rs @@ -3,7 +3,7 @@ use crate::spec::{Arch, Cc, LinkerFlavor, Lld, Target, TargetMetadata, base}; pub(crate) fn target() -> Target { let mut base = base::windows_gnu::opts(); base.cpu = "x86-64".into(); - base.features = "+cx16,+sse3,+sahf".into(); + base.features = "+cmpxchg16b,+sse3,+lahfsahf".into(); base.plt_by_default = false; // Use high-entropy 64 bit address space for ASLR base.add_pre_link_args( diff --git a/compiler/rustc_target/src/spec/targets/x86_64_pc_windows_gnullvm.rs b/compiler/rustc_target/src/spec/targets/x86_64_pc_windows_gnullvm.rs index 0606d4508badc..de3c960bced9c 100644 --- a/compiler/rustc_target/src/spec/targets/x86_64_pc_windows_gnullvm.rs +++ b/compiler/rustc_target/src/spec/targets/x86_64_pc_windows_gnullvm.rs @@ -3,7 +3,7 @@ use crate::spec::{Arch, Cc, LinkerFlavor, Lld, Target, TargetMetadata, base}; pub(crate) fn target() -> Target { let mut base = base::windows_gnullvm::opts(); base.cpu = "x86-64".into(); - base.features = "+cx16,+sse3,+sahf".into(); + base.features = "+cmpxchg16b,+sse3,+lahfsahf".into(); base.plt_by_default = false; base.add_pre_link_args(LinkerFlavor::Gnu(Cc::Yes, Lld::No), &["-m64"]); base.max_atomic_width = Some(128); diff --git a/compiler/rustc_target/src/spec/targets/x86_64_pc_windows_msvc.rs b/compiler/rustc_target/src/spec/targets/x86_64_pc_windows_msvc.rs index 1eecec8e6bde7..20b6e0701bffa 100644 --- a/compiler/rustc_target/src/spec/targets/x86_64_pc_windows_msvc.rs +++ b/compiler/rustc_target/src/spec/targets/x86_64_pc_windows_msvc.rs @@ -3,7 +3,7 @@ use crate::spec::{Arch, SanitizerSet, Target, TargetMetadata, base}; pub(crate) fn target() -> Target { let mut base = base::windows_msvc::opts(); base.cpu = "x86-64".into(); - base.features = "+cx16,+sse3,+sahf".into(); + base.features = "+cmpxchg16b,+sse3,+lahfsahf".into(); base.plt_by_default = false; base.max_atomic_width = Some(128); base.supported_sanitizers = SanitizerSet::ADDRESS; diff --git a/compiler/rustc_target/src/spec/targets/x86_64_unknown_fuchsia.rs b/compiler/rustc_target/src/spec/targets/x86_64_unknown_fuchsia.rs index 0d92097ad456e..dbff5e30828a9 100644 --- a/compiler/rustc_target/src/spec/targets/x86_64_unknown_fuchsia.rs +++ b/compiler/rustc_target/src/spec/targets/x86_64_unknown_fuchsia.rs @@ -6,7 +6,7 @@ pub(crate) fn target() -> Target { base.plt_by_default = false; // See https://fuchsia.dev/fuchsia-src/contribute/governance/rfcs/0073_x86_64_platform_requirement, // which corresponds to x86-64-v2. - base.features = "+cx16,+sahf,+popcnt,+sse3,+sse4.1,+sse4.2,+ssse3".into(); + base.features = "+cmpxchg16b,+lahfsahf,+popcnt,+sse3,+sse4.1,+sse4.2,+ssse3".into(); base.max_atomic_width = Some(128); base.stack_probes = StackProbeType::Inline; base.supported_sanitizers = SanitizerSet::ADDRESS | SanitizerSet::CFI | SanitizerSet::LEAK; diff --git a/compiler/rustc_target/src/spec/targets/x86_64_unknown_hermit.rs b/compiler/rustc_target/src/spec/targets/x86_64_unknown_hermit.rs index ee191ac95ae71..d6630261e2343 100644 --- a/compiler/rustc_target/src/spec/targets/x86_64_unknown_hermit.rs +++ b/compiler/rustc_target/src/spec/targets/x86_64_unknown_hermit.rs @@ -15,7 +15,7 @@ pub(crate) fn target() -> Target { "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128".into(), options: TargetOptions { cpu: "x86-64".into(), - features: "+rdrnd,+rdseed".into(), + features: "+rdrand,+rdseed".into(), plt_by_default: false, max_atomic_width: Some(64), stack_probes: StackProbeType::Inline, diff --git a/compiler/rustc_target/src/spec/targets/x86_64_uwp_windows_gnu.rs b/compiler/rustc_target/src/spec/targets/x86_64_uwp_windows_gnu.rs index 96d1bd2764c8e..9d53ce0e1c803 100644 --- a/compiler/rustc_target/src/spec/targets/x86_64_uwp_windows_gnu.rs +++ b/compiler/rustc_target/src/spec/targets/x86_64_uwp_windows_gnu.rs @@ -3,7 +3,7 @@ use crate::spec::{Arch, Cc, LinkerFlavor, Lld, Target, TargetMetadata, base}; pub(crate) fn target() -> Target { let mut base = base::windows_uwp_gnu::opts(); base.cpu = "x86-64".into(); - base.features = "+cx16,+sse3,+sahf".into(); + base.features = "+cmpxchg16b,+sse3,+lahfsahf".into(); base.plt_by_default = false; // Use high-entropy 64 bit address space for ASLR base.add_pre_link_args( diff --git a/compiler/rustc_target/src/spec/targets/x86_64_uwp_windows_msvc.rs b/compiler/rustc_target/src/spec/targets/x86_64_uwp_windows_msvc.rs index 3be6abd3d9757..003bcf1e0ddff 100644 --- a/compiler/rustc_target/src/spec/targets/x86_64_uwp_windows_msvc.rs +++ b/compiler/rustc_target/src/spec/targets/x86_64_uwp_windows_msvc.rs @@ -3,7 +3,7 @@ use crate::spec::{Arch, Target, TargetMetadata, base}; pub(crate) fn target() -> Target { let mut base = base::windows_uwp_msvc::opts(); base.cpu = "x86-64".into(); - base.features = "+cx16,+sse3,+sahf".into(); + base.features = "+cmpxchg16b,+sse3,+lahfsahf".into(); base.plt_by_default = false; base.max_atomic_width = Some(128); diff --git a/compiler/rustc_target/src/spec/targets/x86_64h_apple_darwin.rs b/compiler/rustc_target/src/spec/targets/x86_64h_apple_darwin.rs index adc87378fc7ef..7f1fe485aa150 100644 --- a/compiler/rustc_target/src/spec/targets/x86_64h_apple_darwin.rs +++ b/compiler/rustc_target/src/spec/targets/x86_64h_apple_darwin.rs @@ -17,7 +17,7 @@ pub(crate) fn target() -> Target { // It would be nice if this were not the case, but fixing it seems tricky // (and given that the main use-case for this target is for use in universal // binaries, probably not that important). - opts.features = "-rdrnd,-aes,-pclmul,-rtm,-fsgsbase".into(); + opts.features = "-rdrand,-aes,-pclmulqdq,-rtm,-fsgsbase".into(); // Double-check that the `cpu` is what we expect (if it's not the list above // may need updating). assert_eq!( From ae699c8e78469207ccd39f1165ab7f6e1e3f58fe Mon Sep 17 00:00:00 2001 From: lapla Date: Wed, 26 Nov 2025 01:02:01 +0900 Subject: [PATCH 10/13] Avoid ICE when handling const auto traits in the next-gen solver --- .../src/solve/effect_goals.rs | 5 +++-- .../ui/traits/const-traits/const-auto-trait.rs | 9 +++++++++ .../traits/const-traits/const-auto-trait.stderr | 17 +++++++++++++++-- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_next_trait_solver/src/solve/effect_goals.rs b/compiler/rustc_next_trait_solver/src/solve/effect_goals.rs index 2cb79a0219f6e..2837b8565f603 100644 --- a/compiler/rustc_next_trait_solver/src/solve/effect_goals.rs +++ b/compiler/rustc_next_trait_solver/src/solve/effect_goals.rs @@ -189,10 +189,11 @@ where } fn consider_auto_trait_candidate( - _ecx: &mut EvalCtxt<'_, D>, + ecx: &mut EvalCtxt<'_, D>, _goal: Goal, ) -> Result, NoSolution> { - unreachable!("auto traits are never const") + ecx.cx().delay_bug("auto traits are never const"); + Err(NoSolution) } fn consider_trait_alias_candidate( diff --git a/tests/ui/traits/const-traits/const-auto-trait.rs b/tests/ui/traits/const-traits/const-auto-trait.rs index 06558df4623f9..d1745a2ec4c72 100644 --- a/tests/ui/traits/const-traits/const-auto-trait.rs +++ b/tests/ui/traits/const-traits/const-auto-trait.rs @@ -1,6 +1,15 @@ +//@ compile-flags: -Znext-solver +// See rust-lang/rust#149285 for this test + #![feature(auto_traits, const_trait_impl)] const auto trait Marker {} //~^ ERROR: auto traits cannot be const +fn scope() { + fn check() {} + check::<()>(); + //~^ ERROR: the trait bound `(): const Marker` is not satisfied +} + fn main() {} diff --git a/tests/ui/traits/const-traits/const-auto-trait.stderr b/tests/ui/traits/const-traits/const-auto-trait.stderr index cb8ff8001ba09..094c334fc2897 100644 --- a/tests/ui/traits/const-traits/const-auto-trait.stderr +++ b/tests/ui/traits/const-traits/const-auto-trait.stderr @@ -1,10 +1,23 @@ error: auto traits cannot be const - --> $DIR/const-auto-trait.rs:3:1 + --> $DIR/const-auto-trait.rs:6:1 | LL | const auto trait Marker {} | ^^^^^ | = help: remove the `const` keyword -error: aborting due to 1 previous error +error[E0277]: the trait bound `(): const Marker` is not satisfied + --> $DIR/const-auto-trait.rs:11:13 + | +LL | check::<()>(); + | ^^ + | +note: required by a bound in `check` + --> $DIR/const-auto-trait.rs:10:17 + | +LL | fn check() {} + | ^^^^^^^^^^^^ required by this bound in `check` + +error: aborting due to 2 previous errors +For more information about this error, try `rustc --explain E0277`. From 5128ce10a077f96a79e278035522f1f8f9611a6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcelo=20Dom=C3=ADnguez?= Date: Sun, 19 Oct 2025 12:36:23 +0200 Subject: [PATCH 11/13] Implement offload intrinsic --- compiler/rustc_codegen_llvm/messages.ftl | 3 + compiler/rustc_codegen_llvm/src/attributes.rs | 12 + compiler/rustc_codegen_llvm/src/back/lto.rs | 10 +- compiler/rustc_codegen_llvm/src/back/write.rs | 11 +- .../src/builder/gpu_offload.rs | 210 +++++++++--------- compiler/rustc_codegen_llvm/src/context.rs | 10 + compiler/rustc_codegen_llvm/src/errors.rs | 8 + compiler/rustc_codegen_llvm/src/intrinsic.rs | 80 ++++++- compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 6 +- compiler/rustc_codegen_llvm/src/llvm/mod.rs | 8 + .../rustc_codegen_ssa/src/codegen_attrs.rs | 3 + compiler/rustc_feature/src/builtin_attrs.rs | 5 + .../rustc_hir_analysis/src/check/intrinsic.rs | 2 + .../rustc_llvm/llvm-wrapper/RustWrapper.cpp | 33 +++ .../src/middle/codegen_fn_attrs.rs | 2 + compiler/rustc_middle/src/ty/mod.rs | 1 + compiler/rustc_middle/src/ty/offload_meta.rs | 119 ++++++++++ compiler/rustc_span/src/symbol.rs | 2 + library/core/src/intrinsics/mod.rs | 29 +++ tests/codegen-llvm/gpu_offload/gpu_host.rs | 114 +++++----- tests/ui/offload/check_config.fail.stderr | 6 + tests/ui/offload/check_config.rs | 23 ++ 22 files changed, 518 insertions(+), 179 deletions(-) create mode 100644 compiler/rustc_middle/src/ty/offload_meta.rs create mode 100644 tests/ui/offload/check_config.fail.stderr create mode 100644 tests/ui/offload/check_config.rs diff --git a/compiler/rustc_codegen_llvm/messages.ftl b/compiler/rustc_codegen_llvm/messages.ftl index c9d28160d66f7..0e7b00d0bcb70 100644 --- a/compiler/rustc_codegen_llvm/messages.ftl +++ b/compiler/rustc_codegen_llvm/messages.ftl @@ -18,6 +18,9 @@ codegen_llvm_lto_bitcode_from_rlib = failed to get bitcode from object file for codegen_llvm_mismatch_data_layout = data-layout for target `{$rustc_target}`, `{$rustc_layout}`, differs from LLVM target's `{$llvm_target}` default layout, `{$llvm_layout}` +codegen_llvm_offload_without_enable = using the offload feature requires -Z offload=Enable +codegen_llvm_offload_without_fat_lto = using the offload feature requires -C lto=fat + codegen_llvm_parse_bitcode = failed to parse bitcode for LTO module codegen_llvm_parse_bitcode_with_llvm_err = failed to parse bitcode for LTO module: {$llvm_err} diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs index 89878d1e7e20b..a25ce9e5a90ac 100644 --- a/compiler/rustc_codegen_llvm/src/attributes.rs +++ b/compiler/rustc_codegen_llvm/src/attributes.rs @@ -30,6 +30,14 @@ pub(crate) fn apply_to_callsite(callsite: &Value, idx: AttributePlace, attrs: &[ } } +pub(crate) fn has_string_attr(llfn: &Value, name: &str) -> bool { + llvm::HasStringAttribute(llfn, name) +} + +pub(crate) fn remove_string_attr_from_llfn(llfn: &Value, name: &str) { + llvm::RemoveStringAttrFromFn(llfn, name); +} + /// Get LLVM attribute for the provided inline heuristic. pub(crate) fn inline_attr<'ll, 'tcx>( cx: &SimpleCx<'ll>, @@ -408,6 +416,10 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>( to_add.push(llvm::CreateAttrString(cx.llcx, "no-builtins")); } + if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::OFFLOAD_KERNEL) { + to_add.push(llvm::CreateAttrString(cx.llcx, "offload-kernel")) + } + if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::COLD) { to_add.push(AttributeKind::Cold.create_attr(cx.llcx)); } diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index b820b992105fd..482e954138553 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -26,7 +26,7 @@ use crate::back::write::{ }; use crate::errors::{LlvmError, LtoBitcodeFromRlib}; use crate::llvm::{self, build_string}; -use crate::{LlvmCodegenBackend, ModuleLlvm, SimpleCx}; +use crate::{LlvmCodegenBackend, ModuleLlvm}; /// We keep track of the computed LTO cache keys from the previous /// session to determine which CGUs we can reuse. @@ -601,7 +601,6 @@ pub(crate) fn run_pass_manager( // We then run the llvm_optimize function a second time, to optimize the code which we generated // in the enzyme differentiation pass. let enable_ad = config.autodiff.contains(&config::AutoDiff::Enable); - let enable_gpu = config.offload.contains(&config::Offload::Enable); let stage = if thin { write::AutodiffStage::PreAD } else { @@ -616,13 +615,6 @@ pub(crate) fn run_pass_manager( write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage); } - // Here we only handle the GPU host (=cpu) code. - if enable_gpu && !thin && !cgcx.target_is_like_gpu { - let cx = - SimpleCx::new(module.module_llvm.llmod(), &module.module_llvm.llcx, cgcx.pointer_size); - crate::builder::gpu_offload::handle_gpu_code(cgcx, &cx); - } - if cfg!(feature = "llvm_enzyme") && enable_ad && !thin { let opt_stage = llvm::OptStage::FatLTO; let stage = write::AutodiffStage::PostAD; diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index fde7dd6ef7a85..4db4283adb404 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -43,7 +43,7 @@ use crate::errors::{ use crate::llvm::diagnostic::OptimizationDiagnosticKind::*; use crate::llvm::{self, DiagnosticInfo}; use crate::type_::llvm_type_ptr; -use crate::{LlvmCodegenBackend, ModuleLlvm, SimpleCx, base, common, llvm_util}; +use crate::{LlvmCodegenBackend, ModuleLlvm, SimpleCx, attributes, base, common, llvm_util}; pub(crate) fn llvm_err<'a>(dcx: DiagCtxtHandle<'_>, err: LlvmError<'a>) -> ! { match llvm::last_error() { @@ -706,11 +706,12 @@ pub(crate) unsafe fn llvm_optimize( SimpleCx::new(module.module_llvm.llmod(), module.module_llvm.llcx, cgcx.pointer_size); // For now we only support up to 10 kernels named kernel_0 ... kernel_9, a follow-up PR is // introducing a proper offload intrinsic to solve this limitation. - for num in 0..9 { - let name = format!("kernel_{num}"); - if let Some(kernel) = cx.get_function(&name) { - handle_offload(&cx, kernel); + for func in cx.get_functions() { + let offload_kernel = "offload-kernel"; + if attributes::has_string_attr(func, offload_kernel) { + handle_offload(&cx, func); } + attributes::remove_string_attr_from_llfn(func, offload_kernel); } } diff --git a/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs index 5c2f8f700627e..5d1ddd057d88a 100644 --- a/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs +++ b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs @@ -2,37 +2,13 @@ use std::ffi::CString; use llvm::Linkage::*; use rustc_abi::Align; -use rustc_codegen_ssa::back::write::CodegenContext; use rustc_codegen_ssa::traits::BaseTypeCodegenMethods; +use rustc_middle::ty::offload_meta::OffloadMetadata; use crate::builder::SBuilder; -use crate::common::AsCCharPtr; use crate::llvm::AttributePlace::Function; -use crate::llvm::{self, Linkage, Type, Value}; -use crate::{LlvmCodegenBackend, SimpleCx, attributes}; - -pub(crate) fn handle_gpu_code<'ll>( - _cgcx: &CodegenContext, - cx: &'ll SimpleCx<'_>, -) { - // The offload memory transfer type for each kernel - let mut memtransfer_types = vec![]; - let mut region_ids = vec![]; - let offload_entry_ty = TgtOffloadEntry::new_decl(&cx); - // This is a temporary hack, we only search for kernel_0 to kernel_9 functions. - // There is a draft PR in progress which will introduce a proper offload intrinsic to remove - // this limitation. - for num in 0..9 { - let kernel = cx.get_function(&format!("kernel_{num}")); - if let Some(kernel) = kernel { - let (o, k) = gen_define_handling(&cx, kernel, offload_entry_ty, num); - memtransfer_types.push(o); - region_ids.push(k); - } - } - - gen_call_handling(&cx, &memtransfer_types, ®ion_ids); -} +use crate::llvm::{self, BasicBlock, Linkage, Type, Value}; +use crate::{SimpleCx, attributes}; // ; Function Attrs: nounwind // declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr) #2 @@ -79,7 +55,7 @@ fn generate_at_one<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Value { at_one } -struct TgtOffloadEntry { +pub(crate) struct TgtOffloadEntry { // uint64_t Reserved; // uint16_t Version; // uint16_t Kind; @@ -167,7 +143,7 @@ impl KernelArgsTy { fn new<'ll>( cx: &'ll SimpleCx<'_>, num_args: u64, - memtransfer_types: &[&'ll Value], + memtransfer_types: &'ll Value, geps: [&'ll Value; 3], ) -> [(Align, &'ll Value); 13] { let four = Align::from_bytes(4).expect("4 Byte alignment should work"); @@ -181,7 +157,7 @@ impl KernelArgsTy { (eight, geps[0]), (eight, geps[1]), (eight, geps[2]), - (eight, memtransfer_types[0]), + (eight, memtransfer_types), // The next two are debug infos. FIXME(offload): set them (eight, cx.const_null(cx.type_ptr())), // dbg (eight, cx.const_null(cx.type_ptr())), // dbg @@ -194,6 +170,14 @@ impl KernelArgsTy { } } +// Contains LLVM values needed to manage offloading for a single kernel. +pub(crate) struct OffloadKernelData<'ll> { + pub offload_sizes: &'ll llvm::Value, + pub memtransfer_types: &'ll llvm::Value, + pub region_id: &'ll llvm::Value, + pub offload_entry: &'ll llvm::Value, +} + fn gen_tgt_data_mappers<'ll>( cx: &'ll SimpleCx<'_>, ) -> (&'ll llvm::Value, &'ll llvm::Value, &'ll llvm::Value, &'ll llvm::Type) { @@ -256,68 +240,68 @@ pub(crate) fn add_global<'ll>( // This function returns a memtransfer value which encodes how arguments to this kernel shall be // mapped to/from the gpu. It also returns a region_id with the name of this kernel, to be // concatenated into the list of region_ids. -fn gen_define_handling<'ll>( - cx: &'ll SimpleCx<'_>, - kernel: &'ll llvm::Value, +pub(crate) fn gen_define_handling<'ll>( + cx: &SimpleCx<'ll>, offload_entry_ty: &'ll llvm::Type, - num: i64, -) -> (&'ll llvm::Value, &'ll llvm::Value) { - let types = cx.func_params_types(cx.get_type_of_global(kernel)); + metadata: &[OffloadMetadata], + types: &[&Type], + symbol: &str, +) -> OffloadKernelData<'ll> { // It seems like non-pointer values are automatically mapped. So here, we focus on pointer (or // reference) types. - let num_ptr_types = types - .iter() - .filter(|&x| matches!(cx.type_kind(x), rustc_codegen_ssa::common::TypeKind::Pointer)) - .count(); - - // We do not know their size anymore at this level, so hardcode a placeholder. - // A follow-up pr will track these from the frontend, where we still have Rust types. - // Then, we will be able to figure out that e.g. `&[f32;256]` will result in 4*256 bytes. - // I decided that 1024 bytes is a great placeholder value for now. - add_priv_unnamed_arr(&cx, &format!(".offload_sizes.{num}"), &vec![1024; num_ptr_types]); + let ptr_meta = types.iter().zip(metadata).filter_map(|(&x, meta)| match cx.type_kind(x) { + rustc_codegen_ssa::common::TypeKind::Pointer => Some(meta), + _ => None, + }); + + // FIXME(Sa4dUs): add `OMP_MAP_TARGET_PARAM = 0x20` only if necessary + let (ptr_sizes, ptr_transfer): (Vec<_>, Vec<_>) = + ptr_meta.map(|m| (m.payload_size, m.mode.bits() | 0x20)).unzip(); + + let offload_sizes = add_priv_unnamed_arr(&cx, &format!(".offload_sizes.{symbol}"), &ptr_sizes); // Here we figure out whether something needs to be copied to the gpu (=1), from the gpu (=2), // or both to and from the gpu (=3). Other values shouldn't affect us for now. // A non-mutable reference or pointer will be 1, an array that's not read, but fully overwritten // will be 2. For now, everything is 3, until we have our frontend set up. // 1+2+32: 1 (MapTo), 2 (MapFrom), 32 (Add one extra input ptr per function, to be used later). - let memtransfer_types = add_priv_unnamed_arr( - &cx, - &format!(".offload_maptypes.{num}"), - &vec![1 + 2 + 32; num_ptr_types], - ); + let memtransfer_types = + add_priv_unnamed_arr(&cx, &format!(".offload_maptypes.{symbol}"), &ptr_transfer); + // Next: For each function, generate these three entries. A weak constant, // the llvm.rodata entry name, and the llvm_offload_entries value - let name = format!(".kernel_{num}.region_id"); + let name = format!(".{symbol}.region_id"); let initializer = cx.get_const_i8(0); let region_id = add_unnamed_global(&cx, &name, initializer, WeakAnyLinkage); - let c_entry_name = CString::new(format!("kernel_{num}")).unwrap(); + let c_entry_name = CString::new(symbol).unwrap(); let c_val = c_entry_name.as_bytes_with_nul(); - let offload_entry_name = format!(".offloading.entry_name.{num}"); + let offload_entry_name = format!(".offloading.entry_name.{symbol}"); let initializer = crate::common::bytes_in_context(cx.llcx, c_val); let llglobal = add_unnamed_global(&cx, &offload_entry_name, initializer, InternalLinkage); llvm::set_alignment(llglobal, Align::ONE); llvm::set_section(llglobal, c".llvm.rodata.offloading"); - let name = format!(".offloading.entry.kernel_{num}"); + + let name = format!(".offloading.entry.{symbol}"); // See the __tgt_offload_entry documentation above. let elems = TgtOffloadEntry::new(&cx, region_id, llglobal); let initializer = crate::common::named_struct(offload_entry_ty, &elems); let c_name = CString::new(name).unwrap(); - let llglobal = llvm::add_global(cx.llmod, offload_entry_ty, &c_name); - llvm::set_global_constant(llglobal, true); - llvm::set_linkage(llglobal, WeakAnyLinkage); - llvm::set_initializer(llglobal, initializer); - llvm::set_alignment(llglobal, Align::EIGHT); + let offload_entry = llvm::add_global(cx.llmod, offload_entry_ty, &c_name); + llvm::set_global_constant(offload_entry, true); + llvm::set_linkage(offload_entry, WeakAnyLinkage); + llvm::set_initializer(offload_entry, initializer); + llvm::set_alignment(offload_entry, Align::EIGHT); let c_section_name = CString::new("llvm_offload_entries").unwrap(); - llvm::set_section(llglobal, &c_section_name); - (memtransfer_types, region_id) + llvm::set_section(offload_entry, &c_section_name); + + OffloadKernelData { offload_sizes, memtransfer_types, region_id, offload_entry } } -pub(crate) fn declare_offload_fn<'ll>( +fn declare_offload_fn<'ll>( cx: &'ll SimpleCx<'_>, name: &str, ty: &'ll llvm::Type, @@ -333,8 +317,7 @@ pub(crate) fn declare_offload_fn<'ll>( } // For each kernel *call*, we now use some of our previous declared globals to move data to and from -// the gpu. We don't have a proper frontend yet, so we assume that every call to a kernel function -// from main is intended to run on the GPU. For now, we only handle the data transfer part of it. +// the gpu. For now, we only handle the data transfer part of it. // If two consecutive kernels use the same memory, we still move it to the host and back to the gpu. // Since in our frontend users (by default) don't have to specify data transfer, this is something // we should optimize in the future! We also assume that everything should be copied back and forth, @@ -352,11 +335,16 @@ pub(crate) fn declare_offload_fn<'ll>( // 4. set insert point after kernel call. // 5. generate all the GEPS and stores, to be used in 6) // 6. generate __tgt_target_data_end calls to move data from the GPU -fn gen_call_handling<'ll>( - cx: &'ll SimpleCx<'_>, - memtransfer_types: &[&'ll llvm::Value], - region_ids: &[&'ll llvm::Value], +pub(crate) fn gen_call_handling<'ll>( + cx: &SimpleCx<'ll>, + bb: &BasicBlock, + offload_data: &OffloadKernelData<'ll>, + args: &[&'ll Value], + types: &[&Type], + metadata: &[OffloadMetadata], ) { + let OffloadKernelData { offload_sizes, offload_entry, memtransfer_types, region_id } = + offload_data; let (tgt_decl, tgt_target_kernel_ty) = generate_launcher(&cx); // %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr } let tptr = cx.type_ptr(); @@ -368,27 +356,32 @@ fn gen_call_handling<'ll>( let tgt_kernel_decl = KernelArgsTy::new_decl(&cx); let (begin_mapper_decl, _, end_mapper_decl, fn_ty) = gen_tgt_data_mappers(&cx); - let main_fn = cx.get_function("main"); - let Some(main_fn) = main_fn else { return }; - let kernel_name = "kernel_1"; - let call = unsafe { - llvm::LLVMRustGetFunctionCall(main_fn, kernel_name.as_c_char_ptr(), kernel_name.len()) - }; - let Some(kernel_call) = call else { - return; - }; - let kernel_call_bb = unsafe { llvm::LLVMGetInstructionParent(kernel_call) }; - let called = unsafe { llvm::LLVMGetCalledValue(kernel_call).unwrap() }; - let mut builder = SBuilder::build(cx, kernel_call_bb); - - let types = cx.func_params_types(cx.get_type_of_global(called)); + let mut builder = SBuilder::build(cx, bb); + let num_args = types.len() as u64; + let ip = unsafe { llvm::LLVMRustGetInsertPoint(&builder.llbuilder) }; + + // FIXME(Sa4dUs): dummy loads are a temp workaround, we should find a proper way to prevent these + // variables from being optimized away + for val in [offload_sizes, offload_entry] { + unsafe { + let dummy = llvm::LLVMBuildLoad2( + &builder.llbuilder, + llvm::LLVMTypeOf(val), + val, + b"dummy\0".as_ptr() as *const _, + ); + llvm::LLVMSetVolatile(dummy, llvm::TRUE); + } + } // Step 0) // %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr } // %6 = alloca %struct.__tgt_bin_desc, align 8 - unsafe { llvm::LLVMRustPositionBuilderPastAllocas(builder.llbuilder, main_fn) }; - + let llfn = unsafe { llvm::LLVMGetBasicBlockParent(bb) }; + unsafe { + llvm::LLVMRustPositionBuilderPastAllocas(&builder.llbuilder, llfn); + } let tgt_bin_desc_alloca = builder.direct_alloca(tgt_bin_desc, Align::EIGHT, "EmptyDesc"); let ty = cx.type_array(cx.type_ptr(), num_args); @@ -404,15 +397,16 @@ fn gen_call_handling<'ll>( let a5 = builder.direct_alloca(tgt_kernel_decl, Align::EIGHT, "kernel_args"); // Step 1) - unsafe { llvm::LLVMRustPositionBefore(builder.llbuilder, kernel_call) }; + unsafe { + llvm::LLVMRustRestoreInsertPoint(&builder.llbuilder, ip); + } builder.memset(tgt_bin_desc_alloca, cx.get_const_i8(0), cx.get_const_i64(32), Align::EIGHT); // Now we allocate once per function param, a copy to be passed to one of our maps. let mut vals = vec![]; let mut geps = vec![]; let i32_0 = cx.get_const_i32(0); - for index in 0..types.len() { - let v = unsafe { llvm::LLVMGetOperand(kernel_call, index as u32).unwrap() }; + for &v in args { let gep = builder.inbounds_gep(cx.type_f32(), v, &[i32_0]); vals.push(v); geps.push(gep); @@ -437,10 +431,8 @@ fn gen_call_handling<'ll>( let gep2 = builder.inbounds_gep(ty, a2, &[i32_0, idx]); builder.store(geps[i as usize], gep2, Align::EIGHT); let gep3 = builder.inbounds_gep(ty2, a4, &[i32_0, idx]); - // As mentioned above, we don't use Rust type information yet. So for now we will just - // assume that we have 1024 bytes, 256 f32 values. // FIXME(offload): write an offload frontend and handle arbitrary types. - builder.store(cx.get_const_i64(1024), gep3, Align::EIGHT); + builder.store(cx.get_const_i64(metadata[i as usize].payload_size), gep3, Align::EIGHT); } // For now we have a very simplistic indexing scheme into our @@ -482,9 +474,17 @@ fn gen_call_handling<'ll>( // Step 2) let s_ident_t = generate_at_one(&cx); - let o = memtransfer_types[0]; let geps = get_geps(&mut builder, &cx, ty, ty2, a1, a2, a4); - generate_mapper_call(&mut builder, &cx, geps, o, begin_mapper_decl, fn_ty, num_args, s_ident_t); + generate_mapper_call( + &mut builder, + &cx, + geps, + memtransfer_types, + begin_mapper_decl, + fn_ty, + num_args, + s_ident_t, + ); let values = KernelArgsTy::new(&cx, num_args, memtransfer_types, geps); // Step 3) @@ -501,26 +501,26 @@ fn gen_call_handling<'ll>( // FIXME(offload): Don't hardcode the numbers of threads in the future. cx.get_const_i32(2097152), cx.get_const_i32(256), - region_ids[0], + region_id, a5, ]; - let offload_success = builder.call(tgt_target_kernel_ty, tgt_decl, &args, None); + builder.call(tgt_target_kernel_ty, tgt_decl, &args, None); // %41 = call i32 @__tgt_target_kernel(ptr @1, i64 -1, i32 2097152, i32 256, ptr @.kernel_1.region_id, ptr %kernel_args) - unsafe { - let next = llvm::LLVMGetNextInstruction(offload_success).unwrap(); - llvm::LLVMRustPositionAfter(builder.llbuilder, next); - llvm::LLVMInstructionEraseFromParent(next); - } // Step 4) let geps = get_geps(&mut builder, &cx, ty, ty2, a1, a2, a4); - generate_mapper_call(&mut builder, &cx, geps, o, end_mapper_decl, fn_ty, num_args, s_ident_t); + generate_mapper_call( + &mut builder, + &cx, + geps, + memtransfer_types, + end_mapper_decl, + fn_ty, + num_args, + s_ident_t, + ); builder.call(mapper_fn_ty, unregister_lib_decl, &[tgt_bin_desc_alloca], None); drop(builder); - // FIXME(offload) The issue is that we right now add a call to the gpu version of the function, - // and then delete the call to the CPU version. In the future, we should use an intrinsic which - // directly resolves to a call to the GPU version. - unsafe { llvm::LLVMDeleteFunction(called) }; } diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index b60c8a7d37193..6caf60e3cc41e 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -791,6 +791,16 @@ impl<'ll, CX: Borrow>> GenericCx<'ll, CX> { llvm::LLVMMDStringInContext2(self.llcx(), name.as_ptr() as *const c_char, name.len()) } } + + pub(crate) fn get_functions(&self) -> Vec<&'ll Value> { + let mut functions = vec![]; + let mut func = unsafe { llvm::LLVMGetFirstFunction(self.llmod()) }; + while let Some(f) = func { + functions.push(f); + func = unsafe { llvm::LLVMGetNextFunction(f) } + } + functions + } } impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs index 629afee8a6677..dd9fde0b08c6f 100644 --- a/compiler/rustc_codegen_llvm/src/errors.rs +++ b/compiler/rustc_codegen_llvm/src/errors.rs @@ -40,6 +40,14 @@ pub(crate) struct AutoDiffWithoutLto; #[diag(codegen_llvm_autodiff_without_enable)] pub(crate) struct AutoDiffWithoutEnable; +#[derive(Diagnostic)] +#[diag(codegen_llvm_offload_without_enable)] +pub(crate) struct OffloadWithoutEnable; + +#[derive(Diagnostic)] +#[diag(codegen_llvm_offload_without_fat_lto)] +pub(crate) struct OffloadWithoutFatLTO; + #[derive(Diagnostic)] #[diag(codegen_llvm_lto_bitcode_from_rlib)] pub(crate) struct LtoBitcodeFromRlib { diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index 029c43e0ba82e..33541f7b695f8 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -13,6 +13,7 @@ use rustc_hir::def_id::LOCAL_CRATE; use rustc_hir::{self as hir}; use rustc_middle::mir::BinOp; use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, HasTypingEnv, LayoutOf}; +use rustc_middle::ty::offload_meta::OffloadMetadata; use rustc_middle::ty::{self, GenericArgsRef, Instance, SimdAlign, Ty, TyCtxt, TypingEnv}; use rustc_middle::{bug, span_bug}; use rustc_session::config::CrateType; @@ -25,8 +26,11 @@ use tracing::debug; use crate::abi::FnAbiLlvmExt; use crate::builder::Builder; use crate::builder::autodiff::{adjust_activity_to_abi, generate_enzyme_call}; +use crate::builder::gpu_offload::TgtOffloadEntry; use crate::context::CodegenCx; -use crate::errors::{AutoDiffWithoutEnable, AutoDiffWithoutLto}; +use crate::errors::{ + AutoDiffWithoutEnable, AutoDiffWithoutLto, OffloadWithoutEnable, OffloadWithoutFatLTO, +}; use crate::llvm::{self, Metadata, Type, Value}; use crate::type_of::LayoutLlvmExt; use crate::va_arg::emit_va_arg; @@ -197,6 +201,24 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { codegen_autodiff(self, tcx, instance, args, result); return Ok(()); } + sym::offload => { + if !tcx + .sess + .opts + .unstable_opts + .offload + .contains(&rustc_session::config::Offload::Enable) + { + let _ = tcx.dcx().emit_almost_fatal(OffloadWithoutEnable); + } + + if tcx.sess.lto() != rustc_session::config::Lto::Fat { + let _ = tcx.dcx().emit_almost_fatal(OffloadWithoutFatLTO); + } + + codegen_offload(self, tcx, instance, args); + return Ok(()); + } sym::is_val_statically_known => { if let OperandValue::Immediate(imm) = args[0].val { self.call_intrinsic( @@ -1231,6 +1253,62 @@ fn codegen_autodiff<'ll, 'tcx>( ); } +// Generates the LLVM code to offload a Rust function to a target device (e.g., GPU). +// For each kernel call, it generates the necessary globals (including metadata such as +// size and pass mode), manages memory mapping to and from the device, handles all +// data transfers, and launches the kernel on the target device. +fn codegen_offload<'ll, 'tcx>( + bx: &mut Builder<'_, 'll, 'tcx>, + tcx: TyCtxt<'tcx>, + instance: ty::Instance<'tcx>, + args: &[OperandRef<'tcx, &'ll Value>], +) { + let cx = bx.cx; + let fn_args = instance.args; + + let (target_id, target_args) = match fn_args.into_type_list(tcx)[0].kind() { + ty::FnDef(def_id, params) => (def_id, params), + _ => bug!("invalid offload intrinsic arg"), + }; + + let fn_target = match Instance::try_resolve(tcx, cx.typing_env(), *target_id, target_args) { + Ok(Some(instance)) => instance, + Ok(None) => bug!( + "could not resolve ({:?}, {:?}) to a specific offload instance", + target_id, + target_args + ), + Err(_) => { + // An error has already been emitted + return; + } + }; + + let args = get_args_from_tuple(bx, args[1], fn_target); + let target_symbol = symbol_name_for_instance_in_crate(tcx, fn_target, LOCAL_CRATE); + + let offload_entry_ty = TgtOffloadEntry::new_decl(&cx); + + let sig = tcx.fn_sig(fn_target.def_id()).skip_binder().skip_binder(); + let inputs = sig.inputs(); + + let metadata = inputs.iter().map(|ty| OffloadMetadata::from_ty(tcx, *ty)).collect::>(); + + let types = inputs.iter().map(|ty| cx.layout_of(*ty).llvm_type(cx)).collect::>(); + + let offload_data = crate::builder::gpu_offload::gen_define_handling( + cx, + offload_entry_ty, + &metadata, + &types, + &target_symbol, + ); + + // FIXME(Sa4dUs): pass the original builder once we separate kernel launch logic from globals + let bb = unsafe { llvm::LLVMGetInsertBlock(bx.llbuilder) }; + crate::builder::gpu_offload::gen_call_handling(cx, bb, &offload_data, &args, &types, &metadata); +} + fn get_args_from_tuple<'ll, 'tcx>( bx: &mut Builder<'_, 'll, 'tcx>, tuple_op: OperandRef<'tcx, &'ll Value>, diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index ca64d96c2a33c..be99c79a9517a 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -1160,13 +1160,9 @@ unsafe extern "C" { ) -> &'a BasicBlock; // Operations on instructions - pub(crate) fn LLVMGetInstructionParent(Inst: &Value) -> &BasicBlock; - pub(crate) fn LLVMGetCalledValue(CallInst: &Value) -> Option<&Value>; pub(crate) fn LLVMIsAInstruction(Val: &Value) -> Option<&Value>; pub(crate) fn LLVMGetFirstBasicBlock(Fn: &Value) -> &BasicBlock; pub(crate) fn LLVMGetOperand(Val: &Value, Index: c_uint) -> Option<&Value>; - pub(crate) fn LLVMGetNextInstruction(Val: &Value) -> Option<&Value>; - pub(crate) fn LLVMInstructionEraseFromParent(Val: &Value); // Operations on call sites pub(crate) fn LLVMSetInstructionCallConv(Instr: &Value, CC: c_uint); @@ -2454,6 +2450,8 @@ unsafe extern "C" { pub(crate) fn LLVMRustPositionBuilderPastAllocas<'a>(B: &Builder<'a>, Fn: &'a Value); pub(crate) fn LLVMRustPositionBuilderAtStart<'a>(B: &Builder<'a>, BB: &'a BasicBlock); + pub(crate) fn LLVMRustGetInsertPoint<'a>(B: &Builder<'a>) -> &'a Value; + pub(crate) fn LLVMRustRestoreInsertPoint<'a>(B: &Builder<'a>, IP: &'a Value); pub(crate) fn LLVMRustSetModulePICLevel(M: &Module); pub(crate) fn LLVMRustSetModulePIELevel(M: &Module); diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs index 4c58a92106d5c..55a4b415a4e27 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs @@ -43,6 +43,14 @@ pub(crate) fn AddFunctionAttributes<'ll>( } } +pub(crate) fn HasStringAttribute<'ll>(llfn: &'ll Value, name: &str) -> bool { + unsafe { LLVMRustHasFnAttribute(llfn, name.as_c_char_ptr(), name.len()) } +} + +pub(crate) fn RemoveStringAttrFromFn<'ll>(llfn: &'ll Value, name: &str) { + unsafe { LLVMRustRemoveFnAttribute(llfn, name.as_c_char_ptr(), name.len()) } +} + pub(crate) fn AddCallSiteAttributes<'ll>( callsite: &'ll Value, idx: AttributePlace, diff --git a/compiler/rustc_codegen_ssa/src/codegen_attrs.rs b/compiler/rustc_codegen_ssa/src/codegen_attrs.rs index fd3d7d2a3ded0..0ab0cb0ef88a5 100644 --- a/compiler/rustc_codegen_ssa/src/codegen_attrs.rs +++ b/compiler/rustc_codegen_ssa/src/codegen_attrs.rs @@ -334,6 +334,9 @@ fn process_builtin_attrs( codegen_fn_attrs.patchable_function_entry = parse_patchable_function_entry(tcx, attr); } + sym::rustc_offload_kernel => { + codegen_fn_attrs.flags |= CodegenFnAttrFlags::OFFLOAD_KERNEL + } _ => {} } } diff --git a/compiler/rustc_feature/src/builtin_attrs.rs b/compiler/rustc_feature/src/builtin_attrs.rs index ce7dd16cd19cb..8e10f6041d1cd 100644 --- a/compiler/rustc_feature/src/builtin_attrs.rs +++ b/compiler/rustc_feature/src/builtin_attrs.rs @@ -1117,6 +1117,11 @@ pub static BUILTIN_ATTRIBUTES: &[BuiltinAttribute] = &[ rustc_autodiff, Normal, template!(Word, List: &[r#""...""#]), DuplicatesOk, EncodeCrossCrate::Yes, + ), + rustc_attr!( + rustc_offload_kernel, Normal, + template!(Word), DuplicatesOk, + EncodeCrossCrate::Yes, ), // Traces that are left when `cfg` and `cfg_attr` attributes are expanded. // The attributes are not gated, to avoid stability errors, but they cannot be used in stable diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs index 8a505668d0dad..676c9a980afff 100644 --- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs +++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs @@ -163,6 +163,7 @@ fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -> hi | sym::minnumf128 | sym::mul_with_overflow | sym::needs_drop + | sym::offload | sym::offset_of | sym::overflow_checks | sym::powf16 @@ -313,6 +314,7 @@ pub(crate) fn check_intrinsic_type( let type_id = tcx.type_of(tcx.lang_items().type_id().unwrap()).instantiate_identity(); (0, 0, vec![type_id, type_id], tcx.types.bool) } + sym::offload => (3, 0, vec![param(0), param(1)], param(2)), sym::offset => (2, 0, vec![param(0), param(1)], param(0)), sym::arith_offset => ( 1, diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp index 8823c83922822..7902d167dd5e2 100644 --- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp @@ -1378,6 +1378,39 @@ extern "C" void LLVMRustPositionAfter(LLVMBuilderRef B, LLVMValueRef Instr) { } } +extern "C" LLVMValueRef LLVMRustGetInsertPoint(LLVMBuilderRef B) { + llvm::IRBuilderBase &IRB = *unwrap(B); + + llvm::IRBuilderBase::InsertPoint ip = IRB.saveIP(); + llvm::BasicBlock *BB = ip.getBlock(); + + if (!BB) + return nullptr; + + auto it = ip.getPoint(); + + if (it == BB->end()) + return nullptr; + + llvm::Instruction *I = &*it; + return wrap(I); +} + +extern "C" void LLVMRustRestoreInsertPoint(LLVMBuilderRef B, + LLVMValueRef Instr) { + llvm::IRBuilderBase &IRB = *unwrap(B); + + if (!Instr) { + llvm::BasicBlock *BB = IRB.GetInsertBlock(); + if (BB) + IRB.SetInsertPoint(BB); + return; + } + + llvm::Instruction *I = unwrap(Instr); + IRB.SetInsertPoint(I); +} + extern "C" LLVMValueRef LLVMRustGetFunctionCall(LLVMValueRef Fn, const char *Name, size_t NameLen) { auto targetName = StringRef(Name, NameLen); diff --git a/compiler/rustc_middle/src/middle/codegen_fn_attrs.rs b/compiler/rustc_middle/src/middle/codegen_fn_attrs.rs index 5a28d56d4e549..9630cfc94b433 100644 --- a/compiler/rustc_middle/src/middle/codegen_fn_attrs.rs +++ b/compiler/rustc_middle/src/middle/codegen_fn_attrs.rs @@ -190,6 +190,8 @@ bitflags::bitflags! { const NO_BUILTINS = 1 << 15; /// Marks foreign items, to make `contains_extern_indicator` cheaper. const FOREIGN_ITEM = 1 << 16; + /// `#[rustc_offload_kernel]`: indicates that this is an offload kernel, an extra ptr arg will be added. + const OFFLOAD_KERNEL = 1 << 17; } } rustc_data_structures::external_bitflags_debug! { CodegenFnAttrFlags } diff --git a/compiler/rustc_middle/src/ty/mod.rs b/compiler/rustc_middle/src/ty/mod.rs index d3914d2aee74c..61b3059ab4253 100644 --- a/compiler/rustc_middle/src/ty/mod.rs +++ b/compiler/rustc_middle/src/ty/mod.rs @@ -129,6 +129,7 @@ pub mod fast_reject; pub mod inhabitedness; pub mod layout; pub mod normalize_erasing_regions; +pub mod offload_meta; pub mod pattern; pub mod print; pub mod relate; diff --git a/compiler/rustc_middle/src/ty/offload_meta.rs b/compiler/rustc_middle/src/ty/offload_meta.rs new file mode 100644 index 0000000000000..04a7cd2c75f28 --- /dev/null +++ b/compiler/rustc_middle/src/ty/offload_meta.rs @@ -0,0 +1,119 @@ +use bitflags::bitflags; + +use crate::ty::{self, PseudoCanonicalInput, Ty, TyCtxt, TypingEnv}; + +pub struct OffloadMetadata { + pub payload_size: u64, + pub mode: MappingFlags, +} + +bitflags! { + /// Mirrors `OpenMPOffloadMappingFlags` from Clang/OpenMP. + #[derive(Debug, Copy, Clone)] + #[repr(transparent)] + pub struct MappingFlags: u64 { + /// No flags. + const NONE = 0x0; + /// Allocate memory on the device and move data from host to device. + const TO = 0x01; + /// Allocate memory on the device and move data from device to host. + const FROM = 0x02; + /// Always perform the requested mapping action, even if already mapped. + const ALWAYS = 0x04; + /// Delete the element from the device environment, ignoring ref count. + const DELETE = 0x08; + /// The element being mapped is a pointer-pointee pair. + const PTR_AND_OBJ = 0x10; + /// The base address should be passed to the target kernel as argument. + const TARGET_PARAM = 0x20; + /// The runtime must return the device pointer. + const RETURN_PARAM = 0x40; + /// The reference being passed is a pointer to private data. + const PRIVATE = 0x80; + /// Pass the element by value. + const LITERAL = 0x100; + /// Implicit map (generated by compiler, not explicit in code). + const IMPLICIT = 0x200; + /// Hint to allocate memory close to the target device. + const CLOSE = 0x400; + /// Reserved (0x800 in OpenMP for XLC compatibility). + const RESERVED = 0x800; + /// Require that the data is already allocated on the device. + const PRESENT = 0x1000; + /// Increment/decrement a separate ref counter (OpenACC compatibility). + const OMPX_HOLD = 0x2000; + /// Used for non-contiguous list items in target update. + const NON_CONTIG = 0x100000000000; + /// 16 MSBs indicate membership in a struct. + const MEMBER_OF = 0xffff000000000000; + } +} + +impl OffloadMetadata { + pub fn from_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Self { + OffloadMetadata { + payload_size: get_payload_size(tcx, ty), + mode: MappingFlags::from_ty(tcx, ty), + } + } +} + +// FIXME(Sa4dUs): implement a solid logic to determine the payload size +fn get_payload_size<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> u64 { + match ty.kind() { + ty::RawPtr(inner, _) | ty::Ref(_, inner, _) => get_payload_size(tcx, *inner), + _ => tcx + .layout_of(PseudoCanonicalInput { + typing_env: TypingEnv::fully_monomorphized(), + value: ty, + }) + .unwrap() + .size + .bytes(), + } +} + +impl MappingFlags { + fn from_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Self { + use rustc_ast::Mutability::*; + + match ty.kind() { + ty::Bool + | ty::Char + | ty::Int(_) + | ty::Uint(_) + | ty::Float(_) + | ty::Adt(_, _) + | ty::Tuple(_) + | ty::Array(_, _) + | ty::Alias(_, _) + | ty::Param(_) => MappingFlags::TO, + + ty::RawPtr(_, Not) | ty::Ref(_, _, Not) => MappingFlags::TO, + + ty::RawPtr(_, Mut) | ty::Ref(_, _, Mut) => MappingFlags::TO | MappingFlags::FROM, + + ty::Slice(_) | ty::Str | ty::Dynamic(_, _) => MappingFlags::TO | MappingFlags::FROM, + + ty::Foreign(_) | ty::Pat(_, _) | ty::UnsafeBinder(_) => { + MappingFlags::TO | MappingFlags::FROM + } + + ty::FnDef(_, _) + | ty::FnPtr(_, _) + | ty::Closure(_, _) + | ty::CoroutineClosure(_, _) + | ty::Coroutine(_, _) + | ty::CoroutineWitness(_, _) + | ty::Never + | ty::Bound(_, _) + | ty::Placeholder(_) + | ty::Infer(_) + | ty::Error(_) => { + tcx.dcx() + .span_err(rustc_span::DUMMY_SP, format!("type `{ty:?}` cannot be offloaded")); + MappingFlags::empty() + } + } + } +} diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index eca4259efa7d1..6576cd3cc3299 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1584,6 +1584,7 @@ symbols! { object_safe_for_dispatch, of, off, + offload, offset, offset_of, offset_of_enum, @@ -1966,6 +1967,7 @@ symbols! { rustc_objc_class, rustc_objc_selector, rustc_object_lifetime_default, + rustc_offload_kernel, rustc_on_unimplemented, rustc_outlives, rustc_paren_sugar, diff --git a/library/core/src/intrinsics/mod.rs b/library/core/src/intrinsics/mod.rs index 564a896076b0d..798df502ce010 100644 --- a/library/core/src/intrinsics/mod.rs +++ b/library/core/src/intrinsics/mod.rs @@ -3324,6 +3324,35 @@ pub const fn copysignf128(x: f128, y: f128) -> f128; #[rustc_intrinsic] pub const fn autodiff(f: F, df: G, args: T) -> R; +/// Generates the LLVM body of a wrapper function to offload a kernel `f`. +/// +/// Type Parameters: +/// - `F`: The kernel to offload. Must be a function item. +/// - `T`: A tuple of arguments passed to `f`. +/// - `R`: The return type of the kernel. +/// +/// Example usage (pseudocode): +/// +/// ```rust,ignore (pseudocode) +/// fn kernel(x: *mut [f64; 128]) { +/// core::intrinsics::offload(kernel_1, (x,)) +/// } +/// +/// #[cfg(target_os = "linux")] +/// extern "C" { +/// pub fn kernel_1(array_b: *mut [f64; 128]); +/// } +/// +/// #[cfg(not(target_os = "linux"))] +/// #[rustc_offload_kernel] +/// extern "gpu-kernel" fn kernel_1(x: *mut [f64; 128]) { +/// unsafe { (*x)[0] = 21.0 }; +/// } +/// ``` +#[rustc_nounwind] +#[rustc_intrinsic] +pub const fn offload(f: F, args: T) -> R; + /// Inform Miri that a given pointer definitely has a certain alignment. #[cfg(miri)] #[rustc_allow_const_fn_unstable(const_eval_select)] diff --git a/tests/codegen-llvm/gpu_offload/gpu_host.rs b/tests/codegen-llvm/gpu_offload/gpu_host.rs index fac4054d1b7ff..b0f83c825705f 100644 --- a/tests/codegen-llvm/gpu_offload/gpu_host.rs +++ b/tests/codegen-llvm/gpu_offload/gpu_host.rs @@ -11,6 +11,7 @@ // when inside of a function called main. This, too, is a temporary workaround for not having a // frontend. +#![feature(core_intrinsics)] #![no_main] #[unsafe(no_mangle)] @@ -25,73 +26,70 @@ fn main() { // CHECK: %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr } // CHECK: %struct.__tgt_kernel_arguments = type { i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, i64, i64, [3 x i32], [3 x i32], i32 } -// CHECK: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 1024] -// CHECK: @.offload_maptypes.1 = private unnamed_addr constant [1 x i64] [i64 35] -// CHECK: @.kernel_1.region_id = weak unnamed_addr constant i8 0 -// CHECK: @.offloading.entry_name.1 = internal unnamed_addr constant [9 x i8] c"kernel_1\00", section ".llvm.rodata.offloading", align 1 -// CHECK: @.offloading.entry.kernel_1 = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @.kernel_1.region_id, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8 -// CHECK: @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -// CHECK: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK: @.offload_sizes._kernel_1 = private unnamed_addr constant [1 x i64] [i64 1024] +// CHECK: @.offload_maptypes._kernel_1 = private unnamed_addr constant [1 x i64] [i64 35] +// CHECK: @._kernel_1.region_id = internal unnamed_addr constant i8 0 +// CHECK: @.offloading.entry_name._kernel_1 = internal unnamed_addr constant [10 x i8] c"_kernel_1\00", section ".llvm.rodata.offloading", align 1 +// CHECK: @.offloading.entry._kernel_1 = internal constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @._kernel_1.region_id, ptr @.offloading.entry_name._kernel_1, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8 + +// CHECK: @anon.{{.*}}.0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +// CHECK: @anon.{{.*}}.1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @anon.{{.*}}.0 }, align 8 // CHECK: Function Attrs: // CHECK-NEXT: define{{( dso_local)?}} void @main() // CHECK-NEXT: start: // CHECK-NEXT: %0 = alloca [8 x i8], align 8 // CHECK-NEXT: %x = alloca [1024 x i8], align 16 +// CHECK: call void @kernel_1(ptr noalias noundef nonnull align 4 dereferenceable(1024) %x) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %0) +// CHECK-NEXT: store ptr %x, ptr %0, align 8 +// CHECK-NEXT: call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0) #4, !srcloc !4 +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %0) +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 1024, ptr nonnull %x) +// CHECK-NEXT: ret void +// CHECK-NEXT: } + +// CHECK: define{{( dso_local)?}} void @kernel_1(ptr noalias noundef align 4 dereferenceable(1024) %x) +// CHECK-NEXT: start: // CHECK-NEXT: %EmptyDesc = alloca %struct.__tgt_bin_desc, align 8 // CHECK-NEXT: %.offload_baseptrs = alloca [1 x ptr], align 8 // CHECK-NEXT: %.offload_ptrs = alloca [1 x ptr], align 8 // CHECK-NEXT: %.offload_sizes = alloca [1 x i64], align 8 // CHECK-NEXT: %kernel_args = alloca %struct.__tgt_kernel_arguments, align 8 -// CHECK: call void @llvm.memset.p0.i64(ptr align 8 %EmptyDesc, i8 0, i64 32, i1 false) -// CHECK-NEXT: %1 = getelementptr inbounds float, ptr %x, i32 0 -// CHECK-NEXT: call void @__tgt_register_lib(ptr %EmptyDesc) +// CHECK-NEXT: %dummy = load volatile ptr, ptr @.offload_sizes._kernel_1, align 8 +// CHECK-NEXT: %dummy1 = load volatile ptr, ptr @.offloading.entry._kernel_1, align 8 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %EmptyDesc, i8 0, i64 32, i1 false) +// CHECK-NEXT: call void @__tgt_register_lib(ptr nonnull %EmptyDesc) // CHECK-NEXT: call void @__tgt_init_all_rtls() -// CHECK-NEXT: %2 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 -// CHECK-NEXT: store ptr %x, ptr %2, align 8 -// CHECK-NEXT: %3 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0 -// CHECK-NEXT: store ptr %1, ptr %3, align 8 -// CHECK-NEXT: %4 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0 -// CHECK-NEXT: store i64 1024, ptr %4, align 8 -// CHECK-NEXT: %5 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 -// CHECK-NEXT: %6 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0 -// CHECK-NEXT: %7 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0 -// CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr @1, i64 -1, i32 1, ptr %5, ptr %6, ptr %7, ptr @.offload_maptypes.1, ptr null, ptr null) -// CHECK-NEXT: %8 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 0 -// CHECK-NEXT: store i32 3, ptr %8, align 4 -// CHECK-NEXT: %9 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr %9, align 4 -// CHECK-NEXT: %10 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 2 -// CHECK-NEXT: store ptr %5, ptr %10, align 8 -// CHECK-NEXT: %11 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 3 -// CHECK-NEXT: store ptr %6, ptr %11, align 8 -// CHECK-NEXT: %12 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 4 -// CHECK-NEXT: store ptr %7, ptr %12, align 8 -// CHECK-NEXT: %13 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.1, ptr %13, align 8 -// CHECK-NEXT: %14 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 6 -// CHECK-NEXT: store ptr null, ptr %14, align 8 -// CHECK-NEXT: %15 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr %15, align 8 -// CHECK-NEXT: %16 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr %16, align 8 -// CHECK-NEXT: %17 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 9 -// CHECK-NEXT: store i64 0, ptr %17, align 8 -// CHECK-NEXT: %18 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 10 -// CHECK-NEXT: store [3 x i32] [i32 2097152, i32 0, i32 0], ptr %18, align 4 -// CHECK-NEXT: %19 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 11 -// CHECK-NEXT: store [3 x i32] [i32 256, i32 0, i32 0], ptr %19, align 4 -// CHECK-NEXT: %20 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 12 -// CHECK-NEXT: store i32 0, ptr %20, align 4 -// CHECK-NEXT: %21 = call i32 @__tgt_target_kernel(ptr @1, i64 -1, i32 2097152, i32 256, ptr @.kernel_1.region_id, ptr %kernel_args) -// CHECK-NEXT: %22 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 -// CHECK-NEXT: %23 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0 -// CHECK-NEXT: %24 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0 -// CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr @1, i64 -1, i32 1, ptr %22, ptr %23, ptr %24, ptr @.offload_maptypes.1, ptr null, ptr null) -// CHECK-NEXT: call void @__tgt_unregister_lib(ptr %EmptyDesc) -// CHECK: store ptr %x, ptr %0, align 8 -// CHECK-NEXT: call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0) -// CHECK: ret void +// CHECK-NEXT: store ptr %x, ptr %.offload_baseptrs, align 8 +// CHECK-NEXT: store ptr %x, ptr %.offload_ptrs, align 8 +// CHECK-NEXT: store i64 1024, ptr %.offload_sizes, align 8 +// CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes._kernel_1, ptr null, ptr null) +// CHECK-NEXT: store i32 3, ptr %kernel_args, align 8 +// CHECK-NEXT: %0 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 4 +// CHECK-NEXT: store i32 1, ptr %0, align 4 +// CHECK-NEXT: %1 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 8 +// CHECK-NEXT: store ptr %.offload_baseptrs, ptr %1, align 8 +// CHECK-NEXT: %2 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 16 +// CHECK-NEXT: store ptr %.offload_ptrs, ptr %2, align 8 +// CHECK-NEXT: %3 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 24 +// CHECK-NEXT: store ptr %.offload_sizes, ptr %3, align 8 +// CHECK-NEXT: %4 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 32 +// CHECK-NEXT: store ptr @.offload_maptypes._kernel_1, ptr %4, align 8 +// CHECK-NEXT: %5 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 40 +// CHECK-NEXT: %6 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 72 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %5, i8 0, i64 32, i1 false) +// CHECK-NEXT: store <4 x i32> , ptr %6, align 8 +// CHECK-NEXT: %.fca.1.gep3 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 88 +// CHECK-NEXT: store i32 0, ptr %.fca.1.gep3, align 8 +// CHECK-NEXT: %.fca.2.gep4 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 92 +// CHECK-NEXT: store i32 0, ptr %.fca.2.gep4, align 4 +// CHECK-NEXT: %7 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 96 +// CHECK-NEXT: store i32 0, ptr %7, align 8 +// CHECK-NEXT: %8 = call i32 @__tgt_target_kernel(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 2097152, i32 256, ptr nonnull @._kernel_1.region_id, ptr nonnull %kernel_args) +// CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes._kernel_1, ptr null, ptr null) +// CHECK-NEXT: call void @__tgt_unregister_lib(ptr nonnull %EmptyDesc) +// CHECK-NEXT: ret void // CHECK-NEXT: } // CHECK: Function Attrs: nounwind @@ -100,6 +98,12 @@ fn main() { #[unsafe(no_mangle)] #[inline(never)] pub fn kernel_1(x: &mut [f32; 256]) { + core::intrinsics::offload(_kernel_1, (x,)) +} + +#[unsafe(no_mangle)] +#[inline(never)] +pub fn _kernel_1(x: &mut [f32; 256]) { for i in 0..256 { x[i] = 21.0; } diff --git a/tests/ui/offload/check_config.fail.stderr b/tests/ui/offload/check_config.fail.stderr new file mode 100644 index 0000000000000..a9162ed926cb0 --- /dev/null +++ b/tests/ui/offload/check_config.fail.stderr @@ -0,0 +1,6 @@ +error: using the offload feature requires -Z offload=Enable + +error: using the offload feature requires -C lto=fat + +error: aborting due to 2 previous errors + diff --git a/tests/ui/offload/check_config.rs b/tests/ui/offload/check_config.rs new file mode 100644 index 0000000000000..667c6d9788bae --- /dev/null +++ b/tests/ui/offload/check_config.rs @@ -0,0 +1,23 @@ +//@ revisions: pass fail +//@ no-prefer-dynamic +//@ needs-enzyme +//@[pass] build-pass +//@[fail] build-fail +//@[pass] compile-flags: -Zunstable-options -Zoffload=Enable -Clto=fat --emit=metadata +//@[fail] compile-flags: -Clto=thin + +//[fail]~? ERROR: using the offload feature requires -Z offload=Enable +//[fail]~? ERROR: using the offload feature requires -C lto=fat + +#![feature(core_intrinsics)] + +fn main() { + let mut x = [3.0; 256]; + kernel_1(&mut x); +} + +fn kernel_1(x: &mut [f32; 256]) { + core::intrinsics::offload(_kernel_1, (x,)) +} + +fn _kernel_1(x: &mut [f32; 256]) {} From f39ec4756fd70ca2509d204bb5f508324c8bd774 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcelo=20Dom=C3=ADnguez?= Date: Mon, 17 Nov 2025 18:40:25 +0100 Subject: [PATCH 12/13] Update rustc-dev-guide --- library/core/src/intrinsics/mod.rs | 3 +++ src/doc/rustc-dev-guide/src/offload/usage.md | 11 ++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/library/core/src/intrinsics/mod.rs b/library/core/src/intrinsics/mod.rs index 798df502ce010..9d99b05b38d69 100644 --- a/library/core/src/intrinsics/mod.rs +++ b/library/core/src/intrinsics/mod.rs @@ -3349,6 +3349,9 @@ pub const fn autodiff(f: F, df: G, args: T) -> /// unsafe { (*x)[0] = 21.0 }; /// } /// ``` +/// +/// For reference, see the Clang documentation on offloading: +/// . #[rustc_nounwind] #[rustc_intrinsic] pub const fn offload(f: F, args: T) -> R; diff --git a/src/doc/rustc-dev-guide/src/offload/usage.md b/src/doc/rustc-dev-guide/src/offload/usage.md index 7abf90aa6e0b6..bcb97d134b9ed 100644 --- a/src/doc/rustc-dev-guide/src/offload/usage.md +++ b/src/doc/rustc-dev-guide/src/offload/usage.md @@ -5,6 +5,8 @@ We currently work on launching the following Rust kernel on the GPU. To follow a ```rust #![feature(abi_gpu_kernel)] +#![feature(rustc_attrs)] +#![feature(core_intrinsics)] #![no_std] #[cfg(target_os = "linux")] @@ -12,6 +14,7 @@ extern crate libc; #[cfg(target_os = "linux")] use libc::c_char; +#[cfg(target_os = "linux")] use core::mem; #[panic_handler] @@ -38,7 +41,7 @@ fn main() { } unsafe { - kernel_1(array_c); + kernel(array_c); } core::hint::black_box(&array_c); unsafe { @@ -52,6 +55,11 @@ fn main() { } } +#[inline(never)] +unsafe fn kernel(x: *mut [f64; 256]) { + core::intrinsics::offload(kernel_1, (x,)) +} + #[cfg(target_os = "linux")] unsafe extern "C" { pub fn kernel_1(array_b: *mut [f64; 256]); @@ -60,6 +68,7 @@ unsafe extern "C" { #[cfg(not(target_os = "linux"))] #[unsafe(no_mangle)] #[inline(never)] +#[rustc_offload_kernel] pub extern "gpu-kernel" fn kernel_1(x: *mut [f64; 256]) { unsafe { (*x)[0] = 21.0 }; } From c87bebd553041679ebac00ee572b9f6cce5b9260 Mon Sep 17 00:00:00 2001 From: Jason Newcomb Date: Tue, 25 Nov 2025 23:07:27 -0500 Subject: [PATCH 13/13] Add `Copy` to some AST enums. --- compiler/rustc_ast/src/ast.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index f2061f3088a25..900af43dc0785 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -857,7 +857,7 @@ impl BindingMode { } } -#[derive(Clone, Encodable, Decodable, Debug, Walkable)] +#[derive(Clone, Copy, Encodable, Decodable, Debug, Walkable)] pub enum RangeEnd { /// `..=` or `...` Included(RangeSyntax), @@ -865,7 +865,7 @@ pub enum RangeEnd { Excluded, } -#[derive(Clone, Encodable, Decodable, Debug, Walkable)] +#[derive(Clone, Copy, Encodable, Decodable, Debug, Walkable)] pub enum RangeSyntax { /// `...` DotDotDot, @@ -1915,7 +1915,7 @@ pub enum ForLoopKind { } /// Used to differentiate between `async {}` blocks and `gen {}` blocks. -#[derive(Clone, Encodable, Decodable, Debug, PartialEq, Eq, Walkable)] +#[derive(Clone, Copy, Encodable, Decodable, Debug, PartialEq, Eq, Walkable)] pub enum GenBlockKind { Async, Gen,