Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 28 additions & 5 deletions compiler/rustc_codegen_llvm/src/intrinsic.rs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Miri implementation of these intrinsics will need similar adjustments -- and likewise for cranelift and gcc, if they support the intrinsic.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

afaik cg_gcc doesn't support these at all, I am not familiar with cg_clif at all. I will change the miri/const-eval implementation once #146568 merges, otherwise there will be nasty merge conflicts

Copy link
Member

@RalfJung RalfJung Oct 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will change the miri/const-eval implementation once #146568 merges, otherwise there will be nasty merge conflicts

What we could do is land the part of that PR that moves the intrinsics to rustc_const_eval, without marking them const. Then the constification and associated tests can proceed in parallel with other work such as this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that seems like a nice idea, should I split the PR then?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, feel free to.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @tgross35, in that case I don't think there's a need to review it anymore, @RalfJung had already reviewed the const-eval part

Copy link
Member

@RalfJung RalfJung Oct 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, they'll be assigned reviewer for the 2nd PR that actually marks the intrinsics const and adds all the tests, I presume. (Or maybe we can find another reviewer for that.)

Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use rustc_hir::def_id::LOCAL_CRATE;
use rustc_hir::{self as hir};
use rustc_middle::mir::BinOp;
use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, HasTypingEnv, LayoutOf};
use rustc_middle::ty::{self, GenericArgsRef, Instance, Ty, TyCtxt, TypingEnv};
use rustc_middle::ty::{self, GenericArgsRef, Instance, SimdAlign, Ty, TyCtxt, TypingEnv};
use rustc_middle::{bug, span_bug};
use rustc_span::{Span, Symbol, sym};
use rustc_symbol_mangling::{mangle_internal_symbol, symbol_name_for_instance_in_crate};
Expand Down Expand Up @@ -1828,15 +1828,34 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
));
}

fn llvm_alignment<'ll, 'tcx>(
bx: &mut Builder<'_, 'll, 'tcx>,
alignment: SimdAlign,
vector_ty: Ty<'tcx>,
element_ty: Ty<'tcx>,
) -> &'ll Value {
let alignment = match alignment {
SimdAlign::Unaligned => 1,
SimdAlign::Element => bx.align_of(element_ty).bytes(),
SimdAlign::Vector => bx.align_of(vector_ty).bytes(),
};

bx.const_i32(alignment as i32)
}

if name == sym::simd_masked_load {
// simd_masked_load(mask: <N x i{M}>, pointer: *_ T, values: <N x T>) -> <N x T>
// simd_masked_load<_, _, _, const ALIGN: SimdAlign>(mask: <N x i{M}>, pointer: *_ T, values: <N x T>) -> <N x T>
// * N: number of elements in the input vectors
// * T: type of the element to load
// * M: any integer width is supported, will be truncated to i1
// Loads contiguous elements from memory behind `pointer`, but only for
// those lanes whose `mask` bit is enabled.
// The memory addresses corresponding to the “off” lanes are not accessed.

let alignment = fn_args[3].expect_const().to_value().valtree.unwrap_branch()[0]
.unwrap_leaf()
.to_simd_alignment();

// The element type of the "mask" argument must be a signed integer type of any width
let mask_ty = in_ty;
let (mask_len, mask_elem) = (in_len, in_elem);
Expand Down Expand Up @@ -1893,7 +1912,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);

// Alignment of T, must be a constant integer value:
let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32);
let alignment = llvm_alignment(bx, alignment, values_ty, values_elem);

let llvm_pointer = bx.type_ptr();

Expand All @@ -1908,14 +1927,18 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
}

if name == sym::simd_masked_store {
// simd_masked_store(mask: <N x i{M}>, pointer: *mut T, values: <N x T>) -> ()
// simd_masked_store<_, _, _, const ALIGN: SimdAlign>(mask: <N x i{M}>, pointer: *mut T, values: <N x T>) -> ()
// * N: number of elements in the input vectors
// * T: type of the element to load
// * M: any integer width is supported, will be truncated to i1
// Stores contiguous elements to memory behind `pointer`, but only for
// those lanes whose `mask` bit is enabled.
// The memory addresses corresponding to the “off” lanes are not accessed.

let alignment = fn_args[3].expect_const().to_value().valtree.unwrap_branch()[0]
.unwrap_leaf()
.to_simd_alignment();

// The element type of the "mask" argument must be a signed integer type of any width
let mask_ty = in_ty;
let (mask_len, mask_elem) = (in_len, in_elem);
Expand Down Expand Up @@ -1966,7 +1989,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);

// Alignment of T, must be a constant integer value:
let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32);
let alignment = llvm_alignment(bx, alignment, values_ty, values_elem);

let llvm_pointer = bx.type_ptr();

Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_hir_analysis/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -695,8 +695,8 @@ pub(crate) fn check_intrinsic_type(
(1, 0, vec![param(0), param(0), param(0)], param(0))
}
sym::simd_gather => (3, 0, vec![param(0), param(1), param(2)], param(0)),
sym::simd_masked_load => (3, 0, vec![param(0), param(1), param(2)], param(2)),
sym::simd_masked_store => (3, 0, vec![param(0), param(1), param(2)], tcx.types.unit),
sym::simd_masked_load => (3, 1, vec![param(0), param(1), param(2)], param(2)),
sym::simd_masked_store => (3, 1, vec![param(0), param(1), param(2)], tcx.types.unit),
sym::simd_scatter => (3, 0, vec![param(0), param(1), param(2)], tcx.types.unit),
sym::simd_insert | sym::simd_insert_dyn => {
(2, 0, vec![param(0), tcx.types.u32, param(1)], param(0))
Expand Down
24 changes: 24 additions & 0 deletions compiler/rustc_middle/src/ty/consts/int.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,15 @@ pub enum AtomicOrdering {
SeqCst = 4,
}

/// An enum to represent the compiler-side view of `intrinsics::simd::SimdAlign`.
#[derive(Debug, Copy, Clone)]
pub enum SimdAlign {
// These values must match `intrinsics::simd::SimdAlign`!
Unaligned = 0,
Element = 1,
Vector = 2,
}

impl std::fmt::Debug for ConstInt {
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { int, signed, is_ptr_sized_integral } = *self;
Expand Down Expand Up @@ -350,6 +359,21 @@ impl ScalarInt {
}
}

#[inline]
pub fn to_simd_alignment(self) -> SimdAlign {
use SimdAlign::*;
let val = self.to_u32();
if val == Unaligned as u32 {
Unaligned
} else if val == Element as u32 {
Element
} else if val == Vector as u32 {
Vector
} else {
panic!("not a valid simd alignment")
}
}

/// Converts the `ScalarInt` to `bool`.
/// Panics if the `size` of the `ScalarInt` is not equal to 1 byte.
/// Errors if it is not a valid `bool`.
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_middle/src/ty/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ pub use self::closure::{
};
pub use self::consts::{
AnonConstKind, AtomicOrdering, Const, ConstInt, ConstKind, ConstToValTreeResult, Expr,
ExprKind, ScalarInt, UnevaluatedConst, ValTree, ValTreeKind, Value,
ExprKind, ScalarInt, SimdAlign, UnevaluatedConst, ValTree, ValTreeKind, Value,
};
pub use self::context::{
CtxtInterners, CurrentGcx, DeducedParamAttrs, Feed, FreeRegionInfo, GlobalCtxt, Lift, TyCtxt,
Expand Down
25 changes: 19 additions & 6 deletions library/core/src/intrinsics/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
//!
//! In this module, a "vector" is any `repr(simd)` type.

use crate::marker::ConstParamTy;

/// Inserts an element into a vector, returning the updated vector.
///
/// `T` must be a vector with element type `U`, and `idx` must be `const`.
Expand Down Expand Up @@ -377,6 +379,19 @@ pub unsafe fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
#[rustc_nounwind]
pub unsafe fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);

/// A type for alignment options for SIMD masked load/store intrinsics.
#[derive(Debug, ConstParamTy, PartialEq, Eq)]
pub enum SimdAlign {
// These values must match the compiler's `SimdAlign` defined in
// `rustc_middle/src/ty/consts/int.rs`!
/// No alignment requirements on the pointer
Unaligned = 0,
/// The pointer must be aligned to the element type of the SIMD vector
Element = 1,
/// The pointer must be aligned to the SIMD vector type
Vector = 2,
}

/// Reads a vector of pointers.
///
/// `T` must be a vector.
Expand All @@ -392,13 +407,12 @@ pub unsafe fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
/// `val`.
///
/// # Safety
/// Unmasked values in `T` must be readable as if by `<ptr>::read` (e.g. aligned to the element
/// type).
/// `ptr` must be aligned according to the `ALIGN` parameter, see [`SimdAlign`] for details.
///
/// `mask` must only contain `0` or `!0` values.
#[rustc_intrinsic]
#[rustc_nounwind]
pub unsafe fn simd_masked_load<V, U, T>(mask: V, ptr: U, val: T) -> T;
pub unsafe fn simd_masked_load<V, U, T, const ALIGN: SimdAlign>(mask: V, ptr: U, val: T) -> T;

/// Writes to a vector of pointers.
///
Expand All @@ -414,13 +428,12 @@ pub unsafe fn simd_masked_load<V, U, T>(mask: V, ptr: U, val: T) -> T;
/// Otherwise if the corresponding value in `mask` is `0`, do nothing.
///
/// # Safety
/// Unmasked values in `T` must be writeable as if by `<ptr>::write` (e.g. aligned to the element
/// type).
/// `ptr` must be aligned according to the `ALIGN` parameter, see [`SimdAlign`] for details.
///
/// `mask` must only contain `0` or `!0` values.
#[rustc_intrinsic]
#[rustc_nounwind]
pub unsafe fn simd_masked_store<V, U, T>(mask: V, ptr: U, val: T);
pub unsafe fn simd_masked_store<V, U, T, const ALIGN: SimdAlign>(mask: V, ptr: U, val: T);

/// Adds two simd vectors elementwise, with saturation.
///
Expand Down
18 changes: 16 additions & 2 deletions library/portable-simd/crates/core_simd/src/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,14 @@ where
or: Self,
) -> Self {
// SAFETY: The safety of reading elements through `ptr` is ensured by the caller.
unsafe { core::intrinsics::simd::simd_masked_load(enable.to_int(), ptr, or) }
unsafe {
core::intrinsics::simd::simd_masked_load::<
_,
_,
_,
{ core::intrinsics::simd::SimdAlign::Element },
>(enable.to_int(), ptr, or)
}
}

/// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
Expand Down Expand Up @@ -723,7 +730,14 @@ where
#[inline]
pub unsafe fn store_select_ptr(self, ptr: *mut T, enable: Mask<<T as SimdElement>::Mask, N>) {
// SAFETY: The safety of writing elements through `ptr` is ensured by the caller.
unsafe { core::intrinsics::simd::simd_masked_store(enable.to_int(), ptr, self) }
unsafe {
core::intrinsics::simd::simd_masked_store::<
_,
_,
_,
{ core::intrinsics::simd::SimdAlign::Element },
>(enable.to_int(), ptr, self)
}
}

/// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`.
Expand Down
24 changes: 19 additions & 5 deletions src/tools/miri/tests/pass/intrinsics/portable-simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -680,25 +680,39 @@ fn simd_float_intrinsics() {
}

fn simd_masked_loadstore() {
use intrinsics::*;

// The buffer is deliberarely too short, so reading the last element would be UB.
let buf = [3i32; 3];
let default = i32x4::splat(0);
let mask = i32x4::from_array([!0, !0, !0, 0]);
let vals = unsafe { intrinsics::simd_masked_load(mask, buf.as_ptr(), default) };
let vals =
unsafe { simd_masked_load::<_, _, _, { SimdAlign::Element }>(mask, buf.as_ptr(), default) };
assert_eq!(vals, i32x4::from_array([3, 3, 3, 0]));
// Also read in a way that the *first* element is OOB.
let mask2 = i32x4::from_array([0, !0, !0, !0]);
let vals =
unsafe { intrinsics::simd_masked_load(mask2, buf.as_ptr().wrapping_sub(1), default) };
let vals = unsafe {
simd_masked_load::<_, _, _, { SimdAlign::Element }>(
mask2,
buf.as_ptr().wrapping_sub(1),
default,
)
};
assert_eq!(vals, i32x4::from_array([0, 3, 3, 3]));

// The buffer is deliberarely too short, so writing the last element would be UB.
let mut buf = [42i32; 3];
let vals = i32x4::from_array([1, 2, 3, 4]);
unsafe { intrinsics::simd_masked_store(mask, buf.as_mut_ptr(), vals) };
unsafe { simd_masked_store::<_, _, _, { SimdAlign::Element }>(mask, buf.as_mut_ptr(), vals) };
assert_eq!(buf, [1, 2, 3]);
// Also write in a way that the *first* element is OOB.
unsafe { intrinsics::simd_masked_store(mask2, buf.as_mut_ptr().wrapping_sub(1), vals) };
unsafe {
simd_masked_store::<_, _, _, { SimdAlign::Element }>(
mask2,
buf.as_mut_ptr().wrapping_sub(1),
vals,
)
};
assert_eq!(buf, [2, 3, 4]);
}

Expand Down
40 changes: 35 additions & 5 deletions tests/assembly-llvm/simd-intrinsic-mask-load.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
//@ assembly-output: emit-asm
//@ compile-flags: --crate-type=lib -Copt-level=3 -C panic=abort

#![feature(no_core, lang_items, repr_simd, intrinsics)]
#![feature(no_core, lang_items, repr_simd, intrinsics, adt_const_params)]
#![no_core]
#![allow(non_camel_case_types)]

Expand All @@ -35,7 +35,7 @@ pub struct f64x4([f64; 4]);
pub struct m64x4([i64; 4]);

#[rustc_intrinsic]
unsafe fn simd_masked_load<M, P, T>(mask: M, pointer: P, values: T) -> T;
unsafe fn simd_masked_load<M, P, T, const ALIGN: SimdAlign>(mask: M, pointer: P, values: T) -> T;

// CHECK-LABEL: load_i8x16
#[no_mangle]
Expand All @@ -56,7 +56,11 @@ pub unsafe extern "C" fn load_i8x16(mask: m8x16, pointer: *const i8) -> i8x16 {
// x86-avx512-NOT: vpsllw
// x86-avx512: vpmovb2m k1, xmm0
// x86-avx512-NEXT: vmovdqu8 xmm0 {k1} {z}, xmmword ptr [rdi]
simd_masked_load(mask, pointer, i8x16([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))
simd_masked_load::<_, _, _, { SimdAlign::Element }>(
mask,
pointer,
i8x16([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
)
}

// CHECK-LABEL: load_f32x8
Expand All @@ -68,7 +72,29 @@ pub unsafe extern "C" fn load_f32x8(mask: m32x8, pointer: *const f32) -> f32x8 {
// x86-avx512-NOT: vpslld
// x86-avx512: vpmovd2m k1, ymm0
// x86-avx512-NEXT: vmovups ymm0 {k1} {z}, ymmword ptr [rdi]
simd_masked_load(mask, pointer, f32x8([0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32]))
simd_masked_load::<_, _, _, { SimdAlign::Element }>(
mask,
pointer,
f32x8([0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32]),
)
}

// CHECK-LABEL: load_f32x8_aligned
#[no_mangle]
pub unsafe extern "C" fn load_f32x8_aligned(mask: m32x8, pointer: *const f32) -> f32x8 {
// x86-avx2-NOT: vpslld
// x86-avx2: vmaskmovps ymm0, ymm0, ymmword ptr [rdi]
//
// x86-avx512-NOT: vpslld
// x86-avx512: vpmovd2m k1, ymm0
// x86-avx512-NEXT: vmovaps ymm0 {k1} {z}, ymmword ptr [rdi]
//
// this aligned version should generate `movaps` instead of `movups`
simd_masked_load::<_, _, _, { SimdAlign::Vector }>(
mask,
pointer,
f32x8([0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32]),
)
}

// CHECK-LABEL: load_f64x4
Expand All @@ -79,5 +105,9 @@ pub unsafe extern "C" fn load_f64x4(mask: m64x4, pointer: *const f64) -> f64x4 {
//
// x86-avx512-NOT: vpsllq
// x86-avx512: vpmovq2m k1, ymm0
simd_masked_load(mask, pointer, f64x4([0_f64, 0_f64, 0_f64, 0_f64]))
simd_masked_load::<_, _, _, { SimdAlign::Element }>(
mask,
pointer,
f64x4([0_f64, 0_f64, 0_f64, 0_f64]),
)
}
Loading
Loading