rust-lang · sayantn · Oct 8, 2025 · RalfJung · Oct 8, 2025 · sayantn
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -13,7 +13,7 @@ use rustc_hir::def_id::LOCAL_CRATE;
 use rustc_hir::{self as hir};
 use rustc_middle::mir::BinOp;
 use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, HasTypingEnv, LayoutOf};
-use rustc_middle::ty::{self, GenericArgsRef, Instance, Ty, TyCtxt, TypingEnv};
+use rustc_middle::ty::{self, GenericArgsRef, Instance, SimdAlign, Ty, TyCtxt, TypingEnv};
 use rustc_middle::{bug, span_bug};
 use rustc_span::{Span, Symbol, sym};
 use rustc_symbol_mangling::{mangle_internal_symbol, symbol_name_for_instance_in_crate};
@@ -1828,15 +1828,34 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         ));
     }
 
+    fn llvm_alignment<'ll, 'tcx>(
+        bx: &mut Builder<'_, 'll, 'tcx>,
+        alignment: SimdAlign,
+        vector_ty: Ty<'tcx>,
+        element_ty: Ty<'tcx>,
+    ) -> &'ll Value {
+        let alignment = match alignment {
+            SimdAlign::Unaligned => 1,
+            SimdAlign::Element => bx.align_of(element_ty).bytes(),
+            SimdAlign::Vector => bx.align_of(vector_ty).bytes(),
+        };
+
+        bx.const_i32(alignment as i32)
+    }
+
     if name == sym::simd_masked_load {
-        // simd_masked_load(mask: <N x i{M}>, pointer: *_ T, values: <N x T>) -> <N x T>
+        // simd_masked_load<_, _, _, const ALIGN: SimdAlign>(mask: <N x i{M}>, pointer: *_ T, values: <N x T>) -> <N x T>
         // * N: number of elements in the input vectors
         // * T: type of the element to load
         // * M: any integer width is supported, will be truncated to i1
         // Loads contiguous elements from memory behind `pointer`, but only for
         // those lanes whose `mask` bit is enabled.
         // The memory addresses corresponding to the “off” lanes are not accessed.
 
+        let alignment = fn_args[3].expect_const().to_value().valtree.unwrap_branch()[0]
+            .unwrap_leaf()
+            .to_simd_alignment();
+
         // The element type of the "mask" argument must be a signed integer type of any width
         let mask_ty = in_ty;
         let (mask_len, mask_elem) = (in_len, in_elem);
@@ -1893,7 +1912,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
 
         // Alignment of T, must be a constant integer value:
-        let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32);
+        let alignment = llvm_alignment(bx, alignment, values_ty, values_elem);
 
         let llvm_pointer = bx.type_ptr();
 
@@ -1908,14 +1927,18 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
     }
 
     if name == sym::simd_masked_store {
-        // simd_masked_store(mask: <N x i{M}>, pointer: *mut T, values: <N x T>) -> ()
+        // simd_masked_store<_, _, _, const ALIGN: SimdAlign>(mask: <N x i{M}>, pointer: *mut T, values: <N x T>) -> ()
         // * N: number of elements in the input vectors
         // * T: type of the element to load
         // * M: any integer width is supported, will be truncated to i1
         // Stores contiguous elements to memory behind `pointer`, but only for
         // those lanes whose `mask` bit is enabled.
         // The memory addresses corresponding to the “off” lanes are not accessed.
 
+        let alignment = fn_args[3].expect_const().to_value().valtree.unwrap_branch()[0]
+            .unwrap_leaf()
+            .to_simd_alignment();
+
         // The element type of the "mask" argument must be a signed integer type of any width
         let mask_ty = in_ty;
         let (mask_len, mask_elem) = (in_len, in_elem);
@@ -1966,7 +1989,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
 
         // Alignment of T, must be a constant integer value:
-        let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32);
+        let alignment = llvm_alignment(bx, alignment, values_ty, values_elem);
 
         let llvm_pointer = bx.type_ptr();
 

diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs
@@ -695,8 +695,8 @@ pub(crate) fn check_intrinsic_type(
             (1, 0, vec![param(0), param(0), param(0)], param(0))
         }
         sym::simd_gather => (3, 0, vec![param(0), param(1), param(2)], param(0)),
-        sym::simd_masked_load => (3, 0, vec![param(0), param(1), param(2)], param(2)),
-        sym::simd_masked_store => (3, 0, vec![param(0), param(1), param(2)], tcx.types.unit),
+        sym::simd_masked_load => (3, 1, vec![param(0), param(1), param(2)], param(2)),
+        sym::simd_masked_store => (3, 1, vec![param(0), param(1), param(2)], tcx.types.unit),
         sym::simd_scatter => (3, 0, vec![param(0), param(1), param(2)], tcx.types.unit),
         sym::simd_insert | sym::simd_insert_dyn => {
             (2, 0, vec![param(0), tcx.types.u32, param(1)], param(0))

diff --git a/compiler/rustc_middle/src/ty/consts/int.rs b/compiler/rustc_middle/src/ty/consts/int.rs
@@ -39,6 +39,15 @@ pub enum AtomicOrdering {
     SeqCst = 4,
 }
 
+/// An enum to represent the compiler-side view of `intrinsics::simd::SimdAlign`.
+#[derive(Debug, Copy, Clone)]
+pub enum SimdAlign {
+    // These values must match `intrinsics::simd::SimdAlign`!
+    Unaligned = 0,
+    Element = 1,
+    Vector = 2,
+}
+
 impl std::fmt::Debug for ConstInt {
     fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         let Self { int, signed, is_ptr_sized_integral } = *self;
@@ -350,6 +359,21 @@ impl ScalarInt {
         }
     }
 
+    #[inline]
+    pub fn to_simd_alignment(self) -> SimdAlign {
+        use SimdAlign::*;
+        let val = self.to_u32();
+        if val == Unaligned as u32 {
+            Unaligned
+        } else if val == Element as u32 {
+            Element
+        } else if val == Vector as u32 {
+            Vector
+        } else {
+            panic!("not a valid simd alignment")
+        }
+    }
+
     /// Converts the `ScalarInt` to `bool`.
     /// Panics if the `size` of the `ScalarInt` is not equal to 1 byte.
     /// Errors if it is not a valid `bool`.

diff --git a/compiler/rustc_middle/src/ty/mod.rs b/compiler/rustc_middle/src/ty/mod.rs
@@ -75,7 +75,7 @@ pub use self::closure::{
 };
 pub use self::consts::{
     AnonConstKind, AtomicOrdering, Const, ConstInt, ConstKind, ConstToValTreeResult, Expr,
-    ExprKind, ScalarInt, UnevaluatedConst, ValTree, ValTreeKind, Value,
+    ExprKind, ScalarInt, SimdAlign, UnevaluatedConst, ValTree, ValTreeKind, Value,
 };
 pub use self::context::{
     CtxtInterners, CurrentGcx, DeducedParamAttrs, Feed, FreeRegionInfo, GlobalCtxt, Lift, TyCtxt,

diff --git a/library/core/src/intrinsics/simd.rs b/library/core/src/intrinsics/simd.rs
@@ -2,6 +2,8 @@
 //!
 //! In this module, a "vector" is any `repr(simd)` type.
 
+use crate::marker::ConstParamTy;
+
 /// Inserts an element into a vector, returning the updated vector.
 ///
 /// `T` must be a vector with element type `U`, and `idx` must be `const`.
@@ -377,6 +379,19 @@ pub unsafe fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
 #[rustc_nounwind]
 pub unsafe fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
 
+/// A type for alignment options for SIMD masked load/store intrinsics.
+#[derive(Debug, ConstParamTy, PartialEq, Eq)]
+pub enum SimdAlign {
+    // These values must match the compiler's `SimdAlign` defined in
+    // `rustc_middle/src/ty/consts/int.rs`!
+    /// No alignment requirements on the pointer
+    Unaligned = 0,
+    /// The pointer must be aligned to the element type of the SIMD vector
+    Element = 1,
+    /// The pointer must be aligned to the SIMD vector type
+    Vector = 2,
+}
+
 /// Reads a vector of pointers.
 ///
 /// `T` must be a vector.
@@ -392,13 +407,12 @@ pub unsafe fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
 /// `val`.
 ///
 /// # Safety
-/// Unmasked values in `T` must be readable as if by `<ptr>::read` (e.g. aligned to the element
-/// type).
+/// `ptr` must be aligned according to the `ALIGN` parameter, see [`SimdAlign`] for details.
 ///
 /// `mask` must only contain `0` or `!0` values.
 #[rustc_intrinsic]
 #[rustc_nounwind]
-pub unsafe fn simd_masked_load<V, U, T>(mask: V, ptr: U, val: T) -> T;
+pub unsafe fn simd_masked_load<V, U, T, const ALIGN: SimdAlign>(mask: V, ptr: U, val: T) -> T;
 
 /// Writes to a vector of pointers.
 ///
@@ -414,13 +428,12 @@ pub unsafe fn simd_masked_load<V, U, T>(mask: V, ptr: U, val: T) -> T;
 /// Otherwise if the corresponding value in `mask` is `0`, do nothing.
 ///
 /// # Safety
-/// Unmasked values in `T` must be writeable as if by `<ptr>::write` (e.g. aligned to the element
-/// type).
+/// `ptr` must be aligned according to the `ALIGN` parameter, see [`SimdAlign`] for details.
 ///
 /// `mask` must only contain `0` or `!0` values.
 #[rustc_intrinsic]
 #[rustc_nounwind]
-pub unsafe fn simd_masked_store<V, U, T>(mask: V, ptr: U, val: T);
+pub unsafe fn simd_masked_store<V, U, T, const ALIGN: SimdAlign>(mask: V, ptr: U, val: T);
 
 /// Adds two simd vectors elementwise, with saturation.
 ///

diff --git a/library/portable-simd/crates/core_simd/src/vector.rs b/library/portable-simd/crates/core_simd/src/vector.rs
@@ -474,7 +474,14 @@ where
         or: Self,
     ) -> Self {
         // SAFETY: The safety of reading elements through `ptr` is ensured by the caller.
-        unsafe { core::intrinsics::simd::simd_masked_load(enable.to_int(), ptr, or) }
+        unsafe {
+            core::intrinsics::simd::simd_masked_load::<
+                _,
+                _,
+                _,
+                { core::intrinsics::simd::SimdAlign::Element },
+            >(enable.to_int(), ptr, or)
+        }
     }
 
     /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
@@ -723,7 +730,14 @@ where
     #[inline]
     pub unsafe fn store_select_ptr(self, ptr: *mut T, enable: Mask<<T as SimdElement>::Mask, N>) {
         // SAFETY: The safety of writing elements through `ptr` is ensured by the caller.
-        unsafe { core::intrinsics::simd::simd_masked_store(enable.to_int(), ptr, self) }
+        unsafe {
+            core::intrinsics::simd::simd_masked_store::<
+                _,
+                _,
+                _,
+                { core::intrinsics::simd::SimdAlign::Element },
+            >(enable.to_int(), ptr, self)
+        }
     }
 
     /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`.

diff --git a/src/tools/miri/tests/pass/intrinsics/portable-simd.rs b/src/tools/miri/tests/pass/intrinsics/portable-simd.rs
@@ -680,25 +680,39 @@ fn simd_float_intrinsics() {
 }
 
 fn simd_masked_loadstore() {
+    use intrinsics::*;
+
     // The buffer is deliberarely too short, so reading the last element would be UB.
     let buf = [3i32; 3];
     let default = i32x4::splat(0);
     let mask = i32x4::from_array([!0, !0, !0, 0]);
-    let vals = unsafe { intrinsics::simd_masked_load(mask, buf.as_ptr(), default) };
+    let vals =
+        unsafe { simd_masked_load::<_, _, _, { SimdAlign::Element }>(mask, buf.as_ptr(), default) };
     assert_eq!(vals, i32x4::from_array([3, 3, 3, 0]));
     // Also read in a way that the *first* element is OOB.
     let mask2 = i32x4::from_array([0, !0, !0, !0]);
-    let vals =
-        unsafe { intrinsics::simd_masked_load(mask2, buf.as_ptr().wrapping_sub(1), default) };
+    let vals = unsafe {
+        simd_masked_load::<_, _, _, { SimdAlign::Element }>(
+            mask2,
+            buf.as_ptr().wrapping_sub(1),
+            default,
+        )
+    };
     assert_eq!(vals, i32x4::from_array([0, 3, 3, 3]));
 
     // The buffer is deliberarely too short, so writing the last element would be UB.
     let mut buf = [42i32; 3];
     let vals = i32x4::from_array([1, 2, 3, 4]);
-    unsafe { intrinsics::simd_masked_store(mask, buf.as_mut_ptr(), vals) };
+    unsafe { simd_masked_store::<_, _, _, { SimdAlign::Element }>(mask, buf.as_mut_ptr(), vals) };
     assert_eq!(buf, [1, 2, 3]);
     // Also write in a way that the *first* element is OOB.
-    unsafe { intrinsics::simd_masked_store(mask2, buf.as_mut_ptr().wrapping_sub(1), vals) };
+    unsafe {
+        simd_masked_store::<_, _, _, { SimdAlign::Element }>(
+            mask2,
+            buf.as_mut_ptr().wrapping_sub(1),
+            vals,
+        )
+    };
     assert_eq!(buf, [2, 3, 4]);
 }
 

diff --git a/tests/assembly-llvm/simd-intrinsic-mask-load.rs b/tests/assembly-llvm/simd-intrinsic-mask-load.rs
@@ -9,7 +9,7 @@
 //@ assembly-output: emit-asm
 //@ compile-flags: --crate-type=lib -Copt-level=3 -C panic=abort
 
-#![feature(no_core, lang_items, repr_simd, intrinsics)]
+#![feature(no_core, lang_items, repr_simd, intrinsics, adt_const_params)]
 #![no_core]
 #![allow(non_camel_case_types)]
 
@@ -35,7 +35,7 @@ pub struct f64x4([f64; 4]);
 pub struct m64x4([i64; 4]);
 
 #[rustc_intrinsic]
-unsafe fn simd_masked_load<M, P, T>(mask: M, pointer: P, values: T) -> T;
+unsafe fn simd_masked_load<M, P, T, const ALIGN: SimdAlign>(mask: M, pointer: P, values: T) -> T;
 
 // CHECK-LABEL: load_i8x16
 #[no_mangle]
@@ -56,7 +56,11 @@ pub unsafe extern "C" fn load_i8x16(mask: m8x16, pointer: *const i8) -> i8x16 {
     // x86-avx512-NOT: vpsllw
     // x86-avx512: vpmovb2m k1, xmm0
     // x86-avx512-NEXT: vmovdqu8 xmm0 {k1} {z}, xmmword ptr [rdi]
-    simd_masked_load(mask, pointer, i8x16([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))
+    simd_masked_load::<_, _, _, { SimdAlign::Element }>(
+        mask,
+        pointer,
+        i8x16([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
+    )
 }
 
 // CHECK-LABEL: load_f32x8
@@ -68,7 +72,29 @@ pub unsafe extern "C" fn load_f32x8(mask: m32x8, pointer: *const f32) -> f32x8 {
     // x86-avx512-NOT: vpslld
     // x86-avx512: vpmovd2m k1, ymm0
     // x86-avx512-NEXT: vmovups ymm0 {k1} {z}, ymmword ptr [rdi]
-    simd_masked_load(mask, pointer, f32x8([0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32]))
+    simd_masked_load::<_, _, _, { SimdAlign::Element }>(
+        mask,
+        pointer,
+        f32x8([0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32]),
+    )
+}
+
+// CHECK-LABEL: load_f32x8_aligned
+#[no_mangle]
+pub unsafe extern "C" fn load_f32x8_aligned(mask: m32x8, pointer: *const f32) -> f32x8 {
+    // x86-avx2-NOT: vpslld
+    // x86-avx2: vmaskmovps ymm0, ymm0, ymmword ptr [rdi]
+    //
+    // x86-avx512-NOT: vpslld
+    // x86-avx512: vpmovd2m k1, ymm0
+    // x86-avx512-NEXT: vmovaps ymm0 {k1} {z}, ymmword ptr [rdi]
+    //
+    // this aligned version should generate `movaps` instead of `movups`
+    simd_masked_load::<_, _, _, { SimdAlign::Vector }>(
+        mask,
+        pointer,
+        f32x8([0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32]),
+    )
 }
 
 // CHECK-LABEL: load_f64x4
@@ -79,5 +105,9 @@ pub unsafe extern "C" fn load_f64x4(mask: m64x4, pointer: *const f64) -> f64x4 {
     //
     // x86-avx512-NOT: vpsllq
     // x86-avx512: vpmovq2m k1, ymm0
-    simd_masked_load(mask, pointer, f64x4([0_f64, 0_f64, 0_f64, 0_f64]))
+    simd_masked_load::<_, _, _, { SimdAlign::Element }>(
+        mask,
+        pointer,
+        f64x4([0_f64, 0_f64, 0_f64, 0_f64]),
+    )
 }