diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index 467655b0bfcd7..f7da3404911cc 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -13,7 +13,7 @@ use rustc_hir::def_id::LOCAL_CRATE; use rustc_hir::{self as hir}; use rustc_middle::mir::BinOp; use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, HasTypingEnv, LayoutOf}; -use rustc_middle::ty::{self, GenericArgsRef, Instance, Ty, TyCtxt, TypingEnv}; +use rustc_middle::ty::{self, GenericArgsRef, Instance, SimdAlign, Ty, TyCtxt, TypingEnv}; use rustc_middle::{bug, span_bug}; use rustc_span::{Span, Symbol, sym}; use rustc_symbol_mangling::{mangle_internal_symbol, symbol_name_for_instance_in_crate}; @@ -1828,8 +1828,23 @@ fn generic_simd_intrinsic<'ll, 'tcx>( )); } + fn llvm_alignment<'ll, 'tcx>( + bx: &mut Builder<'_, 'll, 'tcx>, + alignment: SimdAlign, + vector_ty: Ty<'tcx>, + element_ty: Ty<'tcx>, + ) -> &'ll Value { + let alignment = match alignment { + SimdAlign::Unaligned => 1, + SimdAlign::Element => bx.align_of(element_ty).bytes(), + SimdAlign::Vector => bx.align_of(vector_ty).bytes(), + }; + + bx.const_i32(alignment as i32) + } + if name == sym::simd_masked_load { - // simd_masked_load(mask: , pointer: *_ T, values: ) -> + // simd_masked_load<_, _, _, const ALIGN: SimdAlign>(mask: , pointer: *_ T, values: ) -> // * N: number of elements in the input vectors // * T: type of the element to load // * M: any integer width is supported, will be truncated to i1 @@ -1837,6 +1852,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>( // those lanes whose `mask` bit is enabled. // The memory addresses corresponding to the “off” lanes are not accessed. + let alignment = fn_args[3].expect_const().to_value().valtree.unwrap_branch()[0] + .unwrap_leaf() + .to_simd_alignment(); + // The element type of the "mask" argument must be a signed integer type of any width let mask_ty = in_ty; let (mask_len, mask_elem) = (in_len, in_elem); @@ -1893,7 +1912,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>( let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len); // Alignment of T, must be a constant integer value: - let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32); + let alignment = llvm_alignment(bx, alignment, values_ty, values_elem); let llvm_pointer = bx.type_ptr(); @@ -1908,7 +1927,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>( } if name == sym::simd_masked_store { - // simd_masked_store(mask: , pointer: *mut T, values: ) -> () + // simd_masked_store<_, _, _, const ALIGN: SimdAlign>(mask: , pointer: *mut T, values: ) -> () // * N: number of elements in the input vectors // * T: type of the element to load // * M: any integer width is supported, will be truncated to i1 @@ -1916,6 +1935,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>( // those lanes whose `mask` bit is enabled. // The memory addresses corresponding to the “off” lanes are not accessed. + let alignment = fn_args[3].expect_const().to_value().valtree.unwrap_branch()[0] + .unwrap_leaf() + .to_simd_alignment(); + // The element type of the "mask" argument must be a signed integer type of any width let mask_ty = in_ty; let (mask_len, mask_elem) = (in_len, in_elem); @@ -1966,7 +1989,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>( let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len); // Alignment of T, must be a constant integer value: - let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32); + let alignment = llvm_alignment(bx, alignment, values_ty, values_elem); let llvm_pointer = bx.type_ptr(); diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs index bc3448be5823e..db29d2942fb8b 100644 --- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs +++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs @@ -695,8 +695,8 @@ pub(crate) fn check_intrinsic_type( (1, 0, vec![param(0), param(0), param(0)], param(0)) } sym::simd_gather => (3, 0, vec![param(0), param(1), param(2)], param(0)), - sym::simd_masked_load => (3, 0, vec![param(0), param(1), param(2)], param(2)), - sym::simd_masked_store => (3, 0, vec![param(0), param(1), param(2)], tcx.types.unit), + sym::simd_masked_load => (3, 1, vec![param(0), param(1), param(2)], param(2)), + sym::simd_masked_store => (3, 1, vec![param(0), param(1), param(2)], tcx.types.unit), sym::simd_scatter => (3, 0, vec![param(0), param(1), param(2)], tcx.types.unit), sym::simd_insert | sym::simd_insert_dyn => { (2, 0, vec![param(0), tcx.types.u32, param(1)], param(0)) diff --git a/compiler/rustc_middle/src/ty/consts/int.rs b/compiler/rustc_middle/src/ty/consts/int.rs index 6ee76b9450739..eaf67ae23ad26 100644 --- a/compiler/rustc_middle/src/ty/consts/int.rs +++ b/compiler/rustc_middle/src/ty/consts/int.rs @@ -39,6 +39,15 @@ pub enum AtomicOrdering { SeqCst = 4, } +/// An enum to represent the compiler-side view of `intrinsics::simd::SimdAlign`. +#[derive(Debug, Copy, Clone)] +pub enum SimdAlign { + // These values must match `intrinsics::simd::SimdAlign`! + Unaligned = 0, + Element = 1, + Vector = 2, +} + impl std::fmt::Debug for ConstInt { fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let Self { int, signed, is_ptr_sized_integral } = *self; @@ -350,6 +359,21 @@ impl ScalarInt { } } + #[inline] + pub fn to_simd_alignment(self) -> SimdAlign { + use SimdAlign::*; + let val = self.to_u32(); + if val == Unaligned as u32 { + Unaligned + } else if val == Element as u32 { + Element + } else if val == Vector as u32 { + Vector + } else { + panic!("not a valid simd alignment") + } + } + /// Converts the `ScalarInt` to `bool`. /// Panics if the `size` of the `ScalarInt` is not equal to 1 byte. /// Errors if it is not a valid `bool`. diff --git a/compiler/rustc_middle/src/ty/mod.rs b/compiler/rustc_middle/src/ty/mod.rs index ce4de6b95e0bb..c284c13f86010 100644 --- a/compiler/rustc_middle/src/ty/mod.rs +++ b/compiler/rustc_middle/src/ty/mod.rs @@ -75,7 +75,7 @@ pub use self::closure::{ }; pub use self::consts::{ AnonConstKind, AtomicOrdering, Const, ConstInt, ConstKind, ConstToValTreeResult, Expr, - ExprKind, ScalarInt, UnevaluatedConst, ValTree, ValTreeKind, Value, + ExprKind, ScalarInt, SimdAlign, UnevaluatedConst, ValTree, ValTreeKind, Value, }; pub use self::context::{ CtxtInterners, CurrentGcx, DeducedParamAttrs, Feed, FreeRegionInfo, GlobalCtxt, Lift, TyCtxt, diff --git a/library/core/src/intrinsics/simd.rs b/library/core/src/intrinsics/simd.rs index 19488082cc33d..c56e04bfc2d90 100644 --- a/library/core/src/intrinsics/simd.rs +++ b/library/core/src/intrinsics/simd.rs @@ -2,6 +2,8 @@ //! //! In this module, a "vector" is any `repr(simd)` type. +use crate::marker::ConstParamTy; + /// Inserts an element into a vector, returning the updated vector. /// /// `T` must be a vector with element type `U`, and `idx` must be `const`. @@ -377,6 +379,19 @@ pub unsafe fn simd_gather(val: T, ptr: U, mask: V) -> T; #[rustc_nounwind] pub unsafe fn simd_scatter(val: T, ptr: U, mask: V); +/// A type for alignment options for SIMD masked load/store intrinsics. +#[derive(Debug, ConstParamTy, PartialEq, Eq)] +pub enum SimdAlign { + // These values must match the compiler's `SimdAlign` defined in + // `rustc_middle/src/ty/consts/int.rs`! + /// No alignment requirements on the pointer + Unaligned = 0, + /// The pointer must be aligned to the element type of the SIMD vector + Element = 1, + /// The pointer must be aligned to the SIMD vector type + Vector = 2, +} + /// Reads a vector of pointers. /// /// `T` must be a vector. @@ -392,13 +407,12 @@ pub unsafe fn simd_scatter(val: T, ptr: U, mask: V); /// `val`. /// /// # Safety -/// Unmasked values in `T` must be readable as if by `::read` (e.g. aligned to the element -/// type). +/// `ptr` must be aligned according to the `ALIGN` parameter, see [`SimdAlign`] for details. /// /// `mask` must only contain `0` or `!0` values. #[rustc_intrinsic] #[rustc_nounwind] -pub unsafe fn simd_masked_load(mask: V, ptr: U, val: T) -> T; +pub unsafe fn simd_masked_load(mask: V, ptr: U, val: T) -> T; /// Writes to a vector of pointers. /// @@ -414,13 +428,12 @@ pub unsafe fn simd_masked_load(mask: V, ptr: U, val: T) -> T; /// Otherwise if the corresponding value in `mask` is `0`, do nothing. /// /// # Safety -/// Unmasked values in `T` must be writeable as if by `::write` (e.g. aligned to the element -/// type). +/// `ptr` must be aligned according to the `ALIGN` parameter, see [`SimdAlign`] for details. /// /// `mask` must only contain `0` or `!0` values. #[rustc_intrinsic] #[rustc_nounwind] -pub unsafe fn simd_masked_store(mask: V, ptr: U, val: T); +pub unsafe fn simd_masked_store(mask: V, ptr: U, val: T); /// Adds two simd vectors elementwise, with saturation. /// diff --git a/library/portable-simd/crates/core_simd/src/vector.rs b/library/portable-simd/crates/core_simd/src/vector.rs index d76a6cd52bfc5..f40031f8c4da7 100644 --- a/library/portable-simd/crates/core_simd/src/vector.rs +++ b/library/portable-simd/crates/core_simd/src/vector.rs @@ -474,7 +474,14 @@ where or: Self, ) -> Self { // SAFETY: The safety of reading elements through `ptr` is ensured by the caller. - unsafe { core::intrinsics::simd::simd_masked_load(enable.to_int(), ptr, or) } + unsafe { + core::intrinsics::simd::simd_masked_load::< + _, + _, + _, + { core::intrinsics::simd::SimdAlign::Element }, + >(enable.to_int(), ptr, or) + } } /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. @@ -723,7 +730,14 @@ where #[inline] pub unsafe fn store_select_ptr(self, ptr: *mut T, enable: Mask<::Mask, N>) { // SAFETY: The safety of writing elements through `ptr` is ensured by the caller. - unsafe { core::intrinsics::simd::simd_masked_store(enable.to_int(), ptr, self) } + unsafe { + core::intrinsics::simd::simd_masked_store::< + _, + _, + _, + { core::intrinsics::simd::SimdAlign::Element }, + >(enable.to_int(), ptr, self) + } } /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`. diff --git a/src/tools/miri/tests/pass/intrinsics/portable-simd.rs b/src/tools/miri/tests/pass/intrinsics/portable-simd.rs index e2cd08733af1c..0fca2d877b1a4 100644 --- a/src/tools/miri/tests/pass/intrinsics/portable-simd.rs +++ b/src/tools/miri/tests/pass/intrinsics/portable-simd.rs @@ -680,25 +680,39 @@ fn simd_float_intrinsics() { } fn simd_masked_loadstore() { + use intrinsics::*; + // The buffer is deliberarely too short, so reading the last element would be UB. let buf = [3i32; 3]; let default = i32x4::splat(0); let mask = i32x4::from_array([!0, !0, !0, 0]); - let vals = unsafe { intrinsics::simd_masked_load(mask, buf.as_ptr(), default) }; + let vals = + unsafe { simd_masked_load::<_, _, _, { SimdAlign::Element }>(mask, buf.as_ptr(), default) }; assert_eq!(vals, i32x4::from_array([3, 3, 3, 0])); // Also read in a way that the *first* element is OOB. let mask2 = i32x4::from_array([0, !0, !0, !0]); - let vals = - unsafe { intrinsics::simd_masked_load(mask2, buf.as_ptr().wrapping_sub(1), default) }; + let vals = unsafe { + simd_masked_load::<_, _, _, { SimdAlign::Element }>( + mask2, + buf.as_ptr().wrapping_sub(1), + default, + ) + }; assert_eq!(vals, i32x4::from_array([0, 3, 3, 3])); // The buffer is deliberarely too short, so writing the last element would be UB. let mut buf = [42i32; 3]; let vals = i32x4::from_array([1, 2, 3, 4]); - unsafe { intrinsics::simd_masked_store(mask, buf.as_mut_ptr(), vals) }; + unsafe { simd_masked_store::<_, _, _, { SimdAlign::Element }>(mask, buf.as_mut_ptr(), vals) }; assert_eq!(buf, [1, 2, 3]); // Also write in a way that the *first* element is OOB. - unsafe { intrinsics::simd_masked_store(mask2, buf.as_mut_ptr().wrapping_sub(1), vals) }; + unsafe { + simd_masked_store::<_, _, _, { SimdAlign::Element }>( + mask2, + buf.as_mut_ptr().wrapping_sub(1), + vals, + ) + }; assert_eq!(buf, [2, 3, 4]); } diff --git a/tests/assembly-llvm/simd-intrinsic-mask-load.rs b/tests/assembly-llvm/simd-intrinsic-mask-load.rs index d3f3453a780a4..bc9fa640b6fa2 100644 --- a/tests/assembly-llvm/simd-intrinsic-mask-load.rs +++ b/tests/assembly-llvm/simd-intrinsic-mask-load.rs @@ -9,7 +9,7 @@ //@ assembly-output: emit-asm //@ compile-flags: --crate-type=lib -Copt-level=3 -C panic=abort -#![feature(no_core, lang_items, repr_simd, intrinsics)] +#![feature(no_core, lang_items, repr_simd, intrinsics, adt_const_params)] #![no_core] #![allow(non_camel_case_types)] @@ -35,7 +35,7 @@ pub struct f64x4([f64; 4]); pub struct m64x4([i64; 4]); #[rustc_intrinsic] -unsafe fn simd_masked_load(mask: M, pointer: P, values: T) -> T; +unsafe fn simd_masked_load(mask: M, pointer: P, values: T) -> T; // CHECK-LABEL: load_i8x16 #[no_mangle] @@ -56,7 +56,11 @@ pub unsafe extern "C" fn load_i8x16(mask: m8x16, pointer: *const i8) -> i8x16 { // x86-avx512-NOT: vpsllw // x86-avx512: vpmovb2m k1, xmm0 // x86-avx512-NEXT: vmovdqu8 xmm0 {k1} {z}, xmmword ptr [rdi] - simd_masked_load(mask, pointer, i8x16([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])) + simd_masked_load::<_, _, _, { SimdAlign::Element }>( + mask, + pointer, + i8x16([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), + ) } // CHECK-LABEL: load_f32x8 @@ -68,7 +72,29 @@ pub unsafe extern "C" fn load_f32x8(mask: m32x8, pointer: *const f32) -> f32x8 { // x86-avx512-NOT: vpslld // x86-avx512: vpmovd2m k1, ymm0 // x86-avx512-NEXT: vmovups ymm0 {k1} {z}, ymmword ptr [rdi] - simd_masked_load(mask, pointer, f32x8([0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32])) + simd_masked_load::<_, _, _, { SimdAlign::Element }>( + mask, + pointer, + f32x8([0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32]), + ) +} + +// CHECK-LABEL: load_f32x8_aligned +#[no_mangle] +pub unsafe extern "C" fn load_f32x8_aligned(mask: m32x8, pointer: *const f32) -> f32x8 { + // x86-avx2-NOT: vpslld + // x86-avx2: vmaskmovps ymm0, ymm0, ymmword ptr [rdi] + // + // x86-avx512-NOT: vpslld + // x86-avx512: vpmovd2m k1, ymm0 + // x86-avx512-NEXT: vmovaps ymm0 {k1} {z}, ymmword ptr [rdi] + // + // this aligned version should generate `movaps` instead of `movups` + simd_masked_load::<_, _, _, { SimdAlign::Vector }>( + mask, + pointer, + f32x8([0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32]), + ) } // CHECK-LABEL: load_f64x4 @@ -79,5 +105,9 @@ pub unsafe extern "C" fn load_f64x4(mask: m64x4, pointer: *const f64) -> f64x4 { // // x86-avx512-NOT: vpsllq // x86-avx512: vpmovq2m k1, ymm0 - simd_masked_load(mask, pointer, f64x4([0_f64, 0_f64, 0_f64, 0_f64])) + simd_masked_load::<_, _, _, { SimdAlign::Element }>( + mask, + pointer, + f64x4([0_f64, 0_f64, 0_f64, 0_f64]), + ) } diff --git a/tests/assembly-llvm/simd-intrinsic-mask-store.rs b/tests/assembly-llvm/simd-intrinsic-mask-store.rs index 001762e5060db..0591ca5679456 100644 --- a/tests/assembly-llvm/simd-intrinsic-mask-store.rs +++ b/tests/assembly-llvm/simd-intrinsic-mask-store.rs @@ -9,7 +9,7 @@ //@ assembly-output: emit-asm //@ compile-flags: --crate-type=lib -Copt-level=3 -C panic=abort -#![feature(no_core, lang_items, repr_simd, intrinsics)] +#![feature(no_core, lang_items, repr_simd, intrinsics, adt_const_params)] #![no_core] #![allow(non_camel_case_types)] @@ -35,7 +35,7 @@ pub struct f64x4([f64; 4]); pub struct m64x4([i64; 4]); #[rustc_intrinsic] -unsafe fn simd_masked_store(mask: M, pointer: P, values: T); +unsafe fn simd_masked_store(mask: M, pointer: P, values: T); // CHECK-LABEL: store_i8x16 #[no_mangle] @@ -54,7 +54,7 @@ pub unsafe extern "C" fn store_i8x16(mask: m8x16, pointer: *mut i8, value: i8x16 // x86-avx512-NOT: vpsllw // x86-avx512: vpmovb2m k1, xmm0 // x86-avx512-NEXT: vmovdqu8 xmmword ptr [rdi] {k1}, xmm1 - simd_masked_store(mask, pointer, value) + simd_masked_store::<_, _, _, { SimdAlign::Element }>(mask, pointer, value) } // CHECK-LABEL: store_f32x8 @@ -66,7 +66,21 @@ pub unsafe extern "C" fn store_f32x8(mask: m32x8, pointer: *mut f32, value: f32x // x86-avx512-NOT: vpslld // x86-avx512: vpmovd2m k1, ymm0 // x86-avx512-NEXT: vmovups ymmword ptr [rdi] {k1}, ymm1 - simd_masked_store(mask, pointer, value) + simd_masked_store::<_, _, _, { SimdAlign::Element }>(mask, pointer, value) +} + +// CHECK-LABEL: store_f32x8_aligned +#[no_mangle] +pub unsafe extern "C" fn store_f32x8_aligned(mask: m32x8, pointer: *mut f32, value: f32x8) { + // x86-avx2-NOT: vpslld + // x86-avx2: vmaskmovps ymmword ptr [rdi], ymm0, ymm1 + // + // x86-avx512-NOT: vpslld + // x86-avx512: vpmovd2m k1, ymm0 + // x86-avx512-NEXT: vmovaps ymmword ptr [rdi] {k1}, ymm1 + // + // this aligned version should generate `movaps` instead of `movups` + simd_masked_store::<_, _, _, { SimdAlign::Vector }>(mask, pointer, value) } // CHECK-LABEL: store_f64x4 @@ -78,5 +92,5 @@ pub unsafe extern "C" fn store_f64x4(mask: m64x4, pointer: *mut f64, value: f64x // x86-avx512-NOT: vpsllq // x86-avx512: vpmovq2m k1, ymm0 // x86-avx512-NEXT: vmovupd ymmword ptr [rdi] {k1}, ymm1 - simd_masked_store(mask, pointer, value) + simd_masked_store::<_, _, _, { SimdAlign::Element }>(mask, pointer, value) } diff --git a/tests/auxiliary/minicore.rs b/tests/auxiliary/minicore.rs index 4f4c653cb46e7..1ff70bdb9531b 100644 --- a/tests/auxiliary/minicore.rs +++ b/tests/auxiliary/minicore.rs @@ -239,3 +239,17 @@ pub enum c_void { __variant1, __variant2, } + +#[lang = "const_param_ty"] +#[diagnostic::on_unimplemented(message = "`{Self}` can't be used as a const parameter type")] +pub trait ConstParamTy_ {} + +pub enum SimdAlign { + // These values must match the compiler's `SimdAlign` defined in + // `rustc_middle/src/ty/consts/int.rs`! + Unaligned = 0, + Element = 1, + Vector = 2, +} + +impl ConstParamTy_ for SimdAlign {} diff --git a/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-generic-masked-load.rs b/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-generic-masked-load.rs index fda315dc66ca2..9911bd6863c70 100644 --- a/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-generic-masked-load.rs +++ b/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-generic-masked-load.rs @@ -8,7 +8,7 @@ mod minisimd; use minisimd::*; -use std::intrinsics::simd::simd_masked_load; +use std::intrinsics::simd::{SimdAlign, simd_masked_load}; pub type Vec2 = Simd; pub type Vec4 = Simd; @@ -19,7 +19,39 @@ pub unsafe fn load_f32x2(mask: Vec2, pointer: *const f32, values: Vec2 // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{|splat \(i32 31\)}} // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1> // CHECK: call <2 x float> @llvm.masked.load.v2f32.p0(ptr {{.*}}, i32 4, <2 x i1> [[B]], <2 x float> {{.*}}) - simd_masked_load(mask, pointer, values) + // ^^^^^ + // the align parameter should be equal to the alignment of the element type (assumed to be 4) + simd_masked_load::<_, _, _, { SimdAlign::Element }>(mask, pointer, values) +} + +// CHECK-LABEL: @load_f32x2_aligned +#[no_mangle] +pub unsafe fn load_f32x2_aligned( + mask: Vec2, + pointer: *const f32, + values: Vec2, +) -> Vec2 { + // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{|splat \(i32 31\)}} + // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1> + // CHECK: call <2 x float> @llvm.masked.load.v2f32.p0(ptr {{.*}}, i32 8, <2 x i1> [[B]], <2 x float> {{.*}}) + // ^^^^^ + // the align parameter should be equal to the size of the vector + simd_masked_load::<_, _, _, { SimdAlign::Vector }>(mask, pointer, values) +} + +// CHECK-LABEL: @load_f32x2_unaligned +#[no_mangle] +pub unsafe fn load_f32x2_unaligned( + mask: Vec2, + pointer: *const f32, + values: Vec2, +) -> Vec2 { + // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{|splat \(i32 31\)}} + // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1> + // CHECK: call <2 x float> @llvm.masked.load.v2f32.p0(ptr {{.*}}, i32 1, <2 x i1> [[B]], <2 x float> {{.*}}) + // ^^^^^ + // the align parameter should be 1 + simd_masked_load::<_, _, _, { SimdAlign::Unaligned }>(mask, pointer, values) } // CHECK-LABEL: @load_f32x2_unsigned @@ -32,7 +64,7 @@ pub unsafe fn load_f32x2_unsigned( // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{|splat \(i32 31\)}} // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1> // CHECK: call <2 x float> @llvm.masked.load.v2f32.p0(ptr {{.*}}, i32 4, <2 x i1> [[B]], <2 x float> {{.*}}) - simd_masked_load(mask, pointer, values) + simd_masked_load::<_, _, _, { SimdAlign::Element }>(mask, pointer, values) } // CHECK-LABEL: @load_pf32x4 @@ -45,5 +77,5 @@ pub unsafe fn load_pf32x4( // CHECK: [[A:%[0-9]+]] = lshr <4 x i32> {{.*}}, {{|splat \(i32 31\)}} // CHECK: [[B:%[0-9]+]] = trunc <4 x i32> [[A]] to <4 x i1> // CHECK: call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr {{.*}}, i32 {{.*}}, <4 x i1> [[B]], <4 x ptr> {{.*}}) - simd_masked_load(mask, pointer, values) + simd_masked_load::<_, _, _, { SimdAlign::Element }>(mask, pointer, values) } diff --git a/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-generic-masked-store.rs b/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-generic-masked-store.rs index 6ca7388d464b9..1c106e0d01304 100644 --- a/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-generic-masked-store.rs +++ b/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-generic-masked-store.rs @@ -8,7 +8,7 @@ mod minisimd; use minisimd::*; -use std::intrinsics::simd::simd_masked_store; +use std::intrinsics::simd::{SimdAlign, simd_masked_store}; pub type Vec2 = Simd; pub type Vec4 = Simd; @@ -19,7 +19,31 @@ pub unsafe fn store_f32x2(mask: Vec2, pointer: *mut f32, values: Vec2) // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{|splat \(i32 31\)}} // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1> // CHECK: call void @llvm.masked.store.v2f32.p0(<2 x float> {{.*}}, ptr {{.*}}, i32 4, <2 x i1> [[B]]) - simd_masked_store(mask, pointer, values) + // ^^^^^ + // the align parameter should be equal to the alignment of the element type (assumed to be 4) + simd_masked_store::<_, _, _, { SimdAlign::Element }>(mask, pointer, values) +} + +// CHECK-LABEL: @store_f32x2_aligned +#[no_mangle] +pub unsafe fn store_f32x2_aligned(mask: Vec2, pointer: *mut f32, values: Vec2) { + // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{|splat \(i32 31\)}} + // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1> + // CHECK: call void @llvm.masked.store.v2f32.p0(<2 x float> {{.*}}, ptr {{.*}}, i32 8, <2 x i1> [[B]]) + // ^^^^^ + // the align parameter should be equal to the size of the vector + simd_masked_store::<_, _, _, { SimdAlign::Vector }>(mask, pointer, values) +} + +// CHECK-LABEL: @store_f32x2_unaligned +#[no_mangle] +pub unsafe fn store_f32x2_unaligned(mask: Vec2, pointer: *mut f32, values: Vec2) { + // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{|splat \(i32 31\)}} + // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1> + // CHECK: call void @llvm.masked.store.v2f32.p0(<2 x float> {{.*}}, ptr {{.*}}, i32 1, <2 x i1> [[B]]) + // ^^^^^ + // the align parameter should be 1 + simd_masked_store::<_, _, _, { SimdAlign::Unaligned }>(mask, pointer, values) } // CHECK-LABEL: @store_f32x2_unsigned @@ -28,7 +52,7 @@ pub unsafe fn store_f32x2_unsigned(mask: Vec2, pointer: *mut f32, values: V // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{|splat \(i32 31\)}} // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1> // CHECK: call void @llvm.masked.store.v2f32.p0(<2 x float> {{.*}}, ptr {{.*}}, i32 4, <2 x i1> [[B]]) - simd_masked_store(mask, pointer, values) + simd_masked_store::<_, _, _, { SimdAlign::Element }>(mask, pointer, values) } // CHECK-LABEL: @store_pf32x4 @@ -37,5 +61,5 @@ pub unsafe fn store_pf32x4(mask: Vec4, pointer: *mut *const f32, values: Ve // CHECK: [[A:%[0-9]+]] = lshr <4 x i32> {{.*}}, {{|splat \(i32 31\)}} // CHECK: [[B:%[0-9]+]] = trunc <4 x i32> [[A]] to <4 x i1> // CHECK: call void @llvm.masked.store.v4p0.p0(<4 x ptr> {{.*}}, ptr {{.*}}, i32 {{.*}}, <4 x i1> [[B]]) - simd_masked_store(mask, pointer, values) + simd_masked_store::<_, _, _, { SimdAlign::Element }>(mask, pointer, values) } diff --git a/tests/ui/simd/masked-load-store-build-fail.rs b/tests/ui/simd/masked-load-store-build-fail.rs index c711b6dfd9770..82866af2c2212 100644 --- a/tests/ui/simd/masked-load-store-build-fail.rs +++ b/tests/ui/simd/masked-load-store-build-fail.rs @@ -2,7 +2,7 @@ //@ ignore-backends: gcc #![feature(repr_simd, core_intrinsics)] -use std::intrinsics::simd::{simd_masked_load, simd_masked_store}; +use std::intrinsics::simd::{SimdAlign, simd_masked_load, simd_masked_store}; #[derive(Copy, Clone)] #[repr(simd)] @@ -13,28 +13,60 @@ fn main() { let mut arr = [4u8, 5, 6, 7]; let default = Simd::([9; 4]); - simd_masked_load(Simd::([-1, 0, -1, -1, 0, 0, 0, 0]), arr.as_ptr(), default); - //~^ ERROR expected third argument with length 8 (same as input type `Simd`), found `Simd` with length 4 + //~v ERROR expected third argument with length 8 (same as input type `Simd`), found `Simd` with length 4 + simd_masked_load::<_, _, _, { SimdAlign::Element }>( + Simd::([-1, 0, -1, -1, 0, 0, 0, 0]), + arr.as_ptr(), + default, + ); - simd_masked_load(Simd::([-1, 0, -1, -1]), arr.as_ptr() as *const i8, default); - //~^ ERROR expected element type `u8` of second argument `*const i8` to be a pointer to the element type `u8` of the first argument `Simd`, found `u8` != `*_ u8` + //~v ERROR expected element type `u8` of second argument `*const i8` to be a pointer to the element type `u8` of the first argument `Simd`, found `u8` != `*_ u8` + simd_masked_load::<_, _, _, { SimdAlign::Element }>( + Simd::([-1, 0, -1, -1]), + arr.as_ptr() as *const i8, + default, + ); - simd_masked_load(Simd::([-1, 0, -1, -1]), arr.as_ptr(), Simd::([9; 4])); - //~^ ERROR expected element type `u32` of second argument `*const u8` to be a pointer to the element type `u32` of the first argument `Simd`, found `u32` != `*_ u32` + //~v ERROR expected element type `u32` of second argument `*const u8` to be a pointer to the element type `u32` of the first argument `Simd`, found `u32` != `*_ u32` + simd_masked_load::<_, _, _, { SimdAlign::Element }>( + Simd::([-1, 0, -1, -1]), + arr.as_ptr(), + Simd::([9; 4]), + ); - simd_masked_load(Simd::([1.0, 0.0, 1.0, 1.0]), arr.as_ptr(), default); - //~^ ERROR expected mask element type to be an integer, found `f32` + //~v ERROR expected mask element type to be an integer, found `f32` + simd_masked_load::<_, _, _, { SimdAlign::Element }>( + Simd::([1.0, 0.0, 1.0, 1.0]), + arr.as_ptr(), + default, + ); - simd_masked_store(Simd([-1i8; 4]), arr.as_ptr(), Simd([5u32; 4])); - //~^ ERROR expected element type `u32` of second argument `*const u8` to be a pointer to the element type `u32` of the first argument `Simd`, found `u32` != `*mut u32` + //~v ERROR expected element type `u32` of second argument `*const u8` to be a pointer to the element type `u32` of the first argument `Simd`, found `u32` != `*mut u32` + simd_masked_store::<_, _, _, { SimdAlign::Element }>( + Simd([-1i8; 4]), + arr.as_ptr(), + Simd([5u32; 4]), + ); - simd_masked_store(Simd([-1i8; 4]), arr.as_ptr(), Simd([5u8; 4])); - //~^ ERROR expected element type `u8` of second argument `*const u8` to be a pointer to the element type `u8` of the first argument `Simd`, found `u8` != `*mut u8` + //~v ERROR expected element type `u8` of second argument `*const u8` to be a pointer to the element type `u8` of the first argument `Simd`, found `u8` != `*mut u8` + simd_masked_store::<_, _, _, { SimdAlign::Element }>( + Simd([-1i8; 4]), + arr.as_ptr(), + Simd([5u8; 4]), + ); - simd_masked_store(Simd([-1i8; 4]), arr.as_mut_ptr(), Simd([5u8; 2])); - //~^ ERROR expected third argument with length 4 (same as input type `Simd`), found `Simd` with length 2 + //~v ERROR expected third argument with length 4 (same as input type `Simd`), found `Simd` with length 2 + simd_masked_store::<_, _, _, { SimdAlign::Element }>( + Simd([-1i8; 4]), + arr.as_mut_ptr(), + Simd([5u8; 2]), + ); - simd_masked_store(Simd([1f32; 4]), arr.as_mut_ptr(), Simd([5u8; 4])); - //~^ ERROR expected mask element type to be an integer, found `f32` + //~v ERROR expected mask element type to be an integer, found `f32` + simd_masked_store::<_, _, _, { SimdAlign::Element }>( + Simd([1f32; 4]), + arr.as_mut_ptr(), + Simd([5u8; 4]), + ); } } diff --git a/tests/ui/simd/masked-load-store-build-fail.stderr b/tests/ui/simd/masked-load-store-build-fail.stderr index b9158f46ea9aa..f2a9ecfb731fb 100644 --- a/tests/ui/simd/masked-load-store-build-fail.stderr +++ b/tests/ui/simd/masked-load-store-build-fail.stderr @@ -1,50 +1,82 @@ error[E0511]: invalid monomorphization of `simd_masked_load` intrinsic: expected third argument with length 8 (same as input type `Simd`), found `Simd` with length 4 - --> $DIR/masked-load-store-build-fail.rs:16:9 + --> $DIR/masked-load-store-build-fail.rs:17:9 | -LL | simd_masked_load(Simd::([-1, 0, -1, -1, 0, 0, 0, 0]), arr.as_ptr(), default); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | / simd_masked_load::<_, _, _, { SimdAlign::Element }>( +LL | | Simd::([-1, 0, -1, -1, 0, 0, 0, 0]), +LL | | arr.as_ptr(), +LL | | default, +LL | | ); + | |_________^ error[E0511]: invalid monomorphization of `simd_masked_load` intrinsic: expected element type `u8` of second argument `*const i8` to be a pointer to the element type `u8` of the first argument `Simd`, found `u8` != `*_ u8` - --> $DIR/masked-load-store-build-fail.rs:19:9 + --> $DIR/masked-load-store-build-fail.rs:24:9 | -LL | simd_masked_load(Simd::([-1, 0, -1, -1]), arr.as_ptr() as *const i8, default); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | / simd_masked_load::<_, _, _, { SimdAlign::Element }>( +LL | | Simd::([-1, 0, -1, -1]), +LL | | arr.as_ptr() as *const i8, +LL | | default, +LL | | ); + | |_________^ error[E0511]: invalid monomorphization of `simd_masked_load` intrinsic: expected element type `u32` of second argument `*const u8` to be a pointer to the element type `u32` of the first argument `Simd`, found `u32` != `*_ u32` - --> $DIR/masked-load-store-build-fail.rs:22:9 + --> $DIR/masked-load-store-build-fail.rs:31:9 | -LL | simd_masked_load(Simd::([-1, 0, -1, -1]), arr.as_ptr(), Simd::([9; 4])); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | / simd_masked_load::<_, _, _, { SimdAlign::Element }>( +LL | | Simd::([-1, 0, -1, -1]), +LL | | arr.as_ptr(), +LL | | Simd::([9; 4]), +LL | | ); + | |_________^ error[E0511]: invalid monomorphization of `simd_masked_load` intrinsic: expected mask element type to be an integer, found `f32` - --> $DIR/masked-load-store-build-fail.rs:25:9 + --> $DIR/masked-load-store-build-fail.rs:38:9 | -LL | simd_masked_load(Simd::([1.0, 0.0, 1.0, 1.0]), arr.as_ptr(), default); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | / simd_masked_load::<_, _, _, { SimdAlign::Element }>( +LL | | Simd::([1.0, 0.0, 1.0, 1.0]), +LL | | arr.as_ptr(), +LL | | default, +LL | | ); + | |_________^ error[E0511]: invalid monomorphization of `simd_masked_store` intrinsic: expected element type `u32` of second argument `*const u8` to be a pointer to the element type `u32` of the first argument `Simd`, found `u32` != `*mut u32` - --> $DIR/masked-load-store-build-fail.rs:28:9 + --> $DIR/masked-load-store-build-fail.rs:45:9 | -LL | simd_masked_store(Simd([-1i8; 4]), arr.as_ptr(), Simd([5u32; 4])); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | / simd_masked_store::<_, _, _, { SimdAlign::Element }>( +LL | | Simd([-1i8; 4]), +LL | | arr.as_ptr(), +LL | | Simd([5u32; 4]), +LL | | ); + | |_________^ error[E0511]: invalid monomorphization of `simd_masked_store` intrinsic: expected element type `u8` of second argument `*const u8` to be a pointer to the element type `u8` of the first argument `Simd`, found `u8` != `*mut u8` - --> $DIR/masked-load-store-build-fail.rs:31:9 + --> $DIR/masked-load-store-build-fail.rs:52:9 | -LL | simd_masked_store(Simd([-1i8; 4]), arr.as_ptr(), Simd([5u8; 4])); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | / simd_masked_store::<_, _, _, { SimdAlign::Element }>( +LL | | Simd([-1i8; 4]), +LL | | arr.as_ptr(), +LL | | Simd([5u8; 4]), +LL | | ); + | |_________^ error[E0511]: invalid monomorphization of `simd_masked_store` intrinsic: expected third argument with length 4 (same as input type `Simd`), found `Simd` with length 2 - --> $DIR/masked-load-store-build-fail.rs:34:9 + --> $DIR/masked-load-store-build-fail.rs:59:9 | -LL | simd_masked_store(Simd([-1i8; 4]), arr.as_mut_ptr(), Simd([5u8; 2])); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | / simd_masked_store::<_, _, _, { SimdAlign::Element }>( +LL | | Simd([-1i8; 4]), +LL | | arr.as_mut_ptr(), +LL | | Simd([5u8; 2]), +LL | | ); + | |_________^ error[E0511]: invalid monomorphization of `simd_masked_store` intrinsic: expected mask element type to be an integer, found `f32` - --> $DIR/masked-load-store-build-fail.rs:37:9 + --> $DIR/masked-load-store-build-fail.rs:66:9 | -LL | simd_masked_store(Simd([1f32; 4]), arr.as_mut_ptr(), Simd([5u8; 4])); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | / simd_masked_store::<_, _, _, { SimdAlign::Element }>( +LL | | Simd([1f32; 4]), +LL | | arr.as_mut_ptr(), +LL | | Simd([5u8; 4]), +LL | | ); + | |_________^ error: aborting due to 8 previous errors diff --git a/tests/ui/simd/masked-load-store-check-fail.rs b/tests/ui/simd/masked-load-store-check-fail.rs index 3ed47cd9ed40b..bd6448a589a4b 100644 --- a/tests/ui/simd/masked-load-store-check-fail.rs +++ b/tests/ui/simd/masked-load-store-check-fail.rs @@ -1,7 +1,7 @@ //@ check-fail #![feature(repr_simd, core_intrinsics)] -use std::intrinsics::simd::{simd_masked_load, simd_masked_store}; +use std::intrinsics::simd::{SimdAlign, simd_masked_load, simd_masked_store}; #[derive(Copy, Clone)] #[repr(simd)] @@ -12,11 +12,18 @@ fn main() { let mut arr = [4u8, 5, 6, 7]; let default = Simd::([9; 4]); - let _x: Simd = - simd_masked_load(Simd::([-1, 0, -1, -1]), arr.as_ptr(), Simd::([9; 4])); - //~^ ERROR mismatched types + let _x: Simd = simd_masked_load::<_, _, _, { SimdAlign::Element }>( + Simd::([-1, 0, -1, -1]), + arr.as_ptr(), + Simd::([9; 4]), + ); + //~^^ ERROR mismatched types - let _x: Simd = simd_masked_load(Simd::([1, 0, 1, 1]), arr.as_ptr(), default); - //~^ ERROR mismatched types + let _x: Simd = simd_masked_load::<_, _, _, { SimdAlign::Element }>( + Simd::([1, 0, 1, 1]), + arr.as_ptr(), + default, + ); + //~^^ ERROR mismatched types } } diff --git a/tests/ui/simd/masked-load-store-check-fail.stderr b/tests/ui/simd/masked-load-store-check-fail.stderr index 1c9f9d246df50..4e63d04a3b158 100644 --- a/tests/ui/simd/masked-load-store-check-fail.stderr +++ b/tests/ui/simd/masked-load-store-check-fail.stderr @@ -1,36 +1,50 @@ error[E0308]: mismatched types - --> $DIR/masked-load-store-check-fail.rs:16:76 + --> $DIR/masked-load-store-check-fail.rs:18:13 | -LL | simd_masked_load(Simd::([-1, 0, -1, -1]), arr.as_ptr(), Simd::([9; 4])); - | ---------------- arguments to this function are incorrect ^^^^^^^^^^^^^^^^^^^^^ expected `2`, found `4` +LL | let _x: Simd = simd_masked_load::<_, _, _, { SimdAlign::Element }>( + | --------------------------------------------------- arguments to this function are incorrect +... +LL | Simd::([9; 4]), + | ^^^^^^^^^^^^^^^^^^^^^ expected `2`, found `4` | = note: expected struct `Simd<_, 2>` found struct `Simd<_, 4>` help: the return type of this call is `Simd` due to the type of the argument passed - --> $DIR/masked-load-store-check-fail.rs:16:13 + --> $DIR/masked-load-store-check-fail.rs:15:31 | -LL | simd_masked_load(Simd::([-1, 0, -1, -1]), arr.as_ptr(), Simd::([9; 4])); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------^ - | | - | this argument influences the return type of `simd_masked_load` +LL | let _x: Simd = simd_masked_load::<_, _, _, { SimdAlign::Element }>( + | _______________________________^ +LL | | Simd::([-1, 0, -1, -1]), +LL | | arr.as_ptr(), +LL | | Simd::([9; 4]), + | | --------------------- this argument influences the return type of `simd_masked_load` +LL | | ); + | |_________^ note: function defined here --> $SRC_DIR/core/src/intrinsics/simd.rs:LL:COL error[E0308]: mismatched types - --> $DIR/masked-load-store-check-fail.rs:19:92 + --> $DIR/masked-load-store-check-fail.rs:25:13 | -LL | let _x: Simd = simd_masked_load(Simd::([1, 0, 1, 1]), arr.as_ptr(), default); - | ---------------- arguments to this function are incorrect ^^^^^^^ expected `Simd`, found `Simd` +LL | let _x: Simd = simd_masked_load::<_, _, _, { SimdAlign::Element }>( + | --------------------------------------------------- arguments to this function are incorrect +... +LL | default, + | ^^^^^^^ expected `Simd`, found `Simd` | = note: expected struct `Simd` found struct `Simd` help: the return type of this call is `Simd` due to the type of the argument passed - --> $DIR/masked-load-store-check-fail.rs:19:32 + --> $DIR/masked-load-store-check-fail.rs:22:32 | -LL | let _x: Simd = simd_masked_load(Simd::([1, 0, 1, 1]), arr.as_ptr(), default); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------^ - | | - | this argument influences the return type of `simd_masked_load` +LL | let _x: Simd = simd_masked_load::<_, _, _, { SimdAlign::Element }>( + | ________________________________^ +LL | | Simd::([1, 0, 1, 1]), +LL | | arr.as_ptr(), +LL | | default, + | | ------- this argument influences the return type of `simd_masked_load` +LL | | ); + | |_________^ note: function defined here --> $SRC_DIR/core/src/intrinsics/simd.rs:LL:COL diff --git a/tests/ui/simd/masked-load-store.rs b/tests/ui/simd/masked-load-store.rs index bc4307fb26d6b..f6682ad16725e 100644 --- a/tests/ui/simd/masked-load-store.rs +++ b/tests/ui/simd/masked-load-store.rs @@ -6,23 +6,34 @@ mod minisimd; use minisimd::*; -use std::intrinsics::simd::{simd_masked_load, simd_masked_store}; +use std::intrinsics::simd::{SimdAlign, simd_masked_load, simd_masked_store}; fn main() { unsafe { let a = Simd::([0, 1, 2, 3]); let b_src = [4u8, 5, 6, 7]; let b_default = Simd::([9; 4]); - let b: Simd = - simd_masked_load(Simd::([-1, 0, -1, -1]), b_src.as_ptr(), b_default); + let b: Simd = simd_masked_load::<_, _, _, { SimdAlign::Element }>( + Simd::([-1, 0, -1, -1]), + b_src.as_ptr(), + b_default, + ); assert_eq!(b.as_array(), &[4, 9, 6, 7]); let mut output = [u8::MAX; 5]; - simd_masked_store(Simd::([-1, -1, -1, 0]), output.as_mut_ptr(), a); + simd_masked_store::<_, _, _, { SimdAlign::Element }>( + Simd::([-1, -1, -1, 0]), + output.as_mut_ptr(), + a, + ); assert_eq!(&output, &[0, 1, 2, u8::MAX, u8::MAX]); - simd_masked_store(Simd::([0, -1, -1, 0]), output[1..].as_mut_ptr(), b); + simd_masked_store::<_, _, _, { SimdAlign::Element }>( + Simd::([0, -1, -1, 0]), + output[1..].as_mut_ptr(), + b, + ); assert_eq!(&output, &[0, 1, 9, 6, u8::MAX]); } }