From eb8250b7e347bcd5bdcada8b0a003cf9d2b75f0a Mon Sep 17 00:00:00 2001 From: sayantn Date: Tue, 14 Apr 2026 14:38:28 +0530 Subject: [PATCH 1/2] Remove uses of asm --- crates/core_arch/src/x86/avx512bf16.rs | 38 +----- crates/core_arch/src/x86/avx512fp16.rs | 154 +++++------------------ crates/core_arch/src/x86/avxneconvert.rs | 28 ++--- 3 files changed, 43 insertions(+), 177 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bf16.rs b/crates/core_arch/src/x86/avx512bf16.rs index 66eef063ee..8d944f5ba8 100644 --- a/crates/core_arch/src/x86/avx512bf16.rs +++ b/crates/core_arch/src/x86/avx512bf16.rs @@ -2,7 +2,6 @@ //! //! [AVX512BF16 intrinsics]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769&avx512techs=AVX512_BF16 -use crate::arch::asm; use crate::core_arch::{simd::*, x86::*}; use crate::intrinsics::simd::*; @@ -17,6 +16,8 @@ unsafe extern "C" { fn cvtne2ps2bf16_256(a: f32x8, b: f32x8) -> i16x16; #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.512"] fn cvtne2ps2bf16_512(a: f32x16, b: f32x16) -> i16x32; + #[link_name = "llvm.x86.avx512bf16.mask.cvtneps2bf16.128"] + fn cvtneps2bf16_128(a: f32x4, src: i16x8, k: __mmask8) -> i16x8; #[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.256"] fn cvtneps2bf16_256(a: f32x8) -> i16x8; #[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.512"] @@ -519,16 +520,7 @@ pub fn _mm_cvtsbh_ss(a: bf16) -> f32 { #[cfg_attr(test, assert_instr("vcvtneps2bf16"))] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_cvtneps_pbh(a: __m128) -> __m128bh { - unsafe { - let mut dst: __m128bh; - asm!( - "vcvtneps2bf16 {dst}, {src}", - dst = lateout(xmm_reg) dst, - src = in(xmm_reg) a, - options(pure, nomem, nostack, preserves_flags) - ); - dst - } + _mm_mask_cvtneps_pbh(__m128bh::splat(0), !0, a) } /// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) @@ -541,17 +533,7 @@ pub fn _mm_cvtneps_pbh(a: __m128) -> __m128bh { #[cfg_attr(test, assert_instr("vcvtneps2bf16"))] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh { - unsafe { - let mut dst = src; - asm!( - "vcvtneps2bf16 {dst}{{{k}}},{src}", - dst = inlateout(xmm_reg) dst, - src = in(xmm_reg) a, - k = in(kreg) k, - options(pure, nomem, nostack, preserves_flags) - ); - dst - } + unsafe { cvtneps2bf16_128(a.as_f32x4(), src.as_i16x8(), k).as_m128bh() } } /// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) @@ -564,17 +546,7 @@ pub fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh { #[cfg_attr(test, assert_instr("vcvtneps2bf16"))] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh { - unsafe { - let mut dst: __m128bh; - asm!( - "vcvtneps2bf16 {dst}{{{k}}}{{z}},{src}", - dst = lateout(xmm_reg) dst, - src = in(xmm_reg) a, - k = in(kreg) k, - options(pure, nomem, nostack, preserves_flags) - ); - dst - } + _mm_mask_cvtneps_pbh(__m128bh::splat(0), k, a) } /// Converts a single-precision (32-bit) floating-point element in a to a BF16 (16-bit) floating-point diff --git a/crates/core_arch/src/x86/avx512fp16.rs b/crates/core_arch/src/x86/avx512fp16.rs index 8ddc3d29a3..6523e98d0c 100644 --- a/crates/core_arch/src/x86/avx512fp16.rs +++ b/crates/core_arch/src/x86/avx512fp16.rs @@ -695,34 +695,6 @@ pub const fn _mm512_zextph128_ph512(a: __m128h) -> __m512h { } } -macro_rules! cmp_asm { // FIXME: use LLVM intrinsics - ($mask_type: ty, $reg: ident, $a: expr, $b: expr) => {{ - let dst: $mask_type; - asm!( - "vcmpph {k}, {a}, {b}, {imm8}", - k = lateout(kreg) dst, - a = in($reg) $a, - b = in($reg) $b, - imm8 = const IMM5, - options(pure, nomem, nostack) - ); - dst - }}; - ($mask_type: ty, $mask: expr, $reg: ident, $a: expr, $b: expr) => {{ - let dst: $mask_type; - asm!( - "vcmpph {k} {{ {mask} }}, {a}, {b}, {imm8}", - k = lateout(kreg) dst, - mask = in(kreg) $mask, - a = in($reg) $a, - b = in($reg) $b, - imm8 = const IMM5, - options(pure, nomem, nostack) - ); - dst - }}; -} - /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison /// operand specified by imm8, and store the results in mask vector k. /// @@ -732,10 +704,7 @@ macro_rules! cmp_asm { // FIXME: use LLVM intrinsics #[rustc_legacy_const_generics(2)] #[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")] pub fn _mm_cmp_ph_mask(a: __m128h, b: __m128h) -> __mmask8 { - unsafe { - static_assert_uimm_bits!(IMM5, 5); - cmp_asm!(__mmask8, xmm_reg, a, b) - } + _mm_mask_cmp_ph_mask::(!0, a, b) } /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison @@ -750,7 +719,7 @@ pub fn _mm_cmp_ph_mask(a: __m128h, b: __m128h) -> __mmask8 { pub fn _mm_mask_cmp_ph_mask(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 { unsafe { static_assert_uimm_bits!(IMM5, 5); - cmp_asm!(__mmask8, k1, xmm_reg, a, b) + vcmpph_128(a, b, IMM5, k1) } } @@ -763,10 +732,7 @@ pub fn _mm_mask_cmp_ph_mask(k1: __mmask8, a: __m128h, b: __m128 #[rustc_legacy_const_generics(2)] #[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")] pub fn _mm256_cmp_ph_mask(a: __m256h, b: __m256h) -> __mmask16 { - unsafe { - static_assert_uimm_bits!(IMM5, 5); - cmp_asm!(__mmask16, ymm_reg, a, b) - } + _mm256_mask_cmp_ph_mask::(!0, a, b) } /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison @@ -785,7 +751,7 @@ pub fn _mm256_mask_cmp_ph_mask( ) -> __mmask16 { unsafe { static_assert_uimm_bits!(IMM5, 5); - cmp_asm!(__mmask16, k1, ymm_reg, a, b) + vcmpph_256(a, b, IMM5, k1) } } @@ -798,10 +764,7 @@ pub fn _mm256_mask_cmp_ph_mask( #[rustc_legacy_const_generics(2)] #[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")] pub fn _mm512_cmp_ph_mask(a: __m512h, b: __m512h) -> __mmask32 { - unsafe { - static_assert_uimm_bits!(IMM5, 5); - cmp_asm!(__mmask32, zmm_reg, a, b) - } + _mm512_mask_cmp_ph_mask::(!0, a, b) } /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison @@ -818,10 +781,7 @@ pub fn _mm512_mask_cmp_ph_mask( a: __m512h, b: __m512h, ) -> __mmask32 { - unsafe { - static_assert_uimm_bits!(IMM5, 5); - cmp_asm!(__mmask32, k1, zmm_reg, a, b) - } + _mm512_mask_cmp_round_ph_mask::(k1, a, b) } /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison @@ -838,24 +798,7 @@ pub fn _mm512_cmp_round_ph_mask( a: __m512h, b: __m512h, ) -> __mmask32 { - unsafe { - static_assert_uimm_bits!(IMM5, 5); - static_assert_sae!(SAE); - if SAE == _MM_FROUND_NO_EXC { - let dst: __mmask32; - asm!( - "vcmpph {k}, {a}, {b}, {{sae}}, {imm8}", - k = lateout(kreg) dst, - a = in(zmm_reg) a, - b = in(zmm_reg) b, - imm8 = const IMM5, - options(pure, nomem, nostack) - ); - dst - } else { - cmp_asm!(__mmask32, zmm_reg, a, b) - } - } + _mm512_mask_cmp_round_ph_mask::(!0, a, b) } /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison @@ -877,21 +820,7 @@ pub fn _mm512_mask_cmp_round_ph_mask( unsafe { static_assert_uimm_bits!(IMM5, 5); static_assert_sae!(SAE); - if SAE == _MM_FROUND_NO_EXC { - let dst: __mmask32; - asm!( - "vcmpph {k} {{{k1}}}, {a}, {b}, {{sae}}, {imm8}", - k = lateout(kreg) dst, - k1 = in(kreg) k1, - a = in(zmm_reg) a, - b = in(zmm_reg) b, - imm8 = const IMM5, - options(pure, nomem, nostack) - ); - dst - } else { - cmp_asm!(__mmask32, k1, zmm_reg, a, b) - } + vcmpph_512(a, b, IMM5, k1, SAE) } } @@ -11538,32 +11467,6 @@ pub fn _mm512_reduce_max_ph(a: __m512h) -> f16 { } } -macro_rules! fpclass_asm { // FIXME: use LLVM intrinsics - ($mask_type: ty, $reg: ident, $a: expr) => {{ - let dst: $mask_type; - asm!( - "vfpclassph {k}, {src}, {imm8}", - k = lateout(kreg) dst, - src = in($reg) $a, - imm8 = const IMM8, - options(pure, nomem, nostack) - ); - dst - }}; - ($mask_type: ty, $mask: expr, $reg: ident, $a: expr) => {{ - let dst: $mask_type; - asm!( - "vfpclassph {k} {{ {mask} }}, {src}, {imm8}", - k = lateout(kreg) dst, - mask = in(kreg) $mask, - src = in($reg) $a, - imm8 = const IMM8, - options(pure, nomem, nostack) - ); - dst - }}; -} - /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified /// by imm8, and store the results in mask vector k. /// imm can be a combination of: @@ -11586,7 +11489,7 @@ macro_rules! fpclass_asm { // FIXME: use LLVM intrinsics pub fn _mm_fpclass_ph_mask(a: __m128h) -> __mmask8 { unsafe { static_assert_uimm_bits!(IMM8, 8); - fpclass_asm!(__mmask8, xmm_reg, a) + vfpclassph_128(a, IMM8) } } @@ -11611,10 +11514,7 @@ pub fn _mm_fpclass_ph_mask(a: __m128h) -> __mmask8 { #[rustc_legacy_const_generics(2)] #[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")] pub fn _mm_mask_fpclass_ph_mask(k1: __mmask8, a: __m128h) -> __mmask8 { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - fpclass_asm!(__mmask8, k1, xmm_reg, a) - } + _mm_fpclass_ph_mask::(a) & k1 } /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified @@ -11639,7 +11539,7 @@ pub fn _mm_mask_fpclass_ph_mask(k1: __mmask8, a: __m128h) -> __ pub fn _mm256_fpclass_ph_mask(a: __m256h) -> __mmask16 { unsafe { static_assert_uimm_bits!(IMM8, 8); - fpclass_asm!(__mmask16, ymm_reg, a) + vfpclassph_256(a, IMM8) } } @@ -11664,10 +11564,7 @@ pub fn _mm256_fpclass_ph_mask(a: __m256h) -> __mmask16 { #[rustc_legacy_const_generics(2)] #[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")] pub fn _mm256_mask_fpclass_ph_mask(k1: __mmask16, a: __m256h) -> __mmask16 { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - fpclass_asm!(__mmask16, k1, ymm_reg, a) - } + _mm256_fpclass_ph_mask::(a) & k1 } /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified @@ -11692,7 +11589,7 @@ pub fn _mm256_mask_fpclass_ph_mask(k1: __mmask16, a: __m256h) - pub fn _mm512_fpclass_ph_mask(a: __m512h) -> __mmask32 { unsafe { static_assert_uimm_bits!(IMM8, 8); - fpclass_asm!(__mmask32, zmm_reg, a) + vfpclassph_512(a, IMM8) } } @@ -11717,10 +11614,7 @@ pub fn _mm512_fpclass_ph_mask(a: __m512h) -> __mmask32 { #[rustc_legacy_const_generics(2)] #[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")] pub fn _mm512_mask_fpclass_ph_mask(k1: __mmask32, a: __m512h) -> __mmask32 { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - fpclass_asm!(__mmask32, k1, zmm_reg, a) - } + _mm512_fpclass_ph_mask::(a) & k1 } /// Test the lower half-precision (16-bit) floating-point element in a for special categories specified @@ -16571,11 +16465,18 @@ pub const fn _mm_cvtsi16_si128(a: i16) -> __m128i { } #[allow(improper_ctypes)] -unsafe extern "C" { +unsafe extern "unadjusted" { + #[link_name = "llvm.x86.avx512fp16.mask.cmp.ph.128"] + fn vcmpph_128(a: __m128h, b: __m128h, imm5: i32, mask: __mmask8) -> __mmask8; + #[link_name = "llvm.x86.avx512fp16.mask.cmp.ph.256"] + fn vcmpph_256(a: __m256h, b: __m256h, imm5: i32, mask: __mmask16) -> __mmask16; + #[link_name = "llvm.x86.avx512fp16.mask.cmp.ph.512"] + fn vcmpph_512(a: __m512h, b: __m512h, imm5: i32, mask: __mmask32, sae: i32) -> __mmask32; + #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"] - fn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8; + fn vcmpsh(a: __m128h, b: __m128h, imm5: i32, mask: __mmask8, sae: i32) -> __mmask8; #[link_name = "llvm.x86.avx512fp16.vcomi.sh"] - fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32; + fn vcomish(a: __m128h, b: __m128h, imm5: i32, sae: i32) -> i32; #[link_name = "llvm.x86.avx512fp16.add.ph.512"] fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h; @@ -16758,6 +16659,13 @@ unsafe extern "C" { fn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.fpclass.ph.128"] + fn vfpclassph_128(a: __m128h, imm8: i32) -> __mmask8; + #[link_name = "llvm.x86.avx512fp16.fpclass.ph.256"] + fn vfpclassph_256(a: __m256h, imm8: i32) -> __mmask16; + #[link_name = "llvm.x86.avx512fp16.fpclass.ph.512"] + fn vfpclassph_512(a: __m512h, imm8: i32) -> __mmask32; + #[link_name = "llvm.x86.avx512fp16.mask.fpclass.sh"] fn vfpclasssh(a: __m128h, imm8: i32, k: __mmask8) -> __mmask8; diff --git a/crates/core_arch/src/x86/avxneconvert.rs b/crates/core_arch/src/x86/avxneconvert.rs index b8a3b9473a..861213eb42 100644 --- a/crates/core_arch/src/x86/avxneconvert.rs +++ b/crates/core_arch/src/x86/avxneconvert.rs @@ -1,4 +1,3 @@ -use crate::arch::asm; use crate::core_arch::x86::*; #[cfg(test)] @@ -161,16 +160,7 @@ pub unsafe fn _mm256_cvtneoph_ps(a: *const __m256h) -> __m256 { #[cfg_attr(test, assert_instr(vcvtneps2bf16))] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh { - unsafe { - let mut dst: __m128bh; - asm!( - "{{vex}}vcvtneps2bf16 {dst},{src}", - dst = lateout(xmm_reg) dst, - src = in(xmm_reg) a, - options(pure, nomem, nostack, preserves_flags) - ); - dst - } + unsafe { vcvtneps2bf16_128(a) } } /// Convert packed single precision (32-bit) floating-point elements in a to packed BF16 (16-bit) floating-point @@ -182,16 +172,7 @@ pub fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh { #[cfg_attr(test, assert_instr(vcvtneps2bf16))] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh { - unsafe { - let mut dst: __m128bh; - asm!( - "{{vex}}vcvtneps2bf16 {dst},{src}", - dst = lateout(xmm_reg) dst, - src = in(ymm_reg) a, - options(pure, nomem, nostack, preserves_flags) - ); - dst - } + unsafe { vcvtneps2bf16_256(a) } } #[allow(improper_ctypes)] @@ -222,6 +203,11 @@ unsafe extern "C" { fn cvtneoph2ps_128(a: *const __m128h) -> __m128; #[link_name = "llvm.x86.vcvtneoph2ps256"] fn cvtneoph2ps_256(a: *const __m256h) -> __m256; + + #[link_name = "llvm.x86.vcvtneps2bf16128"] + fn vcvtneps2bf16_128(a: __m128) -> __m128bh; + #[link_name = "llvm.x86.vcvtneps2bf16256"] + fn vcvtneps2bf16_256(a: __m256) -> __m128bh; } #[cfg(test)] From 84ed8350fc967d19a7bd1ff28e801c7508984b22 Mon Sep 17 00:00:00 2001 From: sayantn Date: Thu, 16 Apr 2026 11:23:54 +0530 Subject: [PATCH 2/2] Remove uses of deprecated intrinsics --- crates/core_arch/src/aarch64/sve/generated.rs | 32 ++++----- crates/core_arch/src/x86/avx512bitalg.rs | 24 +++---- crates/core_arch/src/x86/avx512dq.rs | 68 +++++++------------ crates/core_arch/src/x86/avx512f.rs | 18 ++--- .../stdarch-gen-arm/spec/sve/aarch64.spec.yml | 12 ++-- 5 files changed, 65 insertions(+), 89 deletions(-) diff --git a/crates/core_arch/src/aarch64/sve/generated.rs b/crates/core_arch/src/aarch64/sve/generated.rs index ed28e98a81..5f26d61e7c 100644 --- a/crates/core_arch/src/aarch64/sve/generated.rs +++ b/crates/core_arch/src/aarch64/sve/generated.rs @@ -9799,7 +9799,7 @@ pub fn svdupq_n_f32(x0: f32, x1: f32, x2: f32, x3: f32) -> svfloat32_t { unsafe extern "unadjusted" { #[cfg_attr( target_arch = "aarch64", - link_name = "llvm.experimental.vector.insert.nxv4f32.v4f32" + link_name = "llvm.vector.insert.nxv4f32.v4f32" )] fn _svdupq_n_f32(op0: svfloat32_t, op1: float32x4_t, idx: i64) -> svfloat32_t; } @@ -9817,7 +9817,7 @@ pub fn svdupq_n_s32(x0: i32, x1: i32, x2: i32, x3: i32) -> svint32_t { unsafe extern "unadjusted" { #[cfg_attr( target_arch = "aarch64", - link_name = "llvm.experimental.vector.insert.nxv4i32.v4i32" + link_name = "llvm.vector.insert.nxv4i32.v4i32" )] fn _svdupq_n_s32(op0: svint32_t, op1: int32x4_t, idx: i64) -> svint32_t; } @@ -9851,7 +9851,7 @@ pub fn svdupq_n_f64(x0: f64, x1: f64) -> svfloat64_t { unsafe extern "unadjusted" { #[cfg_attr( target_arch = "aarch64", - link_name = "llvm.experimental.vector.insert.nxv2f64.v2f64" + link_name = "llvm.vector.insert.nxv2f64.v2f64" )] fn _svdupq_n_f64(op0: svfloat64_t, op1: float64x2_t, idx: i64) -> svfloat64_t; } @@ -9869,7 +9869,7 @@ pub fn svdupq_n_s64(x0: i64, x1: i64) -> svint64_t { unsafe extern "unadjusted" { #[cfg_attr( target_arch = "aarch64", - link_name = "llvm.experimental.vector.insert.nxv2i64.v2i64" + link_name = "llvm.vector.insert.nxv2i64.v2i64" )] fn _svdupq_n_s64(op0: svint64_t, op1: int64x2_t, idx: i64) -> svint64_t; } @@ -9904,7 +9904,7 @@ pub fn svdupq_n_s16( unsafe extern "unadjusted" { #[cfg_attr( target_arch = "aarch64", - link_name = "llvm.experimental.vector.insert.nxv8i16.v8i16" + link_name = "llvm.vector.insert.nxv8i16.v8i16" )] fn _svdupq_n_s16(op0: svint16_t, op1: int16x8_t, idx: i64) -> svint16_t; } @@ -9972,7 +9972,7 @@ pub fn svdupq_n_s8( unsafe extern "unadjusted" { #[cfg_attr( target_arch = "aarch64", - link_name = "llvm.experimental.vector.insert.nxv16i8.v16i8" + link_name = "llvm.vector.insert.nxv16i8.v16i8" )] fn _svdupq_n_s8(op0: svint8_t, op1: int8x16_t, idx: i64) -> svint8_t; } @@ -35208,7 +35208,7 @@ pub fn svreinterpret_u64_u64(op: svuint64_t) -> svuint64_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_b8(op: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv16i1")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv16i1")] fn _svrev_b8(op: svbool_t) -> svbool_t; } unsafe { _svrev_b8(op) } @@ -35221,7 +35221,7 @@ pub fn svrev_b8(op: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_b16(op: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv8i1")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv8i1")] fn _svrev_b16(op: svbool8_t) -> svbool8_t; } unsafe { _svrev_b16(op.sve_into()).sve_into() } @@ -35234,7 +35234,7 @@ pub fn svrev_b16(op: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_b32(op: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv4i1")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv4i1")] fn _svrev_b32(op: svbool4_t) -> svbool4_t; } unsafe { _svrev_b32(op.sve_into()).sve_into() } @@ -35247,7 +35247,7 @@ pub fn svrev_b32(op: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_b64(op: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv2i1")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv2i1")] fn _svrev_b64(op: svbool2_t) -> svbool2_t; } unsafe { _svrev_b64(op.sve_into()).sve_into() } @@ -35260,7 +35260,7 @@ pub fn svrev_b64(op: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_f32(op: svfloat32_t) -> svfloat32_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv4f32")] fn _svrev_f32(op: svfloat32_t) -> svfloat32_t; } unsafe { _svrev_f32(op) } @@ -35273,7 +35273,7 @@ pub fn svrev_f32(op: svfloat32_t) -> svfloat32_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_f64(op: svfloat64_t) -> svfloat64_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv2f64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv2f64")] fn _svrev_f64(op: svfloat64_t) -> svfloat64_t; } unsafe { _svrev_f64(op) } @@ -35286,7 +35286,7 @@ pub fn svrev_f64(op: svfloat64_t) -> svfloat64_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_s8(op: svint8_t) -> svint8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv16i8")] fn _svrev_s8(op: svint8_t) -> svint8_t; } unsafe { _svrev_s8(op) } @@ -35299,7 +35299,7 @@ pub fn svrev_s8(op: svint8_t) -> svint8_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_s16(op: svint16_t) -> svint16_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv8i16")] fn _svrev_s16(op: svint16_t) -> svint16_t; } unsafe { _svrev_s16(op) } @@ -35312,7 +35312,7 @@ pub fn svrev_s16(op: svint16_t) -> svint16_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_s32(op: svint32_t) -> svint32_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv4i32")] fn _svrev_s32(op: svint32_t) -> svint32_t; } unsafe { _svrev_s32(op) } @@ -35325,7 +35325,7 @@ pub fn svrev_s32(op: svint32_t) -> svint32_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_s64(op: svint64_t) -> svint64_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv2i64")] fn _svrev_s64(op: svint64_t) -> svint64_t; } unsafe { _svrev_s64(op) } diff --git a/crates/core_arch/src/x86/avx512bitalg.rs b/crates/core_arch/src/x86/avx512bitalg.rs index 6dd4e6b33a..dd211854af 100644 --- a/crates/core_arch/src/x86/avx512bitalg.rs +++ b/crates/core_arch/src/x86/avx512bitalg.rs @@ -28,12 +28,12 @@ use stdarch_test::assert_instr; #[allow(improper_ctypes)] unsafe extern "C" { - #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"] - fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64; - #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"] - fn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32; - #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128"] - fn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16; + #[link_name = "llvm.x86.avx512.vpshufbitqmb.512"] + fn bitshuffle_512(data: i8x64, indices: i8x64) -> __mmask64; + #[link_name = "llvm.x86.avx512.vpshufbitqmb.256"] + fn bitshuffle_256(data: i8x32, indices: i8x32) -> __mmask32; + #[link_name = "llvm.x86.avx512.vpshufbitqmb.128"] + fn bitshuffle_128(data: i8x16, indices: i8x16) -> __mmask16; } /// For each packed 16-bit integer maps the value to the number of logical 1 bits. @@ -370,7 +370,7 @@ pub const fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshufbitqmb))] pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 { - unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0) } + unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64()) } } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. @@ -386,7 +386,7 @@ pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshufbitqmb))] pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 { - unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k) } + _mm512_bitshuffle_epi64_mask(b, c) & k } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. @@ -399,7 +399,7 @@ pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) - #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshufbitqmb))] pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 { - unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0) } + unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32()) } } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. @@ -415,7 +415,7 @@ pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshufbitqmb))] pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 { - unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k) } + _mm256_bitshuffle_epi64_mask(b, c) & k } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. @@ -428,7 +428,7 @@ pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) - #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshufbitqmb))] pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 { - unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0) } + unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16()) } } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. @@ -444,7 +444,7 @@ pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshufbitqmb))] pub fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 { - unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k) } + _mm_bitshuffle_epi64_mask(b, c) & k } #[cfg(test)] diff --git a/crates/core_arch/src/x86/avx512dq.rs b/crates/core_arch/src/x86/avx512dq.rs index 9e1a4c0b29..0b322c8b83 100644 --- a/crates/core_arch/src/x86/avx512dq.rs +++ b/crates/core_arch/src/x86/avx512dq.rs @@ -6865,7 +6865,7 @@ pub fn _mm_maskz_reduce_ss(k: __mmask8, a: __m128, b: __m128) - #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_fpclass_pd_mask(a: __m128d) -> __mmask8 { static_assert_uimm_bits!(IMM8, 8); - _mm_mask_fpclass_pd_mask::(0xff, a) + unsafe { vfpclasspd_128(a.as_f64x2(), IMM8) } } /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified @@ -6889,10 +6889,7 @@ pub fn _mm_fpclass_pd_mask(a: __m128d) -> __mmask8 { #[rustc_legacy_const_generics(2)] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_mask_fpclass_pd_mask(k1: __mmask8, a: __m128d) -> __mmask8 { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - transmute(vfpclasspd_128(a.as_f64x2(), IMM8, k1)) - } + _mm_fpclass_pd_mask::(a) & k1 } /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified @@ -6916,7 +6913,7 @@ pub fn _mm_mask_fpclass_pd_mask(k1: __mmask8, a: __m128d) -> __ #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm256_fpclass_pd_mask(a: __m256d) -> __mmask8 { static_assert_uimm_bits!(IMM8, 8); - _mm256_mask_fpclass_pd_mask::(0xff, a) + unsafe { vfpclasspd_256(a.as_f64x4(), IMM8) } } /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified @@ -6940,10 +6937,7 @@ pub fn _mm256_fpclass_pd_mask(a: __m256d) -> __mmask8 { #[rustc_legacy_const_generics(2)] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm256_mask_fpclass_pd_mask(k1: __mmask8, a: __m256d) -> __mmask8 { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - transmute(vfpclasspd_256(a.as_f64x4(), IMM8, k1)) - } + _mm256_fpclass_pd_mask::(a) & k1 } /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified @@ -6967,7 +6961,7 @@ pub fn _mm256_mask_fpclass_pd_mask(k1: __mmask8, a: __m256d) -> #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm512_fpclass_pd_mask(a: __m512d) -> __mmask8 { static_assert_uimm_bits!(IMM8, 8); - _mm512_mask_fpclass_pd_mask::(0xff, a) + unsafe { vfpclasspd_512(a.as_f64x8(), IMM8) } } /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified @@ -6991,10 +6985,7 @@ pub fn _mm512_fpclass_pd_mask(a: __m512d) -> __mmask8 { #[rustc_legacy_const_generics(2)] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm512_mask_fpclass_pd_mask(k1: __mmask8, a: __m512d) -> __mmask8 { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - transmute(vfpclasspd_512(a.as_f64x8(), IMM8, k1)) - } + _mm512_fpclass_pd_mask::(a) & k1 } /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified @@ -7018,7 +7009,7 @@ pub fn _mm512_mask_fpclass_pd_mask(k1: __mmask8, a: __m512d) -> #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_fpclass_ps_mask(a: __m128) -> __mmask8 { static_assert_uimm_bits!(IMM8, 8); - _mm_mask_fpclass_ps_mask::(0xff, a) + unsafe { vfpclassps_128(a.as_f32x4(), IMM8) } } /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified @@ -7042,10 +7033,7 @@ pub fn _mm_fpclass_ps_mask(a: __m128) -> __mmask8 { #[rustc_legacy_const_generics(2)] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_mask_fpclass_ps_mask(k1: __mmask8, a: __m128) -> __mmask8 { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - transmute(vfpclassps_128(a.as_f32x4(), IMM8, k1)) - } + _mm_fpclass_ps_mask::(a) & k1 } /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified @@ -7069,7 +7057,7 @@ pub fn _mm_mask_fpclass_ps_mask(k1: __mmask8, a: __m128) -> __m #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm256_fpclass_ps_mask(a: __m256) -> __mmask8 { static_assert_uimm_bits!(IMM8, 8); - _mm256_mask_fpclass_ps_mask::(0xff, a) + unsafe { vfpclassps_256(a.as_f32x8(), IMM8) } } /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified @@ -7093,10 +7081,7 @@ pub fn _mm256_fpclass_ps_mask(a: __m256) -> __mmask8 { #[rustc_legacy_const_generics(2)] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm256_mask_fpclass_ps_mask(k1: __mmask8, a: __m256) -> __mmask8 { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - transmute(vfpclassps_256(a.as_f32x8(), IMM8, k1)) - } + _mm256_fpclass_ps_mask::(a) & k1 } /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified @@ -7120,7 +7105,7 @@ pub fn _mm256_mask_fpclass_ps_mask(k1: __mmask8, a: __m256) -> #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm512_fpclass_ps_mask(a: __m512) -> __mmask16 { static_assert_uimm_bits!(IMM8, 8); - _mm512_mask_fpclass_ps_mask::(0xffff, a) + unsafe { vfpclassps_512(a.as_f32x16(), IMM8) } } /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified @@ -7144,10 +7129,7 @@ pub fn _mm512_fpclass_ps_mask(a: __m512) -> __mmask16 { #[rustc_legacy_const_generics(2)] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm512_mask_fpclass_ps_mask(k1: __mmask16, a: __m512) -> __mmask16 { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - transmute(vfpclassps_512(a.as_f32x16(), IMM8, k1)) - } + _mm512_fpclass_ps_mask::(a) & k1 } /// Test the lower double-precision (64-bit) floating-point element in a for special categories specified @@ -7377,19 +7359,19 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.mask.reduce.ss"] fn vreducess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4; - #[link_name = "llvm.x86.avx512.mask.fpclass.pd.128"] - fn vfpclasspd_128(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8; - #[link_name = "llvm.x86.avx512.mask.fpclass.pd.256"] - fn vfpclasspd_256(a: f64x4, imm8: i32, k: __mmask8) -> __mmask8; - #[link_name = "llvm.x86.avx512.mask.fpclass.pd.512"] - fn vfpclasspd_512(a: f64x8, imm8: i32, k: __mmask8) -> __mmask8; - - #[link_name = "llvm.x86.avx512.mask.fpclass.ps.128"] - fn vfpclassps_128(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8; - #[link_name = "llvm.x86.avx512.mask.fpclass.ps.256"] - fn vfpclassps_256(a: f32x8, imm8: i32, k: __mmask8) -> __mmask8; - #[link_name = "llvm.x86.avx512.mask.fpclass.ps.512"] - fn vfpclassps_512(a: f32x16, imm8: i32, k: __mmask16) -> __mmask16; + #[link_name = "llvm.x86.avx512.fpclass.pd.128"] + fn vfpclasspd_128(a: f64x2, imm8: i32) -> __mmask8; + #[link_name = "llvm.x86.avx512.fpclass.pd.256"] + fn vfpclasspd_256(a: f64x4, imm8: i32) -> __mmask8; + #[link_name = "llvm.x86.avx512.fpclass.pd.512"] + fn vfpclasspd_512(a: f64x8, imm8: i32) -> __mmask8; + + #[link_name = "llvm.x86.avx512.fpclass.ps.128"] + fn vfpclassps_128(a: f32x4, imm8: i32) -> __mmask8; + #[link_name = "llvm.x86.avx512.fpclass.ps.256"] + fn vfpclassps_256(a: f32x8, imm8: i32) -> __mmask8; + #[link_name = "llvm.x86.avx512.fpclass.ps.512"] + fn vfpclassps_512(a: f32x16, imm8: i32) -> __mmask16; #[link_name = "llvm.x86.avx512.mask.fpclass.sd"] fn vfpclasssd(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8; diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 0c725402a9..2c5002a2d0 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -16242,7 +16242,7 @@ pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vcvttps2dq))] pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { - unsafe { transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k)) } + unsafe { simd_select_bitmask(k, _mm256_cvttps_epi32(a).as_i32x8(), src.as_i32x8()).as_m256i() } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16253,7 +16253,7 @@ pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vcvttps2dq))] pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i { - unsafe { transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k)) } + _mm256_mask_cvttps_epi32(_mm256_setzero_si256(), k, a) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -16264,7 +16264,7 @@ pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vcvttps2dq))] pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { - unsafe { transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k)) } + unsafe { simd_select_bitmask(k, _mm_cvttps_epi32(a).as_i32x4(), src.as_i32x4()).as_m128i() } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16275,7 +16275,7 @@ pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vcvttps2dq))] pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i { - unsafe { transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k)) } + _mm_mask_cvttps_epi32(_mm_setzero_si128(), k, a) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. @@ -16478,7 +16478,7 @@ pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { - unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k)) } + unsafe { simd_select_bitmask(k, _mm256_cvttpd_epi32(a).as_i32x4(), src.as_i32x4()).as_m128i() } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16489,7 +16489,7 @@ pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i { - unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k)) } + _mm256_mask_cvttpd_epi32(_mm_setzero_si128(), k, a) } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -44430,10 +44430,6 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"] fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16; - #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"] - fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8; - #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"] - fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4; #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"] fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16; @@ -44444,8 +44440,6 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"] fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8; - #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"] - fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4; #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"] fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4; diff --git a/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml index 383e50b7cc..29dd3a095d 100644 --- a/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml @@ -562,7 +562,7 @@ intrinsics: assert_instr: [] compose: - LLVMLink: - name: llvm.experimental.vector.insert.{sve_type}.{neon_type} + name: llvm.vector.insert.{sve_type}.{neon_type} arguments: ["op0: {sve_type}", "op1: {neon_type}", "idx: i64"] - Let: - op @@ -657,7 +657,7 @@ intrinsics: assert_instr: [] compose: - LLVMLink: - name: llvm.experimental.vector.insert.{sve_type}.{neon_type} + name: llvm.vector.insert.{sve_type}.{neon_type} arguments: ["op0: {sve_type}", "op1: {neon_type}", "idx: i64"] - Let: - op @@ -713,7 +713,7 @@ intrinsics: assert_instr: [] compose: - LLVMLink: - name: llvm.experimental.vector.insert.{sve_type}.{neon_type} + name: llvm.vector.insert.{sve_type}.{neon_type} arguments: ["op0: {sve_type}", "op1: {neon_type}", "idx: i64"] - Let: - op @@ -755,7 +755,7 @@ intrinsics: assert_instr: [] compose: - LLVMLink: - name: llvm.experimental.vector.insert.{sve_type}.{neon_type} + name: llvm.vector.insert.{sve_type}.{neon_type} arguments: ["op0: {sve_type}", "op1: {neon_type}", "idx: i64"] - Let: - op @@ -1210,7 +1210,7 @@ intrinsics: types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] assert_instr: [rev] compose: - - LLVMLink: { name: "rev.{sve_type}" } + - LLVMLink: { name: "llvm.vector.reverse.{sve_type}" } - name: svrev_{type} attr: [*sve-unstable] @@ -1220,7 +1220,7 @@ intrinsics: types: [b8, b16, b32, b64] assert_instr: [rev] compose: - - LLVMLink: { name: "rev.{sve_type}" } + - LLVMLink: { name: "llvm.vector.reverse.{sve_type}" } - name: svrevb[_{type}]{_mxz} attr: [*sve-unstable]