Skip to content

Commit

Permalink
Update f16c intrinsics to use the f16c target feature
Browse files Browse the repository at this point in the history
  • Loading branch information
gnzlbg committed Apr 24, 2019
1 parent aacbae3 commit fe30e8a
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 11 deletions.
12 changes: 6 additions & 6 deletions crates/core_arch/src/x86/f16c.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ extern "unadjusted" {
/// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide
/// vector.
#[inline]
#[target_feature(enable = "avx512f")]
#[target_feature(enable = "f16c")]
#[cfg_attr(test, assert_instr("vcvtph2ps"))]
pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
transmute(llvm_vcvtph2ps_128(transmute(a)))
Expand All @@ -36,7 +36,7 @@ pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
/// Converts the 8 x 16-bit half-precision float values in the 128-bit vector
/// `a` into 8 x 32-bit float values stored in a 256-bit wide vector.
#[inline]
#[target_feature(enable = "avx512f")]
#[target_feature(enable = "f16c")]
#[cfg_attr(test, assert_instr("vcvtph2ps"))]
pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
transmute(llvm_vcvtph2ps_256(transmute(a)))
Expand Down Expand Up @@ -70,7 +70,7 @@ macro_rules! dispatch_rounding {
/// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
/// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
#[inline]
#[target_feature(enable = "avx512f")]
#[target_feature(enable = "f16c")]
#[rustc_args_required_const(1)]
#[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
pub unsafe fn _mm_cvtps_ph(a: __m128, imm_rounding: i32) -> __m128i {
Expand All @@ -94,7 +94,7 @@ pub unsafe fn _mm_cvtps_ph(a: __m128, imm_rounding: i32) -> __m128i {
/// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
/// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
#[inline]
#[target_feature(enable = "avx512f")]
#[target_feature(enable = "f16c")]
#[rustc_args_required_const(1)]
#[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
pub unsafe fn _mm256_cvtps_ph(a: __m256, imm_rounding: i32) -> __m128i {
Expand All @@ -112,7 +112,7 @@ mod tests {
use crate::{core_arch::x86::*, mem::transmute};
use stdsimd_test::simd_test;

#[simd_test(enable = "avx512f")]
#[simd_test(enable = "f16c")]
unsafe fn test_mm_cvtph_ps() {
let array = [1_f32, 2_f32, 3_f32, 4_f32];
let float_vec: __m128 = transmute(array);
Expand All @@ -122,7 +122,7 @@ mod tests {
assert_eq!(result, array);
}

#[simd_test(enable = "avx512f")]
#[simd_test(enable = "f16c")]
unsafe fn test_mm256_cvtph_ps() {
let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32];
let float_vec: __m256 = transmute(array);
Expand Down
2 changes: 1 addition & 1 deletion crates/std_detect/src/detect/arch/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ macro_rules! is_x86_feature_detected {
$crate::detect::Feature::avx512_vpopcntdq)
};
("f16c") => {
cfg!(target_feature = "avx512f") || $crate::detect::check_for(
cfg!(target_feature = "f16c") || $crate::detect::check_for(
$crate::detect::Feature::f16c)
};
("fma") => {
Expand Down
7 changes: 3 additions & 4 deletions crates/stdsimd-verify/tests/x86-intel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,9 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
// it "avx512ifma".
"avx512ifma52" => String::from("avx512ifma"),
// See: https://github.com/rust-lang-nursery/stdsimd/issues/738
// FIXME: we need to fix "fp16c" to "f16c" here. Since
// https://github.com/rust-lang/rust/pull/60191 is not merged,
// we temporarily map it to "avx512f".
"fp16c" => String::from("avx512f"),
// The intrinsics guide calls `f16c` `fp16c` in disagreement with
// Intel's architecture manuals.
"fp16c" => String::from("f16c"),
_ => cpuid,
};
let fixed_cpuid = fixup_cpuid(cpuid);
Expand Down

0 comments on commit fe30e8a

Please sign in to comment.