diff --git a/coresimd/aarch64/crypto.rs b/coresimd/aarch64/crypto.rs new file mode 100644 index 0000000000..28f1c5ded3 --- /dev/null +++ b/coresimd/aarch64/crypto.rs @@ -0,0 +1,428 @@ +use coresimd::arm::uint32x4_t; +use coresimd::arm::uint8x16_t; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.aarch64.crypto.aese"] + fn vaeseq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t; + #[link_name = "llvm.aarch64.crypto.aesd"] + fn vaesdq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t; + #[link_name = "llvm.aarch64.crypto.aesmc"] + fn vaesmcq_u8_(data: uint8x16_t) -> uint8x16_t; + #[link_name = "llvm.aarch64.crypto.aesimc"] + fn vaesimcq_u8_(data: uint8x16_t) -> uint8x16_t; + + #[link_name = "llvm.aarch64.crypto.sha1h"] + fn vsha1h_u32_(hash_e: u32) -> u32; + #[link_name = "llvm.aarch64.crypto.sha1su0"] + fn vsha1su0q_u32_( + w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t + ) -> uint32x4_t; + #[link_name = "llvm.aarch64.crypto.sha1su1"] + fn vsha1su1q_u32_(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t; + #[link_name = "llvm.aarch64.crypto.sha1c"] + fn vsha1cq_u32_( + hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t + ) -> uint32x4_t; + #[link_name = "llvm.aarch64.crypto.sha1p"] + fn vsha1pq_u32_( + hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t + ) -> uint32x4_t; + #[link_name = "llvm.aarch64.crypto.sha1m"] + fn vsha1mq_u32_( + hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t + ) -> uint32x4_t; + + #[link_name = "llvm.aarch64.crypto.sha256h"] + fn vsha256hq_u32_( + hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t + ) -> uint32x4_t; + #[link_name = "llvm.aarch64.crypto.sha256h2"] + fn vsha256h2q_u32_( + hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t + ) -> uint32x4_t; + #[link_name = "llvm.aarch64.crypto.sha256su0"] + fn vsha256su0q_u32_(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t; + #[link_name = "llvm.aarch64.crypto.sha256su1"] + fn vsha256su1q_u32_( + tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t + ) -> uint32x4_t; +} + +#[cfg(test)] +use stdsimd_test::assert_instr; + +/// AES single round encryption. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(aese))] +pub unsafe fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { + vaeseq_u8_(data, key) +} + +/// AES single round decryption. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(aesd))] +pub unsafe fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { + vaesdq_u8_(data, key) +} + +/// AES mix columns. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(aesmc))] +pub unsafe fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t { + vaesmcq_u8_(data) +} + +/// AES inverse mix columns. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(aesimc))] +pub unsafe fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t { + vaesimcq_u8_(data) +} + +/// SHA1 fixed rotate. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha1h))] +pub unsafe fn vsha1h_u32(hash_e: u32) -> u32 { + vsha1h_u32_(hash_e) +} + +/// SHA1 hash update accelerator, choose. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha1c))] +pub unsafe fn vsha1cq_u32( + hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t +) -> uint32x4_t { + vsha1cq_u32_(hash_abcd, hash_e, wk) +} + +/// SHA1 hash update accelerator, majority. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha1m))] +pub unsafe fn vsha1mq_u32( + hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t +) -> uint32x4_t { + vsha1mq_u32_(hash_abcd, hash_e, wk) +} + +/// SHA1 hash update accelerator, parity. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha1p))] +pub unsafe fn vsha1pq_u32( + hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t +) -> uint32x4_t { + vsha1pq_u32_(hash_abcd, hash_e, wk) +} + +/// SHA1 schedule update accelerator, first part. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha1su0))] +pub unsafe fn vsha1su0q_u32( + w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t +) -> uint32x4_t { + vsha1su0q_u32_(w0_3, w4_7, w8_11) +} + +/// SHA1 schedule update accelerator, second part. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha1su1))] +pub unsafe fn vsha1su1q_u32( + tw0_3: uint32x4_t, w12_15: uint32x4_t +) -> uint32x4_t { + vsha1su1q_u32_(tw0_3, w12_15) +} + +/// SHA256 hash update accelerator. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha256h))] +pub unsafe fn vsha256hq_u32( + hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t +) -> uint32x4_t { + vsha256hq_u32_(hash_abcd, hash_efgh, wk) +} + +/// SHA256 hash update accelerator, upper part. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha256h2))] +pub unsafe fn vsha256h2q_u32( + hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t +) -> uint32x4_t { + vsha256h2q_u32_(hash_efgh, hash_abcd, wk) +} + +/// SHA256 schedule update accelerator, first part. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha256su0))] +pub unsafe fn vsha256su0q_u32( + w0_3: uint32x4_t, w4_7: uint32x4_t +) -> uint32x4_t { + vsha256su0q_u32_(w0_3, w4_7) +} + +/// SHA256 schedule update accelerator, second part. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha256su1))] +pub unsafe fn vsha256su1q_u32( + tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t +) -> uint32x4_t { + vsha256su1q_u32_(tw0_3, w8_11, w12_15) +} + +#[cfg(test)] +mod tests { + use coresimd::aarch64::*; + use simd::*; + use std::mem; + use stdsimd_test::simd_test; + + #[simd_test = "crypto"] + unsafe fn test_vaeseq_u8() { + let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8) + .into_bits(); + let key = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7) + .into_bits(); + let r: u8x16 = vaeseq_u8(data, key).into_bits(); + assert_eq!( + r, + u8x16::new( + 124, + 123, + 124, + 118, + 124, + 123, + 124, + 197, + 124, + 123, + 124, + 118, + 124, + 123, + 124, + 197 + ) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vaesdq_u8() { + let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8) + .into_bits(); + let key = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7) + .into_bits(); + let r: u8x16 = vaesdq_u8(data, key).into_bits(); + assert_eq!( + r, + u8x16::new( + 9, + 213, + 9, + 251, + 9, + 213, + 9, + 56, + 9, + 213, + 9, + 251, + 9, + 213, + 9, + 56 + ) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vaesmcq_u8() { + let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8) + .into_bits(); + let r: u8x16 = vaesmcq_u8(data).into_bits(); + assert_eq!( + r, + u8x16::new( + 3, + 4, + 9, + 10, + 15, + 8, + 21, + 30, + 3, + 4, + 9, + 10, + 15, + 8, + 21, + 30 + ) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vaesimcq_u8() { + let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8) + .into_bits(); + let r: u8x16 = vaesimcq_u8(data).into_bits(); + assert_eq!( + r, + u8x16::new( + 43, + 60, + 33, + 50, + 103, + 80, + 125, + 70, + 43, + 60, + 33, + 50, + 103, + 80, + 125, + 70 + ) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha1h_u32() { + assert_eq!(vsha1h_u32(0x1234), 0x048d); + assert_eq!(vsha1h_u32(0x5678), 0x159e); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha1su0q_u32() { + let r: u32x4 = vsha1su0q_u32( + u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32) + .into_bits(), + u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32) + .into_bits(), + u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32) + .into_bits(), + ).into_bits(); + assert_eq!(r, u32x4::new(0x9abc, 0xdef0, 0x1234, 0x5678)); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha1su1q_u32() { + let r: u32x4 = vsha1su1q_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0x00008898, 0x00019988, 0x00008898, 0x0000acd0) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha1cq_u32() { + let r: u32x4 = vsha1cq_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + 0x1234, + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0x8a32cbd8, 0x0c518a96, 0x0018a081, 0x0000c168) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha1pq_u32() { + let r: u32x4 = vsha1pq_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + 0x1234, + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0x469f0ba3, 0x0a326147, 0x80145d7f, 0x00009f47) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha1mq_u32() { + let r: u32x4 = vsha1mq_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + 0x1234, + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0xaa39693b, 0x0d51bf84, 0x001aa109, 0x0000d278) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha256hq_u32() { + let r: u32x4 = vsha256hq_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0x05e9aaa8, 0xec5f4c02, 0x20a1ea61, 0x28738cef) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha256h2q_u32() { + let r: u32x4 = vsha256h2q_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0x3745362e, 0x2fb51d00, 0xbd4c529b, 0x968b8516) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha256su0q_u32() { + let r: u32x4 = vsha256su0q_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0xe59e1c97, 0x5eaf68da, 0xd7bcb51f, 0x6c8de152) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha256su1q_u32() { + let r: u32x4 = vsha256su1q_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0x5e09e8d2, 0x74a6f16b, 0xc966606b, 0xa686ee9f) + ); + } +} diff --git a/coresimd/aarch64/mod.rs b/coresimd/aarch64/mod.rs index 5c794e3750..a4419db757 100644 --- a/coresimd/aarch64/mod.rs +++ b/coresimd/aarch64/mod.rs @@ -13,3 +13,6 @@ pub use self::v8::*; mod neon; pub use self::neon::*; + +mod crypto; +pub use self::crypto::*; diff --git a/coresimd/aarch64/neon.rs b/coresimd/aarch64/neon.rs index 2b272c7427..25f03a63d3 100644 --- a/coresimd/aarch64/neon.rs +++ b/coresimd/aarch64/neon.rs @@ -2,11 +2,11 @@ // FIXME: replace neon with asimd +use coresimd::arm::*; +use coresimd::simd::*; +use coresimd::simd_llvm::simd_add; #[cfg(test)] use stdsimd_test::assert_instr; -use coresimd::simd_llvm::simd_add; -use coresimd::simd::*; -use coresimd::arm::*; types! { /// ARM-specific 64-bit wide vector of one packed `f64`. @@ -382,10 +382,10 @@ pub unsafe fn vminvq_f64(a: float64x2_t) -> f64 { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; - use simd::*; use coresimd::aarch64::*; + use simd::*; use std::mem; + use stdsimd_test::simd_test; #[simd_test = "neon"] unsafe fn test_vadd_f64() { diff --git a/coresimd/arm/neon.rs b/coresimd/arm/neon.rs index 43ce2bec13..98dac8db68 100644 --- a/coresimd/arm/neon.rs +++ b/coresimd/arm/neon.rs @@ -1,9 +1,9 @@ //! ARMv7 NEON intrinsics +use coresimd::simd::*; +use coresimd::simd_llvm::*; #[cfg(test)] use stdsimd_test::assert_instr; -use coresimd::simd_llvm::*; -use coresimd::simd::*; types! { /// ARM-specific 64-bit wide vector of eight packed `i8`. @@ -682,10 +682,10 @@ pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; - use simd::*; use coresimd::arm::*; + use simd::*; use std::mem; + use stdsimd_test::simd_test; #[simd_test = "neon"] unsafe fn test_vadd_s8() { diff --git a/coresimd/mips/msa.rs b/coresimd/mips/msa.rs index a2e7e40f57..899330026e 100644 --- a/coresimd/mips/msa.rs +++ b/coresimd/mips/msa.rs @@ -5,9 +5,9 @@ //! //! [msa_ref]: http://cdn2.imgtec.com/documentation/MD00866-2B-MSA32-AFP-01.12.pdf +use coresimd::simd::*; #[cfg(test)] use stdsimd_test::assert_instr; -use coresimd::simd::*; #[allow(improper_ctypes)] extern "C" { @@ -28,9 +28,9 @@ pub unsafe fn __msa_add_a_b(a: i8x16, b: i8x16) -> i8x16 { #[cfg(test)] mod tests { + use coresimd::mips64::msa; use simd::*; use stdsimd_test::simd_test; - use coresimd::mips64::msa; #[simd_test = "msa"] unsafe fn __msa_add_a_b() { diff --git a/coresimd/mod.rs b/coresimd/mod.rs index 5e4361619e..5768ca9b34 100644 --- a/coresimd/mod.rs +++ b/coresimd/mod.rs @@ -77,8 +77,8 @@ pub mod arch { #[cfg(any(target_arch = "aarch64", dox))] #[doc(cfg(target_arch = "aarch64"))] pub mod aarch64 { - pub use coresimd::arm::*; pub use coresimd::aarch64::*; + pub use coresimd::arm::*; } /// Platform-specific intrinsics for the `wasm32` platform. @@ -115,10 +115,10 @@ mod x86; #[cfg(any(target_arch = "x86_64", dox))] mod x86_64; -#[cfg(any(target_arch = "arm", target_arch = "aarch64", dox))] -mod arm; #[cfg(any(target_arch = "aarch64", dox))] mod aarch64; +#[cfg(any(target_arch = "arm", target_arch = "aarch64", dox))] +mod arm; #[cfg(target_arch = "wasm32")] mod wasm32; diff --git a/coresimd/ppsv/api/cmp.rs b/coresimd/ppsv/api/cmp.rs index 37c5ca4252..ca2a939ab3 100644 --- a/coresimd/ppsv/api/cmp.rs +++ b/coresimd/ppsv/api/cmp.rs @@ -99,11 +99,16 @@ macro_rules! impl_bool_cmp { #[cfg(test)] macro_rules! test_cmp { - ($id:ident, $elem_ty:ident, $bool_ty:ident, - $true:expr, $false:expr) => { + ( + $id: ident, + $elem_ty: ident, + $bool_ty: ident, + $true: expr, + $false: expr + ) => { #[test] fn cmp() { - use ::coresimd::simd::*; + use coresimd::simd::*; let a = $id::splat($false); let b = $id::splat($true); @@ -139,5 +144,5 @@ macro_rules! test_cmp { let r = a.lt(b); assert!(r == e); } - } + }; } diff --git a/coresimd/ppsv/api/load_store.rs b/coresimd/ppsv/api/load_store.rs index 6c1e2348bf..102ffd8851 100644 --- a/coresimd/ppsv/api/load_store.rs +++ b/coresimd/ppsv/api/load_store.rs @@ -130,8 +130,8 @@ macro_rules! impl_load_store { pub unsafe fn load_unaligned_unchecked( slice: &[$elem_ty] ) -> Self { - use slice::SliceExt; use mem::size_of; + use slice::SliceExt; let target_ptr = slice.get_unchecked(0) as *const $elem_ty as *const u8; let mut x = Self::splat(0 as $elem_ty); diff --git a/coresimd/ppsv/api/minmax_reductions.rs b/coresimd/ppsv/api/minmax_reductions.rs index 924e67f590..7c24f2c1e1 100644 --- a/coresimd/ppsv/api/minmax_reductions.rs +++ b/coresimd/ppsv/api/minmax_reductions.rs @@ -22,8 +22,8 @@ macro_rules! impl_minmax_reductions { pub fn max(self) -> $elem_ty { // FIXME: broken on AArch64 // https://bugs.llvm.org/show_bug.cgi?id=36796 - use num::Float; use cmp::Ord; + use num::Float; let mut x = self.extract(0); for i in 1..$id::lanes() { x = x.max(self.extract(i)); @@ -49,8 +49,8 @@ macro_rules! impl_minmax_reductions { pub fn min(self) -> $elem_ty { // FIXME: broken on AArch64 // https://bugs.llvm.org/show_bug.cgi?id=36796 - use num::Float; use cmp::Ord; + use num::Float; let mut x = self.extract(0); for i in 1..$id::lanes() { x = x.min(self.extract(i)); diff --git a/coresimd/ppsv/mod.rs b/coresimd/ppsv/mod.rs index 840a6af92f..6e793fc88e 100644 --- a/coresimd/ppsv/mod.rs +++ b/coresimd/ppsv/mod.rs @@ -33,19 +33,19 @@ #[macro_use] mod api; -mod v16; -mod v32; -mod v64; mod v128; +mod v16; mod v256; +mod v32; mod v512; +mod v64; -pub use self::v16::*; -pub use self::v32::*; -pub use self::v64::*; pub use self::v128::*; +pub use self::v16::*; pub use self::v256::*; +pub use self::v32::*; pub use self::v512::*; +pub use self::v64::*; /// Safe lossless bitwise conversion from `T` to `Self`. pub trait FromBits: ::marker::Sized { diff --git a/coresimd/ppsv/v128.rs b/coresimd/ppsv/v128.rs index c9f7afb981..842fd5116f 100644 --- a/coresimd/ppsv/v128.rs +++ b/coresimd/ppsv/v128.rs @@ -121,13 +121,17 @@ use coresimd::arch::aarch64::{// FIXME: float16x8_t, uint8x16_t}; macro_rules! from_bits_arm { - ($id:ident, $elem_ty:ident, $test_mod_arm:ident, $test_mod_a64:ident) => { - #[cfg(any(all(target_arch = "arm", - target_feature = "neon", - target_feature = "v7"), target_arch = "aarch64"))] + ( + $id: ident, + $elem_ty: ident, + $test_mod_arm: ident, + $test_mod_a64: ident + ) => { + #[cfg(any(all(target_arch = "arm", target_feature = "neon", + target_feature = "v7"), + target_arch = "aarch64"))] impl_from_bits_!( - $id: - int8x16_t, + $id: int8x16_t, uint8x16_t, int16x8_t, uint16x8_t, @@ -141,10 +145,8 @@ macro_rules! from_bits_arm { poly16x8_t ); #[cfg(target_arch = "aarch64")] - impl_from_bits_!( - $id: float64x2_t - ); - } + impl_from_bits_!($id: float64x2_t); + }; } impl_from_bits!( diff --git a/coresimd/ppsv/v64.rs b/coresimd/ppsv/v64.rs index a651a4a028..5376e1cf3b 100644 --- a/coresimd/ppsv/v64.rs +++ b/coresimd/ppsv/v64.rs @@ -101,13 +101,17 @@ use coresimd::arch::aarch64::{// FIXME: float16x4_t, uint8x8_t}; macro_rules! from_bits_arm { - ($id:ident, $elem_ty:ident, $test_mod_arm:ident, $test_mod_a64:ident) => { - #[cfg(any(all(target_arch = "arm", - target_feature = "neon", - target_feature = "v7"), target_arch = "aarch64"))] + ( + $id: ident, + $elem_ty: ident, + $test_mod_arm: ident, + $test_mod_a64: ident + ) => { + #[cfg(any(all(target_arch = "arm", target_feature = "neon", + target_feature = "v7"), + target_arch = "aarch64"))] impl_from_bits_!( - $id: - int64x1_t, + $id: int64x1_t, uint64x1_t, uint32x2_t, int32x2_t, @@ -121,10 +125,8 @@ macro_rules! from_bits_arm { poly8x8_t ); #[cfg(target_arch = "aarch64")] - impl_from_bits_!( - $id: float64x1_t - ); - } + impl_from_bits_!($id: float64x1_t); + }; } impl_from_bits!( diff --git a/coresimd/x86/avx.rs b/coresimd/x86/avx.rs index d2ed05b09f..20c5cb5d33 100644 --- a/coresimd/x86/avx.rs +++ b/coresimd/x86/avx.rs @@ -13,8 +13,8 @@ //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf //! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions -use coresimd::simd_llvm::*; use coresimd::simd::*; +use coresimd::simd_llvm::*; use coresimd::x86::*; use intrinsics; use mem; @@ -142,9 +142,18 @@ pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g: expr, $h: expr) => { + ( + $a: expr, + $b: expr, + $c: expr, + $d: expr, + $e: expr, + $f: expr, + $g: expr, + $h: expr + ) => { simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]); - } + }; } macro_rules! shuffle3 { ($a: expr, $b: expr, $c: expr, $e: expr, $f: expr, $g: expr) => { @@ -471,9 +480,18 @@ pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! blend4 { - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => { + ( + $a: expr, + $b: expr, + $c: expr, + $d: expr, + $e: expr, + $f: expr, + $g: expr, + $h: expr + ) => { simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]); - } + }; } macro_rules! blend3 { ($a: expr, $b: expr, $c: expr, $d: expr, $e: expr, $f: expr) => { diff --git a/coresimd/x86/avx2.rs b/coresimd/x86/avx2.rs index 7751d996ce..7a3cbb25e9 100644 --- a/coresimd/x86/avx2.rs +++ b/coresimd/x86/avx2.rs @@ -18,8 +18,8 @@ //! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions //! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate -use coresimd::simd_llvm::*; use coresimd::simd::*; +use coresimd::simd_llvm::*; use coresimd::x86::*; use mem; @@ -365,9 +365,18 @@ pub unsafe fn _mm256_blend_epi32( let a = a.as_i32x8(); let b = b.as_i32x8(); macro_rules! blend4 { - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => { + ( + $a: expr, + $b: expr, + $c: expr, + $d: expr, + $e: expr, + $f: expr, + $g: expr, + $h: expr + ) => { simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]); - } + }; } macro_rules! blend3 { ($a: expr, $b: expr, $c: expr, $d: expr, $e: expr, $f: expr) => { @@ -420,31 +429,195 @@ pub unsafe fn _mm256_blend_epi16( let a = a.as_i16x16(); let b = b.as_i16x16(); macro_rules! blend4 { - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, - $i:expr, $j:expr, $k:expr, $l:expr, $m:expr, $n:expr, $o:expr, $p:expr) => { - simd_shuffle16(a, b, [$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p]) - } + ( + $a: expr, + $b: expr, + $c: expr, + $d: expr, + $e: expr, + $f: expr, + $g: expr, + $h: expr, + $i: expr, + $j: expr, + $k: expr, + $l: expr, + $m: expr, + $n: expr, + $o: expr, + $p: expr + ) => { + simd_shuffle16( + a, + b, + [ + $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, + $n, $o, $p, + ], + ) + }; } macro_rules! blend3 { - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, - $a2:expr, $b2:expr, $c2:expr, $d2:expr, $e2:expr, $f2:expr) => { + ( + $a: expr, + $b: expr, + $c: expr, + $d: expr, + $e: expr, + $f: expr, + $a2: expr, + $b2: expr, + $c2: expr, + $d2: expr, + $e2: expr, + $f2: expr + ) => { match (imm8 >> 6) & 0b11 { - 0b00 => blend4!($a, $b, $c, $d, $e, $f, 6, 7, $a2, $b2, $c2, $d2, $e2, $f2, 14, 15), - 0b01 => blend4!($a, $b, $c, $d, $e, $f, 22, 7, $a2, $b2, $c2, $d2, $e2, $f2, 30, 15), - 0b10 => blend4!($a, $b, $c, $d, $e, $f, 6, 23, $a2, $b2, $c2, $d2, $e2, $f2, 14, 31), - _ => blend4!($a, $b, $c, $d, $e, $f, 22, 23, $a2, $b2, $c2, $d2, $e2, $f2, 30, 31), + 0b00 => blend4!( + $a, + $b, + $c, + $d, + $e, + $f, + 6, + 7, + $a2, + $b2, + $c2, + $d2, + $e2, + $f2, + 14, + 15 + ), + 0b01 => blend4!( + $a, + $b, + $c, + $d, + $e, + $f, + 22, + 7, + $a2, + $b2, + $c2, + $d2, + $e2, + $f2, + 30, + 15 + ), + 0b10 => blend4!( + $a, + $b, + $c, + $d, + $e, + $f, + 6, + 23, + $a2, + $b2, + $c2, + $d2, + $e2, + $f2, + 14, + 31 + ), + _ => blend4!( + $a, + $b, + $c, + $d, + $e, + $f, + 22, + 23, + $a2, + $b2, + $c2, + $d2, + $e2, + $f2, + 30, + 31 + ), } - } + }; } macro_rules! blend2 { - ($a:expr, $b:expr, $c:expr, $d:expr, $a2:expr, $b2:expr, $c2:expr, $d2:expr) => { + ( + $a: expr, + $b: expr, + $c: expr, + $d: expr, + $a2: expr, + $b2: expr, + $c2: expr, + $d2: expr + ) => { match (imm8 >> 4) & 0b11 { - 0b00 => blend3!($a, $b, $c, $d, 4, 5, $a2, $b2, $c2, $d2, 12, 13), - 0b01 => blend3!($a, $b, $c, $d, 20, 5, $a2, $b2, $c2, $d2, 28, 13), - 0b10 => blend3!($a, $b, $c, $d, 4, 21, $a2, $b2, $c2, $d2, 12, 29), - _ => blend3!($a, $b, $c, $d, 20, 21, $a2, $b2, $c2, $d2, 28, 29), + 0b00 => blend3!( + $a, + $b, + $c, + $d, + 4, + 5, + $a2, + $b2, + $c2, + $d2, + 12, + 13 + ), + 0b01 => blend3!( + $a, + $b, + $c, + $d, + 20, + 5, + $a2, + $b2, + $c2, + $d2, + 28, + 13 + ), + 0b10 => blend3!( + $a, + $b, + $c, + $d, + 4, + 21, + $a2, + $b2, + $c2, + $d2, + 12, + 29 + ), + _ => blend3!( + $a, + $b, + $c, + $d, + 20, + 21, + $a2, + $b2, + $c2, + $d2, + 28, + 29 + ), } - } + }; } macro_rules! blend1 { ($a1: expr, $b1: expr, $a2: expr, $b2: expr) => { @@ -3520,8 +3693,8 @@ extern "C" { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; use std; + use stdsimd_test::simd_test; use coresimd::x86::*; diff --git a/coresimd/x86/fxsr.rs b/coresimd/x86/fxsr.rs index fe04de4fdc..2fa2685157 100644 --- a/coresimd/x86/fxsr.rs +++ b/coresimd/x86/fxsr.rs @@ -52,8 +52,8 @@ pub unsafe fn _fxrstor(mem_addr: *const u8) { #[cfg(test)] mod tests { use coresimd::x86::i386::fxsr; - use stdsimd_test::simd_test; use std::fmt; + use stdsimd_test::simd_test; #[repr(align(16))] struct FxsaveArea { diff --git a/coresimd/x86/mod.rs b/coresimd/x86/mod.rs index 87c5efab3c..16ef3eead5 100644 --- a/coresimd/x86/mod.rs +++ b/coresimd/x86/mod.rs @@ -1,7 +1,7 @@ //! `x86` and `x86_64` intrinsics. -use prelude::v1::*; use mem; +use prelude::v1::*; #[macro_use] mod macros; diff --git a/coresimd/x86/rdtsc.rs b/coresimd/x86/rdtsc.rs index de064e2dfe..468fa09bc8 100644 --- a/coresimd/x86/rdtsc.rs +++ b/coresimd/x86/rdtsc.rs @@ -53,8 +53,8 @@ extern "C" { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; use coresimd::x86::rdtsc; + use stdsimd_test::simd_test; #[simd_test = "sse2"] unsafe fn _rdtsc() { diff --git a/coresimd/x86/sha.rs b/coresimd/x86/sha.rs index 4cfb2610f7..23acfe4edc 100644 --- a/coresimd/x86/sha.rs +++ b/coresimd/x86/sha.rs @@ -116,15 +116,15 @@ pub unsafe fn _mm_sha256rnds2_epu32( #[cfg(test)] mod tests { - use std::mem::{self, transmute}; - use std::f64::{self, NAN}; use std::f32; + use std::f64::{self, NAN}; use std::i32; + use std::mem::{self, transmute}; + use coresimd::simd::*; + use coresimd::x86::*; use stdsimd_test::simd_test; use test::black_box; // Used to inhibit constant-folding. - use coresimd::x86::*; - use coresimd::simd::*; #[simd_test = "sha"] #[allow(overflowing_literals)] diff --git a/coresimd/x86/sse.rs b/coresimd/x86/sse.rs index 6512e153e8..5078c1d0dc 100644 --- a/coresimd/x86/sse.rs +++ b/coresimd/x86/sse.rs @@ -1,7 +1,7 @@ //! Streaming SIMD Extensions (SSE) -use coresimd::simd_llvm::*; use coresimd::simd::*; +use coresimd::simd_llvm::*; use coresimd::x86::*; use intrinsics; use mem; @@ -2176,8 +2176,8 @@ pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 { #[cfg(test)] mod tests { - use std::mem::transmute; use std::f32::NAN; + use std::mem::transmute; use stdsimd_test::simd_test; use test::black_box; // Used to inhibit constant-folding. diff --git a/coresimd/x86/sse2.rs b/coresimd/x86/sse2.rs index ec97090790..e11071a0eb 100644 --- a/coresimd/x86/sse2.rs +++ b/coresimd/x86/sse2.rs @@ -3,8 +3,8 @@ #[cfg(test)] use stdsimd_test::assert_instr; -use coresimd::simd_llvm::*; use coresimd::simd::*; +use coresimd::simd_llvm::*; use coresimd::x86::*; use intrinsics; use mem; @@ -2559,15 +2559,15 @@ extern "C" { #[cfg(test)] mod tests { - use std::mem::{self, transmute}; - use std::f64::{self, NAN}; use std::f32; + use std::f64::{self, NAN}; use std::i32; + use std::mem::{self, transmute}; + use coresimd::simd::*; + use coresimd::x86::*; use stdsimd_test::simd_test; use test::black_box; // Used to inhibit constant-folding. - use coresimd::x86::*; - use coresimd::simd::*; #[simd_test = "sse2"] unsafe fn test_mm_pause() { diff --git a/coresimd/x86/sse3.rs b/coresimd/x86/sse3.rs index 6272502996..ea41164b24 100644 --- a/coresimd/x86/sse3.rs +++ b/coresimd/x86/sse3.rs @@ -1,7 +1,7 @@ //! Streaming SIMD Extensions 3 (SSE3) -use coresimd::simd_llvm::{simd_shuffle2, simd_shuffle4}; use coresimd::simd::*; +use coresimd::simd_llvm::{simd_shuffle2, simd_shuffle4}; use coresimd::x86::*; #[cfg(test)] diff --git a/coresimd/x86/sse41.rs b/coresimd/x86/sse41.rs index 0a8089957f..30a88c1cab 100644 --- a/coresimd/x86/sse41.rs +++ b/coresimd/x86/sse41.rs @@ -1,7 +1,7 @@ //! Streaming SIMD Extensions 4.1 (SSE4.1) -use coresimd::simd_llvm::*; use coresimd::simd::*; +use coresimd::simd_llvm::*; use coresimd::x86::*; use mem; @@ -1042,9 +1042,9 @@ extern "C" { #[cfg(test)] mod tests { + use coresimd::x86::*; use std::mem; use stdsimd_test::simd_test; - use coresimd::x86::*; #[simd_test = "sse4.1"] unsafe fn test_mm_blendv_epi8() { diff --git a/coresimd/x86/sse42.rs b/coresimd/x86/sse42.rs index b32d7dc2ac..a976d18c90 100644 --- a/coresimd/x86/sse42.rs +++ b/coresimd/x86/sse42.rs @@ -5,8 +5,8 @@ #[cfg(test)] use stdsimd_test::assert_instr; -use coresimd::simd_llvm::*; use coresimd::simd::*; +use coresimd::simd_llvm::*; use coresimd::x86::*; /// String contains unsigned 8-bit characters *(Default)* @@ -704,8 +704,8 @@ extern "C" { mod tests { use stdsimd_test::simd_test; - use std::ptr; use coresimd::x86::*; + use std::ptr; // Currently one cannot `load` a &[u8] that is is less than 16 // in length. This makes loading strings less than 16 in length diff --git a/coresimd/x86/sse4a.rs b/coresimd/x86/sse4a.rs index 0aae55502a..370034c38a 100644 --- a/coresimd/x86/sse4a.rs +++ b/coresimd/x86/sse4a.rs @@ -74,8 +74,8 @@ pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; use coresimd::x86::*; + use stdsimd_test::simd_test; #[simd_test = "sse4a"] unsafe fn test_mm_extract_si64() { diff --git a/coresimd/x86/ssse3.rs b/coresimd/x86/ssse3.rs index bcda3e8fad..784a394e16 100644 --- a/coresimd/x86/ssse3.rs +++ b/coresimd/x86/ssse3.rs @@ -1,7 +1,7 @@ //! Supplemental Streaming SIMD Extensions 3 (SSSE3) -use coresimd::simd_llvm::simd_shuffle16; use coresimd::simd::*; +use coresimd::simd_llvm::simd_shuffle16; use coresimd::x86::*; use mem; diff --git a/coresimd/x86_64/fxsr.rs b/coresimd/x86_64/fxsr.rs index 2a81f5580d..42280b4b3a 100644 --- a/coresimd/x86_64/fxsr.rs +++ b/coresimd/x86_64/fxsr.rs @@ -52,8 +52,8 @@ pub unsafe fn _fxrstor64(mem_addr: *const u8) { #[cfg(test)] mod tests { use coresimd::x86::x86_64::fxsr; - use stdsimd_test::simd_test; use std::fmt; + use stdsimd_test::simd_test; #[repr(align(16))] struct FxsaveArea { diff --git a/coresimd/x86_64/sse2.rs b/coresimd/x86_64/sse2.rs index 1a540e2f77..e48708ee59 100644 --- a/coresimd/x86_64/sse2.rs +++ b/coresimd/x86_64/sse2.rs @@ -1,7 +1,7 @@ //! `x86_64`'s Streaming SIMD Extensions 2 (SSE2) -use coresimd::x86::*; use coresimd::simd_llvm::*; +use coresimd::x86::*; use intrinsics; #[cfg(test)] diff --git a/coresimd/x86_64/sse41.rs b/coresimd/x86_64/sse41.rs index eb83103353..8a0bcdb05f 100644 --- a/coresimd/x86_64/sse41.rs +++ b/coresimd/x86_64/sse41.rs @@ -1,7 +1,7 @@ //! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1) -use coresimd::x86::*; use coresimd::simd_llvm::*; +use coresimd::x86::*; use mem; #[cfg(test)] @@ -30,8 +30,8 @@ pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; use coresimd::arch::x86_64::*; + use stdsimd_test::simd_test; #[simd_test = "sse4.1"] unsafe fn test_mm_extract_epi64() { diff --git a/crates/coresimd/src/lib.rs b/crates/coresimd/src/lib.rs index e7f9ec682b..7791045115 100644 --- a/crates/coresimd/src/lib.rs +++ b/crates/coresimd/src/lib.rs @@ -10,7 +10,7 @@ #![allow(dead_code)] #![allow(unused_features)] #![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, - simd_ffi, target_feature, cfg_target_feature, i128_type, asm, + simd_ffi, target_feature, cfg_target_feature, asm, integer_atomics, stmt_expr_attributes, core_intrinsics, crate_in_paths, no_core, attr_literals, rustc_attrs, stdsimd, staged_api, fn_must_use, core_float, core_slice_ext, align_offset, diff --git a/crates/simd-test-macro/src/lib.rs b/crates/simd-test-macro/src/lib.rs index 63b282f4a7..19d005f6bb 100644 --- a/crates/simd-test-macro/src/lib.rs +++ b/crates/simd-test-macro/src/lib.rs @@ -12,8 +12,8 @@ extern crate quote; use std::env; -use proc_macro2::{Term, TokenNode, TokenStream, TokenTree}; use proc_macro2::Literal; +use proc_macro2::{Term, TokenNode, TokenStream, TokenTree}; fn string(s: &str) -> TokenTree { TokenNode::Literal(Literal::string(s)).into() diff --git a/crates/stdsimd-verify/src/lib.rs b/crates/stdsimd-verify/src/lib.rs index 1fad214406..f35c9cf59e 100644 --- a/crates/stdsimd-verify/src/lib.rs +++ b/crates/stdsimd-verify/src/lib.rs @@ -7,9 +7,9 @@ extern crate quote; #[macro_use] extern crate syn; -use std::path::Path; use std::fs::File; use std::io::Read; +use std::path::Path; use proc_macro::TokenStream; use quote::Tokens; diff --git a/crates/stdsimd/src/lib.rs b/crates/stdsimd/src/lib.rs index bc6a843afc..c77b7afd9f 100644 --- a/crates/stdsimd/src/lib.rs +++ b/crates/stdsimd/src/lib.rs @@ -29,9 +29,9 @@ mod stdsimd; pub use stdsimd::*; -#[allow(unused_imports)] -use _std::prelude; #[allow(unused_imports)] use _std::fs; #[allow(unused_imports)] use _std::io; +#[allow(unused_imports)] +use _std::prelude; diff --git a/examples/hex.rs b/examples/hex.rs index 85b160d661..5aec17d08c 100644 --- a/examples/hex.rs +++ b/examples/hex.rs @@ -30,8 +30,8 @@ extern crate stdsimd; #[macro_use] extern crate quickcheck; -use std::str; use std::io::{self, Read}; +use std::str; #[cfg(target_arch = "x86")] use stdsimd::arch::x86::*; diff --git a/examples/nbody.rs b/examples/nbody.rs index b78ce8be67..86cc8ef38f 100644 --- a/examples/nbody.rs +++ b/examples/nbody.rs @@ -45,10 +45,10 @@ impl Frsqrt for f64x2 { } #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { - #[cfg(target_arch = "arm")] - use stdsimd::arch::arm::*; #[cfg(target_arch = "aarch64")] use stdsimd::arch::aarch64::*; + #[cfg(target_arch = "arm")] + use stdsimd::arch::arm::*; let t: f32x2 = (*self).into(); let t: f32x2 = unsafe { vrsqrte_f32(t.into_bits()).into_bits() };