Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add VAES intrinsics #942

Merged
merged 4 commits into from
Nov 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
332 changes: 332 additions & 0 deletions crates/core_arch/src/x86/avx512vaes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,332 @@
//! Vectorized AES Instructions (VAES)
//!
//! The intrinsics here correspond to those in the `immintrin.h` C header.
//!
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
//!
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf

use crate::core_arch::x86::__m256i;
use crate::core_arch::x86::__m512i;

#[cfg(test)]
use stdarch_test::assert_instr;

#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.aesni.aesenc.256"]
fn aesenc_256(a: __m256i, round_key: __m256i) -> __m256i;
#[link_name = "llvm.x86.aesni.aesenclast.256"]
fn aesenclast_256(a: __m256i, round_key: __m256i) -> __m256i;
#[link_name = "llvm.x86.aesni.aesdec.256"]
fn aesdec_256(a: __m256i, round_key: __m256i) -> __m256i;
#[link_name = "llvm.x86.aesni.aesdeclast.256"]
fn aesdeclast_256(a: __m256i, round_key: __m256i) -> __m256i;
#[link_name = "llvm.x86.aesni.aesenc.512"]
fn aesenc_512(a: __m512i, round_key: __m512i) -> __m512i;
#[link_name = "llvm.x86.aesni.aesenclast.512"]
fn aesenclast_512(a: __m512i, round_key: __m512i) -> __m512i;
#[link_name = "llvm.x86.aesni.aesdec.512"]
fn aesdec_512(a: __m512i, round_key: __m512i) -> __m512i;
#[link_name = "llvm.x86.aesni.aesdeclast.512"]
fn aesdeclast_512(a: __m512i, round_key: __m512i) -> __m512i;
}

/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenc_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512vl")]
#[cfg_attr(test, assert_instr(vaesenc))]
pub unsafe fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesenc_256(a, round_key)
}

/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenclast_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512vl")]
#[cfg_attr(test, assert_instr(vaesenclast))]
pub unsafe fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesenclast_256(a, round_key)
}

/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdec_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512vl")]
#[cfg_attr(test, assert_instr(vaesdec))]
pub unsafe fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesdec_256(a, round_key)
}

/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdeclast_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512vl")]
#[cfg_attr(test, assert_instr(vaesdeclast))]
pub unsafe fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesdeclast_256(a, round_key)
}

/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenc_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesenc))]
pub unsafe fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesenc_512(a, round_key)
}

/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenclast_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesenclast))]
pub unsafe fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesenclast_512(a, round_key)
}

/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdec_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesdec))]
pub unsafe fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesdec_512(a, round_key)
}

/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdeclast_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesdeclast))]
pub unsafe fn _mm512_aesdeclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesdeclast_512(a, round_key)
}

#[cfg(test)]
mod tests {
// The constants in the tests below are just bit patterns. They should not
// be interpreted as integers; signedness does not make sense for them, but
// __mXXXi happens to be defined in terms of signed integers.
#![allow(overflowing_literals)]

use stdarch_test::simd_test;

use crate::core_arch::x86::*;

// the first parts of these tests are straight ports from the AES-NI tests
// the second parts directly compare the two, for inputs that are different across lanes
// and "more random" than the standard test vectors
// ideally we'd be using quickcheck here instead

#[target_feature(enable = "avx2")]
unsafe fn helper_for_256_avx512vaes(
linear: unsafe fn(__m128i, __m128i) -> __m128i,
vectorized: unsafe fn(__m256i, __m256i) -> __m256i,
) {
let a = _mm256_set_epi64x(
0xDCB4DB3657BF0B7D,
0x18DB0601068EDD9F,
0xB76B908233200DC5,
0xE478235FA8E22D5E,
);
let k = _mm256_set_epi64x(
0x672F6F105A94CEA7,
0x8298B8FFCA5F829C,
0xA3927047B3FB61D8,
0x978093862CDE7187,
);
let mut a_decomp = [_mm_setzero_si128(); 2];
a_decomp[0] = _mm256_extracti128_si256(a, 0);
a_decomp[1] = _mm256_extracti128_si256(a, 1);
let mut k_decomp = [_mm_setzero_si128(); 2];
k_decomp[0] = _mm256_extracti128_si256(k, 0);
k_decomp[1] = _mm256_extracti128_si256(k, 1);
let r = vectorized(a, k);
let mut e_decomp = [_mm_setzero_si128(); 2];
for i in 0..2 {
e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
}
assert_eq_m128i(_mm256_extracti128_si256(r, 0), e_decomp[0]);
assert_eq_m128i(_mm256_extracti128_si256(r, 1), e_decomp[1]);
}

#[target_feature(enable = "sse2")]
unsafe fn setup_state_key<T>(broadcast: unsafe fn(__m128i) -> T) -> (T, T) {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
(broadcast(a), broadcast(k))
}

#[target_feature(enable = "avx2")]
unsafe fn setup_state_key_256() -> (__m256i, __m256i) {
setup_state_key(_mm256_broadcastsi128_si256)
}

#[target_feature(enable = "avx512f")]
unsafe fn setup_state_key_512() -> (__m512i, __m512i) {
setup_state_key(_mm512_broadcast_i32x4)
}

#[simd_test(enable = "avx512vaes,avx512vl")]
unsafe fn test_mm256_aesdec_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
let (a, k) = setup_state_key_256();
let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
let e = _mm256_broadcastsi128_si256(e);
let r = _mm256_aesdec_epi128(a, k);
assert_eq_m256i(r, e);

helper_for_256_avx512vaes(_mm_aesdec_si128, _mm256_aesdec_epi128);
}

#[simd_test(enable = "avx512vaes,avx512vl")]
unsafe fn test_mm256_aesdeclast_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
let (a, k) = setup_state_key_256();
let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
let e = _mm256_broadcastsi128_si256(e);
let r = _mm256_aesdeclast_epi128(a, k);
assert_eq_m256i(r, e);

helper_for_256_avx512vaes(_mm_aesdeclast_si128, _mm256_aesdeclast_epi128);
}

#[simd_test(enable = "avx512vaes,avx512vl")]
unsafe fn test_mm256_aesenc_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
// they are repeated appropriately
let (a, k) = setup_state_key_256();
let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
let e = _mm256_broadcastsi128_si256(e);
let r = _mm256_aesenc_epi128(a, k);
assert_eq_m256i(r, e);

helper_for_256_avx512vaes(_mm_aesenc_si128, _mm256_aesenc_epi128);
}

#[simd_test(enable = "avx512vaes,avx512vl")]
unsafe fn test_mm256_aesenclast_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
let (a, k) = setup_state_key_256();
let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
let e = _mm256_broadcastsi128_si256(e);
let r = _mm256_aesenclast_epi128(a, k);
assert_eq_m256i(r, e);

helper_for_256_avx512vaes(_mm_aesenclast_si128, _mm256_aesenclast_epi128);
}

#[target_feature(enable = "avx512f")]
unsafe fn helper_for_512_avx512vaes(
linear: unsafe fn(__m128i, __m128i) -> __m128i,
vectorized: unsafe fn(__m512i, __m512i) -> __m512i,
) {
let a = _mm512_set_epi64(
0xDCB4DB3657BF0B7D,
0x18DB0601068EDD9F,
0xB76B908233200DC5,
0xE478235FA8E22D5E,
0xAB05CFFA2621154C,
0x1171B47A186174C9,
0x8C6B6C0E7595CEC9,
0xBE3E7D4934E961BD,
);
let k = _mm512_set_epi64(
0x672F6F105A94CEA7,
0x8298B8FFCA5F829C,
0xA3927047B3FB61D8,
0x978093862CDE7187,
0xB1927AB22F31D0EC,
0xA9A5DA619BE4D7AF,
0xCA2590F56884FDC6,
0x19BE9F660038BDB5,
);
let mut a_decomp = [_mm_setzero_si128(); 4];
a_decomp[0] = _mm512_extracti32x4_epi32(a, 0);
a_decomp[1] = _mm512_extracti32x4_epi32(a, 1);
a_decomp[2] = _mm512_extracti32x4_epi32(a, 2);
a_decomp[3] = _mm512_extracti32x4_epi32(a, 3);
let mut k_decomp = [_mm_setzero_si128(); 4];
k_decomp[0] = _mm512_extracti32x4_epi32(k, 0);
k_decomp[1] = _mm512_extracti32x4_epi32(k, 1);
k_decomp[2] = _mm512_extracti32x4_epi32(k, 2);
k_decomp[3] = _mm512_extracti32x4_epi32(k, 3);
let r = vectorized(a, k);
let mut e_decomp = [_mm_setzero_si128(); 4];
for i in 0..4 {
e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
}
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 0), e_decomp[0]);
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 1), e_decomp[1]);
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 2), e_decomp[2]);
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 3), e_decomp[3]);
}

#[simd_test(enable = "avx512vaes,avx512f")]
unsafe fn test_mm512_aesdec_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
let (a, k) = setup_state_key_512();
let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
let e = _mm512_broadcast_i32x4(e);
let r = _mm512_aesdec_epi128(a, k);
assert_eq_m512i(r, e);

helper_for_512_avx512vaes(_mm_aesdec_si128, _mm512_aesdec_epi128);
}

#[simd_test(enable = "avx512vaes,avx512f")]
unsafe fn test_mm512_aesdeclast_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
let (a, k) = setup_state_key_512();
let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
let e = _mm512_broadcast_i32x4(e);
let r = _mm512_aesdeclast_epi128(a, k);
assert_eq_m512i(r, e);

helper_for_512_avx512vaes(_mm_aesdeclast_si128, _mm512_aesdeclast_epi128);
}

#[simd_test(enable = "avx512vaes,avx512f")]
unsafe fn test_mm512_aesenc_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
let (a, k) = setup_state_key_512();
let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
let e = _mm512_broadcast_i32x4(e);
let r = _mm512_aesenc_epi128(a, k);
assert_eq_m512i(r, e);

helper_for_512_avx512vaes(_mm_aesenc_si128, _mm512_aesenc_epi128);
}

#[simd_test(enable = "avx512vaes,avx512f")]
unsafe fn test_mm512_aesenclast_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
let (a, k) = setup_state_key_512();
let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
let e = _mm512_broadcast_i32x4(e);
let r = _mm512_aesenclast_epi128(a, k);
assert_eq_m512i(r, e);

helper_for_512_avx512vaes(_mm_aesenclast_si128, _mm512_aesenclast_epi128);
}
}
3 changes: 3 additions & 0 deletions crates/core_arch/src/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,9 @@ pub use self::avx512f::*;
mod avx512ifma;
pub use self::avx512ifma::*;

mod avx512vaes;
pub use self::avx512vaes::*;

mod bt;
pub use self::bt::*;

Expand Down