diff --git a/.travis.yml b/.travis.yml
index 2a22bd66b..7ae054104 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -45,6 +45,10 @@ env:
BUILDNAME="STOCK-MPI"
BUILDOPTIONS="-ULTM_DESC -UTFM_DESC -UUSE_LTM -UUSE_TFM"
BUILDSCRIPT=".ci/run.sh"
+ - |
+ BUILDNAME="STOCK+AESNI"
+ BUILDOPTIONS="-msse4.1 -maes"
+ BUILDSCRIPT=".ci/run.sh"
- |
BUILDNAME="EASY"
BUILDOPTIONS="-DLTC_EASY"
diff --git a/doc/crypt.tex b/doc/crypt.tex
index a6541dc87..7965faa0c 100644
--- a/doc/crypt.tex
+++ b/doc/crypt.tex
@@ -604,30 +604,31 @@ \subsection{Simple Encryption Demonstration}
\begin{figure}[hpbt]
\begin{small}
\begin{center}
-\begin{tabular}{|c|c|c|c|c|c|}
- \hline \textbf{Name} & \textbf{Descriptor Name} & \textbf{Block Size} & \textbf{Key Range} & \textbf{Rounds} \\
- \hline Blowfish & blowfish\_desc & 8 & 8 $\ldots$ 56 & 16 \\
- \hline X-Tea & xtea\_desc & 8 & 16 & 32 \\
- \hline RC2 & rc2\_desc & 8 & 5 $\ldots$ 128 & 16 \\
- \hline RC5-32/12/b & rc5\_desc & 8 & 8 $\ldots$ 128 & 12 $\ldots$ 24 \\
- \hline RC6-32/20/b & rc6\_desc & 16 & 8 $\ldots$ 128 & 20 \\
- \hline SAFER+ & saferp\_desc &16 & 16, 24, 32 & 8, 12, 16 \\
- \hline AES & aes\_desc & 16 & 16, 24, 32 & 10, 12, 14 \\
- & aes\_enc\_desc & 16 & 16, 24, 32 & 10, 12, 14 \\
- \hline Twofish & twofish\_desc & 16 & 16, 24, 32 & 16 \\
- \hline DES & des\_desc & 8 & 8 & 16 \\
- \hline 3DES (EDE mode) & des3\_desc & 8 & 16, 24 & 16 \\
- \hline CAST5 (CAST-128) & cast5\_desc & 8 & 5 $\ldots$ 16 & 12, 16 \\
- \hline Noekeon & noekeon\_desc & 16 & 16 & 16 \\
- \hline Skipjack & skipjack\_desc & 8 & 10 & 32 \\
- \hline Anubis & anubis\_desc & 16 & 16 $\ldots$ 40 & 12 $\ldots$ 18 \\
- \hline Khazad & khazad\_desc & 8 & 16 & 8 \\
- \hline SEED & kseed\_desc & 16 & 16 & 16 \\
- \hline KASUMI & kasumi\_desc & 8 & 16 & 8 \\
- \hline Camellia & camellia\_desc & 16 & 16, 24, 32 & 18, 24 \\
- \hline IDEA & idea\_desc & 8 & 16 & 8 \\
- \hline Serpent & serpent\_desc & 16 & 16, 24, 32 & 32 \\
- \hline TEA & tea\_desc & 8 & 16 & 32 \\
+\begin{tabular}{|c|c|c|c|c|c|c|}
+ \hline \textbf{Name} & \textbf{Descriptor Name} & \textbf{Block Size} & \textbf{Key Range} & \textbf{Rounds} & \textbf{Id} \\
+ \hline Blowfish & blowfish\_desc & 8 & 8 $\ldots$ 56 & 16 & 0 \\
+ \hline X-Tea & xtea\_desc & 8 & 16 & 32 & 1 \\
+ \hline RC2 & rc2\_desc & 8 & 5 $\ldots$ 128 & 16 & 12 \\
+ \hline RC5-32/12/b & rc5\_desc & 8 & 8 $\ldots$ 128 & 12 $\ldots$ 24 & 2 \\
+ \hline RC6-32/20/b & rc6\_desc & 16 & 8 $\ldots$ 128 & 20 & 3 \\
+ \hline SAFER+ & saferp\_desc &16 & 16, 24, 32 & 8, 12, 16 & 4 \\
+ \hline AES & aes\_desc & 16 & 16, 24, 32 & 10, 12, 14 & 6 \\
+ & aes\_enc\_desc & 16 & 16, 24, 32 & 10, 12, 14 & 6 \\
+ & aesni\_desc & 16 & 16, 24, 32 & 10, 12, 14 & 6 \\
+ \hline Twofish & twofish\_desc & 16 & 16, 24, 32 & 16 & 7 \\
+ \hline DES & des\_desc & 8 & 8 & 16 & 13 \\
+ \hline 3DES (EDE mode) & des3\_desc & 8 & 16, 24 & 16 & 14 \\
+ \hline CAST5 (CAST-128) & cast5\_desc & 8 & 5 $\ldots$ 16 & 12, 16 & 15 \\
+ \hline Noekeon & noekeon\_desc & 16 & 16 & 16 & 16 \\
+ \hline Skipjack & skipjack\_desc & 8 & 10 & 32 & 17 \\
+ \hline Anubis & anubis\_desc & 16 & 16 $\ldots$ 40 & 12 $\ldots$ 18 & 19 \\
+ \hline Khazad & khazad\_desc & 8 & 16 & 8 & 18 \\
+ \hline SEED & kseed\_desc & 16 & 16 & 16 & 20 \\
+ \hline KASUMI & kasumi\_desc & 8 & 16 & 8 & 21 \\
+ \hline Camellia & camellia\_desc & 16 & 16, 24, 32 & 18, 24 & 23 \\
+ \hline IDEA & idea\_desc & 8 & 16 & 8 & 24 \\
+ \hline Serpent & serpent\_desc & 16 & 16, 24, 32 & 32 & 25 \\
+ \hline TEA & tea\_desc & 8 & 16 & 32 & 26 \\
\hline
\end{tabular}
\end{center}
diff --git a/libtomcrypt_VS2008.vcproj b/libtomcrypt_VS2008.vcproj
index a3588d976..b5846b8be 100644
--- a/libtomcrypt_VS2008.vcproj
+++ b/libtomcrypt_VS2008.vcproj
@@ -454,6 +454,10 @@
/>
+
+
+#include
+#include
+
+#define setup_mix(t, c) _mm_extract_epi32(_mm_aeskeygenassist_si128(t, 0), c)
+#define temp_load(k) _mm_loadu_si128((__m128i*)(k))
+#define temp_update(t, k) _mm_insert_epi32(t, k, 3)
+#define temp_invert(k) _mm_aesimc_si128(*((__m128i*)(k)))
+
+
+static const ulong32 rcon[] = {
+ 0x01UL, 0x02UL, 0x04UL, 0x08UL, 0x10UL, 0x20UL, 0x40UL, 0x80UL, 0x1BUL, 0x36UL
+};
+
+/* Code partially borrowed from https://software.intel.com/content/www/us/en/develop/articles/intel-sha-extensions.html */
+static int s_aesni_is_supported(void)
+{
+ static int initialized = 0, is_supported = 0;
+
+ if (initialized == 0) {
+ int a, b, c, d;
+
+ /* Look for CPUID.1.0.ECX[25]
+ * EAX = 1, ECX = 0
+ */
+ a = 1;
+ c = 0;
+
+ asm volatile ("cpuid"
+ :"=a"(a), "=b"(b), "=c"(c), "=d"(d)
+ :"a"(a), "c"(c)
+ );
+
+ is_supported = ((c >> 25) & 1);
+ initialized = 1;
+ }
+
+ return is_supported;
+}
+
+ /**
+ Initialize the AES (Rijndael) block cipher
+ @param key The symmetric key you wish to pass
+ @param keylen The key length in bytes
+ @param num_rounds The number of rounds desired (0 for default)
+ @param skey The key in as scheduled by this function.
+ @return CRYPT_OK if successful
+ */
+int aesni_setup(const unsigned char *key, int keylen, int num_rounds, symmetric_key *skey)
+{
+ int i;
+ __m128i temp;
+ ulong32 *rk;
+ ulong32 *rrk;
+ LTC_ARGCHK(key != NULL);
+ LTC_ARGCHK(skey != NULL);
+
+ if (s_aesni_is_supported() == 0) {
+#ifdef LTC_RIJNDAEL
+ return rijndael_setup(key, keylen, num_rounds, skey);
+#else
+ return CRYPT_INVALID_CIPHER;
+#endif
+ }
+
+ if (keylen != 16 && keylen != 24 && keylen != 32) {
+ return CRYPT_INVALID_KEYSIZE;
+ }
+
+ if (num_rounds != 0 && num_rounds != (keylen / 4 + 6)) {
+ return CRYPT_INVALID_ROUNDS;
+ }
+
+ skey->rijndael.Nr = keylen / 4 + 6;
+
+ /* setup the forward key */
+ i = 0;
+ rk = skey->rijndael.eK;
+ LOAD32L(rk[0], key);
+ LOAD32L(rk[1], key + 4);
+ LOAD32L(rk[2], key + 8);
+ LOAD32L(rk[3], key + 12);
+ if (keylen == 16) {
+ temp = temp_load(key);
+ for (;;) {
+ rk[4] = rk[0] ^ setup_mix(temp, 3) ^ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+ if (++i == 10) {
+ break;
+ }
+ temp = temp_update(temp, rk[7]);
+ rk += 4;
+ }
+ } else if (keylen == 24) {
+ LOAD32L(rk[4], key + 16);
+ LOAD32L(rk[5], key + 20);
+ temp = temp_load(key + 8);
+ for (;;) {
+ rk[6] = rk[0] ^ setup_mix(temp, 3) ^ rcon[i];
+ rk[7] = rk[1] ^ rk[6];
+ rk[8] = rk[2] ^ rk[7];
+ rk[9] = rk[3] ^ rk[8];
+ if (++i == 8) {
+ break;
+ }
+ rk[10] = rk[4] ^ rk[9];
+ rk[11] = rk[5] ^ rk[10];
+ temp = temp_update(temp, rk[11]);
+ rk += 6;
+ }
+ } else if (keylen == 32) {
+ LOAD32L(rk[4], key + 16);
+ LOAD32L(rk[5], key + 20);
+ LOAD32L(rk[6], key + 24);
+ LOAD32L(rk[7], key + 28);
+ temp = temp_load(key + 16);
+ for (;;) {
+ rk[8] = rk[0] ^ setup_mix(temp, 3) ^ rcon[i];
+ rk[9] = rk[1] ^ rk[8];
+ rk[10] = rk[2] ^ rk[9];
+ rk[11] = rk[3] ^ rk[10];
+ if (++i == 7) {
+ break;
+ }
+ temp = temp_update(temp, rk[11]);
+ rk[12] = rk[4] ^ setup_mix(temp, 2);
+ rk[13] = rk[5] ^ rk[12];
+ rk[14] = rk[6] ^ rk[13];
+ rk[15] = rk[7] ^ rk[14];
+ temp = temp_update(temp, rk[15]);
+ rk += 8;
+ }
+ } else {
+ /* this can't happen */
+ /* coverity[dead_error_line] */
+ return CRYPT_ERROR;
+ }
+
+ /* setup the inverse key now */
+ rk = skey->rijndael.dK;
+ rrk = skey->rijndael.eK + skey->rijndael.Nr * 4;
+
+ /* apply the inverse MixColumn transform to all round keys but the first and the last: */
+ /* copy first */
+ *rk++ = *rrk++;
+ *rk++ = *rrk++;
+ *rk++ = *rrk++;
+ *rk = *rrk;
+ rk -= 3;
+ rrk -= 3;
+
+ for (i = 1; i < skey->rijndael.Nr; i++) {
+ rrk -= 4;
+ rk += 4;
+ temp = temp_invert(rk);
+ *((__m128i*) rk) = temp_invert(rrk);
+ }
+
+ /* copy last */
+ rrk -= 4;
+ rk += 4;
+ *rk++ = *rrk++;
+ *rk++ = *rrk++;
+ *rk++ = *rrk++;
+ *rk = *rrk;
+
+ return CRYPT_OK;
+}
+
+/**
+ Encrypts a block of text with AES
+ @param pt The input plaintext (16 bytes)
+ @param ct The output ciphertext (16 bytes)
+ @param skey The key as scheduled
+ @return CRYPT_OK if successful
+*/
+#ifdef LTC_CLEAN_STACK
+static int s_aesni_ecb_encrypt(const unsigned char *pt, unsigned char *ct, const symmetric_key *skey)
+#else
+int aesni_ecb_encrypt(const unsigned char *pt, unsigned char *ct, const symmetric_key *skey)
+#endif
+{
+ int Nr, r;
+ const __m128i *skeys;
+ __m128i block;
+
+ LTC_ARGCHK(pt != NULL);
+ LTC_ARGCHK(ct != NULL);
+ LTC_ARGCHK(skey != NULL);
+
+ if (s_aesni_is_supported() == 0) {
+#ifdef LTC_RIJNDAEL
+ return rijndael_ecb_encrypt(pt, ct, skey);
+#else
+ return CRYPT_INVALID_CIPHER;
+#endif
+ }
+
+ Nr = skey->rijndael.Nr;
+
+ if (Nr < 2 || Nr > 16) return CRYPT_INVALID_ROUNDS;
+
+ skeys = (__m128i*) skey->rijndael.eK;
+ block = _mm_loadu_si128((const __m128i*) (pt));
+
+ block = _mm_xor_si128(block, skeys[0]);
+ for (r = 1; r < Nr - 1; r += 2) {
+ block = _mm_aesenc_si128(block, skeys[r]);
+ block = _mm_aesenc_si128(block, skeys[r + 1]);
+ }
+ block = _mm_aesenc_si128(block, skeys[Nr - 1]);
+ block = _mm_aesenclast_si128(block, skeys[Nr]);
+
+ _mm_storeu_si128((__m128i*) ct, block);
+
+ return CRYPT_OK;
+}
+
+#ifdef LTC_CLEAN_STACK
+int aesni_ecb_encrypt(const unsigned char *pt, unsigned char *ct, const symmetric_key *skey)
+{
+ int err = s_aesni_ecb_encrypt(pt, ct, skey);
+ burn_stack(sizeof(unsigned long)*8 + sizeof(unsigned long*) + sizeof(int)*2);
+ return err;
+}
+#endif
+
+
+/**
+ Decrypts a block of text with AES
+ @param ct The input ciphertext (16 bytes)
+ @param pt The output plaintext (16 bytes)
+ @param skey The key as scheduled
+ @return CRYPT_OK if successful
+*/
+#ifdef LTC_CLEAN_STACK
+static int s_aesni_ecb_decrypt(const unsigned char *ct, unsigned char *pt, const symmetric_key *skey)
+#else
+int aesni_ecb_decrypt(const unsigned char *ct, unsigned char *pt, const symmetric_key *skey)
+#endif
+{
+ int Nr, r;
+ const __m128i *skeys;
+ __m128i block;
+
+ LTC_ARGCHK(pt != NULL);
+ LTC_ARGCHK(ct != NULL);
+ LTC_ARGCHK(skey != NULL);
+
+ if (s_aesni_is_supported() == 0) {
+#ifdef LTC_RIJNDAEL
+ return rijndael_ecb_decrypt(ct, pt, skey);
+#else
+ return CRYPT_INVALID_CIPHER;
+#endif
+ }
+
+ Nr = skey->rijndael.Nr;
+
+ if (Nr < 2 || Nr > 16) return CRYPT_INVALID_ROUNDS;
+
+ skeys = (__m128i*) skey->rijndael.dK;
+ block = _mm_loadu_si128((const __m128i*) (ct));
+
+ block = _mm_xor_si128(block, skeys[0]);
+ for (r = 1; r < Nr - 1; r += 2) {
+ block = _mm_aesdec_si128(block, skeys[r]);
+ block = _mm_aesdec_si128(block, skeys[r + 1]);
+ }
+ block = _mm_aesdec_si128(block, skeys[Nr - 1]);
+ block = _mm_aesdeclast_si128(block, skeys[Nr]);
+
+ _mm_storeu_si128((__m128i*) pt, block);
+
+ return CRYPT_OK;
+}
+
+
+#ifdef LTC_CLEAN_STACK
+int aesni_ecb_decrypt(const unsigned char *ct, unsigned char *pt, const symmetric_key *skey)
+{
+ int err = s_aesni_ecb_decrypt(ct, pt, skey);
+ burn_stack(sizeof(unsigned long)*8 + sizeof(unsigned long*) + sizeof(int)*2);
+ return err;
+}
+#endif
+
+/**
+ Performs a self-test of the AES block cipher
+ @return CRYPT_OK if functional, CRYPT_NOP if self-test has been disabled
+*/
+int aesni_test(void)
+{
+ #ifndef LTC_TEST
+ return CRYPT_NOP;
+ #else
+ int err;
+ static const struct {
+ int keylen;
+ unsigned char key[32], pt[16], ct[16];
+ } tests[] = {
+ { 16,
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+ 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+ { 0x69, 0xc4, 0xe0, 0xd8, 0x6a, 0x7b, 0x04, 0x30,
+ 0xd8, 0xcd, 0xb7, 0x80, 0x70, 0xb4, 0xc5, 0x5a }
+ }, {
+ 24,
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 },
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+ 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+ { 0xdd, 0xa9, 0x7c, 0xa4, 0x86, 0x4c, 0xdf, 0xe0,
+ 0x6e, 0xaf, 0x70, 0xa0, 0xec, 0x0d, 0x71, 0x91 }
+ }, {
+ 32,
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+ 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+ { 0x8e, 0xa2, 0xb7, 0xca, 0x51, 0x67, 0x45, 0xbf,
+ 0xea, 0xfc, 0x49, 0x90, 0x4b, 0x49, 0x60, 0x89 }
+ }
+ };
+
+ symmetric_key key;
+ unsigned char tmp[2][16];
+ int i, y;
+
+ for (i = 0; i < (int)(sizeof(tests)/sizeof(tests[0])); i++) {
+ zeromem(&key, sizeof(key));
+ if ((err = aesni_setup(tests[i].key, tests[i].keylen, 0, &key)) != CRYPT_OK) {
+ return err;
+ }
+
+ aesni_ecb_encrypt(tests[i].pt, tmp[0], &key);
+ aesni_ecb_decrypt(tmp[0], tmp[1], &key);
+ if (compare_testvector(tmp[0], 16, tests[i].ct, 16, "AES-NI Encrypt", i) ||
+ compare_testvector(tmp[1], 16, tests[i].pt, 16, "AES-NI Decrypt", i)) {
+ return CRYPT_FAIL_TESTVECTOR;
+ }
+
+ /* now see if we can encrypt all zero bytes 1000 times, decrypt and come back where we started */
+ for (y = 0; y < 16; y++) tmp[0][y] = 0;
+ for (y = 0; y < 1000; y++) aesni_ecb_encrypt(tmp[0], tmp[0], &key);
+ for (y = 0; y < 1000; y++) aesni_ecb_decrypt(tmp[0], tmp[0], &key);
+ for (y = 0; y < 16; y++) if (tmp[0][y] != 0) return CRYPT_FAIL_TESTVECTOR;
+ }
+ return CRYPT_OK;
+ #endif
+}
+
+
+/** Terminate the context
+ @param skey The scheduled key
+*/
+void aesni_done(symmetric_key *skey)
+{
+ LTC_UNUSED_PARAM(skey);
+}
+
+
+/**
+ Gets suitable key size
+ @param keysize [in/out] The length of the recommended key (in bytes). This function will store the suitable size back in this variable.
+ @return CRYPT_OK if the input key size is acceptable.
+*/
+int aesni_keysize(int *keysize)
+{
+ LTC_ARGCHK(keysize != NULL);
+
+ if (*keysize < 16) {
+ return CRYPT_INVALID_KEYSIZE;
+ }
+ if (*keysize < 24) {
+ *keysize = 16;
+ return CRYPT_OK;
+ }
+ if (*keysize < 32) {
+ *keysize = 24;
+ return CRYPT_OK;
+ }
+ *keysize = 32;
+ return CRYPT_OK;
+}
+
+#endif
+
diff --git a/src/headers/tomcrypt_cfg.h b/src/headers/tomcrypt_cfg.h
index 6392c5499..f086bb9fc 100644
--- a/src/headers/tomcrypt_cfg.h
+++ b/src/headers/tomcrypt_cfg.h
@@ -91,6 +91,11 @@ LTC_EXPORT int LTC_CALL XSTRCMP(const char *s1, const char *s2);
#define ENDIAN_LITTLE
#define ENDIAN_64BITWORD
#define LTC_FAST
+ #if defined(__SSE4_1__)
+ #if __SSE4_1__ == 1
+ #define LTC_AMD64_SSE4_1
+ #endif
+ #endif
#endif
/* detect PPC32 */
@@ -288,6 +293,12 @@ typedef unsigned long ltc_mp_digit;
#define LTC_HAVE_ROTATE_BUILTIN
#endif
+#if defined(__GNUC__)
+ #define LTC_ALIGN(n) __attribute__((aligned(n)))
+#else
+ #define LTC_ALIGN(n)
+#endif
+
#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 405)
# define LTC_DEPRECATED(s) __attribute__((deprecated("replaced by " #s)))
# define PRIVATE_LTC_DEPRECATED_PRAGMA(s) _Pragma(#s)
diff --git a/src/headers/tomcrypt_cipher.h b/src/headers/tomcrypt_cipher.h
index 7d8d5ad61..fb9139a41 100644
--- a/src/headers/tomcrypt_cipher.h
+++ b/src/headers/tomcrypt_cipher.h
@@ -35,7 +35,8 @@ struct saferp_key {
#ifdef LTC_RIJNDAEL
struct rijndael_key {
- ulong32 eK[60], dK[60];
+ ulong32 eK[60] LTC_ALIGN(16);
+ ulong32 dK[60] LTC_ALIGN(16);
int Nr;
};
#endif
@@ -715,6 +716,16 @@ extern const struct ltc_cipher_descriptor rijndael_desc, aes_desc;
extern const struct ltc_cipher_descriptor rijndael_enc_desc, aes_enc_desc;
#endif
+#if defined(LTC_AES_NI) && defined(LTC_AMD64_SSE4_1)
+int aesni_setup(const unsigned char *key, int keylen, int num_rounds, symmetric_key *skey);
+int aesni_ecb_encrypt(const unsigned char *pt, unsigned char *ct, const symmetric_key *skey);
+int aesni_ecb_decrypt(const unsigned char *ct, unsigned char *pt, const symmetric_key *skey);
+int aesni_test(void);
+void aesni_done(symmetric_key *skey);
+int aesni_keysize(int *keysize);
+extern const struct ltc_cipher_descriptor aesni_desc;
+#endif
+
#ifdef LTC_XTEA
int xtea_setup(const unsigned char *key, int keylen, int num_rounds, symmetric_key *skey);
int xtea_ecb_encrypt(const unsigned char *pt, unsigned char *ct, const symmetric_key *skey);
diff --git a/src/headers/tomcrypt_custom.h b/src/headers/tomcrypt_custom.h
index b13c2e08f..c321e57cf 100644
--- a/src/headers/tomcrypt_custom.h
+++ b/src/headers/tomcrypt_custom.h
@@ -179,6 +179,7 @@
#define LTC_RC6
#define LTC_SAFERP
#define LTC_RIJNDAEL
+#define LTC_AES_NI
#define LTC_XTEA
/* _TABLES tells it to use tables during setup, _SMALL means to use the smaller scheduled key format
* (saves 4KB of ram), _ALL_TABLES enables all tables during setup */
diff --git a/src/headers/tomcrypt_mac.h b/src/headers/tomcrypt_mac.h
index 60ab1acb0..b2c789c36 100644
--- a/src/headers/tomcrypt_mac.h
+++ b/src/headers/tomcrypt_mac.h
@@ -482,7 +482,7 @@ typedef struct {
#ifdef LTC_GCM_TABLES
unsigned char PC[16][256][16] /* 16 tables of 8x128 */
#ifdef LTC_GCM_TABLES_SSE2
-__attribute__ ((aligned (16)))
+LTC_ALIGN(16)
#endif
;
#endif
diff --git a/src/misc/crypt/crypt.c b/src/misc/crypt/crypt.c
index 14576fa7d..460cd59d8 100644
--- a/src/misc/crypt/crypt.c
+++ b/src/misc/crypt/crypt.c
@@ -416,6 +416,9 @@ const char *crypt_build_settings =
#if defined(LTC_ADLER32)
" ADLER32 "
#endif
+#if defined(LTC_AES_NI) && defined(LTC_AMD64_SSE4_1)
+ " AES-NI "
+#endif
#if defined(LTC_BASE64)
" BASE64 "
#endif
diff --git a/src/misc/crypt/crypt_register_all_ciphers.c b/src/misc/crypt/crypt_register_all_ciphers.c
index bb2b873e2..b8181640b 100644
--- a/src/misc/crypt/crypt_register_all_ciphers.c
+++ b/src/misc/crypt/crypt_register_all_ciphers.c
@@ -21,11 +21,15 @@ int register_all_ciphers(void)
* register_cipher(&rijndael_enc_desc);
*/
REGISTER_CIPHER(&aes_enc_desc);
+#else
+#if defined(LTC_AES_NI) && defined(LTC_AMD64_SSE4_1)
+ REGISTER_CIPHER(&aesni_desc);
#else
/* alternative would be
* register_cipher(&rijndael_desc);
*/
REGISTER_CIPHER(&aes_desc);
+#endif /* AES-NI */
#endif
#endif
#ifdef LTC_BLOWFISH
diff --git a/tests/test.c b/tests/test.c
index 375beb267..e6d008ec7 100644
--- a/tests/test.c
+++ b/tests/test.c
@@ -111,11 +111,15 @@ static void s_unregister_all(void)
* unregister_cipher(&rijndael_enc_desc);
*/
unregister_cipher(&aes_enc_desc);
+#else
+#if defined(LTC_AES_NI) && defined(LTC_AMD64_SSE4_1)
+ unregister_cipher(&aesni_desc);
#else
/* alternative would be
* unregister_cipher(&rijndael_desc);
*/
unregister_cipher(&aes_desc);
+#endif /* AES-NI */
#endif
#endif
#ifdef LTC_BLOWFISH