Skip to content

Commit

Permalink
Optimize AES-GCM implementation on aarch64
Browse files Browse the repository at this point in the history
Comparing to current implementation, this change can get more
performance improved by tunning the loop-unrolling factor in
interleave implementation as well as by enabling high level parallelism.

Performance(A72)

new
type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes   16384 bytes
aes-128-gcm     113065.51k   375743.00k   848359.51k  1517865.98k  1964040.19k  1986663.77k
aes-192-gcm     110679.32k   364470.63k   799322.88k  1428084.05k  1826917.03k  1848967.17k
aes-256-gcm     104919.86k   352939.29k   759477.76k  1330683.56k  1663175.34k  1670430.72k

old
type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes   16384 bytes
aes-128-gcm     115595.32k   382348.65k   855891.29k  1236452.35k  1425670.14k  1429793.45k
aes-192-gcm     112227.02k   369543.47k   810046.55k  1147948.37k  1286288.73k  1296941.06k
aes-256-gcm     111543.90k   361902.36k   769543.59k  1070693.03k  1208576.68k  1207511.72k

Change-Id: I28a2dca85c001a63a2a942e80c7c64f7a4fdfcf7

Reviewed-by: Bernd Edlinger <bernd.edlinger@hotmail.de>
Reviewed-by: Paul Dale <paul.dale@oracle.com>
(Merged from #9818)
  • Loading branch information
zorrorffm authored and paulidale committed Dec 19, 2019
1 parent 51a7c4b commit 31b5907
Show file tree
Hide file tree
Showing 8 changed files with 5,854 additions and 12 deletions.
1 change: 1 addition & 0 deletions crypto/build.info
Expand Up @@ -85,6 +85,7 @@ DEFINE[../libcrypto]=$UTIL_DEFINE $UPLINKDEF
DEFINE[../providers/libfips.a]=$UTIL_DEFINE
DEFINE[../providers/fips]=$UTIL_DEFINE
DEFINE[../providers/libimplementations.a]=$UTIL_DEFINE
DEFINE[../providers/libcommon.a]=$UTIL_DEFINE

DEPEND[info.o]=buildinf.h
DEPEND[cversion.o]=buildinf.h
Expand Down
2 changes: 0 additions & 2 deletions crypto/evp/e_aes.c
Expand Up @@ -130,8 +130,6 @@ static void ctr64_inc(unsigned char *counter)

#if defined(AESNI_CAPABLE)
# if defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
# define AES_gcm_encrypt aesni_gcm_encrypt
# define AES_gcm_decrypt aesni_gcm_decrypt
# define AES_GCM_ASM2(gctx) (gctx->gcm.block==(block128_f)aesni_encrypt && \
gctx->gcm.ghash==gcm_ghash_avx)
# undef AES_GCM_ASM2 /* minor size optimization */
Expand Down
5,722 changes: 5,722 additions & 0 deletions crypto/modes/asm/aes-gcm-armv8_64.pl

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion crypto/modes/build.info
Expand Up @@ -24,7 +24,7 @@ IF[{- !$disabled{asm} -}]

$MODESASM_armv4=ghash-armv4.S ghashv8-armx.S
$MODESDEF_armv4=GHASH_ASM
$MODESASM_aarch64=ghashv8-armx.S
$MODESASM_aarch64=ghashv8-armx.S aes-gcm-armv8_64.S
$MODESDEF_aarch64=

$MODESASM_parisc11=ghash-parisc.s
Expand Down Expand Up @@ -76,6 +76,8 @@ GENERATE[ghash-armv4.S]=asm/ghash-armv4.pl
INCLUDE[ghash-armv4.o]=..
GENERATE[ghashv8-armx.S]=asm/ghashv8-armx.pl
INCLUDE[ghashv8-armx.o]=..
GENERATE[aes-gcm-armv8_64.S]=asm/aes-gcm-armv8_64.pl
INCLUDE[aes-gcm-armv8_64.o]=..
GENERATE[ghash-s390x.S]=asm/ghash-s390x.pl
INCLUDE[ghash-s390x.o]=..
GENERATE[ghash-c64xplus.S]=asm/ghash-c64xplus.pl
33 changes: 32 additions & 1 deletion include/crypto/ciphermode_platform.h
Expand Up @@ -91,6 +91,32 @@ void AES_xts_decrypt(const unsigned char *inp, unsigned char *out, size_t len,
# define HWAES_cbc_encrypt aes_v8_cbc_encrypt
# define HWAES_ecb_encrypt aes_v8_ecb_encrypt
# define HWAES_ctr32_encrypt_blocks aes_v8_ctr32_encrypt_blocks
# define AES_PMULL_CAPABLE ((OPENSSL_armcap_P & ARMV8_PMULL) && (OPENSSL_armcap_P & ARMV8_AES))
# define AES_GCM_ENC_BYTES 512
# define AES_GCM_DEC_BYTES 512
# if __ARM_MAX_ARCH__>=8
# define AES_gcm_encrypt armv8_aes_gcm_encrypt
# define AES_gcm_decrypt armv8_aes_gcm_decrypt
# define AES_GCM_ASM(gctx) ((gctx)->ctr==aes_v8_ctr32_encrypt_blocks && \
(gctx)->gcm.ghash==gcm_ghash_v8)
size_t aes_gcm_enc_128_kernel(const uint8_t * plaintext, uint64_t plaintext_length, uint8_t * ciphertext,
uint64_t *Xi, unsigned char ivec[16], const void *key);
size_t aes_gcm_enc_192_kernel(const uint8_t * plaintext, uint64_t plaintext_length, uint8_t * ciphertext,
uint64_t *Xi, unsigned char ivec[16], const void *key);
size_t aes_gcm_enc_256_kernel(const uint8_t * plaintext, uint64_t plaintext_length, uint8_t * ciphertext,
uint64_t *Xi, unsigned char ivec[16], const void *key);
size_t aes_gcm_dec_128_kernel(const uint8_t * ciphertext, uint64_t plaintext_length, uint8_t * plaintext,
uint64_t *Xi, unsigned char ivec[16], const void *key);
size_t aes_gcm_dec_192_kernel(const uint8_t * ciphertext, uint64_t plaintext_length, uint8_t * plaintext,
uint64_t *Xi, unsigned char ivec[16], const void *key);
size_t aes_gcm_dec_256_kernel(const uint8_t * ciphertext, uint64_t plaintext_length, uint8_t * plaintext,
uint64_t *Xi, unsigned char ivec[16], const void *key);
size_t armv8_aes_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len, const void *key,
unsigned char ivec[16], u64 *Xi);
size_t armv8_aes_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len, const void *key,
unsigned char ivec[16], u64 *Xi);
void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp, size_t len);
# endif
# endif
# endif
# endif /* OPENSSL_CPUID_OBJ */
Expand All @@ -111,6 +137,9 @@ void AES_xts_decrypt(const unsigned char *inp, unsigned char *out, size_t len,
# define BSAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32)))
# endif

# define AES_GCM_ENC_BYTES 32
# define AES_GCM_DEC_BYTES 16

int aesni_set_encrypt_key(const unsigned char *userKey, int bits,
AES_KEY *key);
int aesni_set_decrypt_key(const unsigned char *userKey, int bits,
Expand Down Expand Up @@ -181,6 +210,8 @@ size_t aesni_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len
const void *key, unsigned char ivec[16], u64 *Xi);
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *in, size_t len);

# define AES_gcm_encrypt aesni_gcm_encrypt
# define AES_gcm_decrypt aesni_gcm_decrypt
# define AES_GCM_ASM(ctx) (ctx->ctr == aesni_ctr32_encrypt_blocks && \
ctx->gcm.ghash == gcm_ghash_avx)
# endif
Expand Down Expand Up @@ -416,7 +447,7 @@ void HWAES_ecb_encrypt(const unsigned char *in, unsigned char *out,
size_t length, const AES_KEY *key,
const int enc);
void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
size_t len, const AES_KEY *key,
size_t len, const void *key,
const unsigned char ivec[16]);
void HWAES_xts_encrypt(const unsigned char *inp, unsigned char *out,
size_t len, const AES_KEY *key1,
Expand Down
2 changes: 2 additions & 0 deletions providers/implementations/ciphers/cipher_aes_gcm_hw.c
Expand Up @@ -68,6 +68,8 @@ static const PROV_GCM_HW aes_gcm = {
# include "cipher_aes_gcm_hw_aesni.inc"
#elif defined(SPARC_AES_CAPABLE)
# include "cipher_aes_gcm_hw_t4.inc"
#elif defined(AES_PMULL_CAPABLE) && defined(AES_GCM_ASM)
# include "cipher_aes_gcm_hw_armv8.inc"
#else
const PROV_GCM_HW *PROV_AES_HW_gcm(size_t keybits)
{
Expand Down
83 changes: 83 additions & 0 deletions providers/implementations/ciphers/cipher_aes_gcm_hw_armv8.inc
@@ -0,0 +1,83 @@
/*
* Copyright 2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*/

/*
* Crypto extention support for AES GCM.
* This file is included by cipher_aes_gcm_hw.c
*/

size_t armv8_aes_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len,
const void *key, unsigned char ivec[16], u64 *Xi)
{
size_t align_bytes = 0;
align_bytes = len - len % 16;

AES_KEY *aes_key = (AES_KEY *)key;

switch(aes_key->rounds) {
case 10:
aes_gcm_enc_128_kernel(in, align_bytes * 8, out, (uint64_t *)Xi, ivec, key);
break;
case 12:
aes_gcm_enc_192_kernel(in, align_bytes * 8, out, (uint64_t *)Xi, ivec, key);
break;
case 14:
aes_gcm_enc_256_kernel(in, align_bytes * 8, out, (uint64_t *)Xi, ivec, key);
break;
}
return align_bytes;
}

size_t armv8_aes_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len,
const void *key, unsigned char ivec[16], u64 *Xi)
{
size_t align_bytes = 0;
align_bytes = len - len % 16;

AES_KEY *aes_key = (AES_KEY *)key;

switch(aes_key->rounds) {
case 10:
aes_gcm_dec_128_kernel(in, align_bytes * 8, out, (uint64_t *)Xi, ivec, key);
break;
case 12:
aes_gcm_dec_192_kernel(in, align_bytes * 8, out, (uint64_t *)Xi, ivec, key);
break;
case 14:
aes_gcm_dec_256_kernel(in, align_bytes * 8, out, (uint64_t *)Xi, ivec, key);
break;
}
return align_bytes;
}

static int armv8_aes_gcm_initkey(PROV_GCM_CTX *ctx, const unsigned char *key,
size_t keylen)
{
PROV_AES_GCM_CTX *actx = (PROV_AES_GCM_CTX *)ctx;
AES_KEY *ks = &actx->ks.ks;

GCM_HW_SET_KEY_CTR_FN(ks, aes_v8_set_encrypt_key, aes_v8_encrypt,
aes_v8_ctr32_encrypt_blocks);
return 1;
}


static const PROV_GCM_HW armv8_aes_gcm = {
armv8_aes_gcm_initkey,
gcm_setiv,
gcm_aad_update,
gcm_cipher_update,
gcm_cipher_final,
gcm_one_shot
};

const PROV_GCM_HW *PROV_AES_HW_gcm(size_t keybits)
{
return AES_PMULL_CAPABLE ? &armv8_aes_gcm : &aes_gcm;
}
19 changes: 11 additions & 8 deletions providers/implementations/ciphers/ciphercommon_gcm_hw.c
Expand Up @@ -30,14 +30,16 @@ int gcm_cipher_update(PROV_GCM_CTX *ctx, const unsigned char *in,
#if defined(AES_GCM_ASM)
size_t bulk = 0;

if (len >= 32 && AES_GCM_ASM(ctx)) {
if (len >= AES_GCM_ENC_BYTES && AES_GCM_ASM(ctx)) {
size_t res = (16 - ctx->gcm.mres) % 16;

if (CRYPTO_gcm128_encrypt(&ctx->gcm, in, out, res))
return 0;
bulk = aesni_gcm_encrypt(in + res, out + res, len - res,
ctx->gcm.key,
ctx->gcm.Yi.c, ctx->gcm.Xi.u);

bulk = AES_gcm_encrypt(in + res, out + res, len - res,
ctx->gcm.key,
ctx->gcm.Yi.c, ctx->gcm.Xi.u);

ctx->gcm.len.u[1] += bulk;
bulk += res;
}
Expand All @@ -57,15 +59,16 @@ int gcm_cipher_update(PROV_GCM_CTX *ctx, const unsigned char *in,
#if defined(AES_GCM_ASM)
size_t bulk = 0;

if (len >= 16 && AES_GCM_ASM(ctx)) {
if (len >= AES_GCM_DEC_BYTES && AES_GCM_ASM(ctx)) {
size_t res = (16 - ctx->gcm.mres) % 16;

if (CRYPTO_gcm128_decrypt(&ctx->gcm, in, out, res))
return -1;

bulk = aesni_gcm_decrypt(in + res, out + res, len - res,
ctx->gcm.key,
ctx->gcm.Yi.c, ctx->gcm.Xi.u);
bulk = AES_gcm_decrypt(in + res, out + res, len - res,
ctx->gcm.key,
ctx->gcm.Yi.c, ctx->gcm.Xi.u);

ctx->gcm.len.u[1] += bulk;
bulk += res;
}
Expand Down

0 comments on commit 31b5907

Please sign in to comment.