Skip to content
Permalink
Browse files
8267125: AES Galois CounterMode (GCM) interleaved implementation usin…
…g AVX512 + VAES instructions

Co-authored-by: Smita Kamath <svkamath@openjdk.org>
Co-authored-by: Tomasz Kantecki <tomasz.kantecki@intel.com>
Co-authored-by: Anthony Scarpino <ascarpino@openjdk.org>
Reviewed-by: kvn, valeriep
  • Loading branch information
3 people committed Aug 24, 2021
1 parent 6ace805 commit 0e7288ffbf635b9fdb17e8017e9a4f673ca0501d
Showing 21 changed files with 1,318 additions and 246 deletions.
@@ -945,12 +945,19 @@ class MacroAssembler: public Assembler {
void roundDec(XMMRegister key, int rnum);
void lastroundDec(XMMRegister key, int rnum);
void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);

void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
void generateHtbl_48_block_zmm(Register htbl);
void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx,
XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction,
XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos,
bool final_reduction, int index, XMMRegister counter_inc_mask);
public:
void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len);
void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len);
void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter,
Register len_reg, Register used, Register used_addr, Register saved_encCounter_start);
void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
Register state, Register subkeyHtbl, Register counter);

#endif

Large diffs are not rendered by default.

@@ -4368,6 +4368,95 @@ class StubGenerator: public StubCodeGenerator {
return start;
}

address ghash_polynomial512_addr() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "_ghash_poly512_addr");
address start = __ pc();
__ emit_data64(0x00000001C2000000, relocInfo::none); // POLY for reduction
__ emit_data64(0xC200000000000000, relocInfo::none);
__ emit_data64(0x00000001C2000000, relocInfo::none);
__ emit_data64(0xC200000000000000, relocInfo::none);
__ emit_data64(0x00000001C2000000, relocInfo::none);
__ emit_data64(0xC200000000000000, relocInfo::none);
__ emit_data64(0x00000001C2000000, relocInfo::none);
__ emit_data64(0xC200000000000000, relocInfo::none);
__ emit_data64(0x0000000000000001, relocInfo::none); // POLY
__ emit_data64(0xC200000000000000, relocInfo::none);
__ emit_data64(0x0000000000000001, relocInfo::none); // TWOONE
__ emit_data64(0x0000000100000000, relocInfo::none);
return start;
}

// Vector AES Galois Counter Mode implementation. Parameters:
// Windows regs | Linux regs
// in = c_rarg0 (rcx) | c_rarg0 (rsi)
// len = c_rarg1 (rdx) | c_rarg1 (rdi)
// ct = c_rarg2 (r8) | c_rarg2 (rdx)
// out = c_rarg3 (r9) | c_rarg3 (rcx)
// key = r10 | c_rarg4 (r8)
// state = r13 | c_rarg5 (r9)
// subkeyHtbl = r14 | r11
// counter = rsi | r12
// return - number of processed bytes
address generate_galoisCounterMode_AESCrypt() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "galoisCounterMode_AESCrypt");
address start = __ pc();
const Register in = c_rarg0;
const Register len = c_rarg1;
const Register ct = c_rarg2;
const Register out = c_rarg3;
// and updated with the incremented counter in the end
#ifndef _WIN64
const Register key = c_rarg4;
const Register state = c_rarg5;
const Address subkeyH_mem(rbp, 2 * wordSize);
const Register subkeyHtbl = r11;
const Address counter_mem(rbp, 3 * wordSize);
const Register counter = r12;
#else
const Address key_mem(rbp, 6 * wordSize);
const Register key = r10;
const Address state_mem(rbp, 7 * wordSize);
const Register state = r13;
const Address subkeyH_mem(rbp, 8 * wordSize);
const Register subkeyHtbl = r14;
const Address counter_mem(rbp, 9 * wordSize);
const Register counter = rsi;
#endif
__ enter();
// Save state before entering routine
__ push(r12);
__ push(r13);
__ push(r14);
__ push(r15);
__ push(rbx);
#ifdef _WIN64
// on win64, fill len_reg from stack position
__ push(rsi);
__ movptr(key, key_mem);
__ movptr(state, state_mem);
#endif
__ movptr(subkeyHtbl, subkeyH_mem);
__ movptr(counter, counter_mem);

__ aesgcm_encrypt(in, len, ct, out, key, state, subkeyHtbl, counter);

// Restore state before leaving routine
#ifdef _WIN64
__ pop(rsi);
#endif
__ pop(rbx);
__ pop(r15);
__ pop(r14);
__ pop(r13);
__ pop(r12);

__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}

// This mask is used for incrementing counter value(linc0, linc4, etc.)
address counter_mask_addr() {
__ align(64);
@@ -7618,13 +7707,20 @@ address generate_avx_ghash_processBlocks() {
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptVectorAESCrypt();
StubRoutines::_electronicCodeBook_encryptAESCrypt = generate_electronicCodeBook_encryptAESCrypt();
StubRoutines::_electronicCodeBook_decryptAESCrypt = generate_electronicCodeBook_decryptAESCrypt();
StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
StubRoutines::x86::_ghash_poly512_addr = ghash_polynomial512_addr();
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
StubRoutines::_galoisCounterMode_AESCrypt = generate_galoisCounterMode_AESCrypt();
} else {
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
}
}

if (UseAESCTRIntrinsics) {
if (VM_Version::supports_avx512_vaes() && VM_Version::supports_avx512bw() && VM_Version::supports_avx512vl()) {
StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
if (StubRoutines::x86::_counter_mask_addr == NULL) {
StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
}
StubRoutines::_counterMode_AESCrypt = generate_counterMode_VectorAESCrypt();
} else {
StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
@@ -7664,7 +7760,9 @@ address generate_avx_ghash_processBlocks() {

// Generate GHASH intrinsics code
if (UseGHASHIntrinsics) {
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
if (StubRoutines::x86::_ghash_long_swap_mask_addr == NULL) {
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
}
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
if (VM_Version::supports_avx()) {
StubRoutines::x86::_ghash_shuffmask_addr = ghash_shufflemask_addr();
@@ -80,6 +80,7 @@ address StubRoutines::x86::_join_0_1_base64 = NULL;
address StubRoutines::x86::_join_1_2_base64 = NULL;
address StubRoutines::x86::_join_2_3_base64 = NULL;
address StubRoutines::x86::_decoding_table_base64 = NULL;
address StubRoutines::x86::_ghash_poly512_addr = NULL;
#endif
address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;

@@ -33,7 +33,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_

enum platform_dependent_constants {
code_size1 = 20000 LP64_ONLY(+10000), // simply increase if too small (assembler will crash if too small)
code_size2 = 35300 LP64_ONLY(+25000) // simply increase if too small (assembler will crash if too small)
code_size2 = 35300 LP64_ONLY(+32000) // simply increase if too small (assembler will crash if too small)
};

class x86 {
@@ -198,6 +198,7 @@ class x86 {
static address _join_1_2_base64;
static address _join_2_3_base64;
static address _decoding_table_base64;
static address _ghash_poly512_addr;
#endif
// byte flip mask for sha256
static address _pshuffle_byte_flip_mask_addr;
@@ -254,6 +255,7 @@ class x86 {
static address crc_by128_masks_avx512_addr() { return (address)_crc_by128_masks_avx512; }
static address shuf_table_crc32_avx512_addr() { return (address)_shuf_table_crc32_avx512; }
static address crc_table_avx512_addr() { return (address)_crc_table_avx512; }
static address ghash_polynomial512_addr() { return _ghash_poly512_addr; }
#endif // _LP64
static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
@@ -182,6 +182,7 @@ int vmIntrinsics::predicates_needed(vmIntrinsics::ID id) {
case vmIntrinsics::_electronicCodeBook_encryptAESCrypt:
case vmIntrinsics::_electronicCodeBook_decryptAESCrypt:
case vmIntrinsics::_counterMode_AESCrypt:
case vmIntrinsics::_galoisCounterMode_AESCrypt:
return 1;
case vmIntrinsics::_digestBase_implCompressMB:
return 5;
@@ -429,6 +430,9 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) {
case vmIntrinsics::_counterMode_AESCrypt:
if (!UseAESCTRIntrinsics) return true;
break;
case vmIntrinsics::_galoisCounterMode_AESCrypt:
if (!UseAESIntrinsics) return true;
break;
case vmIntrinsics::_md5_implCompress:
if (!UseMD5Intrinsics) return true;
break;
@@ -415,6 +415,11 @@ class methodHandle;
do_intrinsic(_counterMode_AESCrypt, com_sun_crypto_provider_counterMode, crypt_name, byteArray_int_int_byteArray_int_signature, F_R) \
do_name( crypt_name, "implCrypt") \
\
do_class(com_sun_crypto_provider_galoisCounterMode, "com/sun/crypto/provider/GaloisCounterMode") \
do_intrinsic(_galoisCounterMode_AESCrypt, com_sun_crypto_provider_galoisCounterMode, gcm_crypt_name, aes_gcm_signature, F_S) \
do_name(gcm_crypt_name, "implGCMCrypt") \
do_signature(aes_gcm_signature, "([BII[BI[BILcom/sun/crypto/provider/GCTR;Lcom/sun/crypto/provider/GHASH;)I") \
\
/* support for sun.security.provider.MD5 */ \
do_class(sun_security_provider_md5, "sun/security/provider/MD5") \
do_intrinsic(_md5_implCompress, sun_security_provider_md5, implCompress_name, implCompress_signature, F_R) \
@@ -308,6 +308,7 @@
static_field(StubRoutines, _electronicCodeBook_encryptAESCrypt, address) \
static_field(StubRoutines, _electronicCodeBook_decryptAESCrypt, address) \
static_field(StubRoutines, _counterMode_AESCrypt, address) \
static_field(StubRoutines, _galoisCounterMode_AESCrypt, address) \
static_field(StubRoutines, _base64_encodeBlock, address) \
static_field(StubRoutines, _base64_decodeBlock, address) \
static_field(StubRoutines, _ghash_processBlocks, address) \
@@ -640,6 +640,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
case vmIntrinsics::_electronicCodeBook_encryptAESCrypt:
case vmIntrinsics::_electronicCodeBook_decryptAESCrypt:
case vmIntrinsics::_counterMode_AESCrypt:
case vmIntrinsics::_galoisCounterMode_AESCrypt:
case vmIntrinsics::_md5_implCompress:
case vmIntrinsics::_sha_implCompress:
case vmIntrinsics::_sha2_implCompress:
@@ -1087,6 +1087,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
strcmp(call->as_CallLeaf()->_name, "electronicCodeBook_encryptAESCrypt") == 0 ||
strcmp(call->as_CallLeaf()->_name, "electronicCodeBook_decryptAESCrypt") == 0 ||
strcmp(call->as_CallLeaf()->_name, "counterMode_AESCrypt") == 0 ||
strcmp(call->as_CallLeaf()->_name, "galoisCounterMode_AESCrypt") == 0 ||
strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
strcmp(call->as_CallLeaf()->_name, "encodeBlock") == 0 ||
strcmp(call->as_CallLeaf()->_name, "decodeBlock") == 0 ||
@@ -2535,7 +2535,7 @@ Node* GraphKit::make_runtime_call(int flags,
if (parm5 != NULL) { call->init_req(TypeFunc::Parms+5, parm5);
if (parm6 != NULL) { call->init_req(TypeFunc::Parms+6, parm6);
if (parm7 != NULL) { call->init_req(TypeFunc::Parms+7, parm7);
/* close each nested if ===> */ } } } } } } } }
/* close each nested if ===> */ } } } } } } } }
assert(call->in(call->req()-1) != NULL, "must initialize all parms");

if (!is_leaf) {

1 comment on commit 0e7288f

@openjdk-notifier
Copy link

@openjdk-notifier openjdk-notifier bot commented on 0e7288f Aug 24, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.