Skip to content

Commit e5bbad0

Browse files
8334999: RISC-V: implement AES single block encryption/decryption intrinsics
Reviewed-by: fyang, rehn, yzhu
1 parent c40bb76 commit e5bbad0

File tree

4 files changed

+199
-13
lines changed

4 files changed

+199
-13
lines changed

src/hotspot/cpu/riscv/assembler_riscv.hpp

+7
Original file line numberDiff line numberDiff line change
@@ -1962,6 +1962,13 @@ enum Nf {
19621962
INSN(vbrev8_v, 0b1010111, 0b010, 0b01000, 0b010010); // reverse bits in every byte of element
19631963
INSN(vrev8_v, 0b1010111, 0b010, 0b01001, 0b010010); // reverse bytes in every elememt
19641964

1965+
// Vector AES instructions (Zvkned extension)
1966+
INSN(vaesem_vv, 0b1110111, 0b010, 0b00010, 0b101000);
1967+
INSN(vaesef_vv, 0b1110111, 0b010, 0b00011, 0b101000);
1968+
1969+
INSN(vaesdm_vv, 0b1110111, 0b010, 0b00000, 0b101000);
1970+
INSN(vaesdf_vv, 0b1110111, 0b010, 0b00001, 0b101000);
1971+
19651972
INSN(vclz_v, 0b1010111, 0b010, 0b01100, 0b010010); // count leading zeros
19661973
INSN(vctz_v, 0b1010111, 0b010, 0b01101, 0b010010); // count trailing zeros
19671974

src/hotspot/cpu/riscv/stubGenerator_riscv.cpp

+173
Original file line numberDiff line numberDiff line change
@@ -2276,6 +2276,174 @@ class StubGenerator: public StubCodeGenerator {
22762276
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
22772277
}
22782278

2279+
void generate_aes_loadkeys(const Register &key, VectorRegister *working_vregs, int rounds) {
2280+
const int step = 16;
2281+
for (int i = 0; i < rounds; i++) {
2282+
__ vle32_v(working_vregs[i], key);
2283+
// The keys are stored in little-endian array, while we need
2284+
// to operate in big-endian.
2285+
// So performing an endian-swap here with vrev8.v instruction
2286+
__ vrev8_v(working_vregs[i], working_vregs[i]);
2287+
__ addi(key, key, step);
2288+
}
2289+
}
2290+
2291+
void generate_aes_encrypt(const VectorRegister &res, VectorRegister *working_vregs, int rounds) {
2292+
assert(rounds <= 15, "rounds should be less than or equal to working_vregs size");
2293+
2294+
__ vxor_vv(res, res, working_vregs[0]);
2295+
for (int i = 1; i < rounds - 1; i++) {
2296+
__ vaesem_vv(res, working_vregs[i]);
2297+
}
2298+
__ vaesef_vv(res, working_vregs[rounds - 1]);
2299+
}
2300+
2301+
// Arguments:
2302+
//
2303+
// Inputs:
2304+
// c_rarg0 - source byte array address
2305+
// c_rarg1 - destination byte array address
2306+
// c_rarg2 - K (key) in little endian int array
2307+
//
2308+
address generate_aescrypt_encryptBlock() {
2309+
assert(UseAESIntrinsics, "need AES instructions (Zvkned extension) support");
2310+
2311+
__ align(CodeEntryAlignment);
2312+
StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
2313+
2314+
Label L_aes128, L_aes192;
2315+
2316+
const Register from = c_rarg0; // source array address
2317+
const Register to = c_rarg1; // destination array address
2318+
const Register key = c_rarg2; // key array address
2319+
const Register keylen = c_rarg3;
2320+
2321+
VectorRegister working_vregs[] = {
2322+
v4, v5, v6, v7, v8, v9, v10, v11,
2323+
v12, v13, v14, v15, v16, v17, v18
2324+
};
2325+
const VectorRegister res = v19;
2326+
2327+
address start = __ pc();
2328+
__ enter();
2329+
2330+
__ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2331+
2332+
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
2333+
__ vle32_v(res, from);
2334+
2335+
__ mv(t2, 52);
2336+
__ blt(keylen, t2, L_aes128);
2337+
__ beq(keylen, t2, L_aes192);
2338+
// Else we fallthrough to the biggest case (256-bit key size)
2339+
2340+
// Note: the following function performs key += 15*16
2341+
generate_aes_loadkeys(key, working_vregs, 15);
2342+
generate_aes_encrypt(res, working_vregs, 15);
2343+
__ vse32_v(res, to);
2344+
__ mv(c_rarg0, 0);
2345+
__ leave();
2346+
__ ret();
2347+
2348+
__ bind(L_aes192);
2349+
// Note: the following function performs key += 13*16
2350+
generate_aes_loadkeys(key, working_vregs, 13);
2351+
generate_aes_encrypt(res, working_vregs, 13);
2352+
__ vse32_v(res, to);
2353+
__ mv(c_rarg0, 0);
2354+
__ leave();
2355+
__ ret();
2356+
2357+
__ bind(L_aes128);
2358+
// Note: the following function performs key += 11*16
2359+
generate_aes_loadkeys(key, working_vregs, 11);
2360+
generate_aes_encrypt(res, working_vregs, 11);
2361+
__ vse32_v(res, to);
2362+
__ mv(c_rarg0, 0);
2363+
__ leave();
2364+
__ ret();
2365+
2366+
return start;
2367+
}
2368+
2369+
void generate_aes_decrypt(const VectorRegister &res, VectorRegister *working_vregs, int rounds) {
2370+
assert(rounds <= 15, "rounds should be less than or equal to working_vregs size");
2371+
2372+
__ vxor_vv(res, res, working_vregs[rounds - 1]);
2373+
for (int i = rounds - 2; i > 0; i--) {
2374+
__ vaesdm_vv(res, working_vregs[i]);
2375+
}
2376+
__ vaesdf_vv(res, working_vregs[0]);
2377+
}
2378+
2379+
// Arguments:
2380+
//
2381+
// Inputs:
2382+
// c_rarg0 - source byte array address
2383+
// c_rarg1 - destination byte array address
2384+
// c_rarg2 - K (key) in little endian int array
2385+
//
2386+
address generate_aescrypt_decryptBlock() {
2387+
assert(UseAESIntrinsics, "need AES instructions (Zvkned extension) support");
2388+
2389+
__ align(CodeEntryAlignment);
2390+
StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
2391+
2392+
Label L_aes128, L_aes192;
2393+
2394+
const Register from = c_rarg0; // source array address
2395+
const Register to = c_rarg1; // destination array address
2396+
const Register key = c_rarg2; // key array address
2397+
const Register keylen = c_rarg3;
2398+
2399+
VectorRegister working_vregs[] = {
2400+
v4, v5, v6, v7, v8, v9, v10, v11,
2401+
v12, v13, v14, v15, v16, v17, v18
2402+
};
2403+
const VectorRegister res = v19;
2404+
2405+
address start = __ pc();
2406+
__ enter(); // required for proper stackwalking of RuntimeStub frame
2407+
2408+
__ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2409+
2410+
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
2411+
__ vle32_v(res, from);
2412+
2413+
__ mv(t2, 52);
2414+
__ blt(keylen, t2, L_aes128);
2415+
__ beq(keylen, t2, L_aes192);
2416+
// Else we fallthrough to the biggest case (256-bit key size)
2417+
2418+
// Note: the following function performs key += 15*16
2419+
generate_aes_loadkeys(key, working_vregs, 15);
2420+
generate_aes_decrypt(res, working_vregs, 15);
2421+
__ vse32_v(res, to);
2422+
__ mv(c_rarg0, 0);
2423+
__ leave();
2424+
__ ret();
2425+
2426+
__ bind(L_aes192);
2427+
// Note: the following function performs key += 13*16
2428+
generate_aes_loadkeys(key, working_vregs, 13);
2429+
generate_aes_decrypt(res, working_vregs, 13);
2430+
__ vse32_v(res, to);
2431+
__ mv(c_rarg0, 0);
2432+
__ leave();
2433+
__ ret();
2434+
2435+
__ bind(L_aes128);
2436+
// Note: the following function performs key += 11*16
2437+
generate_aes_loadkeys(key, working_vregs, 11);
2438+
generate_aes_decrypt(res, working_vregs, 11);
2439+
__ vse32_v(res, to);
2440+
__ mv(c_rarg0, 0);
2441+
__ leave();
2442+
__ ret();
2443+
2444+
return start;
2445+
}
2446+
22792447
// code for comparing 16 bytes of strings with same encoding
22802448
void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
22812449
const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
@@ -6294,6 +6462,11 @@ static const int64_t right_3_bits = right_n_bits(3);
62946462
StubRoutines::_montgomerySquare = g.generate_square();
62956463
}
62966464

6465+
if (UseAESIntrinsics) {
6466+
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
6467+
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
6468+
}
6469+
62976470
if (UsePoly1305Intrinsics) {
62986471
StubRoutines::_poly1305_processBlocks = generate_poly1305_processBlocks();
62996472
}

src/hotspot/cpu/riscv/vm_version_riscv.cpp

+17-11
Original file line numberDiff line numberDiff line change
@@ -122,17 +122,6 @@ void VM_Version::common_initialize() {
122122
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0);
123123
}
124124

125-
if (UseAES || UseAESIntrinsics) {
126-
if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
127-
warning("AES instructions are not available on this CPU");
128-
FLAG_SET_DEFAULT(UseAES, false);
129-
}
130-
if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
131-
warning("AES intrinsics are not available on this CPU");
132-
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
133-
}
134-
}
135-
136125
if (UseAESCTRIntrinsics) {
137126
warning("AES/CTR intrinsics are not available on this CPU");
138127
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
@@ -429,6 +418,23 @@ void VM_Version::c2_initialize() {
429418
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA3Intrinsics || UseSHA512Intrinsics)) {
430419
FLAG_SET_DEFAULT(UseSHA, false);
431420
}
421+
422+
// AES
423+
if (UseZvkn) {
424+
UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
425+
UseAESIntrinsics =
426+
UseAESIntrinsics || (UseAES && FLAG_IS_DEFAULT(UseAESIntrinsics));
427+
if (UseAESIntrinsics && !UseAES) {
428+
warning("UseAESIntrinsics enabled, but UseAES not, enabling");
429+
UseAES = true;
430+
}
431+
} else if (UseAESIntrinsics || UseAES) {
432+
if (!FLAG_IS_DEFAULT(UseAESIntrinsics) || !FLAG_IS_DEFAULT(UseAES)) {
433+
warning("AES intrinsics require Zvkn extension (not available on this CPU).");
434+
}
435+
FLAG_SET_DEFAULT(UseAES, false);
436+
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
437+
}
432438
}
433439
#endif // COMPILER2
434440

src/hotspot/share/opto/library_call.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -7374,11 +7374,11 @@ bool LibraryCallKit::inline_counterMode_AESCrypt(vmIntrinsics::ID id) {
73747374

73757375
//------------------------------get_key_start_from_aescrypt_object-----------------------
73767376
Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) {
7377-
#if defined(PPC64) || defined(S390)
7377+
#if defined(PPC64) || defined(S390) || defined(RISCV64)
73787378
// MixColumns for decryption can be reduced by preprocessing MixColumns with round keys.
73797379
// Intel's extension is based on this optimization and AESCrypt generates round keys by preprocessing MixColumns.
73807380
// However, ppc64 vncipher processes MixColumns and requires the same round keys with encryption.
7381-
// The ppc64 stubs of encryption and decryption use the same round keys (sessionK[0]).
7381+
// The ppc64 and riscv64 stubs of encryption and decryption use the same round keys (sessionK[0]).
73827382
Node* objSessionK = load_field_from_object(aescrypt_object, "sessionK", "[[I");
73837383
assert (objSessionK != nullptr, "wrong version of com.sun.crypto.provider.AESCrypt");
73847384
if (objSessionK == nullptr) {

0 commit comments

Comments
 (0)