Skip to content

Commit 07050b0

Browse files
asgibbonsDerek WhiteSandhya Viswanathan
committed
8268276: Base64 Decoding optimization for x86 using AVX-512
Co-authored-by: Derek White <drwhite@openjdk.org> Co-authored-by: Sandhya Viswanathan <sviswanathan@openjdk.org> Backport-of: c37988d0793b24d98d285530dfda69999a227937
1 parent 3df72a3 commit 07050b0

File tree

12 files changed

+749
-18
lines changed

12 files changed

+749
-18
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,5 @@ NashornProfile.txt
1616
**/JTreport/**
1717
**/JTwork/**
1818
/src/utils/LogCompilation/target/
19+
/.project/
20+
/.settings/

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6125,6 +6125,7 @@ class StubGenerator: public StubCodeGenerator {
61256125
* c_rarg3 - dest_start
61266126
* c_rarg4 - dest_offset
61276127
* c_rarg5 - isURL
6128+
* c_rarg6 - isMIME
61286129
*
61296130
*/
61306131
address generate_base64_decodeBlock() {
@@ -6207,12 +6208,13 @@ class StubGenerator: public StubCodeGenerator {
62076208
StubCodeMark mark(this, "StubRoutines", "decodeBlock");
62086209
address start = __ pc();
62096210

6210-
Register src = c_rarg0; // source array
6211-
Register soff = c_rarg1; // source start offset
6212-
Register send = c_rarg2; // source end offset
6213-
Register dst = c_rarg3; // dest array
6214-
Register doff = c_rarg4; // position for writing to dest array
6215-
Register isURL = c_rarg5; // Base64 or URL character set
6211+
Register src = c_rarg0; // source array
6212+
Register soff = c_rarg1; // source start offset
6213+
Register send = c_rarg2; // source end offset
6214+
Register dst = c_rarg3; // dest array
6215+
Register doff = c_rarg4; // position for writing to dest array
6216+
Register isURL = c_rarg5; // Base64 or URL character set
6217+
Register isMIME = c_rarg6; // Decoding MIME block - unused in this implementation
62166218

62176219
Register length = send; // reuse send as length of source data to process
62186220

src/hotspot/cpu/ppc/stubGenerator_ppc.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3757,6 +3757,7 @@ class StubGenerator: public StubCodeGenerator {
37573757
Register d = R6_ARG4; // destination address
37583758
Register dp = R7_ARG5; // destination offset
37593759
Register isURL = R8_ARG6; // boolean, if non-zero indicates use of RFC 4648 base64url encoding
3760+
Register isMIME = R9_ARG7; // boolean, if non-zero indicates use of RFC 2045 MIME encoding - not used
37603761

37613762
// Local variables
37623763
Register const_ptr = R9; // used for loading constants

src/hotspot/cpu/x86/assembler_x86.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3830,6 +3830,14 @@ void Assembler::evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int
38303830
emit_int16(0x76, (0xC0 | encode));
38313831
}
38323832

3833+
void Assembler::evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3834+
assert(VM_Version::supports_avx512_vbmi(), "");
3835+
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3836+
attributes.set_is_evex_instruction();
3837+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3838+
emit_int16(0x7D, (0xC0 | encode));
3839+
}
3840+
38333841
void Assembler::pause() {
38343842
emit_int16((unsigned char)0xF3, (unsigned char)0x90);
38353843
}
@@ -4549,6 +4557,15 @@ void Assembler::vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int
45494557
emit_int16((unsigned char)0xF5, (0xC0 | encode));
45504558
}
45514559

4560+
void Assembler::vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
4561+
assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4562+
vector_len == AVX_256bit? VM_Version::supports_avx2() :
4563+
vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
4564+
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4565+
int encode = simd_prefix_and_encode(dst, src1, src2, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4566+
emit_int16(0x04, (0xC0 | encode));
4567+
}
4568+
45524569
void Assembler::evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
45534570
assert(VM_Version::supports_evex(), "");
45544571
assert(VM_Version::supports_avx512_vnni(), "must support vnni");
@@ -4857,6 +4874,15 @@ void Assembler::vptest(XMMRegister dst, XMMRegister src, int vector_len) {
48574874
emit_int16(0x17, (0xC0 | encode));
48584875
}
48594876

4877+
void Assembler::evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4878+
assert(VM_Version::supports_avx512vlbw(), "");
4879+
// Encoding: EVEX.NDS.XXX.66.0F.W0 DB /r
4880+
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4881+
attributes.set_is_evex_instruction();
4882+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4883+
emit_int16((unsigned char)0x26, (0xC0 | encode));
4884+
}
4885+
48604886
void Assembler::punpcklbw(XMMRegister dst, Address src) {
48614887
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
48624888
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
@@ -9410,6 +9436,13 @@ void Assembler::shlxq(Register dst, Register src1, Register src2) {
94109436
emit_int16((unsigned char)0xF7, (0xC0 | encode));
94119437
}
94129438

9439+
void Assembler::shrxl(Register dst, Register src1, Register src2) {
9440+
assert(VM_Version::supports_bmi2(), "");
9441+
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
9442+
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
9443+
emit_int16((unsigned char)0xF7, (0xC0 | encode));
9444+
}
9445+
94139446
void Assembler::shrxq(Register dst, Register src1, Register src2) {
94149447
assert(VM_Version::supports_bmi2(), "");
94159448
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);

src/hotspot/cpu/x86/assembler_x86.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1699,6 +1699,7 @@ class Assembler : public AbstractAssembler {
16991699
void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
17001700
void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
17011701
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1702+
void evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
17021703

17031704
void pause();
17041705

@@ -1812,6 +1813,8 @@ class Assembler : public AbstractAssembler {
18121813
// Multiply add
18131814
void pmaddwd(XMMRegister dst, XMMRegister src);
18141815
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1816+
void vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
1817+
18151818
// Multiply add accumulate
18161819
void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
18171820

@@ -1879,6 +1882,8 @@ class Assembler : public AbstractAssembler {
18791882
void vptest(XMMRegister dst, XMMRegister src);
18801883
void vptest(XMMRegister dst, Address src);
18811884

1885+
void evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1886+
18821887
// Vector compare
18831888
void vptest(XMMRegister dst, XMMRegister src, int vector_len);
18841889

@@ -2140,6 +2145,7 @@ class Assembler : public AbstractAssembler {
21402145

21412146
void shlxl(Register dst, Register src1, Register src2);
21422147
void shlxq(Register dst, Register src1, Register src2);
2148+
void shrxl(Register dst, Register src1, Register src2);
21432149
void shrxq(Register dst, Register src1, Register src2);
21442150

21452151
void bzhiq(Register dst, Register src1, Register src2);

0 commit comments

Comments
 (0)