Skip to content

Commit

Permalink
8268276: Base64 Decoding optimization for x86 using AVX-512
Browse files Browse the repository at this point in the history
Co-authored-by: Derek White <drwhite@openjdk.org>
Co-authored-by: Sandhya Viswanathan <sviswanathan@openjdk.org>
Backport-of: c37988d0793b24d98d285530dfda69999a227937
  • Loading branch information
3 people committed Nov 9, 2022
1 parent 3df72a3 commit 07050b0
Show file tree
Hide file tree
Showing 12 changed files with 749 additions and 18 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@ NashornProfile.txt
**/JTreport/**
**/JTwork/**
/src/utils/LogCompilation/target/
/.project/
/.settings/
14 changes: 8 additions & 6 deletions src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6125,6 +6125,7 @@ class StubGenerator: public StubCodeGenerator {
* c_rarg3 - dest_start
* c_rarg4 - dest_offset
* c_rarg5 - isURL
* c_rarg6 - isMIME
*
*/
address generate_base64_decodeBlock() {
Expand Down Expand Up @@ -6207,12 +6208,13 @@ class StubGenerator: public StubCodeGenerator {
StubCodeMark mark(this, "StubRoutines", "decodeBlock");
address start = __ pc();

Register src = c_rarg0; // source array
Register soff = c_rarg1; // source start offset
Register send = c_rarg2; // source end offset
Register dst = c_rarg3; // dest array
Register doff = c_rarg4; // position for writing to dest array
Register isURL = c_rarg5; // Base64 or URL character set
Register src = c_rarg0; // source array
Register soff = c_rarg1; // source start offset
Register send = c_rarg2; // source end offset
Register dst = c_rarg3; // dest array
Register doff = c_rarg4; // position for writing to dest array
Register isURL = c_rarg5; // Base64 or URL character set
Register isMIME = c_rarg6; // Decoding MIME block - unused in this implementation

Register length = send; // reuse send as length of source data to process

Expand Down
1 change: 1 addition & 0 deletions src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3757,6 +3757,7 @@ class StubGenerator: public StubCodeGenerator {
Register d = R6_ARG4; // destination address
Register dp = R7_ARG5; // destination offset
Register isURL = R8_ARG6; // boolean, if non-zero indicates use of RFC 4648 base64url encoding
Register isMIME = R9_ARG7; // boolean, if non-zero indicates use of RFC 2045 MIME encoding - not used

// Local variables
Register const_ptr = R9; // used for loading constants
Expand Down
33 changes: 33 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3830,6 +3830,14 @@ void Assembler::evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int
emit_int16(0x76, (0xC0 | encode));
}

void Assembler::evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512_vbmi(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x7D, (0xC0 | encode));
}

void Assembler::pause() {
emit_int16((unsigned char)0xF3, (unsigned char)0x90);
}
Expand Down Expand Up @@ -4549,6 +4557,15 @@ void Assembler::vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int
emit_int16((unsigned char)0xF5, (0xC0 | encode));
}

void Assembler::vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
vector_len == AVX_256bit? VM_Version::supports_avx2() :
vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, src1, src2, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x04, (0xC0 | encode));
}

void Assembler::evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_avx512_vnni(), "must support vnni");
Expand Down Expand Up @@ -4857,6 +4874,15 @@ void Assembler::vptest(XMMRegister dst, XMMRegister src, int vector_len) {
emit_int16(0x17, (0xC0 | encode));
}

void Assembler::evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
// Encoding: EVEX.NDS.XXX.66.0F.W0 DB /r
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0x26, (0xC0 | encode));
}

void Assembler::punpcklbw(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
Expand Down Expand Up @@ -9410,6 +9436,13 @@ void Assembler::shlxq(Register dst, Register src1, Register src2) {
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}

void Assembler::shrxl(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}

void Assembler::shrxq(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
Expand Down
6 changes: 6 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1699,6 +1699,7 @@ class Assembler : public AbstractAssembler {
void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

void pause();

Expand Down Expand Up @@ -1812,6 +1813,8 @@ class Assembler : public AbstractAssembler {
// Multiply add
void pmaddwd(XMMRegister dst, XMMRegister src);
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);

// Multiply add accumulate
void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

Expand Down Expand Up @@ -1879,6 +1882,8 @@ class Assembler : public AbstractAssembler {
void vptest(XMMRegister dst, XMMRegister src);
void vptest(XMMRegister dst, Address src);

void evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

// Vector compare
void vptest(XMMRegister dst, XMMRegister src, int vector_len);

Expand Down Expand Up @@ -2140,6 +2145,7 @@ class Assembler : public AbstractAssembler {

void shlxl(Register dst, Register src1, Register src2);
void shlxq(Register dst, Register src1, Register src2);
void shrxl(Register dst, Register src1, Register src2);
void shrxq(Register dst, Register src1, Register src2);

void bzhiq(Register dst, Register src1, Register src2);
Expand Down
Loading

1 comment on commit 07050b0

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.