Skip to content

Commit c37988d

Browse files
asgibbonsSandhya Viswanathan
authored andcommitted
8268276: Base64 Decoding optimization for x86 using AVX-512
Reviewed-by: erikj, sviswanathan, kvn
1 parent 08ee7ae commit c37988d

File tree

12 files changed

+749
-18
lines changed

12 files changed

+749
-18
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,5 @@ NashornProfile.txt
1616
**/JTreport/**
1717
**/JTwork/**
1818
/src/utils/LogCompilation/target/
19+
/.project/
20+
/.settings/

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5683,6 +5683,7 @@ class StubGenerator: public StubCodeGenerator {
56835683
* c_rarg3 - dest_start
56845684
* c_rarg4 - dest_offset
56855685
* c_rarg5 - isURL
5686+
* c_rarg6 - isMIME
56865687
*
56875688
*/
56885689
address generate_base64_decodeBlock() {
@@ -5765,12 +5766,13 @@ class StubGenerator: public StubCodeGenerator {
57655766
StubCodeMark mark(this, "StubRoutines", "decodeBlock");
57665767
address start = __ pc();
57675768

5768-
Register src = c_rarg0; // source array
5769-
Register soff = c_rarg1; // source start offset
5770-
Register send = c_rarg2; // source end offset
5771-
Register dst = c_rarg3; // dest array
5772-
Register doff = c_rarg4; // position for writing to dest array
5773-
Register isURL = c_rarg5; // Base64 or URL character set
5769+
Register src = c_rarg0; // source array
5770+
Register soff = c_rarg1; // source start offset
5771+
Register send = c_rarg2; // source end offset
5772+
Register dst = c_rarg3; // dest array
5773+
Register doff = c_rarg4; // position for writing to dest array
5774+
Register isURL = c_rarg5; // Base64 or URL character set
5775+
Register isMIME = c_rarg6; // Decoding MIME block - unused in this implementation
57745776

57755777
Register length = send; // reuse send as length of source data to process
57765778

src/hotspot/cpu/ppc/stubGenerator_ppc.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3757,6 +3757,7 @@ class StubGenerator: public StubCodeGenerator {
37573757
Register d = R6_ARG4; // destination address
37583758
Register dp = R7_ARG5; // destination offset
37593759
Register isURL = R8_ARG6; // boolean, if non-zero indicates use of RFC 4648 base64url encoding
3760+
Register isMIME = R9_ARG7; // boolean, if non-zero indicates use of RFC 2045 MIME encoding - not used
37603761

37613762
// Local variables
37623763
Register const_ptr = R9; // used for loading constants

src/hotspot/cpu/x86/assembler_x86.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3829,6 +3829,14 @@ void Assembler::evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int
38293829
emit_int16(0x76, (0xC0 | encode));
38303830
}
38313831

3832+
void Assembler::evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3833+
assert(VM_Version::supports_avx512_vbmi(), "");
3834+
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3835+
attributes.set_is_evex_instruction();
3836+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3837+
emit_int16(0x7D, (0xC0 | encode));
3838+
}
3839+
38323840
void Assembler::pause() {
38333841
emit_int16((unsigned char)0xF3, (unsigned char)0x90);
38343842
}
@@ -4548,6 +4556,15 @@ void Assembler::vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int
45484556
emit_int16((unsigned char)0xF5, (0xC0 | encode));
45494557
}
45504558

4559+
void Assembler::vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
4560+
assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4561+
vector_len == AVX_256bit? VM_Version::supports_avx2() :
4562+
vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
4563+
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4564+
int encode = simd_prefix_and_encode(dst, src1, src2, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4565+
emit_int16(0x04, (0xC0 | encode));
4566+
}
4567+
45514568
void Assembler::evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
45524569
assert(VM_Version::supports_evex(), "");
45534570
assert(VM_Version::supports_avx512_vnni(), "must support vnni");
@@ -4856,6 +4873,15 @@ void Assembler::vptest(XMMRegister dst, XMMRegister src, int vector_len) {
48564873
emit_int16(0x17, (0xC0 | encode));
48574874
}
48584875

4876+
void Assembler::evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4877+
assert(VM_Version::supports_avx512vlbw(), "");
4878+
// Encoding: EVEX.NDS.XXX.66.0F.W0 DB /r
4879+
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4880+
attributes.set_is_evex_instruction();
4881+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4882+
emit_int16((unsigned char)0x26, (0xC0 | encode));
4883+
}
4884+
48594885
void Assembler::punpcklbw(XMMRegister dst, Address src) {
48604886
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
48614887
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
@@ -9403,6 +9429,13 @@ void Assembler::shlxq(Register dst, Register src1, Register src2) {
94039429
emit_int16((unsigned char)0xF7, (0xC0 | encode));
94049430
}
94059431

9432+
void Assembler::shrxl(Register dst, Register src1, Register src2) {
9433+
assert(VM_Version::supports_bmi2(), "");
9434+
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
9435+
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
9436+
emit_int16((unsigned char)0xF7, (0xC0 | encode));
9437+
}
9438+
94069439
void Assembler::shrxq(Register dst, Register src1, Register src2) {
94079440
assert(VM_Version::supports_bmi2(), "");
94089441
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);

src/hotspot/cpu/x86/assembler_x86.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1697,6 +1697,7 @@ class Assembler : public AbstractAssembler {
16971697
void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
16981698
void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
16991699
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1700+
void evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
17001701

17011702
void pause();
17021703

@@ -1810,6 +1811,8 @@ class Assembler : public AbstractAssembler {
18101811
// Multiply add
18111812
void pmaddwd(XMMRegister dst, XMMRegister src);
18121813
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1814+
void vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
1815+
18131816
// Multiply add accumulate
18141817
void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
18151818

@@ -1877,6 +1880,8 @@ class Assembler : public AbstractAssembler {
18771880
void vptest(XMMRegister dst, XMMRegister src);
18781881
void vptest(XMMRegister dst, Address src);
18791882

1883+
void evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1884+
18801885
// Vector compare
18811886
void vptest(XMMRegister dst, XMMRegister src, int vector_len);
18821887

@@ -2138,6 +2143,7 @@ class Assembler : public AbstractAssembler {
21382143

21392144
void shlxl(Register dst, Register src1, Register src2);
21402145
void shlxq(Register dst, Register src1, Register src2);
2146+
void shrxl(Register dst, Register src1, Register src2);
21412147
void shrxq(Register dst, Register src1, Register src2);
21422148

21432149
void bzhiq(Register dst, Register src1, Register src2);

0 commit comments

Comments
 (0)