Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8269404: Base64 Encoding optimization enhancements for x86 using AVX-512 #4601

Closed
wants to merge 12 commits into from
@@ -3757,6 +3757,15 @@ void Assembler::vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
emit_int16((unsigned char)0x8D, (0xC0 | encode));
}

void Assembler::vpermb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512_vbmi(), "");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0x8D);
emit_operand(dst, src);
}

void Assembler::vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(vector_len == AVX_128bit ? VM_Version::supports_avx512vlbw() :
vector_len == AVX_256bit ? VM_Version::supports_avx512vlbw() :
@@ -3837,6 +3846,14 @@ void Assembler::evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int
emit_int16(0x7D, (0xC0 | encode));
}

void Assembler::evpmultishiftqb(XMMRegister dst, XMMRegister ctl, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512_vbmi(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), ctl->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0x83, (unsigned char)(0xC0 | encode));
}

void Assembler::pause() {
emit_int16((unsigned char)0xF3, (unsigned char)0x90);
}
@@ -4135,6 +4152,15 @@ void Assembler::vpmovmskb(Register dst, XMMRegister src, int vec_enc) {
emit_int16((unsigned char)0xD7, (0xC0 | encode));
}

void Assembler::vpmaskmovd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert((VM_Version::supports_avx2() && vector_len == AVX_256bit), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0x8C);
emit_operand(dst, src);
}

void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -6564,6 +6590,13 @@ void Assembler::psubq(XMMRegister dst, XMMRegister src) {
emit_int8((0xC0 | encode));
}

void Assembler::vpsubusb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16((unsigned char)0xD8, (0xC0 | encode));
}

void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -6655,6 +6688,14 @@ void Assembler::pmuludq(XMMRegister dst, XMMRegister src) {
emit_int16((unsigned char)0xF4, (0xC0 | encode));
}

void Assembler::vpmulhuw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert((vector_len == AVX_128bit && VM_Version::supports_avx()) ||
(vector_len == AVX_256bit && VM_Version::supports_avx2()), "");
asgibbons marked this conversation as resolved.
Show resolved Hide resolved
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
asgibbons marked this conversation as resolved.
Show resolved Hide resolved
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16((unsigned char)0xE4, (0xC0 | encode));
}

void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -1688,6 +1688,7 @@ class Assembler : public AbstractAssembler {
void vpermq(XMMRegister dst, XMMRegister src, int imm8);
void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpermb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -1698,6 +1699,7 @@ class Assembler : public AbstractAssembler {
void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpmultishiftqb(XMMRegister dst, XMMRegister ctl, XMMRegister src, int vector_len);

void pause();

@@ -1746,6 +1748,7 @@ class Assembler : public AbstractAssembler {

void pmovmskb(Register dst, XMMRegister src);
void vpmovmskb(Register dst, XMMRegister src, int vec_enc);
void vpmaskmovd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

// SSE 4.1 extract
void pextrd(Register dst, XMMRegister src, int imm8);
@@ -2248,6 +2251,7 @@ class Assembler : public AbstractAssembler {
void psubw(XMMRegister dst, XMMRegister src);
void psubd(XMMRegister dst, XMMRegister src);
void psubq(XMMRegister dst, XMMRegister src);
void vpsubusb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -2268,6 +2272,7 @@ class Assembler : public AbstractAssembler {
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmulhuw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

// Minimum of packed integers
void pminsb(XMMRegister dst, XMMRegister src);