Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
Browse files
8245512: CRC32 optimization using AVX512 instructions
Reviewed-by: kvn
  • Loading branch information
Shravya Rukmannagari committed Jun 5, 2020
1 parent 9a7f519 commit 9d5f388498e18e5823d09b53ed66ff0025e661fc
@@ -6241,6 +6241,17 @@ void Assembler::vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address
emit_int8(imm8);
}

void Assembler::vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src3->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x25);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(imm8);
}

// vinserti forms

void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
@@ -6693,6 +6704,21 @@ void Assembler::vpbroadcastq(XMMRegister dst, Address src, int vector_len) {
emit_int8(0x59);
emit_operand(dst, src);
}

void Assembler::evbroadcasti32x4(XMMRegister dst, Address src, int vector_len) {
assert(vector_len != Assembler::AVX_128bit, "");
assert(VM_Version::supports_avx512dq(), "");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
attributes.set_address_attributes(/* tuple_type */ EVEX_T2, /* input_size_in_bits */ EVEX_64bit);
// swap src<->dst for encoding
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x5A);
emit_operand(dst, src);
}

void Assembler::evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len) {
assert(vector_len != Assembler::AVX_128bit, "");
assert(VM_Version::supports_avx512dq(), "");
@@ -7587,6 +7613,15 @@ void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop
emit_int24((unsigned char)0xC2, (0xC0 | encode), (0xF & cop));
}

void Assembler::blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
assert(VM_Version::supports_avx(), "");
assert(vector_len <= AVX_256bit, "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
int src2_enc = src2->encoding();
emit_int24(0x4C, (0xC0 | encode), (0xF0 & src2_enc << 4));
}

void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
assert(VM_Version::supports_avx(), "");
assert(vector_len <= AVX_256bit, "");
@@ -2201,6 +2201,7 @@ class Assembler : public AbstractAssembler {
// Ternary logic instruction.
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);

// vinserti forms
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
@@ -2245,6 +2246,7 @@ class Assembler : public AbstractAssembler {
void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
void vpbroadcastq(XMMRegister dst, Address src, int vector_len);

void evbroadcasti32x4(XMMRegister dst, Address src, int vector_len);
void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);

@@ -2274,6 +2276,7 @@ class Assembler : public AbstractAssembler {
void vzeroupper();

// AVX support for vectorized conditional move (float/double). The following two instructions used only coupled.
void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);

0 comments on commit 9d5f388

Please sign in to comment.