Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8288047: Accelerate Poly1305 on x86_64 using AVX512 instructions #10582

Closed
wants to merge 30 commits into from
Closed
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
e3cfc74
Poly1305 AVX512 intrinsic for x86_64
vpaprotsk Sep 30, 2022
ec6c807
Merge remote-tracking branch 'vpaprotsk/master' into avx512-poly
vpaprotsk Oct 13, 2022
507d6bf
- Fix whitespace and copyright statements
vpaprotsk Oct 13, 2022
7e070d9
missed white-space fix
vpaprotsk Oct 13, 2022
f048f93
further restrict UsePolyIntrinsics with supports_avx512vlbw
vpaprotsk Oct 21, 2022
fb122f3
Merge remote-tracking branch 'origin' into avx512-poly
vpaprotsk Oct 24, 2022
d2a47f1
Merge remote-tracking branch 'origin/master' into avx512-poly
vpaprotsk Oct 24, 2022
de7e138
assembler checks and test case fixes
vpaprotsk Oct 24, 2022
883be10
extra whitespace character
vpaprotsk Oct 24, 2022
78fd8fd
invalidkeyexception and some review comments
vpaprotsk Oct 28, 2022
977e027
address Jamil's review
vpaprotsk Nov 4, 2022
38d9e83
Merge remote-tracking branch 'origin/master' into avx512-poly
vpaprotsk Nov 4, 2022
1841df1
iwanowww review
vpaprotsk Nov 8, 2022
686e061
Merge remote-tracking branch 'origin/master' into avx512-poly
vpaprotsk Nov 8, 2022
120247d
make UsePolyIntrinsics option diagnostic
vpaprotsk Nov 8, 2022
da56045
fix 32-bit build
vpaprotsk Nov 8, 2022
8b1b40f
add getLimbs to interface and reviews
vpaprotsk Nov 9, 2022
abfc68f
fix windows and 32b linux builds
vpaprotsk Nov 10, 2022
2176caf
Sandhya's review
vpaprotsk Nov 10, 2022
196ee35
jcheck
vpaprotsk Nov 10, 2022
835fbe3
live review with Sandhya
vpaprotsk Nov 11, 2022
2a225e4
Vladimir's review
vpaprotsk Nov 11, 2022
a26ac7d
Merge remote-tracking branch 'origin/master' into avx512-poly
vpaprotsk Nov 14, 2022
3fafa11
Merge remote-tracking branch 'origin/master' into avx512-poly
vpaprotsk Nov 15, 2022
8f5942d
Vladimir's review comments
vpaprotsk Nov 15, 2022
58488f4
extra whitespace
vpaprotsk Nov 15, 2022
cbf4938
use noreg properly in poly1305_limbs
vpaprotsk Nov 15, 2022
dbdfd1d
redo register alloc with explicit func params
vpaprotsk Nov 16, 2022
56aed9b
vzeroall, no spill, reg re-map
vpaprotsk Nov 17, 2022
08ea45e
remove early return
vpaprotsk Nov 17, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 124 additions & 16 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5008,6 +5008,40 @@ assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
emit_int16(0x04, (0xC0 | encode));
}

void Assembler::evpmadd52luq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
evpmadd52luq(dst, k0, src1, src2, false, vector_len);
}

void Assembler::evpmadd52luq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
assert(VM_Version::supports_avx512ifma(), "");
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}

int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0xB4, (0xC0 | encode));
}

void Assembler::evpmadd52huq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
evpmadd52huq(dst, k0, src1, src2, false, vector_len);
}

void Assembler::evpmadd52huq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
assert(VM_Version::supports_avx512ifma(), "");
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}

int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0xB5, (0xC0 | encode));
}

void Assembler::evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_avx512_vnni(), "must support vnni");
Expand Down Expand Up @@ -5425,6 +5459,42 @@ void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
emit_int16(0x6C, (0xC0 | encode));
}

void Assembler::evpunpcklqdq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
evpunpcklqdq(dst, k0, src1, src2, false, vector_len);
}

void Assembler::evpunpcklqdq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "requires AVX512F");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}

int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x6C, (0xC0 | encode));
}

void Assembler::evpunpckhqdq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
evpunpckhqdq(dst, k0, src1, src2, false, vector_len);
}

void Assembler::evpunpckhqdq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "requires AVX512F");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}

int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x6D, (0xC0 | encode));
}

void Assembler::push(int32_t imm32) {
// in 64bits we push 64bits onto the stack but only
// take a 32bit immediate
Expand Down Expand Up @@ -5869,6 +5939,18 @@ void Assembler::shrdl(Register dst, Register src, int8_t imm8) {
emit_int32(0x0F, (unsigned char)0xAC, (0xC0 | encode), imm8);
}

#ifdef _LP64
void Assembler::shldq(Register dst, Register src, int8_t imm8) {
int encode = prefixq_and_encode(src->encoding(), dst->encoding());
emit_int32(0x0F, (unsigned char)0xA4, (0xC0 | encode), imm8);
}

void Assembler::shrdq(Register dst, Register src, int8_t imm8) {
int encode = prefixq_and_encode(src->encoding(), dst->encoding());
emit_int32(0x0F, (unsigned char)0xAC, (0xC0 | encode), imm8);
}
#endif

// copies a single word from [esi] to [edi]
void Assembler::smovl() {
emit_int8((unsigned char)0xA5);
Expand Down Expand Up @@ -7740,11 +7822,12 @@ void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_
emit_operand(dst, src, 0);
}

void Assembler::vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16((unsigned char)0xDB, (0xC0 | encode));
void Assembler::evpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
evpandq(dst, k0, nds, src, false, vector_len);
}

void Assembler::evpandq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
evpandq(dst, k0, nds, src, false, vector_len);
}

//Variable Shift packed integers logically left.
Expand Down Expand Up @@ -7857,13 +7940,13 @@ void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_l
emit_operand(dst, src, 0);
}

void Assembler::vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16((unsigned char)0xEB, (0xC0 | encode));
void Assembler::evporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
evporq(dst, k0, nds, src, false, vector_len);
}

void Assembler::evporq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
evporq(dst, k0, nds, src, false, vector_len);
}

void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
Expand Down Expand Up @@ -8004,7 +8087,8 @@ void Assembler::evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, Addres
}

void Assembler::evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_evex(), "requires AVX512F");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
Expand All @@ -8016,7 +8100,8 @@ void Assembler::evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMReg
}

void Assembler::evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
assert(VM_Version::supports_evex(), "requires AVX512F");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit);
Expand All @@ -8031,7 +8116,8 @@ void Assembler::evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, Addres
}

void Assembler::evporq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_evex(), "requires AVX512F");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
Expand All @@ -8043,7 +8129,8 @@ void Assembler::evporq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegi
}

void Assembler::evporq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
assert(VM_Version::supports_evex(), "requires AVX512F");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit);
Expand Down Expand Up @@ -8201,8 +8288,8 @@ void Assembler::vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address
}

void Assembler::vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
assert(VM_Version::supports_evex(), "requires AVX512F");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src3->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
Expand All @@ -8211,6 +8298,20 @@ void Assembler::vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegis
emit_int8(imm8);
}

void Assembler::vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
vex_prefix(src3, src2->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x25);
emit_operand(dst, src3, 1);
emit_int8(imm8);
}

void Assembler::evexpandps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
Expand Down Expand Up @@ -13452,6 +13553,13 @@ void Assembler::vzeroupper() {
emit_copy(code_section(), vzup_code, vzup_len);
}

void Assembler::vzeroall() {
assert(VM_Version::supports_avx(), "requires AVX");
InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
(void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8(0x77);
}

void Assembler::pushq(Address src) {
InstructionMark im(this);
emit_int16(get_prefixq(src), (unsigned char)0xFF);
Expand Down
22 changes: 20 additions & 2 deletions src/hotspot/cpu/x86/assembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1891,6 +1891,10 @@ class Assembler : public AbstractAssembler {
void pmaddwd(XMMRegister dst, XMMRegister src);
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
void evpmadd52luq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
void evpmadd52luq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
void evpmadd52huq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
void evpmadd52huq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);

// Multiply add accumulate
void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
Expand Down Expand Up @@ -1990,6 +1994,11 @@ class Assembler : public AbstractAssembler {
// Interleave Low Quadwords
void punpcklqdq(XMMRegister dst, XMMRegister src);

void evpunpcklqdq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
void evpunpcklqdq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
void evpunpckhqdq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
void evpunpckhqdq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);

// Vector sum of absolute difference.
void vpsadbw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

Expand Down Expand Up @@ -2092,6 +2101,10 @@ class Assembler : public AbstractAssembler {
void shldl(Register dst, Register src, int8_t imm8);
void shrdl(Register dst, Register src);
void shrdl(Register dst, Register src, int8_t imm8);
#ifdef _LP64
void shldq(Register dst, Register src, int8_t imm8);
void shrdq(Register dst, Register src, int8_t imm8);
#endif

void shll(Register dst, int imm8);
void shll(Register dst);
Expand Down Expand Up @@ -2616,7 +2629,8 @@ class Assembler : public AbstractAssembler {
void pand(XMMRegister dst, XMMRegister src);
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpandq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

// Andn packed integers
void pandn(XMMRegister dst, XMMRegister src);
Expand All @@ -2626,7 +2640,8 @@ class Assembler : public AbstractAssembler {
void por(XMMRegister dst, XMMRegister src);
void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evporq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

// Xor packed integers
void pxor(XMMRegister dst, XMMRegister src);
Expand All @@ -2640,6 +2655,7 @@ class Assembler : public AbstractAssembler {
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);

// Vector compress/expand instructions.
void evpcompressb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
Expand Down Expand Up @@ -2753,6 +2769,8 @@ class Assembler : public AbstractAssembler {
// runtime code and native libraries.
void vzeroupper();

void vzeroall();

// Vector double compares
void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
Expand Down
8 changes: 4 additions & 4 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5279,7 +5279,7 @@ void C2_MacroAssembler::vector_reverse_bit(BasicType bt, XMMRegister dst, XMMReg
// Get the reverse bit sequence of lower nibble of each byte.
vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_reverse_bit_lut()), vec_enc, noreg);
vbroadcast(T_INT, xtmp2, 0x0F0F0F0F, rtmp, vec_enc);
vpandq(dst, xtmp2, src, vec_enc);
evpandq(dst, xtmp2, src, vec_enc);
vpshufb(dst, xtmp1, dst, vec_enc);
vpsllq(dst, dst, 4, vec_enc);

Expand All @@ -5290,7 +5290,7 @@ void C2_MacroAssembler::vector_reverse_bit(BasicType bt, XMMRegister dst, XMMReg

// Perform logical OR operation b/w left shifted reverse bit sequence of lower nibble and
// right shifted reverse bit sequence of upper nibble to obtain the reverse bit sequence of each byte.
vporq(xtmp2, dst, xtmp2, vec_enc);
evporq(xtmp2, dst, xtmp2, vec_enc);
vector_reverse_byte(bt, dst, xtmp2, vec_enc);

} else if(vec_enc == Assembler::AVX_512bit) {
Expand Down Expand Up @@ -5345,11 +5345,11 @@ void C2_MacroAssembler::vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, X
void C2_MacroAssembler::vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src,
XMMRegister xtmp1, Register rtmp, int vec_enc) {
vbroadcast(T_INT, xtmp1, bitmask, rtmp, vec_enc);
vpandq(dst, xtmp1, src, vec_enc);
evpandq(dst, xtmp1, src, vec_enc);
vpsllq(dst, dst, nbits, vec_enc);
vpandn(xtmp1, xtmp1, src, vec_enc);
vpsrlq(xtmp1, xtmp1, nbits, vec_enc);
vporq(dst, dst, xtmp1, vec_enc);
evporq(dst, dst, xtmp1, vec_enc);
}

void C2_MacroAssembler::vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
Expand Down
47 changes: 47 additions & 0 deletions src/hotspot/cpu/x86/macroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1217,6 +1217,19 @@ void MacroAssembler::andptr(Register dst, int32_t imm32) {
LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
}

#ifdef _LP64
void MacroAssembler::andq(Register dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");

if (reachable(src)) {
andq(dst, as_Address(src));
} else {
lea(rscratch, src);
andq(dst, Address(rscratch, 0));
}
}
#endif

void MacroAssembler::atomic_incl(Address counter_addr) {
lock();
incrementl(counter_addr);
Expand Down Expand Up @@ -9105,6 +9118,40 @@ void MacroAssembler::evrord(BasicType type, XMMRegister dst, KRegister mask, XMM
fatal("Unexpected type argument %s", type2name(type)); break;
}
}

void MacroAssembler::evpandq(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");

if (reachable(src)) {
evpandq(dst, nds, as_Address(src), vector_len);
} else {
lea(rscratch, src);
evpandq(dst, nds, Address(rscratch, 0), vector_len);
}
}

void MacroAssembler::evporq(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");

if (reachable(src)) {
evporq(dst, nds, as_Address(src), vector_len);
} else {
lea(rscratch, src);
evporq(dst, nds, Address(rscratch, 0), vector_len);
}
}

void MacroAssembler::vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, AddressLiteral src3, int vector_len, Register rscratch) {
assert(rscratch != noreg || always_reachable(src3), "missing");

if (reachable(src3)) {
vpternlogq(dst, imm8, src2, as_Address(src3), vector_len);
} else {
lea(rscratch, src3);
vpternlogq(dst, imm8, src2, Address(rscratch, 0), vector_len);
}
}

#if COMPILER2_OR_JVMCI

void MacroAssembler::fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask,
Expand Down
Loading