Skip to content

Commit 9cfb0f7

Browse files
ferakoczwangweij
authored andcommitted
8341527: AVX-512 intrinsic for SHA3
Reviewed-by: sviswanathan
1 parent 4ce19ca commit 9cfb0f7

File tree

9 files changed

+426
-5
lines changed

9 files changed

+426
-5
lines changed

src/hotspot/cpu/x86/assembler_x86.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3585,6 +3585,23 @@ void Assembler::evmovdqub(Address dst, KRegister mask, XMMRegister src, bool mer
35853585
emit_operand(src, dst, 0);
35863586
}
35873587

3588+
void Assembler::evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) {
3589+
// Unmasked instruction
3590+
evmovdquw(dst, k0, src, /*merge*/ false, vector_len);
3591+
}
3592+
3593+
void Assembler::evmovdquw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
3594+
assert(vector_len <= AVX_256bit ? VM_Version::supports_avx512vlbw() : VM_Version::supports_avx512bw(), "");
3595+
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3596+
attributes.set_embedded_opmask_register_specifier(mask);
3597+
attributes.set_is_evex_instruction();
3598+
if (merge) {
3599+
attributes.reset_is_clear_context();
3600+
}
3601+
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3602+
emit_int16(0x6F, (0xC0 | encode));
3603+
}
3604+
35883605
void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
35893606
// Unmasked instruction
35903607
evmovdquw(dst, k0, src, /*merge*/ false, vector_len);
@@ -8711,6 +8728,15 @@ void Assembler::vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int
87118728
emit_int16((unsigned char)0xF4, (0xC0 | encode));
87128729
}
87138730

8731+
void Assembler::vpmuldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8732+
assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
8733+
(vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex()), "");
8734+
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
8735+
attributes.set_rex_vex_w_reverted();
8736+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8737+
emit_int16(0x28, (0xC0 | encode));
8738+
}
8739+
87148740
void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
87158741
assert(UseAVX > 0, "requires some form of AVX");
87168742
InstructionMark im(this);
@@ -11246,6 +11272,18 @@ void Assembler::evpmullq(XMMRegister dst, KRegister mask, XMMRegister nds, Addre
1124611272
emit_operand(dst, src, 0);
1124711273
}
1124811274

11275+
void Assembler::evpmulhw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
11276+
assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), "");
11277+
InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
11278+
attributes.set_is_evex_instruction();
11279+
attributes.set_embedded_opmask_register_specifier(mask);
11280+
if (merge) {
11281+
attributes.reset_is_clear_context();
11282+
}
11283+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
11284+
emit_int16((unsigned char)0xE5, (0xC0 | encode));
11285+
}
11286+
1124911287
void Assembler::evmulps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
1125011288
assert(VM_Version::supports_evex(), "");
1125111289
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
@@ -16914,3 +16952,28 @@ void Assembler::evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int
1691416952
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1691516953
emit_int16(0x7D, (0xC0 | encode));
1691616954
}
16955+
16956+
void Assembler::evpermt2w(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
16957+
assert(vector_len <= AVX_256bit ? VM_Version::supports_avx512vlbw() : VM_Version::supports_avx512bw(), "");
16958+
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
16959+
attributes.set_is_evex_instruction();
16960+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
16961+
emit_int16(0x7D, (0xC0 | encode));
16962+
}
16963+
16964+
void Assembler::evpermt2d(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
16965+
assert(VM_Version::supports_evex() && (vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl()), "");
16966+
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
16967+
attributes.set_is_evex_instruction();
16968+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
16969+
emit_int16(0x7E, (0xC0 | encode));
16970+
}
16971+
16972+
void Assembler::evpermt2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
16973+
assert(VM_Version::supports_evex() && (vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl()), "");
16974+
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
16975+
attributes.set_is_evex_instruction();
16976+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
16977+
emit_int16(0x7E, (0xC0 | encode));
16978+
}
16979+

src/hotspot/cpu/x86/assembler_x86.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1757,6 +1757,7 @@ class Assembler : public AbstractAssembler {
17571757
void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
17581758
void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
17591759

1760+
void evmovdquw(XMMRegister dst, XMMRegister src, int vector_len);
17601761
void evmovdquw(XMMRegister dst, Address src, int vector_len);
17611762
void evmovdquw(Address dst, XMMRegister src, int vector_len);
17621763
void evmovdquw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
@@ -1970,6 +1971,9 @@ class Assembler : public AbstractAssembler {
19701971
void evpermi2ps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
19711972
void evpermi2pd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
19721973
void evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1974+
void evpermt2w(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1975+
void evpermt2d(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1976+
void evpermt2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
19731977

19741978
void pause();
19751979

@@ -2741,6 +2745,7 @@ class Assembler : public AbstractAssembler {
27412745
void evsubps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
27422746
void evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
27432747
void evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2748+
void evpmulhw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
27442749
void evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
27452750
void evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
27462751
void evpmulld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
@@ -2876,6 +2881,7 @@ class Assembler : public AbstractAssembler {
28762881
void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
28772882
void evpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
28782883
void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2884+
void vpmuldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
28792885
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
28802886
void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
28812887
void evpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

src/hotspot/cpu/x86/macroAssembler_x86.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1296,6 +1296,7 @@ class MacroAssembler: public Assembler {
12961296
void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
12971297
void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg);
12981298

1299+
void evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdquw(dst, src, vector_len); }
12991300
void evmovdquw(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquw(dst, src, vector_len); }
13001301
void evmovdquw(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquw(dst, src, vector_len); }
13011302

@@ -1506,6 +1507,8 @@ class MacroAssembler: public Assembler {
15061507
void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpmulld(dst, nds, src, vector_len); }
15071508
void vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
15081509

1510+
void vpmuldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpmuldq(dst, nds, src, vector_len); }
1511+
15091512
void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
15101513
void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
15111514

@@ -1515,9 +1518,13 @@ class MacroAssembler: public Assembler {
15151518
void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
15161519
void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
15171520

1521+
void evpsrad(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1522+
void evpsrad(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
1523+
15181524
void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
15191525
void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
15201526

1527+
using Assembler::evpsllw;
15211528
void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
15221529
if (!is_varshift) {
15231530
Assembler::evpsllw(dst, mask, nds, src, merge, vector_len);
@@ -1562,13 +1569,15 @@ class MacroAssembler: public Assembler {
15621569
Assembler::evpsrlvq(dst, mask, nds, src, merge, vector_len);
15631570
}
15641571
}
1572+
using Assembler::evpsraw;
15651573
void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
15661574
if (!is_varshift) {
15671575
Assembler::evpsraw(dst, mask, nds, src, merge, vector_len);
15681576
} else {
15691577
Assembler::evpsravw(dst, mask, nds, src, merge, vector_len);
15701578
}
15711579
}
1580+
using Assembler::evpsrad;
15721581
void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
15731582
if (!is_varshift) {
15741583
Assembler::evpsrad(dst, mask, nds, src, merge, vector_len);

src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4032,6 +4032,8 @@ void StubGenerator::generate_compiler_stubs() {
40324032

40334033
generate_chacha_stubs();
40344034

4035+
generate_sha3_stubs();
4036+
40354037
#ifdef COMPILER2
40364038
if ((UseAVX == 2) && EnableX86ECoreOpts) {
40374039
generate_string_indexof(StubRoutines::_string_indexof_array);

src/hotspot/cpu/x86/stubGenerator_x86_64.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,10 @@ class StubGenerator: public StubCodeGenerator {
497497
address generate_intpoly_montgomeryMult_P256();
498498
address generate_intpoly_assign();
499499

500+
// SHA3 stubs
501+
void generate_sha3_stubs();
502+
address generate_sha3_implCompress(bool multiBlock, const char *name);
503+
500504
// BASE64 stubs
501505

502506
address base64_shuffle_addr();

0 commit comments

Comments
 (0)