Skip to content

Commit

Permalink
8283694: Improve bit manipulation and boolean to integer conversion o…
Browse files Browse the repository at this point in the history
…perations on x86_64

Reviewed-by: kvn, dlong
  • Loading branch information
merykitty authored and Vladimir Kozlov committed Jun 3, 2022
1 parent a0219da commit 0b35460
Show file tree
Hide file tree
Showing 8 changed files with 598 additions and 39 deletions.
131 changes: 127 additions & 4 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2456,6 +2456,15 @@ void Assembler::lzcntl(Register dst, Register src) {
emit_int24(0x0F, (unsigned char)0xBD, (0xC0 | encode));
}

void Assembler::lzcntl(Register dst, Address src) {
assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
InstructionMark im(this);
emit_int8((unsigned char)0xF3);
prefix(src, dst);
emit_int16(0x0F, (unsigned char)0xBD);
emit_operand(dst, src);
}

// Emit mfence instruction
void Assembler::mfence() {
NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
Expand Down Expand Up @@ -5877,13 +5886,31 @@ void Assembler::tzcntl(Register dst, Register src) {
0xC0 | encode);
}

void Assembler::tzcntl(Register dst, Address src) {
assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
InstructionMark im(this);
emit_int8((unsigned char)0xF3);
prefix(src, dst);
emit_int16(0x0F, (unsigned char)0xBC);
emit_operand(dst, src);
}

void Assembler::tzcntq(Register dst, Register src) {
assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
emit_int8((unsigned char)0xF3);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_int24(0x0F, (unsigned char)0xBC, (0xC0 | encode));
}

void Assembler::tzcntq(Register dst, Address src) {
assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
InstructionMark im(this);
emit_int8((unsigned char)0xF3);
prefixq(src, dst);
emit_int16(0x0F, (unsigned char)0xBC);
emit_operand(dst, src);
}

void Assembler::ucomisd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
Expand Down Expand Up @@ -11646,34 +11673,102 @@ void Assembler::pdep(Register dst, Register src1, Register src2) {
emit_int16((unsigned char)0xF5, (0xC0 | encode));
}

void Assembler::sarxl(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}

void Assembler::sarxl(Register dst, Address src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
emit_operand(dst, src1);
}

void Assembler::sarxq(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}

void Assembler::sarxq(Register dst, Address src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
emit_operand(dst, src1);
}

void Assembler::shlxl(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}

void Assembler::shlxl(Register dst, Address src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
emit_operand(dst, src1);
}

void Assembler::shlxq(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}

void Assembler::shlxq(Register dst, Address src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
emit_operand(dst, src1);
}

void Assembler::shrxl(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}

void Assembler::shrxl(Register dst, Address src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
emit_operand(dst, src1);
}

void Assembler::shrxq(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}

void Assembler::shrxq(Register dst, Address src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
emit_operand(dst, src1);
}

void Assembler::evpmovq2m(KRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512vldq(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
Expand Down Expand Up @@ -12719,6 +12814,15 @@ void Assembler::lzcntq(Register dst, Register src) {
emit_int24(0x0F, (unsigned char)0xBD, (0xC0 | encode));
}

void Assembler::lzcntq(Register dst, Address src) {
assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
InstructionMark im(this);
emit_int8((unsigned char)0xF3);
prefixq(src, dst);
emit_int16(0x0F, (unsigned char)0xBD);
emit_operand(dst, src);
}

void Assembler::movdq(XMMRegister dst, Register src) {
// table D-1 says MMX/SSE2
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
Expand Down Expand Up @@ -13113,21 +13217,40 @@ void Assembler::rcrq(Register dst, int imm8) {
}
}


void Assembler::rorxq(Register dst, Register src, int imm8) {
void Assembler::rorxl(Register dst, Register src, int imm8) {
assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
emit_int24((unsigned char)0xF0, (0xC0 | encode), imm8);
}

void Assembler::rorxd(Register dst, Register src, int imm8) {
void Assembler::rorxl(Register dst, Address src, int imm8) {
assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0xF0);
emit_operand(dst, src);
emit_int8(imm8);
}

void Assembler::rorxq(Register dst, Register src, int imm8) {
assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
emit_int24((unsigned char)0xF0, (0xC0 | encode), imm8);
}

void Assembler::rorxq(Register dst, Address src, int imm8) {
assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0xF0);
emit_operand(dst, src);
emit_int8(imm8);
}

#ifdef _LP64
void Assembler::salq(Address dst, int imm8) {
InstructionMark im(this);
Expand Down
16 changes: 15 additions & 1 deletion src/hotspot/cpu/x86/assembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1443,9 +1443,11 @@ class Assembler : public AbstractAssembler {
void size_prefix();

void lzcntl(Register dst, Register src);
void lzcntl(Register dst, Address src);

#ifdef _LP64
void lzcntq(Register dst, Register src);
void lzcntq(Register dst, Address src);
#endif

enum Membar_mask_bits {
Expand Down Expand Up @@ -1995,8 +1997,10 @@ class Assembler : public AbstractAssembler {
void rolq(Register dst, int imm8);
void rorq(Register dst);
void rorq(Register dst, int imm8);
void rorxl(Register dst, Register src, int imm8);
void rorxl(Register dst, Address src, int imm8);
void rorxq(Register dst, Register src, int imm8);
void rorxd(Register dst, Register src, int imm8);
void rorxq(Register dst, Address src, int imm8);
#endif

void sahf();
Expand Down Expand Up @@ -2132,7 +2136,9 @@ class Assembler : public AbstractAssembler {

// BMI - count trailing zeros
void tzcntl(Register dst, Register src);
void tzcntl(Register dst, Address src);
void tzcntq(Register dst, Register src);
void tzcntq(Register dst, Address src);

// Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
void ucomisd(XMMRegister dst, Address src);
Expand Down Expand Up @@ -2208,10 +2214,18 @@ class Assembler : public AbstractAssembler {
void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);

void sarxl(Register dst, Register src1, Register src2);
void sarxl(Register dst, Address src1, Register src2);
void sarxq(Register dst, Register src1, Register src2);
void sarxq(Register dst, Address src1, Register src2);
void shlxl(Register dst, Register src1, Register src2);
void shlxl(Register dst, Address src1, Register src2);
void shlxq(Register dst, Register src1, Register src2);
void shlxq(Register dst, Address src1, Register src2);
void shrxl(Register dst, Register src1, Register src2);
void shrxl(Register dst, Address src1, Register src2);
void shrxq(Register dst, Register src1, Register src2);
void shrxq(Register dst, Address src1, Register src2);

void bzhiq(Register dst, Register src1, Register src2);
void pext(Register dst, Register src1, Register src2);
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/x86/macroAssembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,7 @@ class MacroAssembler: public Assembler {

void verify_tlab();

Condition negate_condition(Condition cond);
static Condition negate_condition(Condition cond);

// Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit
// operands. In general the names are modified to avoid hiding the instruction in Assembler
Expand Down
24 changes: 12 additions & 12 deletions src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -525,26 +525,26 @@ void MacroAssembler::sha256_AVX2_one_round_compute(
addl(reg_old_h, reg_y2); // reg_h = k + w + reg_h + S0 + S1 + CH = t1 + S0; --
}
movl(reg_y2, reg_f); // reg_y2 = reg_f ; CH
rorxd(reg_y0, reg_e, 25); // reg_y0 = reg_e >> 25 ; S1A
rorxd(reg_y1, reg_e, 11); // reg_y1 = reg_e >> 11 ; S1B
rorxl(reg_y0, reg_e, 25); // reg_y0 = reg_e >> 25 ; S1A
rorxl(reg_y1, reg_e, 11); // reg_y1 = reg_e >> 11 ; S1B
xorl(reg_y2, reg_g); // reg_y2 = reg_f^reg_g ; CH

xorl(reg_y0, reg_y1); // reg_y0 = (reg_e>>25) ^ (reg_h>>11) ; S1
rorxd(reg_y1, reg_e, 6); // reg_y1 = (reg_e >> 6) ; S1
rorxl(reg_y1, reg_e, 6); // reg_y1 = (reg_e >> 6) ; S1
andl(reg_y2, reg_e); // reg_y2 = (reg_f^reg_g)&reg_e ; CH

if (iter%4 > 0) {
addl(reg_old_h, reg_y3); // reg_h = t1 + S0 + MAJ ; --
}

xorl(reg_y0, reg_y1); // reg_y0 = (reg_e>>25) ^ (reg_e>>11) ^ (reg_e>>6) ; S1
rorxd(reg_T1, reg_a, 13); // reg_T1 = reg_a >> 13 ; S0B
rorxl(reg_T1, reg_a, 13); // reg_T1 = reg_a >> 13 ; S0B
xorl(reg_y2, reg_g); // reg_y2 = CH = ((reg_f^reg_g)&reg_e)^reg_g ; CH
rorxd(reg_y1, reg_a, 22); // reg_y1 = reg_a >> 22 ; S0A
rorxl(reg_y1, reg_a, 22); // reg_y1 = reg_a >> 22 ; S0A
movl(reg_y3, reg_a); // reg_y3 = reg_a ; MAJA

xorl(reg_y1, reg_T1); // reg_y1 = (reg_a>>22) ^ (reg_a>>13) ; S0
rorxd(reg_T1, reg_a, 2); // reg_T1 = (reg_a >> 2) ; S0
rorxl(reg_T1, reg_a, 2); // reg_T1 = (reg_a >> 2) ; S0
addl(reg_h, Address(rsp, rdx, Address::times_1, 4*iter)); // reg_h = k + w + reg_h ; --
orl(reg_y3, reg_c); // reg_y3 = reg_a|reg_c ; MAJA

Expand Down Expand Up @@ -598,27 +598,27 @@ void MacroAssembler::sha256_AVX2_one_round_and_sched(
int iter)
{
movl(rcx, reg_a); // rcx = reg_a ; MAJA
rorxd(r13, reg_e, 25); // r13 = reg_e >> 25 ; S1A
rorxd(r14, reg_e, 11); // r14 = reg_e >> 11 ; S1B
rorxl(r13, reg_e, 25); // r13 = reg_e >> 25 ; S1A
rorxl(r14, reg_e, 11); // r14 = reg_e >> 11 ; S1B
addl(reg_h, Address(rsp, rdx, Address::times_1, 4*iter));
orl(rcx, reg_c); // rcx = reg_a|reg_c ; MAJA

movl(r15, reg_f); // r15 = reg_f ; CH
rorxd(r12, reg_a, 13); // r12 = reg_a >> 13 ; S0B
rorxl(r12, reg_a, 13); // r12 = reg_a >> 13 ; S0B
xorl(r13, r14); // r13 = (reg_e>>25) ^ (reg_e>>11) ; S1
xorl(r15, reg_g); // r15 = reg_f^reg_g ; CH

rorxd(r14, reg_e, 6); // r14 = (reg_e >> 6) ; S1
rorxl(r14, reg_e, 6); // r14 = (reg_e >> 6) ; S1
andl(r15, reg_e); // r15 = (reg_f^reg_g)&reg_e ; CH

xorl(r13, r14); // r13 = (reg_e>>25) ^ (reg_e>>11) ^ (reg_e>>6) ; S1
rorxd(r14, reg_a, 22); // r14 = reg_a >> 22 ; S0A
rorxl(r14, reg_a, 22); // r14 = reg_a >> 22 ; S0A
addl(reg_d, reg_h); // reg_d = k + w + reg_h + reg_d ; --

andl(rcx, reg_b); // rcx = (reg_a|reg_c)&reg_b ; MAJA
xorl(r14, r12); // r14 = (reg_a>>22) ^ (reg_a>>13) ; S0

rorxd(r12, reg_a, 2); // r12 = (reg_a >> 2) ; S0
rorxl(r12, reg_a, 2); // r12 = (reg_a >> 2) ; S0
xorl(r15, reg_g); // r15 = CH = ((reg_f^reg_g)&reg_e)^reg_g ; CH

xorl(r14, r12); // r14 = (reg_a>>22) ^ (reg_a>>13) ^ (reg_a>>2) ; S0
Expand Down
Loading

1 comment on commit 0b35460

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.