Skip to content

Commit

Permalink
8248830: C2: Optimize Rotate API on x86
Browse files Browse the repository at this point in the history
Improved existing scalar rotate operations, added support for vector rotate operations using new AVX512 instructions.

Reviewed-by: vlivanov, kvn
  • Loading branch information
Jatin Bhateja committed Aug 8, 2020
1 parent 084e15b commit 3ed5683
Show file tree
Hide file tree
Showing 19 changed files with 1,467 additions and 275 deletions.
133 changes: 124 additions & 9 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Expand Up @@ -4311,6 +4311,58 @@ void Assembler::roll(Register dst, int imm8) {
}
}

void Assembler::roll(Register dst) {
int encode = prefix_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xC0 | encode));
}

void Assembler::rorl(Register dst, int imm8) {
assert(isShiftCount(imm8 >> 1), "illegal shift count");
int encode = prefix_and_encode(dst->encoding());
if (imm8 == 1) {
emit_int16((unsigned char)0xD1, (0xC8 | encode));
} else {
emit_int24((unsigned char)0xC1, (0xc8 | encode), imm8);
}
}

void Assembler::rorl(Register dst) {
int encode = prefix_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xC8 | encode));
}

#ifdef _LP64
void Assembler::rorq(Register dst) {
int encode = prefixq_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xC8 | encode));
}

void Assembler::rorq(Register dst, int imm8) {
assert(isShiftCount(imm8 >> 1), "illegal shift count");
int encode = prefixq_and_encode(dst->encoding());
if (imm8 == 1) {
emit_int16((unsigned char)0xD1, (0xC8 | encode));
} else {
emit_int24((unsigned char)0xC1, (0xc8 | encode), imm8);
}
}

void Assembler::rolq(Register dst) {
int encode = prefixq_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xC0 | encode));
}

void Assembler::rolq(Register dst, int imm8) {
assert(isShiftCount(imm8 >> 1), "illegal shift count");
int encode = prefixq_and_encode(dst->encoding());
if (imm8 == 1) {
emit_int16((unsigned char)0xD1, (0xC0 | encode));
} else {
emit_int24((unsigned char)0xC1, (0xc0 | encode), imm8);
}
}
#endif

void Assembler::sahf() {
#ifdef _LP64
// Not supported in 64bit mode
Expand Down Expand Up @@ -6226,6 +6278,78 @@ void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vecto
emit_operand(dst, src);
}

void Assembler::evprold(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(xmm1->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
}

void Assembler::evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(xmm1->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
}

void Assembler::evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(xmm0->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
}

void Assembler::evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(xmm0->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
}

void Assembler::evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x15, (unsigned char)(0xC0 | encode));
}

void Assembler::evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x15, (unsigned char)(0xC0 | encode));
}

void Assembler::evprorvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x14, (unsigned char)(0xC0 | encode));
}

void Assembler::evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x14, (unsigned char)(0xC0 | encode));
}

void Assembler::vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
Expand Down Expand Up @@ -8905,15 +9029,6 @@ void Assembler::rcrq(Register dst, int imm8) {
}
}

void Assembler::rorq(Register dst, int imm8) {
assert(isShiftCount(imm8 >> 1), "illegal shift count");
int encode = prefixq_and_encode(dst->encoding());
if (imm8 == 1) {
emit_int16((unsigned char)0xD1, (0xC8 | encode));
} else {
emit_int24((unsigned char)0xC1, (0xc8 | encode), imm8);
}
}

void Assembler::rorxq(Register dst, Register src, int imm8) {
assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
Expand Down
19 changes: 19 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.hpp
Expand Up @@ -1827,9 +1827,18 @@ class Assembler : public AbstractAssembler {

void ret(int imm16);

void roll(Register dst);

void roll(Register dst, int imm8);

void rorl(Register dst);

void rorl(Register dst, int imm8);

#ifdef _LP64
void rolq(Register dst);
void rolq(Register dst, int imm8);
void rorq(Register dst);
void rorq(Register dst, int imm8);
void rorxq(Register dst, Register src, int imm8);
void rorxd(Register dst, Register src, int imm8);
Expand Down Expand Up @@ -2205,6 +2214,16 @@ class Assembler : public AbstractAssembler {
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);

// Vector Rotate Left/Right instruction.
void evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void evprorvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void evprold(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_len);

// vinserti forms
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
Expand Down
84 changes: 84 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Expand Up @@ -870,6 +870,57 @@ void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src, i
}
}

void C2_MacroAssembler::vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src,
int shift, int vector_len) {
if (opcode == Op_RotateLeftV) {
if (etype == T_INT) {
evprold(dst, src, shift, vector_len);
} else {
assert(etype == T_LONG, "expected type T_LONG");
evprolq(dst, src, shift, vector_len);
}
} else {
assert(opcode == Op_RotateRightV, "opcode should be Op_RotateRightV");
if (etype == T_INT) {
evprord(dst, src, shift, vector_len);
} else {
assert(etype == T_LONG, "expected type T_LONG");
evprorq(dst, src, shift, vector_len);
}
}
}

void C2_MacroAssembler::vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src,
XMMRegister shift, int vector_len) {
if (opcode == Op_RotateLeftV) {
if (etype == T_INT) {
evprolvd(dst, src, shift, vector_len);
} else {
assert(etype == T_LONG, "expected type T_LONG");
evprolvq(dst, src, shift, vector_len);
}
} else {
assert(opcode == Op_RotateRightV, "opcode should be Op_RotateRightV");
if (etype == T_INT) {
evprorvd(dst, src, shift, vector_len);
} else {
assert(etype == T_LONG, "expected type T_LONG");
evprorvq(dst, src, shift, vector_len);
}
}
}

void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, int shift) {
if (opcode == Op_RShiftVI) {
psrad(dst, shift);
} else if (opcode == Op_LShiftVI) {
pslld(dst, shift);
} else {
assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI");
psrld(dst, shift);
}
}

void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src) {
if (opcode == Op_RShiftVI) {
psrad(dst, src);
Expand All @@ -881,6 +932,17 @@ void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src) {
}
}

void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
if (opcode == Op_RShiftVI) {
vpsrad(dst, nds, shift, vector_len);
} else if (opcode == Op_LShiftVI) {
vpslld(dst, nds, shift, vector_len);
} else {
assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI");
vpsrld(dst, nds, shift, vector_len);
}
}

void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
if (opcode == Op_RShiftVI) {
vpsrad(dst, nds, src, vector_len);
Expand Down Expand Up @@ -925,6 +987,17 @@ void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister src) {
}
}

void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, int shift) {
if (opcode == Op_RShiftVL) {
psrlq(dst, shift); // using srl to implement sra on pre-avs512 systems
} else if (opcode == Op_LShiftVL) {
psllq(dst, shift);
} else {
assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL");
psrlq(dst, shift);
}
}

void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
if (opcode == Op_RShiftVL) {
evpsraq(dst, nds, src, vector_len);
Expand All @@ -936,6 +1009,17 @@ void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XM
}
}

void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
if (opcode == Op_RShiftVL) {
evpsraq(dst, nds, shift, vector_len);
} else if (opcode == Op_LShiftVL) {
vpsllq(dst, nds, shift, vector_len);
} else {
assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL");
vpsrlq(dst, nds, shift, vector_len);
}
}

// Reductions for vectors of ints, longs, floats, and doubles.

void C2_MacroAssembler::reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src) {
Expand Down
7 changes: 7 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
Expand Up @@ -74,11 +74,18 @@
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
void vshiftd(int opcode, XMMRegister dst, XMMRegister src);
void vshiftd_imm(int opcode, XMMRegister dst, int shift);
void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void vshiftw(int opcode, XMMRegister dst, XMMRegister src);
void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vshiftq(int opcode, XMMRegister dst, XMMRegister src);
void vshiftq_imm(int opcode, XMMRegister dst, int shift);
void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);

void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len);
void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);

// Reductions for vectors of ints, longs, floats, and doubles.

Expand Down

0 comments on commit 3ed5683

Please sign in to comment.