Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
Browse files
8248830: C2: Optimize Rotate API on x86
Improved existing scalar rotate operations, added support for vector rotate operations using new AVX512 instructions.

Reviewed-by: vlivanov, kvn
  • Loading branch information
Jatin Bhateja committed Aug 8, 2020
1 parent 1159a67 commit 67f5341ffe149cc847b2ed028a410efafc10e416
Show file tree
Hide file tree
Showing 19 changed files with 1,467 additions and 275 deletions.
@@ -4311,6 +4311,58 @@ void Assembler::roll(Register dst, int imm8) {
}
}

void Assembler::roll(Register dst) {
int encode = prefix_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xC0 | encode));
}

void Assembler::rorl(Register dst, int imm8) {
assert(isShiftCount(imm8 >> 1), "illegal shift count");
int encode = prefix_and_encode(dst->encoding());
if (imm8 == 1) {
emit_int16((unsigned char)0xD1, (0xC8 | encode));
} else {
emit_int24((unsigned char)0xC1, (0xc8 | encode), imm8);
}
}

void Assembler::rorl(Register dst) {
int encode = prefix_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xC8 | encode));
}

#ifdef _LP64
void Assembler::rorq(Register dst) {
int encode = prefixq_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xC8 | encode));
}

void Assembler::rorq(Register dst, int imm8) {
assert(isShiftCount(imm8 >> 1), "illegal shift count");
int encode = prefixq_and_encode(dst->encoding());
if (imm8 == 1) {
emit_int16((unsigned char)0xD1, (0xC8 | encode));
} else {
emit_int24((unsigned char)0xC1, (0xc8 | encode), imm8);
}
}

void Assembler::rolq(Register dst) {
int encode = prefixq_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xC0 | encode));
}

void Assembler::rolq(Register dst, int imm8) {
assert(isShiftCount(imm8 >> 1), "illegal shift count");
int encode = prefixq_and_encode(dst->encoding());
if (imm8 == 1) {
emit_int16((unsigned char)0xD1, (0xC0 | encode));
} else {
emit_int24((unsigned char)0xC1, (0xc0 | encode), imm8);
}
}
#endif

void Assembler::sahf() {
#ifdef _LP64
// Not supported in 64bit mode
@@ -6226,6 +6278,78 @@ void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vecto
emit_operand(dst, src);
}

void Assembler::evprold(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(xmm1->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
}

void Assembler::evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(xmm1->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
}

void Assembler::evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(xmm0->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
}

void Assembler::evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(xmm0->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
}

void Assembler::evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x15, (unsigned char)(0xC0 | encode));
}

void Assembler::evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x15, (unsigned char)(0xC0 | encode));
}

void Assembler::evprorvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x14, (unsigned char)(0xC0 | encode));
}

void Assembler::evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x14, (unsigned char)(0xC0 | encode));
}

void Assembler::vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
@@ -8905,15 +9029,6 @@ void Assembler::rcrq(Register dst, int imm8) {
}
}

void Assembler::rorq(Register dst, int imm8) {
assert(isShiftCount(imm8 >> 1), "illegal shift count");
int encode = prefixq_and_encode(dst->encoding());
if (imm8 == 1) {
emit_int16((unsigned char)0xD1, (0xC8 | encode));
} else {
emit_int24((unsigned char)0xC1, (0xc8 | encode), imm8);
}
}

void Assembler::rorxq(Register dst, Register src, int imm8) {
assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
@@ -1827,9 +1827,18 @@ class Assembler : public AbstractAssembler {

void ret(int imm16);

void roll(Register dst);

void roll(Register dst, int imm8);

void rorl(Register dst);

void rorl(Register dst, int imm8);

#ifdef _LP64
void rolq(Register dst);
void rolq(Register dst, int imm8);
void rorq(Register dst);
void rorq(Register dst, int imm8);
void rorxq(Register dst, Register src, int imm8);
void rorxd(Register dst, Register src, int imm8);
@@ -2205,6 +2214,16 @@ class Assembler : public AbstractAssembler {
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);

// Vector Rotate Left/Right instruction.
void evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void evprorvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void evprold(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_len);

// vinserti forms
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
@@ -870,6 +870,57 @@ void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src, i
}
}

void C2_MacroAssembler::vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src,
int shift, int vector_len) {
if (opcode == Op_RotateLeftV) {
if (etype == T_INT) {
evprold(dst, src, shift, vector_len);
} else {
assert(etype == T_LONG, "expected type T_LONG");
evprolq(dst, src, shift, vector_len);
}
} else {
assert(opcode == Op_RotateRightV, "opcode should be Op_RotateRightV");
if (etype == T_INT) {
evprord(dst, src, shift, vector_len);
} else {
assert(etype == T_LONG, "expected type T_LONG");
evprorq(dst, src, shift, vector_len);
}
}
}

void C2_MacroAssembler::vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src,
XMMRegister shift, int vector_len) {
if (opcode == Op_RotateLeftV) {
if (etype == T_INT) {
evprolvd(dst, src, shift, vector_len);
} else {
assert(etype == T_LONG, "expected type T_LONG");
evprolvq(dst, src, shift, vector_len);
}
} else {
assert(opcode == Op_RotateRightV, "opcode should be Op_RotateRightV");
if (etype == T_INT) {
evprorvd(dst, src, shift, vector_len);
} else {
assert(etype == T_LONG, "expected type T_LONG");
evprorvq(dst, src, shift, vector_len);
}
}
}

void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, int shift) {
if (opcode == Op_RShiftVI) {
psrad(dst, shift);
} else if (opcode == Op_LShiftVI) {
pslld(dst, shift);
} else {
assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI");
psrld(dst, shift);
}
}

void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src) {
if (opcode == Op_RShiftVI) {
psrad(dst, src);
@@ -881,6 +932,17 @@ void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src) {
}
}

void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
if (opcode == Op_RShiftVI) {
vpsrad(dst, nds, shift, vector_len);
} else if (opcode == Op_LShiftVI) {
vpslld(dst, nds, shift, vector_len);
} else {
assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI");
vpsrld(dst, nds, shift, vector_len);
}
}

void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
if (opcode == Op_RShiftVI) {
vpsrad(dst, nds, src, vector_len);
@@ -925,6 +987,17 @@ void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister src) {
}
}

void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, int shift) {
if (opcode == Op_RShiftVL) {
psrlq(dst, shift); // using srl to implement sra on pre-avs512 systems
} else if (opcode == Op_LShiftVL) {
psllq(dst, shift);
} else {
assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL");
psrlq(dst, shift);
}
}

void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
if (opcode == Op_RShiftVL) {
evpsraq(dst, nds, src, vector_len);
@@ -936,6 +1009,17 @@ void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XM
}
}

void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
if (opcode == Op_RShiftVL) {
evpsraq(dst, nds, shift, vector_len);
} else if (opcode == Op_LShiftVL) {
vpsllq(dst, nds, shift, vector_len);
} else {
assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL");
vpsrlq(dst, nds, shift, vector_len);
}
}

// Reductions for vectors of ints, longs, floats, and doubles.

void C2_MacroAssembler::reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src) {
@@ -74,11 +74,18 @@
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
void vshiftd(int opcode, XMMRegister dst, XMMRegister src);
void vshiftd_imm(int opcode, XMMRegister dst, int shift);
void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void vshiftw(int opcode, XMMRegister dst, XMMRegister src);
void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vshiftq(int opcode, XMMRegister dst, XMMRegister src);
void vshiftq_imm(int opcode, XMMRegister dst, int shift);
void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);

void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len);
void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);

// Reductions for vectors of ints, longs, floats, and doubles.

0 comments on commit 67f5341

Please sign in to comment.