Skip to content

Commit

Permalink
8266317: Vector API enhancements
Browse files Browse the repository at this point in the history
Co-authored-by: Paul Sandoz <psandoz@openjdk.org>
Co-authored-by: Sandhya Viswanathan <sviswanathan@openjdk.org>
Reviewed-by: jbhateja, vlivanov
  • Loading branch information
Paul Sandoz and Sandhya Viswanathan committed Jun 3, 2021
1 parent eb385c0 commit 5982cfc
Show file tree
Hide file tree
Showing 121 changed files with 9,827 additions and 932 deletions.
5 changes: 5 additions & 0 deletions src/hotspot/cpu/aarch64/matcher_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,11 @@
return false;
}

// Does the CPU supports vector unsigned comparison instructions?
static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
return false;
}

// Some microarchitectures have mask registers used on vectors
static const bool has_predicated_vectors(void) {
return UseSVE > 0;
Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/cpu/arm/matcher_arm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,11 @@
return false; // not supported
}

// Does the CPU supports vector unsigned comparison instructions?
static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
return false;
}

// Some microarchitectures have mask registers used on vectors
static constexpr bool has_predicated_vectors(void) {
return false;
Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/cpu/ppc/matcher_ppc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,11 @@
return false;
}

// Does the CPU supports vector unsigned comparison instructions?
static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
return false;
}

// Some microarchitectures have mask registers used on vectors
static constexpr bool has_predicated_vectors(void) {
return false;
Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/cpu/s390/matcher_s390.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@
return false;
}

// Does the CPU supports vector unsigned comparison instructions?
static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
return false;
}

// Some microarchitectures have mask registers used on vectors
static constexpr bool has_predicated_vectors(void) {
return false;
Expand Down
50 changes: 24 additions & 26 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3916,16 +3916,6 @@ void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, Compa
emit_int24(0x3E, (0xC0 | encode), vcc);
}

void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.reset_is_clear_context();
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x3E, (0xC0 | encode), vcc);
}

void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
InstructionMark im(this);
Expand Down Expand Up @@ -9163,7 +9153,7 @@ void Assembler::vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int
}

void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
// Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
Expand All @@ -9172,11 +9162,12 @@ void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegi
attributes.set_embedded_opmask_register_specifier(mask);
attributes.reset_is_clear_context();
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x1F, (0xC0 | encode), comparison);
int opcode = is_signed ? 0x1F : 0x1E;
emit_int24(opcode, (0xC0 | encode), comparison);
}

void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
// Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
Expand All @@ -9188,13 +9179,14 @@ void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address
attributes.reset_is_clear_context();
int dst_enc = kdst->encoding();
vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0x1F);
int opcode = is_signed ? 0x1F : 0x1E;
emit_int8((unsigned char)opcode);
emit_operand(as_Register(dst_enc), src);
emit_int8((unsigned char)comparison);
}

void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
// Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
Expand All @@ -9203,11 +9195,12 @@ void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegi
attributes.set_embedded_opmask_register_specifier(mask);
attributes.reset_is_clear_context();
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x1F, (0xC0 | encode), comparison);
int opcode = is_signed ? 0x1F : 0x1E;
emit_int24(opcode, (0xC0 | encode), comparison);
}

void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
// Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
Expand All @@ -9219,13 +9212,14 @@ void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address
attributes.reset_is_clear_context();
int dst_enc = kdst->encoding();
vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0x1F);
int opcode = is_signed ? 0x1F : 0x1E;
emit_int8((unsigned char)opcode);
emit_operand(as_Register(dst_enc), src);
emit_int8((unsigned char)comparison);
}

void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_avx512bw(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
Expand All @@ -9235,11 +9229,12 @@ void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegi
attributes.set_embedded_opmask_register_specifier(mask);
attributes.reset_is_clear_context();
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x3F, (0xC0 | encode), comparison);
int opcode = is_signed ? 0x3F : 0x3E;
emit_int24(opcode, (0xC0 | encode), comparison);
}

void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_avx512bw(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
Expand All @@ -9252,13 +9247,14 @@ void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address
attributes.reset_is_clear_context();
int dst_enc = kdst->encoding();
vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0x3F);
int opcode = is_signed ? 0x3F : 0x3E;
emit_int8((unsigned char)opcode);
emit_operand(as_Register(dst_enc), src);
emit_int8((unsigned char)comparison);
}

void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_avx512bw(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
Expand All @@ -9268,11 +9264,12 @@ void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegi
attributes.set_embedded_opmask_register_specifier(mask);
attributes.reset_is_clear_context();
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x3F, (0xC0 | encode), comparison);
int opcode = is_signed ? 0x3F : 0x3E;
emit_int24(opcode, (0xC0 | encode), comparison);
}

void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_avx512bw(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
Expand All @@ -9285,7 +9282,8 @@ void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address
attributes.reset_is_clear_context();
int dst_enc = kdst->encoding();
vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0x3F);
int opcode = is_signed ? 0x3F : 0x3E;
emit_int8((unsigned char)opcode);
emit_operand(as_Register(dst_enc), src);
emit_int8((unsigned char)comparison);
}
Expand Down
17 changes: 8 additions & 9 deletions src/hotspot/cpu/x86/assembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1721,7 +1721,6 @@ class Assembler : public AbstractAssembler {
void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);

void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
void evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate of, int vector_len);
void evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len);

void pcmpeqw(XMMRegister dst, XMMRegister src);
Expand Down Expand Up @@ -2495,27 +2494,27 @@ class Assembler : public AbstractAssembler {
// Vector integer compares
void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);

// Vector long compares
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);

// Vector byte compares
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);

// Vector short compares
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);

void evpmovb2m(KRegister dst, XMMRegister src, int vector_len);

Expand Down
86 changes: 82 additions & 4 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2143,25 +2143,103 @@ void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src
void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch) {
switch(typ) {
case T_BYTE:
evpcmpb(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
evpcmpb(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
break;
case T_SHORT:
evpcmpw(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
evpcmpw(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
break;
case T_INT:
case T_FLOAT:
evpcmpd(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
evpcmpd(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
break;
case T_LONG:
case T_DOUBLE:
evpcmpq(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
evpcmpq(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
break;
default:
assert(false,"Should not reach here.");
break;
}
}

void C2_MacroAssembler::vpcmpu(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison,
int vlen_in_bytes, XMMRegister vtmp1, XMMRegister vtmp2, Register scratch) {
int vlen_enc = vector_length_encoding(vlen_in_bytes*2);
switch (typ) {
case T_BYTE:
vpmovzxbw(vtmp1, src1, vlen_enc);
vpmovzxbw(vtmp2, src2, vlen_enc);
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch);
vpacksswb(dst, dst, dst, vlen_enc);
break;
case T_SHORT:
vpmovzxwd(vtmp1, src1, vlen_enc);
vpmovzxwd(vtmp2, src2, vlen_enc);
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch);
vpackssdw(dst, dst, dst, vlen_enc);
break;
case T_INT:
vpmovzxdq(vtmp1, src1, vlen_enc);
vpmovzxdq(vtmp2, src2, vlen_enc);
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch);
vpermilps(dst, dst, 8, vlen_enc);
break;
default:
assert(false, "Should not reach here");
}
if (vlen_in_bytes == 16) {
vpermpd(dst, dst, 0x8, vlen_enc);
}
}

void C2_MacroAssembler::vpcmpu32(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes,
XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3, Register scratch) {
int vlen_enc = vector_length_encoding(vlen_in_bytes);
switch (typ) {
case T_BYTE:
vpmovzxbw(vtmp1, src1, vlen_enc);
vpmovzxbw(vtmp2, src2, vlen_enc);
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch);
vextracti128(vtmp1, src1, 1);
vextracti128(vtmp2, src2, 1);
vpmovzxbw(vtmp1, vtmp1, vlen_enc);
vpmovzxbw(vtmp2, vtmp2, vlen_enc);
vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch);
vpacksswb(dst, dst, vtmp3, vlen_enc);
vpermpd(dst, dst, 0xd8, vlen_enc);
break;
case T_SHORT:
vpmovzxwd(vtmp1, src1, vlen_enc);
vpmovzxwd(vtmp2, src2, vlen_enc);
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch);
vextracti128(vtmp1, src1, 1);
vextracti128(vtmp2, src2, 1);
vpmovzxwd(vtmp1, vtmp1, vlen_enc);
vpmovzxwd(vtmp2, vtmp2, vlen_enc);
vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch);
vpackssdw(dst, dst, vtmp3, vlen_enc);
vpermpd(dst, dst, 0xd8, vlen_enc);
break;
case T_INT:
vpmovzxdq(vtmp1, src1, vlen_enc);
vpmovzxdq(vtmp2, src2, vlen_enc);
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch);
vpshufd(dst, dst, 8, vlen_enc);
vpermq(dst, dst, 8, vlen_enc);
vextracti128(vtmp1, src1, 1);
vextracti128(vtmp2, src2, 1);
vpmovzxdq(vtmp1, vtmp1, vlen_enc);
vpmovzxdq(vtmp2, vtmp2, vlen_enc);
vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch);
vpshufd(vtmp3, vtmp3, 8, vlen_enc);
vpermq(vtmp3, vtmp3, 0x80, vlen_enc);
vpblendd(dst, dst, vtmp3, 0xf0, vlen_enc);
break;
default:
assert(false, "Should not reach here");
}
}

void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
switch(typ) {
case T_BYTE:
Expand Down
6 changes: 6 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,12 @@
void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt);
void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes);

// vector compare
void vpcmpu(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes,
XMMRegister vtmp1, XMMRegister vtmp2, Register scratch);
void vpcmpu32(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes,
XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3, Register scratch);

// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.

// dst = src1 reduce(op, src2) using vtmp as temps
Expand Down
Loading

1 comment on commit 5982cfc

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.