Skip to content

Commit

Permalink
8293409: [vectorapi] Intrinsify VectorSupport.indexVector
Browse files Browse the repository at this point in the history
Reviewed-by: eliu, jbhateja
  • Loading branch information
Xiaohong Gong committed Oct 19, 2022
1 parent 3f3d63d commit 857b0f9
Show file tree
Hide file tree
Showing 14 changed files with 382 additions and 30 deletions.
19 changes: 14 additions & 5 deletions src/hotspot/cpu/aarch64/aarch64_vector.ad
Original file line number Diff line number Diff line change
Expand Up @@ -457,22 +457,31 @@ instruct storeV_masked(vReg src, vmemA mem, pRegGov pg) %{

// vector load const

instruct vloadconB(vReg dst, immI0 src) %{
predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
instruct vloadcon(vReg dst, immI0 src) %{
match(Set dst (VectorLoadConst src));
format %{ "vloadconB $dst, $src\t# load/generate iota indices" %}
format %{ "vloadcon $dst, $src\t# load/generate iota indices" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
if (UseSVE == 0) {
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
assert(length_in_bytes <= 16, "must be");
__ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
// The iota indices are ordered by type B/S/I/L/F/D, and the offset between two types is 16.
int offset = exact_log2(type2aelembytes(bt)) << 4;
if (is_floating_point_type(bt)) {
offset += 32;
}
__ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices() + offset));
if (length_in_bytes == 16) {
__ ldrq($dst$$FloatRegister, rscratch1);
} else {
__ ldrd($dst$$FloatRegister, rscratch1);
}
} else {
__ sve_index($dst$$FloatRegister, __ B, 0, 1);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_index($dst$$FloatRegister, size, 0, 1);
if (is_floating_point_type(bt)) {
__ sve_scvtf($dst$$FloatRegister, size, ptrue, $dst$$FloatRegister, size);
}
}
%}
ins_pipe(pipe_slow);
Expand Down
19 changes: 14 additions & 5 deletions src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
Original file line number Diff line number Diff line change
Expand Up @@ -396,22 +396,31 @@ instruct storeV_masked(vReg src, vmemA mem, pRegGov pg) %{

// vector load const

instruct vloadconB(vReg dst, immI0 src) %{
predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
instruct vloadcon(vReg dst, immI0 src) %{
match(Set dst (VectorLoadConst src));
format %{ "vloadconB $dst, $src\t# load/generate iota indices" %}
format %{ "vloadcon $dst, $src\t# load/generate iota indices" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
if (UseSVE == 0) {
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
assert(length_in_bytes <= 16, "must be");
__ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
// The iota indices are ordered by type B/S/I/L/F/D, and the offset between two types is 16.
int offset = exact_log2(type2aelembytes(bt)) << 4;
if (is_floating_point_type(bt)) {
offset += 32;
}
__ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices() + offset));
if (length_in_bytes == 16) {
__ ldrq($dst$$FloatRegister, rscratch1);
} else {
__ ldrd($dst$$FloatRegister, rscratch1);
}
} else {
__ sve_index($dst$$FloatRegister, __ B, 0, 1);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_index($dst$$FloatRegister, size, 0, 1);
if (is_floating_point_type(bt)) {
__ sve_scvtf($dst$$FloatRegister, size, ptrue, $dst$$FloatRegister, size);
}
}
%}
ins_pipe(pipe_slow);
Expand Down
20 changes: 19 additions & 1 deletion src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -630,8 +630,24 @@ class StubGenerator: public StubCodeGenerator {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
// B
__ emit_data64(0x0706050403020100, relocInfo::none);
__ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
// H
__ emit_data64(0x0003000200010000, relocInfo::none);
__ emit_data64(0x0007000600050004, relocInfo::none);
// S
__ emit_data64(0x0000000100000000, relocInfo::none);
__ emit_data64(0x0000000300000002, relocInfo::none);
// D
__ emit_data64(0x0000000000000000, relocInfo::none);
__ emit_data64(0x0000000000000001, relocInfo::none);
// S - FP
__ emit_data64(0x3F80000000000000, relocInfo::none); // 0.0f, 1.0f
__ emit_data64(0x4040000040000000, relocInfo::none); // 2.0f, 3.0f
// D - FP
__ emit_data64(0x0000000000000000, relocInfo::none); // 0.0d
__ emit_data64(0x3FF0000000000000, relocInfo::none); // 1.0d
return start;
}

Expand Down Expand Up @@ -7846,7 +7862,9 @@ class StubGenerator: public StubCodeGenerator {
SharedRuntime::
throw_NullPointerException_at_call));

StubRoutines::aarch64::_vector_iota_indices = generate_iota_indices("iota_indices");
if (UseSVE == 0) {
StubRoutines::aarch64::_vector_iota_indices = generate_iota_indices("iota_indices");
}

// arraycopy stubs used by compilers
generate_arraycopy_stubs();
Expand Down
9 changes: 7 additions & 2 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1691,8 +1691,13 @@ void C2_MacroAssembler::load_constant_vector(BasicType bt, XMMRegister dst, Inte
}
}

void C2_MacroAssembler::load_iota_indices(XMMRegister dst, int vlen_in_bytes) {
ExternalAddress addr(StubRoutines::x86::vector_iota_indices());
void C2_MacroAssembler::load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt) {
// The iota indices are ordered by type B/S/I/L/F/D, and the offset between two types is 64.
int offset = exact_log2(type2aelembytes(bt)) << 6;
if (is_floating_point_type(bt)) {
offset += 128;
}
ExternalAddress addr(StubRoutines::x86::vector_iota_indices() + offset);
if (vlen_in_bytes <= 4) {
movdl(dst, addr);
} else if (vlen_in_bytes == 8) {
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@
void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc);

void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen);
void load_iota_indices(XMMRegister dst, int vlen_in_bytes);
void load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt);

// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.

Expand Down
48 changes: 46 additions & 2 deletions src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,7 @@ address StubGenerator::generate_iota_indices(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();

// B
__ emit_data64(0x0706050403020100, relocInfo::none);
__ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
__ emit_data64(0x1716151413121110, relocInfo::none);
Expand All @@ -820,7 +820,51 @@ address StubGenerator::generate_iota_indices(const char *stub_name) {
__ emit_data64(0x2F2E2D2C2B2A2928, relocInfo::none);
__ emit_data64(0x3736353433323130, relocInfo::none);
__ emit_data64(0x3F3E3D3C3B3A3938, relocInfo::none);

// W
__ emit_data64(0x0003000200010000, relocInfo::none);
__ emit_data64(0x0007000600050004, relocInfo::none);
__ emit_data64(0x000B000A00090008, relocInfo::none);
__ emit_data64(0x000F000E000D000C, relocInfo::none);
__ emit_data64(0x0013001200110010, relocInfo::none);
__ emit_data64(0x0017001600150014, relocInfo::none);
__ emit_data64(0x001B001A00190018, relocInfo::none);
__ emit_data64(0x001F001E001D001C, relocInfo::none);
// D
__ emit_data64(0x0000000100000000, relocInfo::none);
__ emit_data64(0x0000000300000002, relocInfo::none);
__ emit_data64(0x0000000500000004, relocInfo::none);
__ emit_data64(0x0000000700000006, relocInfo::none);
__ emit_data64(0x0000000900000008, relocInfo::none);
__ emit_data64(0x0000000B0000000A, relocInfo::none);
__ emit_data64(0x0000000D0000000C, relocInfo::none);
__ emit_data64(0x0000000F0000000E, relocInfo::none);
// Q
__ emit_data64(0x0000000000000000, relocInfo::none);
__ emit_data64(0x0000000000000001, relocInfo::none);
__ emit_data64(0x0000000000000002, relocInfo::none);
__ emit_data64(0x0000000000000003, relocInfo::none);
__ emit_data64(0x0000000000000004, relocInfo::none);
__ emit_data64(0x0000000000000005, relocInfo::none);
__ emit_data64(0x0000000000000006, relocInfo::none);
__ emit_data64(0x0000000000000007, relocInfo::none);
// D - FP
__ emit_data64(0x3F80000000000000, relocInfo::none); // 0.0f, 1.0f
__ emit_data64(0x4040000040000000, relocInfo::none); // 2.0f, 3.0f
__ emit_data64(0x40A0000040800000, relocInfo::none); // 4.0f, 5.0f
__ emit_data64(0x40E0000040C00000, relocInfo::none); // 6.0f, 7.0f
__ emit_data64(0x4110000041000000, relocInfo::none); // 8.0f, 9.0f
__ emit_data64(0x4130000041200000, relocInfo::none); // 10.0f, 11.0f
__ emit_data64(0x4150000041400000, relocInfo::none); // 12.0f, 13.0f
__ emit_data64(0x4170000041600000, relocInfo::none); // 14.0f, 15.0f
// Q - FP
__ emit_data64(0x0000000000000000, relocInfo::none); // 0.0d
__ emit_data64(0x3FF0000000000000, relocInfo::none); // 1.0d
__ emit_data64(0x4000000000000000, relocInfo::none); // 2.0d
__ emit_data64(0x4008000000000000, relocInfo::none); // 3.0d
__ emit_data64(0x4010000000000000, relocInfo::none); // 4.0d
__ emit_data64(0x4014000000000000, relocInfo::none); // 5.0d
__ emit_data64(0x4018000000000000, relocInfo::none); // 6.0d
__ emit_data64(0x401c000000000000, relocInfo::none); // 7.0d
return start;
}

Expand Down
18 changes: 6 additions & 12 deletions src/hotspot/cpu/x86/x86.ad
Original file line number Diff line number Diff line change
Expand Up @@ -8400,12 +8400,12 @@ instruct vmaskcast_avx(vec dst, vec src) %{
//-------------------------------- Load Iota Indices ----------------------------------

instruct loadIotaIndices(vec dst, immI_0 src) %{
predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
match(Set dst (VectorLoadConst src));
format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
ins_encode %{
int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
__ load_iota_indices($dst$$XMMRegister, vlen_in_bytes);
BasicType bt = Matcher::vector_element_basic_type(this);
__ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
%}
ins_pipe( pipe_slow );
%}
Expand All @@ -8417,14 +8417,11 @@ instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
ins_encode %{
assert($src2$$constant == 1, "required");
int vlen = Matcher::vector_length(this);
int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
int vlen_enc = vector_length_encoding(this);
BasicType elem_bt = Matcher::vector_element_basic_type(this);
__ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
__ load_iota_indices($dst$$XMMRegister, vlen);
if (elem_bt != T_BYTE) {
__ vconvert_b2x(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
}
__ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
__ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
Expand All @@ -8436,14 +8433,11 @@ instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
ins_encode %{
assert($src2$$constant == 1, "required");
int vlen = Matcher::vector_length(this);
int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
int vlen_enc = vector_length_encoding(this);
BasicType elem_bt = Matcher::vector_element_basic_type(this);
__ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
__ load_iota_indices($dst$$XMMRegister, vlen);
if (elem_bt != T_BYTE) {
__ vconvert_b2x(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
}
__ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
__ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
Expand Down
14 changes: 13 additions & 1 deletion src/hotspot/share/classfile/vmIntrinsics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1178,6 +1178,18 @@ class methodHandle;
"Ljdk/internal/vm/vector/VectorSupport$CompressExpandOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
do_name(vector_compress_expand_op_name, "compressExpandOp") \
\
do_intrinsic(_IndexVector, jdk_internal_vm_vector_VectorSupport, index_vector_op_name, index_vector_op_sig, F_S) \
do_signature(index_vector_op_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"Ljdk/internal/vm/vector/VectorSupport$IndexOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(index_vector_op_name, "indexVector") \
\
/* (2) Bytecode intrinsics */ \
\
do_intrinsic(_park, jdk_internal_misc_Unsafe, park_name, park_signature, F_RN) \
Expand Down Expand Up @@ -1286,7 +1298,7 @@ enum class vmIntrinsicID : int {
__IGNORE_CLASS, __IGNORE_NAME, __IGNORE_SIGNATURE, __IGNORE_ALIAS)

ID_LIMIT,
LAST_COMPILER_INLINE = _VectorCompressExpand,
LAST_COMPILER_INLINE = _IndexVector,
FIRST_MH_SIG_POLY = _invokeGeneric,
FIRST_MH_STATIC = _linkToVirtual,
LAST_MH_SIG_POLY = _linkToNative,
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/share/opto/c2compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
case vmIntrinsics::_VectorInsert:
case vmIntrinsics::_VectorExtract:
case vmIntrinsics::_VectorMaskOp:
case vmIntrinsics::_IndexVector:
return EnableVectorSupport;
case vmIntrinsics::_blackhole:
break;
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/opto/library_call.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
return inline_vector_extract();
case vmIntrinsics::_VectorCompressExpand:
return inline_vector_compress_expand();
case vmIntrinsics::_IndexVector:
return inline_index_vector();

case vmIntrinsics::_getObjectSize:
return inline_getObjectSize();
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/share/opto/library_call.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ class LibraryCallKit : public GraphKit {
bool inline_vector_extract();
bool inline_vector_insert();
bool inline_vector_compress_expand();
bool inline_index_vector();

Node* gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2);

Expand Down

1 comment on commit 857b0f9

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.