Skip to content

Commit e846a1d

Browse files
committed
8304450: [vectorapi] Refactor VectorShuffle implementation
Reviewed-by: psandoz, xgong, jbhateja, vlivanov
1 parent 3f36dd8 commit e846a1d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+2521
-2075
lines changed

src/hotspot/cpu/aarch64/aarch64_vector.ad

Lines changed: 5 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,10 @@ source %{
315315
}
316316
}
317317

318+
const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
319+
return false;
320+
}
321+
318322
// Assert that the given node is not a variable shift.
319323
bool assert_not_var_shift(const Node* n) {
320324
assert(!n->as_ShiftV()->is_var_shift(), "illegal variable shift");
@@ -6065,41 +6069,6 @@ instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{
60656069
ins_pipe(pipe_slow);
60666070
%}
60676071

6068-
// ------------------------------ Vector shuffle -------------------------------
6069-
6070-
instruct loadshuffle(vReg dst, vReg src) %{
6071-
match(Set dst (VectorLoadShuffle src));
6072-
format %{ "loadshuffle $dst, $src" %}
6073-
ins_encode %{
6074-
BasicType bt = Matcher::vector_element_basic_type(this);
6075-
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
6076-
if (bt == T_BYTE) {
6077-
if ($dst$$FloatRegister != $src$$FloatRegister) {
6078-
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
6079-
__ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
6080-
$src$$FloatRegister, $src$$FloatRegister);
6081-
} else {
6082-
assert(UseSVE > 0, "must be sve");
6083-
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
6084-
}
6085-
}
6086-
} else {
6087-
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
6088-
// 4S/8S, 4I, 4F
6089-
__ uxtl($dst$$FloatRegister, __ T8H, $src$$FloatRegister, __ T8B);
6090-
if (type2aelembytes(bt) == 4) {
6091-
__ uxtl($dst$$FloatRegister, __ T4S, $dst$$FloatRegister, __ T4H);
6092-
}
6093-
} else {
6094-
assert(UseSVE > 0, "must be sve");
6095-
__ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt),
6096-
$src$$FloatRegister, __ B);
6097-
}
6098-
}
6099-
%}
6100-
ins_pipe(pipe_slow);
6101-
%}
6102-
61036072
// ------------------------------ Vector rearrange -----------------------------
61046073

61056074
// Here is an example that rearranges a NEON vector with 4 ints:
@@ -6122,6 +6091,7 @@ instruct loadshuffle(vReg dst, vReg src) %{
61226091
// need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
61236092
// to implement rearrange.
61246093

6094+
// Maybe move the shuffle preparation to VectorLoadShuffle
61256095
instruct rearrange_HS_neon(vReg dst, vReg src, vReg shuffle, vReg tmp1, vReg tmp2) %{
61266096
predicate(UseSVE == 0 &&
61276097
(Matcher::vector_element_basic_type(n) == T_SHORT ||

src/hotspot/cpu/aarch64/aarch64_vector_ad.m4

Lines changed: 5 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,10 @@ source %{
305305
}
306306
}
307307

308+
const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
309+
return false;
310+
}
311+
308312
// Assert that the given node is not a variable shift.
309313
bool assert_not_var_shift(const Node* n) {
310314
assert(!n->as_ShiftV()->is_var_shift(), "illegal variable shift");
@@ -4418,41 +4422,6 @@ instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{
44184422
ins_pipe(pipe_slow);
44194423
%}
44204424

4421-
// ------------------------------ Vector shuffle -------------------------------
4422-
4423-
instruct loadshuffle(vReg dst, vReg src) %{
4424-
match(Set dst (VectorLoadShuffle src));
4425-
format %{ "loadshuffle $dst, $src" %}
4426-
ins_encode %{
4427-
BasicType bt = Matcher::vector_element_basic_type(this);
4428-
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
4429-
if (bt == T_BYTE) {
4430-
if ($dst$$FloatRegister != $src$$FloatRegister) {
4431-
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
4432-
__ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
4433-
$src$$FloatRegister, $src$$FloatRegister);
4434-
} else {
4435-
assert(UseSVE > 0, "must be sve");
4436-
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
4437-
}
4438-
}
4439-
} else {
4440-
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
4441-
// 4S/8S, 4I, 4F
4442-
__ uxtl($dst$$FloatRegister, __ T8H, $src$$FloatRegister, __ T8B);
4443-
if (type2aelembytes(bt) == 4) {
4444-
__ uxtl($dst$$FloatRegister, __ T4S, $dst$$FloatRegister, __ T4H);
4445-
}
4446-
} else {
4447-
assert(UseSVE > 0, "must be sve");
4448-
__ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt),
4449-
$src$$FloatRegister, __ B);
4450-
}
4451-
}
4452-
%}
4453-
ins_pipe(pipe_slow);
4454-
%}
4455-
44564425
// ------------------------------ Vector rearrange -----------------------------
44574426

44584427
// Here is an example that rearranges a NEON vector with 4 ints:
@@ -4475,6 +4444,7 @@ instruct loadshuffle(vReg dst, vReg src) %{
44754444
// need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
44764445
// to implement rearrange.
44774446

4447+
// Maybe move the shuffle preparation to VectorLoadShuffle
44784448
instruct rearrange_HS_neon(vReg dst, vReg src, vReg shuffle, vReg tmp1, vReg tmp2) %{
44794449
predicate(UseSVE == 0 &&
44804450
(Matcher::vector_element_basic_type(n) == T_SHORT ||

src/hotspot/cpu/arm/arm.ad

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,10 @@ const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect*
10251025
return false;
10261026
}
10271027

1028+
const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
1029+
return false;
1030+
}
1031+
10281032
const RegMask* Matcher::predicate_reg_mask(void) {
10291033
return NULL;
10301034
}

src/hotspot/cpu/ppc/ppc.ad

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2189,6 +2189,10 @@ const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect*
21892189
return false;
21902190
}
21912191

2192+
const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
2193+
return false;
2194+
}
2195+
21922196
const RegMask* Matcher::predicate_reg_mask(void) {
21932197
return NULL;
21942198
}

src/hotspot/cpu/riscv/riscv.ad

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1870,6 +1870,10 @@ const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect*
18701870
return false;
18711871
}
18721872

1873+
const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
1874+
return false;
1875+
}
1876+
18731877
const RegMask* Matcher::predicate_reg_mask(void) {
18741878
return NULL;
18751879
}

src/hotspot/cpu/s390/s390.ad

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1529,6 +1529,10 @@ const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect*
15291529
return false;
15301530
}
15311531

1532+
const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
1533+
return false;
1534+
}
1535+
15321536
const RegMask* Matcher::predicate_reg_mask(void) {
15331537
return NULL;
15341538
}

src/hotspot/cpu/x86/x86.ad

Lines changed: 18 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2171,6 +2171,19 @@ const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect*
21712171
return false;
21722172
}
21732173

2174+
// Return true if Vector::rearrange needs preparation of the shuffle argument
2175+
const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
2176+
switch (elem_bt) {
2177+
case T_BYTE: return false;
2178+
case T_SHORT: return !VM_Version::supports_avx512bw();
2179+
case T_INT: return !VM_Version::supports_avx();
2180+
case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
2181+
default:
2182+
ShouldNotReachHere();
2183+
return false;
2184+
}
2185+
}
2186+
21742187
MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
21752188
assert(Matcher::is_generic_vector(generic_opnd), "not generic");
21762189
bool legacy = (generic_opnd->opcode() == LEGVEC);
@@ -8406,17 +8419,6 @@ instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
84068419
//-------------------------------- Rearrange ----------------------------------
84078420

84088421
// LoadShuffle/Rearrange for Byte
8409-
8410-
instruct loadShuffleB(vec dst) %{
8411-
predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
8412-
match(Set dst (VectorLoadShuffle dst));
8413-
format %{ "vector_load_shuffle $dst, $dst" %}
8414-
ins_encode %{
8415-
// empty
8416-
%}
8417-
ins_pipe( pipe_slow );
8418-
%}
8419-
84208422
instruct rearrangeB(vec dst, vec shuffle) %{
84218423
predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
84228424
Matcher::vector_length(n) < 32);
@@ -8483,7 +8485,7 @@ instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
84838485

84848486
instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
84858487
predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
8486-
Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS
8488+
!VM_Version::supports_avx512bw());
84878489
match(Set dst (VectorLoadShuffle src));
84888490
effect(TEMP dst, TEMP vtmp);
84898491
format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
@@ -8494,7 +8496,7 @@ instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
84948496
if (UseAVX == 0) {
84958497
assert(vlen_in_bytes <= 16, "required");
84968498
// Multiply each shuffle by two to get byte index
8497-
__ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister);
8499+
__ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
84988500
__ psllw($vtmp$$XMMRegister, 1);
84998501

85008502
// Duplicate to create 2 copies of byte index
@@ -8509,8 +8511,7 @@ instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
85098511
assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
85108512
int vlen_enc = vector_length_encoding(this);
85118513
// Multiply each shuffle by two to get byte index
8512-
__ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
8513-
__ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc);
8514+
__ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
85148515

85158516
// Duplicate to create 2 copies of byte index
85168517
__ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
@@ -8557,21 +8558,6 @@ instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVe
85578558
ins_pipe( pipe_slow );
85588559
%}
85598560

8560-
instruct loadShuffleS_evex(vec dst, vec src) %{
8561-
predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
8562-
VM_Version::supports_avx512bw());
8563-
match(Set dst (VectorLoadShuffle src));
8564-
format %{ "vector_load_shuffle $dst, $src" %}
8565-
ins_encode %{
8566-
int vlen_enc = vector_length_encoding(this);
8567-
if (!VM_Version::supports_avx512vl()) {
8568-
vlen_enc = Assembler::AVX_512bit;
8569-
}
8570-
__ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8571-
%}
8572-
ins_pipe( pipe_slow );
8573-
%}
8574-
85758561
instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
85768562
predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
85778563
VM_Version::supports_avx512bw());
@@ -8602,7 +8588,7 @@ instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
86028588
// only byte shuffle instruction available on these platforms
86038589

86048590
// Duplicate and multiply each shuffle by 4
8605-
__ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister);
8591+
__ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
86068592
__ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
86078593
__ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
86088594
__ psllw($vtmp$$XMMRegister, 2);
@@ -8631,18 +8617,6 @@ instruct rearrangeI(vec dst, vec shuffle) %{
86318617
ins_pipe( pipe_slow );
86328618
%}
86338619

8634-
instruct loadShuffleI_avx(vec dst, vec src) %{
8635-
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
8636-
UseAVX > 0);
8637-
match(Set dst (VectorLoadShuffle src));
8638-
format %{ "vector_load_shuffle $dst, $src" %}
8639-
ins_encode %{
8640-
int vlen_enc = vector_length_encoding(this);
8641-
__ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8642-
%}
8643-
ins_pipe( pipe_slow );
8644-
%}
8645-
86468620
instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
86478621
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
86488622
UseAVX > 0);
@@ -8672,8 +8646,7 @@ instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
86728646
// only double word shuffle instruction available on these platforms
86738647

86748648
// Multiply each shuffle by two to get double word index
8675-
__ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
8676-
__ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc);
8649+
__ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
86778650

86788651
// Duplicate each double word shuffle
86798652
__ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
@@ -8699,20 +8672,6 @@ instruct rearrangeL(vec dst, vec src, vec shuffle) %{
86998672
ins_pipe( pipe_slow );
87008673
%}
87018674

8702-
instruct loadShuffleL_evex(vec dst, vec src) %{
8703-
predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
8704-
(Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
8705-
match(Set dst (VectorLoadShuffle src));
8706-
format %{ "vector_load_shuffle $dst, $src" %}
8707-
ins_encode %{
8708-
assert(UseAVX > 2, "required");
8709-
8710-
int vlen_enc = vector_length_encoding(this);
8711-
__ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8712-
%}
8713-
ins_pipe( pipe_slow );
8714-
%}
8715-
87168675
instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
87178676
predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
87188677
(Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));

src/hotspot/share/classfile/vmIntrinsics.hpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -963,24 +963,6 @@ class methodHandle;
963963
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
964964
do_name(vector_frombits_coerced_name, "fromBitsCoerced") \
965965
\
966-
do_intrinsic(_VectorShuffleIota, jdk_internal_vm_vector_VectorSupport, vector_shuffle_step_iota_name, vector_shuffle_step_iota_sig, F_S) \
967-
do_signature(vector_shuffle_step_iota_sig, "(Ljava/lang/Class;" \
968-
"Ljava/lang/Class;" \
969-
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
970-
"IIII" \
971-
"Ljdk/internal/vm/vector/VectorSupport$ShuffleIotaOperation;)" \
972-
"Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;") \
973-
do_name(vector_shuffle_step_iota_name, "shuffleIota") \
974-
\
975-
do_intrinsic(_VectorShuffleToVector, jdk_internal_vm_vector_VectorSupport, vector_shuffle_to_vector_name, vector_shuffle_to_vector_sig, F_S) \
976-
do_signature(vector_shuffle_to_vector_sig, "(Ljava/lang/Class;" \
977-
"Ljava/lang/Class;" \
978-
"Ljava/lang/Class;" \
979-
"Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \
980-
"ILjdk/internal/vm/vector/VectorSupport$ShuffleToVectorOperation;)" \
981-
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
982-
do_name(vector_shuffle_to_vector_name, "shuffleToVector") \
983-
\
984966
do_intrinsic(_VectorLoadOp, jdk_internal_vm_vector_VectorSupport, vector_load_op_name, vector_load_op_sig, F_S) \
985967
do_signature(vector_load_op_sig, "(Ljava/lang/Class;" \
986968
"Ljava/lang/Class;" \

src/hotspot/share/opto/c2compiler.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -754,8 +754,6 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method) {
754754
case vmIntrinsics::_VectorBinaryOp:
755755
case vmIntrinsics::_VectorTernaryOp:
756756
case vmIntrinsics::_VectorFromBitsCoerced:
757-
case vmIntrinsics::_VectorShuffleIota:
758-
case vmIntrinsics::_VectorShuffleToVector:
759757
case vmIntrinsics::_VectorLoadOp:
760758
case vmIntrinsics::_VectorLoadMaskedOp:
761759
case vmIntrinsics::_VectorStoreOp:

src/hotspot/share/opto/graphKit.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -904,7 +904,7 @@ class GraphKit : public Phase {
904904

905905
// Vector API support (implemented in vectorIntrinsics.cpp)
906906
Node* box_vector(Node* in, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem, bool deoptimize_on_exception = false);
907-
Node* unbox_vector(Node* in, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem, bool shuffle_to_vector = false);
907+
Node* unbox_vector(Node* in, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem);
908908
Node* vector_shift_count(Node* cnt, int shift_op, BasicType bt, int num_elem);
909909
};
910910

0 commit comments

Comments
 (0)