Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 5 additions & 35 deletions src/hotspot/cpu/aarch64/aarch64_vector.ad
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,10 @@ source %{
}
}

const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
return false;
}

// Assert that the given node is not a variable shift.
bool assert_not_var_shift(const Node* n) {
assert(!n->as_ShiftV()->is_var_shift(), "illegal variable shift");
Expand Down Expand Up @@ -6065,41 +6069,6 @@ instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector shuffle -------------------------------

instruct loadshuffle(vReg dst, vReg src) %{
match(Set dst (VectorLoadShuffle src));
format %{ "loadshuffle $dst, $src" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
if (bt == T_BYTE) {
if ($dst$$FloatRegister != $src$$FloatRegister) {
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
__ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src$$FloatRegister, $src$$FloatRegister);
} else {
assert(UseSVE > 0, "must be sve");
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
}
}
} else {
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
// 4S/8S, 4I, 4F
__ uxtl($dst$$FloatRegister, __ T8H, $src$$FloatRegister, __ T8B);
if (type2aelembytes(bt) == 4) {
__ uxtl($dst$$FloatRegister, __ T4S, $dst$$FloatRegister, __ T4H);
}
} else {
assert(UseSVE > 0, "must be sve");
__ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt),
$src$$FloatRegister, __ B);
}
}
%}
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector rearrange -----------------------------

// Here is an example that rearranges a NEON vector with 4 ints:
Expand All @@ -6122,6 +6091,7 @@ instruct loadshuffle(vReg dst, vReg src) %{
// need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
// to implement rearrange.

// Maybe move the shuffle preparation to VectorLoadShuffle
instruct rearrange_HS_neon(vReg dst, vReg src, vReg shuffle, vReg tmp1, vReg tmp2) %{
predicate(UseSVE == 0 &&
(Matcher::vector_element_basic_type(n) == T_SHORT ||
Expand Down
40 changes: 5 additions & 35 deletions src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,10 @@ source %{
}
}

const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
return false;
}

// Assert that the given node is not a variable shift.
bool assert_not_var_shift(const Node* n) {
assert(!n->as_ShiftV()->is_var_shift(), "illegal variable shift");
Expand Down Expand Up @@ -4418,41 +4422,6 @@ instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector shuffle -------------------------------

instruct loadshuffle(vReg dst, vReg src) %{
match(Set dst (VectorLoadShuffle src));
format %{ "loadshuffle $dst, $src" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
if (bt == T_BYTE) {
if ($dst$$FloatRegister != $src$$FloatRegister) {
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
__ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src$$FloatRegister, $src$$FloatRegister);
} else {
assert(UseSVE > 0, "must be sve");
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
}
}
} else {
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
// 4S/8S, 4I, 4F
__ uxtl($dst$$FloatRegister, __ T8H, $src$$FloatRegister, __ T8B);
if (type2aelembytes(bt) == 4) {
__ uxtl($dst$$FloatRegister, __ T4S, $dst$$FloatRegister, __ T4H);
}
} else {
assert(UseSVE > 0, "must be sve");
__ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt),
$src$$FloatRegister, __ B);
}
}
%}
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector rearrange -----------------------------

// Here is an example that rearranges a NEON vector with 4 ints:
Expand All @@ -4475,6 +4444,7 @@ instruct loadshuffle(vReg dst, vReg src) %{
// need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
// to implement rearrange.

// Maybe move the shuffle preparation to VectorLoadShuffle
instruct rearrange_HS_neon(vReg dst, vReg src, vReg shuffle, vReg tmp1, vReg tmp2) %{
predicate(UseSVE == 0 &&
(Matcher::vector_element_basic_type(n) == T_SHORT ||
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/arm/arm.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,10 @@ const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect*
return false;
}

const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/ppc/ppc.ad
Original file line number Diff line number Diff line change
Expand Up @@ -2189,6 +2189,10 @@ const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect*
return false;
}

const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/riscv/riscv.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1872,6 +1872,10 @@ const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect*
return false;
}

const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/s390/s390.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1529,6 +1529,10 @@ const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect*
return false;
}

const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}
Expand Down
77 changes: 18 additions & 59 deletions src/hotspot/cpu/x86/x86.ad
Original file line number Diff line number Diff line change
Expand Up @@ -2171,6 +2171,19 @@ const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect*
return false;
}

// Return true if Vector::rearrange needs preparation of the shuffle argument
const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
switch (elem_bt) {
case T_BYTE: return false;
case T_SHORT: return !VM_Version::supports_avx512bw();
case T_INT: return !VM_Version::supports_avx();
case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
default:
ShouldNotReachHere();
return false;
}
}

MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
assert(Matcher::is_generic_vector(generic_opnd), "not generic");
bool legacy = (generic_opnd->opcode() == LEGVEC);
Expand Down Expand Up @@ -8406,17 +8419,6 @@ instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
//-------------------------------- Rearrange ----------------------------------

// LoadShuffle/Rearrange for Byte

instruct loadShuffleB(vec dst) %{
predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
match(Set dst (VectorLoadShuffle dst));
format %{ "vector_load_shuffle $dst, $dst" %}
ins_encode %{
// empty
%}
ins_pipe( pipe_slow );
%}

instruct rearrangeB(vec dst, vec shuffle) %{
predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
Matcher::vector_length(n) < 32);
Expand Down Expand Up @@ -8483,7 +8485,7 @@ instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{

instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS
!VM_Version::supports_avx512bw());
match(Set dst (VectorLoadShuffle src));
effect(TEMP dst, TEMP vtmp);
format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
Expand All @@ -8494,7 +8496,7 @@ instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
if (UseAVX == 0) {
assert(vlen_in_bytes <= 16, "required");
// Multiply each shuffle by two to get byte index
__ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister);
__ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
__ psllw($vtmp$$XMMRegister, 1);

// Duplicate to create 2 copies of byte index
Expand All @@ -8509,8 +8511,7 @@ instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
int vlen_enc = vector_length_encoding(this);
// Multiply each shuffle by two to get byte index
__ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
__ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc);
__ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);

// Duplicate to create 2 copies of byte index
__ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
Expand Down Expand Up @@ -8557,21 +8558,6 @@ instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVe
ins_pipe( pipe_slow );
%}

instruct loadShuffleS_evex(vec dst, vec src) %{
predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
VM_Version::supports_avx512bw());
match(Set dst (VectorLoadShuffle src));
format %{ "vector_load_shuffle $dst, $src" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
if (!VM_Version::supports_avx512vl()) {
vlen_enc = Assembler::AVX_512bit;
}
__ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
VM_Version::supports_avx512bw());
Expand Down Expand Up @@ -8602,7 +8588,7 @@ instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
// only byte shuffle instruction available on these platforms

// Duplicate and multiply each shuffle by 4
__ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister);
__ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
__ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
__ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
__ psllw($vtmp$$XMMRegister, 2);
Expand Down Expand Up @@ -8631,18 +8617,6 @@ instruct rearrangeI(vec dst, vec shuffle) %{
ins_pipe( pipe_slow );
%}

instruct loadShuffleI_avx(vec dst, vec src) %{
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
UseAVX > 0);
match(Set dst (VectorLoadShuffle src));
format %{ "vector_load_shuffle $dst, $src" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
__ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
UseAVX > 0);
Expand Down Expand Up @@ -8672,8 +8646,7 @@ instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
// only double word shuffle instruction available on these platforms

// Multiply each shuffle by two to get double word index
__ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
__ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc);
__ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);

// Duplicate each double word shuffle
__ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
Expand All @@ -8699,20 +8672,6 @@ instruct rearrangeL(vec dst, vec src, vec shuffle) %{
ins_pipe( pipe_slow );
%}

instruct loadShuffleL_evex(vec dst, vec src) %{
predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
(Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
match(Set dst (VectorLoadShuffle src));
format %{ "vector_load_shuffle $dst, $src" %}
ins_encode %{
assert(UseAVX > 2, "required");

int vlen_enc = vector_length_encoding(this);
__ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
(Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
Expand Down
18 changes: 0 additions & 18 deletions src/hotspot/share/classfile/vmIntrinsics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -963,24 +963,6 @@ class methodHandle;
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
do_name(vector_frombits_coerced_name, "fromBitsCoerced") \
\
do_intrinsic(_VectorShuffleIota, jdk_internal_vm_vector_VectorSupport, vector_shuffle_step_iota_name, vector_shuffle_step_iota_sig, F_S) \
do_signature(vector_shuffle_step_iota_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"IIII" \
"Ljdk/internal/vm/vector/VectorSupport$ShuffleIotaOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;") \
do_name(vector_shuffle_step_iota_name, "shuffleIota") \
\
do_intrinsic(_VectorShuffleToVector, jdk_internal_vm_vector_VectorSupport, vector_shuffle_to_vector_name, vector_shuffle_to_vector_sig, F_S) \
do_signature(vector_shuffle_to_vector_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \
"ILjdk/internal/vm/vector/VectorSupport$ShuffleToVectorOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_shuffle_to_vector_name, "shuffleToVector") \
\
do_intrinsic(_VectorLoadOp, jdk_internal_vm_vector_VectorSupport, vector_load_op_name, vector_load_op_sig, F_S) \
do_signature(vector_load_op_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
Expand Down
2 changes: 0 additions & 2 deletions src/hotspot/share/opto/c2compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -754,8 +754,6 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method) {
case vmIntrinsics::_VectorBinaryOp:
case vmIntrinsics::_VectorTernaryOp:
case vmIntrinsics::_VectorFromBitsCoerced:
case vmIntrinsics::_VectorShuffleIota:
case vmIntrinsics::_VectorShuffleToVector:
case vmIntrinsics::_VectorLoadOp:
case vmIntrinsics::_VectorLoadMaskedOp:
case vmIntrinsics::_VectorStoreOp:
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/opto/graphKit.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,7 @@ class GraphKit : public Phase {

// Vector API support (implemented in vectorIntrinsics.cpp)
Node* box_vector(Node* in, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem, bool deoptimize_on_exception = false);
Node* unbox_vector(Node* in, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem, bool shuffle_to_vector = false);
Node* unbox_vector(Node* in, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem);
Node* vector_shift_count(Node* cnt, int shift_op, BasicType bt, int num_elem);
};

Expand Down
4 changes: 0 additions & 4 deletions src/hotspot/share/opto/library_call.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -694,12 +694,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
return inline_vector_nary_operation(3);
case vmIntrinsics::_VectorFromBitsCoerced:
return inline_vector_frombits_coerced();
case vmIntrinsics::_VectorShuffleIota:
return inline_vector_shuffle_iota();
case vmIntrinsics::_VectorMaskOp:
return inline_vector_mask_operation();
case vmIntrinsics::_VectorShuffleToVector:
return inline_vector_shuffle_to_vector();
case vmIntrinsics::_VectorLoadOp:
return inline_vector_mem_operation(/*is_store=*/false);
case vmIntrinsics::_VectorLoadMaskedOp:
Expand Down
2 changes: 0 additions & 2 deletions src/hotspot/share/opto/library_call.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,8 +345,6 @@ class LibraryCallKit : public GraphKit {
// Vector API support
bool inline_vector_nary_operation(int n);
bool inline_vector_frombits_coerced();
bool inline_vector_shuffle_to_vector();
bool inline_vector_shuffle_iota();
bool inline_vector_mask_operation();
bool inline_vector_mem_operation(bool is_store);
bool inline_vector_mem_masked_operation(bool is_store);
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/opto/matcher.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,8 @@ class Matcher : public PhaseTransform {

static const bool vector_needs_partial_operations(Node* node, const TypeVect* vt);

static const bool vector_needs_load_shuffle(BasicType elem_bt, int vlen);

static const RegMask* predicate_reg_mask(void);
static const TypeVectMask* predicate_reg_type(const Type* elemTy, int length);

Expand Down
Loading