Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 29 additions & 7 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2014, 2024, Red Hat, Inc. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
Expand Down Expand Up @@ -2385,6 +2385,20 @@ uint Matcher::vector_ideal_reg(int len) {
return 0;
}

// Vector ideal reg size corresponding to the specified len in bytes
uint Matcher::vector_ideal_reg_size(int len) {
assert(MaxVectorSize >= len, "");
uint ideal_reg = vector_ideal_reg(len);
switch (ideal_reg) {
case Op_VecD: return 8;
case Op_VecX: return 16;
case Op_VecA: return MaxVectorSize;
default:
ShouldNotReachHere();
return 0;
}
}

MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
assert(Matcher::is_generic_vector(generic_opnd), "not generic");
switch (ideal_reg) {
Expand Down Expand Up @@ -2631,12 +2645,13 @@ bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
// into registers?
bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {

// Loads and stores with indirect memory input (e.g., volatile loads and
// stores) do not subsume the input into complex addressing expressions. If
// the addressing expression is input to at least one such load or store, do
// not clone the addressing expression. Query needs_acquiring_load and
// needs_releasing_store as a proxy for indirect memory input, as it is not
// possible to directly query for indirect memory input at this stage.
// Loads and stores with indirect memory input (e.g., volatile loads/stores,
// and vector gather_loads/scatter_stores) do not subsume the input into
// complex addressing expressions. If the addressing expression is input
// to at least one such load or store, do not clone the addressing expression.
// Query needs_acquiring_load and needs_releasing_store as a proxy for
// indirect memory input, as it is not possible to directly query for indirect
// memory input at this stage.
for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
Node* n = m->fast_out(i);
if (n->is_Load() && needs_acquiring_load(n)) {
Expand All @@ -2645,6 +2660,13 @@ bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack,
if (n->is_Store() && needs_releasing_store(n)) {
return false;
}

if (n->is_LoadVectorGather() ||
n->is_StoreVectorScatter() ||
n->is_LoadVectorGatherMasked() ||
n->is_StoreVectorScatterMasked()) {
return false;
}
}

if (clone_base_plus_offset_address(m, mstack, address_visited)) {
Expand Down
171 changes: 164 additions & 7 deletions src/hotspot/cpu/aarch64/aarch64_vector.ad
Original file line number Diff line number Diff line change
Expand Up @@ -168,22 +168,21 @@ source %{
case Op_MaskAll:
case Op_VectorMaskGen:
case Op_LoadVectorMasked:
case Op_LoadVectorGather:
case Op_LoadVectorGatherMasked:
case Op_StoreVectorMasked:
case Op_StoreVectorScatter:
case Op_StoreVectorScatterMasked:
case Op_PopulateIndex:
case Op_CompressM:
case Op_CompressV:
// Temporarily disable vector mask widen support for NEON,
// as we do not have the use case now.
case Op_VectorMaskWiden:
if (UseSVE == 0) {
return false;
}
break;
case Op_LoadVectorGather:
case Op_LoadVectorGatherMasked:
if (UseSVE == 0 || is_subword_type(bt)) {
return false;
}
break;
case Op_MulAddVS2VI:
if (length_in_bytes != 16) {
return false;
Expand Down Expand Up @@ -325,6 +324,11 @@ source %{
return false;
}

// SVE always needs the vector index for gather/scatter.
bool Matcher::gather_scatter_needs_vector_index(BasicType elem_bt, int vlen) {
return true;
}

// Assert that the given node is not a variable shift.
bool assert_not_var_shift(const Node* n) {
assert(!n->as_ShiftV()->is_var_shift(), "illegal variable shift");
Expand Down Expand Up @@ -5075,6 +5079,35 @@ instruct extractD(vRegD dst, vReg src, immI idx) %{
ins_pipe(pipe_slow);
%}

// ---------------------------- Vector Slice ------------------------

instruct vslice_neon(vReg dst, vReg src1, vReg src2, immI index) %{
predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)));
match(Set dst (VectorSlice (Binary src1 src2) index));
format %{ "vslice_neon $dst, $src1, $src2, $index" %}
ins_encode %{
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
uint scale = type2aelembytes(Matcher::vector_element_basic_type(this));
__ ext($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src1$$FloatRegister, $src2$$FloatRegister,
((uint)$index$$constant * scale));
%}
ins_pipe(pipe_slow);
%}

instruct vslice_sve(vReg dst_src1, vReg src2, immI index) %{
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)));
match(Set dst_src1 (VectorSlice (Binary dst_src1 src2) index));
format %{ "vslice_sve $dst_src1, $dst_src1, $src2, $index" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
uint scale = type2aelembytes(Matcher::vector_element_basic_type(this));
__ sve_ext($dst_src1$$FloatRegister, $src2$$FloatRegister,
((uint)$index$$constant * scale));
%}
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector mask load/store -----------------------

// vector load mask
Expand Down Expand Up @@ -5738,6 +5771,32 @@ instruct vmaskcast_narrow_sve(pReg dst, pReg src, pReg ptmp) %{
ins_pipe(pipe_slow);
%}

// Vector mask widen to twice size
//
// Unpack elements from the lowest or highest half of the source
// predicate and place in elements of twice their size within the
// destination predicate.

instruct vmaskwiden_lo_sve(pReg dst, pReg src) %{
predicate(UseSVE > 0 && n->as_VectorMaskWiden()->is_lo());
match(Set dst (VectorMaskWiden src));
format %{ "vmaskwiden_lo_sve $dst, $src" %}
ins_encode %{
__ sve_punpklo($dst$$PRegister, $src$$PRegister);
%}
ins_pipe(pipe_slow);
%}

instruct vmaskwiden_hi_sve(pReg dst, pReg src) %{
predicate(UseSVE > 0 && !n->as_VectorMaskWiden()->is_lo());
match(Set dst (VectorMaskWiden src));
format %{ "vmaskwiden_hi_sve $dst, $src" %}
ins_encode %{
__ sve_punpkhi($dst$$PRegister, $src$$PRegister);
%}
ins_pipe(pipe_slow);
%}

// vector mask reinterpret

instruct vmask_reinterpret_same_esize(pReg dst_src) %{
Expand Down Expand Up @@ -6471,6 +6530,55 @@ instruct rearrange(vReg dst, vReg src, vReg shuffle) %{

// ------------------------------ Vector Load Gather ---------------------------

instruct gather_load_subword_le128(vReg dst, indirect mem, vReg idx) %{
predicate(UseSVE > 0 &&
type2aelembytes(Matcher::vector_element_basic_type(n)) <= 2 &&
Matcher::vector_length_in_bytes(n->as_LoadVectorGather()->in(3)) <= 16);
match(Set dst (LoadVectorGather mem idx));
effect(TEMP_DEF dst);
format %{ "gather_load_subword_le128 $dst, $mem, $idx\t# vector (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
if (bt == T_BYTE) {
__ sve_ld1b_gather($dst$$FloatRegister, ptrue,
as_Register($mem$$base), $idx$$FloatRegister);
__ xtn($dst$$FloatRegister, __ T4H, $dst$$FloatRegister, __ T4S);
__ xtn($dst$$FloatRegister, __ T8B, $dst$$FloatRegister, __ T8H);
} else {
assert(bt == T_SHORT, "unsupported type");
__ sve_ld1h_gather($dst$$FloatRegister, ptrue,
as_Register($mem$$base), $idx$$FloatRegister);
__ xtn($dst$$FloatRegister, __ T4H, $dst$$FloatRegister, __ T4S);
}
%}
ins_pipe(pipe_slow);
%}

instruct gather_load_subword_gt128(vReg dst, indirect mem, vReg idx, vReg vtmp) %{
predicate(UseSVE > 0 &&
type2aelembytes(Matcher::vector_element_basic_type(n)) <= 2 &&
Matcher::vector_length_in_bytes(n->as_LoadVectorGather()->in(3)) > 16);
match(Set dst (LoadVectorGather mem idx));
effect(TEMP_DEF dst, TEMP vtmp);
format %{ "gather_load_subword_gt128 $dst, $mem, $idx\t# vector (sve). KILL $vtmp" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_dup($vtmp$$FloatRegister, __ S, 0);
if (bt == T_BYTE) {
__ sve_ld1b_gather($dst$$FloatRegister, ptrue,
as_Register($mem$$base), $idx$$FloatRegister);
__ sve_uzp1($dst$$FloatRegister, __ H, $dst$$FloatRegister, $vtmp$$FloatRegister);
__ sve_uzp1($dst$$FloatRegister, __ B, $dst$$FloatRegister, $vtmp$$FloatRegister);
} else {
assert(bt == T_SHORT, "unsupported type");
__ sve_ld1h_gather($dst$$FloatRegister, ptrue,
as_Register($mem$$base), $idx$$FloatRegister);
__ sve_uzp1($dst$$FloatRegister, __ H, $dst$$FloatRegister, $vtmp$$FloatRegister);
}
%}
ins_pipe(pipe_slow);
%}

instruct gather_loadS(vReg dst, indirect mem, vReg idx) %{
predicate(UseSVE > 0 &&
type2aelembytes(Matcher::vector_element_basic_type(n)) == 4);
Expand All @@ -6481,7 +6589,7 @@ instruct gather_loadS(vReg dst, indirect mem, vReg idx) %{
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
__ sve_ld1w_gather($dst$$FloatRegister, ptrue,
as_Register($mem$$base), $idx$$FloatRegister);
%}
%}
ins_pipe(pipe_slow);
%}

Expand All @@ -6501,6 +6609,55 @@ instruct gather_loadD(vReg dst, indirect mem, vReg idx, vReg tmp) %{
ins_pipe(pipe_slow);
%}

instruct gather_load_subword_masked_le128(vReg dst, indirect mem, vReg idx, pRegGov pg) %{
predicate(UseSVE > 0 &&
type2aelembytes(Matcher::vector_element_basic_type(n)) <= 2 &&
Matcher::vector_length_in_bytes(n->as_LoadVectorGatherMasked()->in(3)->in(1)) <= 16);
match(Set dst (LoadVectorGatherMasked mem (Binary idx pg)));
effect(TEMP_DEF dst);
format %{ "gather_load_subword_masked_le128 $dst, $pg, $mem, $idx\t# vector (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
if (bt == T_BYTE) {
__ sve_ld1b_gather($dst$$FloatRegister, $pg$$PRegister,
as_Register($mem$$base), $idx$$FloatRegister);
__ xtn($dst$$FloatRegister, __ T4H, $dst$$FloatRegister, __ T4S);
__ xtn($dst$$FloatRegister, __ T8B, $dst$$FloatRegister, __ T8H);
} else {
assert(bt == T_SHORT, "unsupported type");
__ sve_ld1h_gather($dst$$FloatRegister, $pg$$PRegister,
as_Register($mem$$base), $idx$$FloatRegister);
__ xtn($dst$$FloatRegister, __ T4H, $dst$$FloatRegister, __ T4S);
}
%}
ins_pipe(pipe_slow);
%}

instruct gather_load_subword_masked_gt128(vReg dst, indirect mem, vReg idx, vReg vtmp, pRegGov pg) %{
predicate(UseSVE > 0 &&
type2aelembytes(Matcher::vector_element_basic_type(n)) <= 2 &&
Matcher::vector_length_in_bytes(n->as_LoadVectorGatherMasked()->in(3)->in(1)) > 16);
match(Set dst (LoadVectorGatherMasked mem (Binary idx pg)));
effect(TEMP_DEF dst, TEMP vtmp);
format %{ "gather_load_subword_masked_gt128 $dst, $pg, $mem, $idx\t# vector (sve). KILL $vtmp" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_dup($vtmp$$FloatRegister, __ S, 0);
if (bt == T_BYTE) {
__ sve_ld1b_gather($dst$$FloatRegister, $pg$$PRegister,
as_Register($mem$$base), $idx$$FloatRegister);
__ sve_uzp1($dst$$FloatRegister, __ H, $dst$$FloatRegister, $vtmp$$FloatRegister);
__ sve_uzp1($dst$$FloatRegister, __ B, $dst$$FloatRegister, $vtmp$$FloatRegister);
} else {
assert(bt == T_SHORT, "unsupported type");
__ sve_ld1h_gather($dst$$FloatRegister, $pg$$PRegister,
as_Register($mem$$base), $idx$$FloatRegister);
__ sve_uzp1($dst$$FloatRegister, __ H, $dst$$FloatRegister, $vtmp$$FloatRegister);
}
%}
ins_pipe(pipe_slow);
%}

instruct gather_loadS_masked(vReg dst, indirect mem, vReg idx, pRegGov pg) %{
predicate(UseSVE > 0 &&
type2aelembytes(Matcher::vector_element_basic_type(n)) == 4);
Expand Down
Loading