Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 97 additions & 21 deletions src/hotspot/cpu/aarch64/aarch64_vector.ad
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,17 @@ source %{
}

bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
// Do not auto-vectorize these FP operations, neither NEON or SVE/SVE2 support them directly:
// 1. The non_strict_order SVE implementation for 256-bit wide vectors does recursive folding
// and doesn't conform to the JLS, Section Evaluation Order.
// 2. A strictly ordered SVE implementation for 256-bit wide vectors isn't currently
// profitable performance-wise.
// 3. The strictly ordered NEON implementation for 64-bit and 128-bit wide vectors isn't
// profitable performance-wise.
if (opcode == Op_MulReductionVD || opcode == Op_MulReductionVF) {
return false;
}

if (UseSVE == 0) {
// These operations are not profitable to be vectorized on NEON, because no direct
// NEON instructions support them. But the match rule support for them is profitable for
Expand All @@ -139,7 +150,6 @@ source %{
// They are not suitable for auto-vectorization because the result would not conform
// to the JLS, Section Evaluation Order.
opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
opcode == Op_MulVL) {
return false;
}
Expand Down Expand Up @@ -205,9 +215,9 @@ source %{
case Op_MulReductionVF:
case Op_MulReductionVI:
case Op_MulReductionVL:
// No vector multiply reduction instructions, but we do
// emit scalar instructions for 64/128-bit vectors.
if (length_in_bytes != 8 && length_in_bytes != 16) {
// No vector multiply reduction instructions, but we do emit ASIMD instructions for
// 64/128-bit vectors. For 256-bit vectors it's a combination of SVE and ASIMD instructions.
if (length_in_bytes < 8 || length_in_bytes > 32) {
return false;
}
break;
Expand Down Expand Up @@ -3482,56 +3492,122 @@ instruct reduce_addD_masked(vRegD dst_src1, vReg src2, pRegGov pg) %{

// ------------------------------ Vector reduction mul -------------------------

instruct reduce_mulI(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
vReg tmp1, vReg tmp2) %{
instruct reduce_mulI_le128b(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
vReg tmp1, vReg tmp2) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 8 ||
Matcher::vector_length_in_bytes(n->in(2)) == 16);
match(Set dst (MulReductionVI isrc vsrc));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
format %{ "reduce_mulI $dst, $isrc, $vsrc\t# vector (64/128 bits). KILL $tmp1, $tmp2" %}
format %{ "reduce_mulI_le128b $dst, $isrc, $vsrc\t# vector (64/128 bits). KILL $tmp1, $tmp2" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this, $vsrc);
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
__ reduce_mul_integral_le128b($dst$$Register, bt, $isrc$$Register,
$vsrc$$FloatRegister, length_in_bytes,
$tmp1$$FloatRegister, $tmp2$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}

instruct reduce_mulI_256b(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
vReg tmp1, vReg tmp2, vReg tmp3) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 32);
match(Set dst (MulReductionVI isrc vsrc));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3);
format %{ "reduce_mulI_256b $dst, $isrc, $vsrc\t# vector (256 bits). KILL $tmp1, $tmp2, $tmp3" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
BasicType bt = Matcher::vector_element_basic_type(this, $vsrc);
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
__ neon_reduce_mul_integral($dst$$Register, bt, $isrc$$Register,
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
__ reduce_mul_integral_256b($dst$$Register, bt, $isrc$$Register,
$vsrc$$FloatRegister, length_in_bytes,
$tmp1$$FloatRegister, $tmp2$$FloatRegister);
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}

instruct reduce_mulL(iRegLNoSp dst, iRegL isrc, vReg vsrc) %{
instruct reduce_mulL_128b(iRegLNoSp dst, iRegL isrc, vReg vsrc) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 16);
match(Set dst (MulReductionVL isrc vsrc));
effect(TEMP_DEF dst);
format %{ "reduce_mulL $dst, $isrc, $vsrc\t# 2L" %}
format %{ "reduce_mulL_128b $dst, $isrc, $vsrc\t# 2L" %}
ins_encode %{
__ neon_reduce_mul_integral($dst$$Register, T_LONG, $isrc$$Register,
$vsrc$$FloatRegister, 16, fnoreg, fnoreg);
__ reduce_mul_integral_le128b($dst$$Register, T_LONG, $isrc$$Register, $vsrc$$FloatRegister, 16,
fnoreg, fnoreg);
%}
ins_pipe(pipe_slow);
%}

instruct reduce_mulF(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{
instruct reduce_mulL_256b(iRegLNoSp dst, iRegL isrc, vReg vsrc, vReg tmp1) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 32);
match(Set dst (MulReductionVL isrc vsrc));
effect(TEMP_DEF dst, TEMP tmp1);
format %{ "reduce_mulL_256b $dst, $isrc, $vsrc\t# 4L. KILL $tmp1" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
__ reduce_mul_integral_256b($dst$$Register, T_LONG, $isrc$$Register,
$vsrc$$FloatRegister, length_in_bytes,
$tmp1$$FloatRegister, fnoreg, fnoreg);
%}
ins_pipe(pipe_slow);
%}

instruct reduce_mulF_le128b(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) <= 16);
match(Set dst (MulReductionVF fsrc vsrc));
effect(TEMP_DEF dst, TEMP tmp);
format %{ "reduce_mulF $dst, $fsrc, $vsrc\t# 2F/4F. KILL $tmp" %}
format %{ "reduce_mulF_le128b $dst, $fsrc, $vsrc\t# 2F/4F. KILL $tmp" %}
ins_encode %{
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
__ neon_reduce_mul_fp($dst$$FloatRegister, T_FLOAT, $fsrc$$FloatRegister,
$vsrc$$FloatRegister, length_in_bytes, $tmp$$FloatRegister);
__ reduce_mul_fp_le128b($dst$$FloatRegister, T_FLOAT, $fsrc$$FloatRegister,
$vsrc$$FloatRegister, length_in_bytes, $tmp$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}

instruct reduce_non_strict_order_mulF_256b(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp1, vReg tmp2) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 32 && !n->as_Reduction()->requires_strict_order());
match(Set dst (MulReductionVF fsrc vsrc));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
format %{ "reduce_non_strict_order_mulF_256b $dst, $fsrc, $vsrc\t# 8F. KILL $tmp1, $tmp2" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
__ reduce_non_strict_order_mul_fp_256b($dst$$FloatRegister, T_FLOAT, $fsrc$$FloatRegister,
$vsrc$$FloatRegister, length_in_bytes, $tmp1$$FloatRegister,
$tmp2$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}

instruct reduce_mulD(vRegD dst, vRegD dsrc, vReg vsrc, vReg tmp) %{
instruct reduce_mulD_128b(vRegD dst, vRegD dsrc, vReg vsrc, vReg tmp) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 16);
match(Set dst (MulReductionVD dsrc vsrc));
effect(TEMP_DEF dst, TEMP tmp);
format %{ "reduce_mulD $dst, $dsrc, $vsrc\t# 2D. KILL $tmp" %}
format %{ "reduce_mulD_128b $dst, $dsrc, $vsrc\t# 2D. KILL $tmp" %}
ins_encode %{
__ neon_reduce_mul_fp($dst$$FloatRegister, T_DOUBLE, $dsrc$$FloatRegister,
$vsrc$$FloatRegister, 16, $tmp$$FloatRegister);
__ reduce_mul_fp_le128b($dst$$FloatRegister, T_DOUBLE, $dsrc$$FloatRegister,
$vsrc$$FloatRegister, 16, $tmp$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}

instruct reduce_non_strict_order_mulD_256b(vRegD dst, vRegD dsrc, vReg vsrc, vReg tmp1, vReg tmp2) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 32 && !n->as_Reduction()->requires_strict_order());
match(Set dst (MulReductionVD dsrc vsrc));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
format %{ "reduce_non_strict_order_mulD_256b $dst, $dsrc, $vsrc\t# 4D. KILL $tmp1, $tmp2" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
__ reduce_non_strict_order_mul_fp_256b($dst$$FloatRegister, T_DOUBLE, $dsrc$$FloatRegister,
$vsrc$$FloatRegister, length_in_bytes, $tmp1$$FloatRegister,
$tmp2$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
Expand Down
118 changes: 97 additions & 21 deletions src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,17 @@ source %{
}

bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
// Do not auto-vectorize these FP operations, neither NEON or SVE/SVE2 support them directly:
// 1. The non_strict_order SVE implementation for 256-bit wide vectors does recursive folding
// and doesn't conform to the JLS, Section Evaluation Order.
// 2. A strictly ordered SVE implementation for 256-bit wide vectors isn't currently
// profitable performance-wise.
// 3. The strictly ordered NEON implementation for 64-bit and 128-bit wide vectors isn't
// profitable performance-wise.
if (opcode == Op_MulReductionVD || opcode == Op_MulReductionVF) {
return false;
}

if (UseSVE == 0) {
// These operations are not profitable to be vectorized on NEON, because no direct
// NEON instructions support them. But the match rule support for them is profitable for
Expand All @@ -129,7 +140,6 @@ source %{
// They are not suitable for auto-vectorization because the result would not conform
// to the JLS, Section Evaluation Order.
opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
opcode == Op_MulVL) {
return false;
}
Expand Down Expand Up @@ -195,9 +205,9 @@ source %{
case Op_MulReductionVF:
case Op_MulReductionVI:
case Op_MulReductionVL:
// No vector multiply reduction instructions, but we do
// emit scalar instructions for 64/128-bit vectors.
if (length_in_bytes != 8 && length_in_bytes != 16) {
// No vector multiply reduction instructions, but we do emit ASIMD instructions for
// 64/128-bit vectors. For 256-bit vectors it's a combination of SVE and ASIMD instructions.
if (length_in_bytes < 8 || length_in_bytes > 32) {
return false;
}
break;
Expand Down Expand Up @@ -2109,56 +2119,122 @@ REDUCE_ADD_FP_PREDICATE(D, D)

// ------------------------------ Vector reduction mul -------------------------

instruct reduce_mulI(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
vReg tmp1, vReg tmp2) %{
instruct reduce_mulI_le128b(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
vReg tmp1, vReg tmp2) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 8 ||
Matcher::vector_length_in_bytes(n->in(2)) == 16);
match(Set dst (MulReductionVI isrc vsrc));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
format %{ "reduce_mulI $dst, $isrc, $vsrc\t# vector (64/128 bits). KILL $tmp1, $tmp2" %}
format %{ "reduce_mulI_le128b $dst, $isrc, $vsrc\t# vector (64/128 bits). KILL $tmp1, $tmp2" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this, $vsrc);
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
__ reduce_mul_integral_le128b($dst$$Register, bt, $isrc$$Register,
$vsrc$$FloatRegister, length_in_bytes,
$tmp1$$FloatRegister, $tmp2$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}

instruct reduce_mulI_256b(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
vReg tmp1, vReg tmp2, vReg tmp3) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 32);
match(Set dst (MulReductionVI isrc vsrc));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3);
format %{ "reduce_mulI_256b $dst, $isrc, $vsrc\t# vector (256 bits). KILL $tmp1, $tmp2, $tmp3" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
BasicType bt = Matcher::vector_element_basic_type(this, $vsrc);
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
__ neon_reduce_mul_integral($dst$$Register, bt, $isrc$$Register,
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
__ reduce_mul_integral_256b($dst$$Register, bt, $isrc$$Register,
$vsrc$$FloatRegister, length_in_bytes,
$tmp1$$FloatRegister, $tmp2$$FloatRegister);
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}

instruct reduce_mulL(iRegLNoSp dst, iRegL isrc, vReg vsrc) %{
instruct reduce_mulL_128b(iRegLNoSp dst, iRegL isrc, vReg vsrc) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 16);
match(Set dst (MulReductionVL isrc vsrc));
effect(TEMP_DEF dst);
format %{ "reduce_mulL $dst, $isrc, $vsrc\t# 2L" %}
format %{ "reduce_mulL_128b $dst, $isrc, $vsrc\t# 2L" %}
ins_encode %{
__ neon_reduce_mul_integral($dst$$Register, T_LONG, $isrc$$Register,
$vsrc$$FloatRegister, 16, fnoreg, fnoreg);
__ reduce_mul_integral_le128b($dst$$Register, T_LONG, $isrc$$Register, $vsrc$$FloatRegister, 16,
fnoreg, fnoreg);
%}
ins_pipe(pipe_slow);
%}

instruct reduce_mulF(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{
instruct reduce_mulL_256b(iRegLNoSp dst, iRegL isrc, vReg vsrc, vReg tmp1) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 32);
match(Set dst (MulReductionVL isrc vsrc));
effect(TEMP_DEF dst, TEMP tmp1);
format %{ "reduce_mulL_256b $dst, $isrc, $vsrc\t# 4L. KILL $tmp1" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
__ reduce_mul_integral_256b($dst$$Register, T_LONG, $isrc$$Register,
$vsrc$$FloatRegister, length_in_bytes,
$tmp1$$FloatRegister, fnoreg, fnoreg);
%}
ins_pipe(pipe_slow);
%}

instruct reduce_mulF_le128b(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) <= 16);
match(Set dst (MulReductionVF fsrc vsrc));
effect(TEMP_DEF dst, TEMP tmp);
format %{ "reduce_mulF $dst, $fsrc, $vsrc\t# 2F/4F. KILL $tmp" %}
format %{ "reduce_mulF_le128b $dst, $fsrc, $vsrc\t# 2F/4F. KILL $tmp" %}
ins_encode %{
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
__ neon_reduce_mul_fp($dst$$FloatRegister, T_FLOAT, $fsrc$$FloatRegister,
$vsrc$$FloatRegister, length_in_bytes, $tmp$$FloatRegister);
__ reduce_mul_fp_le128b($dst$$FloatRegister, T_FLOAT, $fsrc$$FloatRegister,
$vsrc$$FloatRegister, length_in_bytes, $tmp$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}

instruct reduce_non_strict_order_mulF_256b(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp1, vReg tmp2) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 32 && !n->as_Reduction()->requires_strict_order());

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 32 && !n->as_Reduction()->requires_strict_order());
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 32 &&
!n->as_Reduction()->requires_strict_order());

match(Set dst (MulReductionVF fsrc vsrc));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
format %{ "reduce_non_strict_order_mulF_256b $dst, $fsrc, $vsrc\t# 8F. KILL $tmp1, $tmp2" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
__ reduce_non_strict_order_mul_fp_256b($dst$$FloatRegister, T_FLOAT, $fsrc$$FloatRegister,
$vsrc$$FloatRegister, length_in_bytes, $tmp1$$FloatRegister,
$tmp2$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}

instruct reduce_mulD(vRegD dst, vRegD dsrc, vReg vsrc, vReg tmp) %{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please consider that reduce_mulF_gt128b and reduce_mulD_gt128b might be similar enough that they should be combined in the same way as other patterns in this file.

instruct reduce_mulD_128b(vRegD dst, vRegD dsrc, vReg vsrc, vReg tmp) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 16);
match(Set dst (MulReductionVD dsrc vsrc));
effect(TEMP_DEF dst, TEMP tmp);
format %{ "reduce_mulD $dst, $dsrc, $vsrc\t# 2D. KILL $tmp" %}
format %{ "reduce_mulD_128b $dst, $dsrc, $vsrc\t# 2D. KILL $tmp" %}
ins_encode %{
__ neon_reduce_mul_fp($dst$$FloatRegister, T_DOUBLE, $dsrc$$FloatRegister,
$vsrc$$FloatRegister, 16, $tmp$$FloatRegister);
__ reduce_mul_fp_le128b($dst$$FloatRegister, T_DOUBLE, $dsrc$$FloatRegister,
$vsrc$$FloatRegister, 16, $tmp$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}

instruct reduce_non_strict_order_mulD_256b(vRegD dst, vRegD dsrc, vReg vsrc, vReg tmp1, vReg tmp2) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) == 32 && !n->as_Reduction()->requires_strict_order());
match(Set dst (MulReductionVD dsrc vsrc));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
format %{ "reduce_non_strict_order_mulD_256b $dst, $dsrc, $vsrc\t# 4D. KILL $tmp1, $tmp2" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
__ reduce_non_strict_order_mul_fp_256b($dst$$FloatRegister, T_DOUBLE, $dsrc$$FloatRegister,
$vsrc$$FloatRegister, length_in_bytes, $tmp1$$FloatRegister,
$tmp2$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
Expand Down
9 changes: 9 additions & 0 deletions src/hotspot/cpu/aarch64/assembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4064,6 +4064,15 @@ template<typename R, typename... Rx>
INSN(sve_brkb, 0b10); // Break before first true condition
#undef INSN

// SVE Integer Misc - Unpredicated

// SVE constructive prefix (unpredicated)
void sve_movprfx(FloatRegister Zd, FloatRegister Zn) {
starti;
f(0b00000100, 31, 24), f(0b00, 23, 22), f(0b1, 21), f(0b00000, 20, 16);
f(0b101111, 15, 10), rf(Zn, 5), rf(Zd, 0);
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This pattern should be in a section SVE Integer Reduction, C4.1.37. I'm not sure if any other instructions in that group are defined yet, but if not please start the section.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, the unpredicated version should be in the SVE Integer Misc - Unpredicated section.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you asking to move it to another existing section in the file or create a new one? If it's the former, could you point me to the section in the file - I can see neither sve_ftssel nor sve_fexpa defined. If the latter, in Arm ARM C4.1.41 SVE Integer Misc - Unpredicated is followed by C4.1.42 SVE Element Count, so the patch places sve_movprfx definition right before sve_cnt*; I also don't see an opportunity to define an INSN for this section as encodings of the instructions within the section do not follow a single pattern.

If it's something else completely, please elaborate.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please try to organize things the same way as the Decode section of the ARM.

Insert a new section called SVE Integer Misc - Unpredicated after SVE bitwise shift by immediate (predicated) and put this pattern there.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please try to organize things the same way as the Decode section of the ARM.

Do you refer to C4: A64 Instruction Set Encoding?

Insert a new section called SVE Integer Misc - Unpredicated after SVE bitwise shift by immediate (predicated) and put this pattern there.

I assume you might have misinterpreted predicated SVE bitwise shift for unpredicated.

In the C4: A64 Instruction Set Encoding, C4.1.41 SVE Integer Misc - Unpredicated follows C4.1.40 SVE Bitwise Shift - Unpredicated which is not implemented by src/hotspot/cpu/aarch64/assembler_aarch64.hpp as far as I can tell. Suggested SVE bitwise shift by immediate (predicated) falls into C4.1.34 SVE Bitwise Shift - Predicated. If this change is to follow the ordering in C4: A64 Instruction Set Encoding, the next proceeding implemented instruction class for sve_movprfx (from C4.1.41) should be SVE stack frame adjustment which falls into C4.1.38 SVE Stack Allocation. The next following implemented instruction class should be SVE element count (inconveniently named something else in the source file) which falls into C4.1.42 SVE Element Count. The two instruction classes doesn't follow each other in the file, unfortunately, so it's one or the other. Currently it's the latter.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume you might have misinterpreted predicated SVE bitwise shift for unpredicated.

It's possible. The point is to make sure that any new instruction is in a section corresponding to its section in hte Decoding tables. Please make your best guess as to where that should be, and we'll discuss it.

Copy link
Contributor Author

@mikabl-arm mikabl-arm Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To (at least partially) conform to the ordering in C4: A64 Instruction Set Encoding, it should be placed either right after SVE stack frame adjustment or right before SVE element count as described above. The patch does the latter.

I've started the section, please check 4aed1f6 and resolve the thread if you find it suitable.

// Element count and increment scalar (SVE)
#define INSN(NAME, TYPE) \
void NAME(Register Xdn, unsigned imm4 = 1, int pattern = 0b11111) { \
Expand Down
Loading