Skip to content

Commit cbee0bc

Browse files
Hao SunNingsheng Jian
Hao Sun
authored and
Ningsheng Jian
committed
8292587: AArch64: Support SVE fabd instruction
Reviewed-by: njian, fgao, ngasson
1 parent 68645eb commit cbee0bc

File tree

7 files changed

+277
-43
lines changed

7 files changed

+277
-43
lines changed

src/hotspot/cpu/aarch64/aarch64_vector.ad

+34-3
Original file line numberDiff line numberDiff line change
@@ -1655,18 +1655,49 @@ instruct vabsD_masked(vReg dst_src, pRegGov pg) %{
16551655

16561656
// vector fabs diff
16571657

1658-
instruct vfabd(vReg dst, vReg src1, vReg src2) %{
1659-
predicate(Matcher::vector_length_in_bytes(n) <= 16);
1658+
instruct vfabd_neon(vReg dst, vReg src1, vReg src2) %{
1659+
predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) &&
1660+
!n->as_Vector()->is_predicated_vector());
16601661
match(Set dst (AbsVF (SubVF src1 src2)));
16611662
match(Set dst (AbsVD (SubVD src1 src2)));
1662-
format %{ "vfabd $dst, $src1, $src2\t# vector <= 128 bits" %}
1663+
format %{ "vfabd_neon $dst, $src1, $src2" %}
16631664
ins_encode %{
16641665
__ fabd($dst$$FloatRegister, get_arrangement(this),
16651666
$src1$$FloatRegister, $src2$$FloatRegister);
16661667
%}
16671668
ins_pipe(pipe_slow);
16681669
%}
16691670

1671+
instruct vfabd_sve(vReg dst_src1, vReg src2) %{
1672+
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) &&
1673+
!n->as_Vector()->is_predicated_vector());
1674+
match(Set dst_src1 (AbsVF (SubVF dst_src1 src2)));
1675+
match(Set dst_src1 (AbsVD (SubVD dst_src1 src2)));
1676+
format %{ "vfabd_sve $dst_src1, $dst_src1, $src2" %}
1677+
ins_encode %{
1678+
assert(UseSVE > 0, "must be sve");
1679+
BasicType bt = Matcher::vector_element_basic_type(this);
1680+
__ sve_fabd($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
1681+
ptrue, $src2$$FloatRegister);
1682+
%}
1683+
ins_pipe(pipe_slow);
1684+
%}
1685+
1686+
// vector fabs diff - predicated
1687+
1688+
instruct vfabd_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
1689+
predicate(UseSVE > 0);
1690+
match(Set dst_src1 (AbsVF (SubVF (Binary dst_src1 src2) pg) pg));
1691+
match(Set dst_src1 (AbsVD (SubVD (Binary dst_src1 src2) pg) pg));
1692+
format %{ "vfabd_masked $dst_src1, $pg, $dst_src1, $src2" %}
1693+
ins_encode %{
1694+
BasicType bt = Matcher::vector_element_basic_type(this);
1695+
__ sve_fabd($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
1696+
$pg$$PRegister, $src2$$FloatRegister);
1697+
%}
1698+
ins_pipe(pipe_slow);
1699+
%}
1700+
16701701
// ------------------------------ Vector neg -----------------------------------
16711702

16721703
// vector neg

src/hotspot/cpu/aarch64/aarch64_vector_ad.m4

+34-3
Original file line numberDiff line numberDiff line change
@@ -871,18 +871,49 @@ UNARY_OP_PREDICATE_WITH_SIZE(vabsD, AbsVD, sve_fabs, D)
871871

872872
// vector fabs diff
873873

874-
instruct vfabd(vReg dst, vReg src1, vReg src2) %{
875-
predicate(Matcher::vector_length_in_bytes(n) <= 16);
874+
instruct vfabd_neon(vReg dst, vReg src1, vReg src2) %{
875+
predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) &&
876+
!n->as_Vector()->is_predicated_vector());
876877
match(Set dst (AbsVF (SubVF src1 src2)));
877878
match(Set dst (AbsVD (SubVD src1 src2)));
878-
format %{ "vfabd $dst, $src1, $src2\t# vector <= 128 bits" %}
879+
format %{ "vfabd_neon $dst, $src1, $src2" %}
879880
ins_encode %{
880881
__ fabd($dst$$FloatRegister, get_arrangement(this),
881882
$src1$$FloatRegister, $src2$$FloatRegister);
882883
%}
883884
ins_pipe(pipe_slow);
884885
%}
885886

887+
instruct vfabd_sve(vReg dst_src1, vReg src2) %{
888+
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) &&
889+
!n->as_Vector()->is_predicated_vector());
890+
match(Set dst_src1 (AbsVF (SubVF dst_src1 src2)));
891+
match(Set dst_src1 (AbsVD (SubVD dst_src1 src2)));
892+
format %{ "vfabd_sve $dst_src1, $dst_src1, $src2" %}
893+
ins_encode %{
894+
assert(UseSVE > 0, "must be sve");
895+
BasicType bt = Matcher::vector_element_basic_type(this);
896+
__ sve_fabd($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
897+
ptrue, $src2$$FloatRegister);
898+
%}
899+
ins_pipe(pipe_slow);
900+
%}
901+
902+
// vector fabs diff - predicated
903+
904+
instruct vfabd_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
905+
predicate(UseSVE > 0);
906+
match(Set dst_src1 (AbsVF (SubVF (Binary dst_src1 src2) pg) pg));
907+
match(Set dst_src1 (AbsVD (SubVD (Binary dst_src1 src2) pg) pg));
908+
format %{ "vfabd_masked $dst_src1, $pg, $dst_src1, $src2" %}
909+
ins_encode %{
910+
BasicType bt = Matcher::vector_element_basic_type(this);
911+
__ sve_fabd($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
912+
$pg$$PRegister, $src2$$FloatRegister);
913+
%}
914+
ins_pipe(pipe_slow);
915+
%}
916+
886917
// ------------------------------ Vector neg -----------------------------------
887918

888919
// vector neg

src/hotspot/cpu/aarch64/assembler_aarch64.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -3142,6 +3142,7 @@ void mvnw(Register Rd, Register Rm,
31423142
sve_predicate_reg_insn(op1, op2, Zd_or_Zdn_or_Vd, T, Pg, Zn_or_Zm); \
31433143
}
31443144

3145+
INSN(sve_fabd, 0b01100101, 0b001000100); // floating-point absolute difference
31453146
INSN(sve_fabs, 0b00000100, 0b011100101);
31463147
INSN(sve_fadd, 0b01100101, 0b000000100);
31473148
INSN(sve_fadda, 0b01100101, 0b011000001); // add strictly-ordered reduction to scalar Vd

test/hotspot/gtest/aarch64/aarch64-asmtest.py

+1
Original file line numberDiff line numberDiff line change
@@ -1918,6 +1918,7 @@ def generate(kind, names):
19181918
["bic", "ZZZ"],
19191919
["uzp1", "ZZZ"],
19201920
["uzp2", "ZZZ"],
1921+
["fabd", "ZPZ", "m", "dn"],
19211922
# SVE2 instructions
19221923
["bext", "ZZZ"],
19231924
["bdep", "ZZZ"],

test/hotspot/gtest/aarch64/asmtest.out.h

+36-35
Original file line numberDiff line numberDiff line change
@@ -1207,19 +1207,20 @@
12071207
__ sve_bic(z1, z10, z19); // bic z1.d, z10.d, z19.d
12081208
__ sve_uzp1(z19, __ H, z23, z23); // uzp1 z19.h, z23.h, z23.h
12091209
__ sve_uzp2(z30, __ S, z17, z19); // uzp2 z30.s, z17.s, z19.s
1210-
__ sve_bext(z20, __ H, z4, z20); // bext z20.h, z4.h, z20.h
1211-
__ sve_bdep(z30, __ D, z22, z30); // bdep z30.d, z22.d, z30.d
1210+
__ sve_fabd(z20, __ S, p1, z20); // fabd z20.s, p1/m, z20.s, z20.s
1211+
__ sve_bext(z30, __ D, z22, z30); // bext z30.d, z22.d, z30.d
1212+
__ sve_bdep(z25, __ H, z17, z17); // bdep z25.h, z17.h, z17.h
12121213

12131214
// SVEReductionOp
1214-
__ sve_andv(v25, __ H, p4, z17); // andv h25, p4, z17.h
1215-
__ sve_orv(v11, __ S, p3, z28); // orv s11, p3, z28.s
1216-
__ sve_eorv(v5, __ H, p0, z13); // eorv h5, p0, z13.h
1217-
__ sve_smaxv(v2, __ H, p1, z10); // smaxv h2, p1, z10.h
1218-
__ sve_sminv(v19, __ H, p1, z25); // sminv h19, p1, z25.h
1219-
__ sve_fminv(v2, __ S, p0, z29); // fminv s2, p0, z29.s
1220-
__ sve_fmaxv(v20, __ D, p1, z20); // fmaxv d20, p1, z20.d
1221-
__ sve_fadda(v28, __ S, p3, z13); // fadda s28, p3, s28, z13.s
1222-
__ sve_uaddv(v13, __ H, p7, z1); // uaddv d13, p7, z1.h
1215+
__ sve_andv(v11, __ S, p3, z28); // andv s11, p3, z28.s
1216+
__ sve_orv(v5, __ H, p0, z13); // orv h5, p0, z13.h
1217+
__ sve_eorv(v2, __ H, p1, z10); // eorv h2, p1, z10.h
1218+
__ sve_smaxv(v19, __ H, p1, z25); // smaxv h19, p1, z25.h
1219+
__ sve_sminv(v2, __ B, p0, z29); // sminv b2, p0, z29.b
1220+
__ sve_fminv(v20, __ D, p1, z20); // fminv d20, p1, z20.d
1221+
__ sve_fmaxv(v28, __ S, p3, z13); // fmaxv s28, p3, z13.s
1222+
__ sve_fadda(v13, __ S, p7, z1); // fadda s13, p7, s13, z1.s
1223+
__ sve_uaddv(v27, __ D, p0, z3); // uaddv d27, p0, z3.d
12231224

12241225
__ bind(forth);
12251226

@@ -1238,30 +1239,30 @@
12381239
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
12391240
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
12401241
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
1241-
0x14000000, 0x17ffffd7, 0x140003fe, 0x94000000,
1242-
0x97ffffd4, 0x940003fb, 0x3400000a, 0x34fffa2a,
1243-
0x34007f0a, 0x35000008, 0x35fff9c8, 0x35007ea8,
1244-
0xb400000b, 0xb4fff96b, 0xb4007e4b, 0xb500001d,
1245-
0xb5fff91d, 0xb5007dfd, 0x10000013, 0x10fff8b3,
1246-
0x10007d93, 0x90000013, 0x36300016, 0x3637f836,
1247-
0x36307d16, 0x3758000c, 0x375ff7cc, 0x37587cac,
1242+
0x14000000, 0x17ffffd7, 0x140003ff, 0x94000000,
1243+
0x97ffffd4, 0x940003fc, 0x3400000a, 0x34fffa2a,
1244+
0x34007f2a, 0x35000008, 0x35fff9c8, 0x35007ec8,
1245+
0xb400000b, 0xb4fff96b, 0xb4007e6b, 0xb500001d,
1246+
0xb5fff91d, 0xb5007e1d, 0x10000013, 0x10fff8b3,
1247+
0x10007db3, 0x90000013, 0x36300016, 0x3637f836,
1248+
0x36307d36, 0x3758000c, 0x375ff7cc, 0x37587ccc,
12481249
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
12491250
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
12501251
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
12511252
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
1252-
0x54007a80, 0x54000001, 0x54fff541, 0x54007a21,
1253-
0x54000002, 0x54fff4e2, 0x540079c2, 0x54000002,
1254-
0x54fff482, 0x54007962, 0x54000003, 0x54fff423,
1255-
0x54007903, 0x54000003, 0x54fff3c3, 0x540078a3,
1256-
0x54000004, 0x54fff364, 0x54007844, 0x54000005,
1257-
0x54fff305, 0x540077e5, 0x54000006, 0x54fff2a6,
1258-
0x54007786, 0x54000007, 0x54fff247, 0x54007727,
1259-
0x54000008, 0x54fff1e8, 0x540076c8, 0x54000009,
1260-
0x54fff189, 0x54007669, 0x5400000a, 0x54fff12a,
1261-
0x5400760a, 0x5400000b, 0x54fff0cb, 0x540075ab,
1262-
0x5400000c, 0x54fff06c, 0x5400754c, 0x5400000d,
1263-
0x54fff00d, 0x540074ed, 0x5400000e, 0x54ffefae,
1264-
0x5400748e, 0x5400000f, 0x54ffef4f, 0x5400742f,
1253+
0x54007aa0, 0x54000001, 0x54fff541, 0x54007a41,
1254+
0x54000002, 0x54fff4e2, 0x540079e2, 0x54000002,
1255+
0x54fff482, 0x54007982, 0x54000003, 0x54fff423,
1256+
0x54007923, 0x54000003, 0x54fff3c3, 0x540078c3,
1257+
0x54000004, 0x54fff364, 0x54007864, 0x54000005,
1258+
0x54fff305, 0x54007805, 0x54000006, 0x54fff2a6,
1259+
0x540077a6, 0x54000007, 0x54fff247, 0x54007747,
1260+
0x54000008, 0x54fff1e8, 0x540076e8, 0x54000009,
1261+
0x54fff189, 0x54007689, 0x5400000a, 0x54fff12a,
1262+
0x5400762a, 0x5400000b, 0x54fff0cb, 0x540075cb,
1263+
0x5400000c, 0x54fff06c, 0x5400756c, 0x5400000d,
1264+
0x54fff00d, 0x5400750d, 0x5400000e, 0x54ffefae,
1265+
0x540074ae, 0x5400000f, 0x54ffef4f, 0x5400744f,
12651266
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
12661267
0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f,
12671268
0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf,
@@ -1491,9 +1492,9 @@
14911492
0x65aabf65, 0x65b4daae, 0x65e5f723, 0x65a1523d,
14921493
0x65e07dae, 0x04965e82, 0x04026d1d, 0x043632ee,
14931494
0x04ba3333, 0x047532ac, 0x04f33141, 0x05776af3,
1494-
0x05b36e3e, 0x4554b094, 0x45deb6de, 0x045a3239,
1495-
0x04982f8b, 0x045921a5, 0x04482542, 0x044a2733,
1496-
0x658723a2, 0x65c62694, 0x65982dbc, 0x04413c2d,
1497-
1495+
0x05b36e3e, 0x65888694, 0x45deb2de, 0x4551b639,
1496+
0x049a2f8b, 0x045821a5, 0x04592542, 0x04482733,
1497+
0x040a23a2, 0x65c72694, 0x65862dbc, 0x65983c2d,
1498+
0x04c1207b,
14981499
};
14991500
// END Generated code -- do not edit
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
/*
2+
* Copyright (c) 2022, Arm Limited. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
package compiler.vectorapi;
25+
26+
import compiler.lib.ir_framework.*;
27+
28+
import java.util.Random;
29+
30+
import jdk.incubator.vector.DoubleVector;
31+
import jdk.incubator.vector.FloatVector;
32+
import jdk.incubator.vector.VectorMask;
33+
import jdk.incubator.vector.VectorOperators;
34+
import jdk.incubator.vector.VectorSpecies;
35+
36+
import jdk.test.lib.Asserts;
37+
import jdk.test.lib.Utils;
38+
39+
/**
40+
* @test
41+
* @bug 8292587
42+
* @key randomness
43+
* @library /test/lib /
44+
* @requires os.arch=="aarch64"
45+
* @summary AArch64: Support SVE fabd instruction
46+
* @modules jdk.incubator.vector
47+
*
48+
* @run driver compiler.vectorapi.VectorAbsDiffTest
49+
*/
50+
51+
public class VectorAbsDiffTest {
52+
private static final VectorSpecies<Double> D_SPECIES = DoubleVector.SPECIES_MAX;
53+
private static final VectorSpecies<Float> F_SPECIES = FloatVector.SPECIES_MAX;
54+
55+
private static int LENGTH = 1024;
56+
private static final Random RD = Utils.getRandomInstance();
57+
58+
private static float[] fa;
59+
private static float[] fb;
60+
private static float[] fr;
61+
private static double[] da;
62+
private static double[] db;
63+
private static double[] dr;
64+
private static boolean[] m;
65+
66+
static {
67+
fa = new float[LENGTH];
68+
fb = new float[LENGTH];
69+
fr = new float[LENGTH];
70+
da = new double[LENGTH];
71+
db = new double[LENGTH];
72+
dr = new double[LENGTH];
73+
m = new boolean[LENGTH];
74+
75+
for (int i = 0; i < LENGTH; i++) {
76+
fa[i] = RD.nextFloat((float) 25.0);
77+
fb[i] = RD.nextFloat((float) 25.0);
78+
da[i] = RD.nextDouble(25.0);
79+
db[i] = RD.nextDouble(25.0);
80+
m[i] = RD.nextBoolean();
81+
}
82+
}
83+
84+
@Test
85+
@IR(counts = {"vfabd", "> 0"})
86+
public static void testFloatAbsDiff() {
87+
for (int i = 0; i < LENGTH; i += F_SPECIES.length()) {
88+
FloatVector av = FloatVector.fromArray(F_SPECIES, fa, i);
89+
FloatVector bv = FloatVector.fromArray(F_SPECIES, fb, i);
90+
av.sub(bv).lanewise(VectorOperators.ABS).intoArray(fr, i);
91+
}
92+
}
93+
94+
@Run(test = "testFloatAbsDiff")
95+
public static void testFloatAbsDiff_runner() {
96+
testFloatAbsDiff();
97+
for (int i = 0; i < LENGTH; i++) {
98+
Asserts.assertEquals(Math.abs(fa[i] - fb[i]), fr[i]);
99+
}
100+
}
101+
102+
@Test
103+
@IR(counts = {"vfabd_masked", "> 0"}, applyIf = {"UseSVE", "> 0"})
104+
public static void testFloatAbsDiffMasked() {
105+
for (int i = 0; i < LENGTH; i += F_SPECIES.length()) {
106+
FloatVector av = FloatVector.fromArray(F_SPECIES, fa, i);
107+
FloatVector bv = FloatVector.fromArray(F_SPECIES, fb, i);
108+
VectorMask<Float> mask = VectorMask.fromArray(F_SPECIES, m, i);
109+
av.lanewise(VectorOperators.SUB, bv, mask).lanewise(VectorOperators.ABS, mask).intoArray(fr, i);
110+
}
111+
}
112+
113+
@Run(test = "testFloatAbsDiffMasked")
114+
public static void testFloatAbsDiffMasked_runner() {
115+
testFloatAbsDiffMasked();
116+
for (int i = 0; i < LENGTH; i++) {
117+
if (m[i]) {
118+
Asserts.assertEquals(Math.abs(fa[i] - fb[i]), fr[i]);
119+
} else {
120+
Asserts.assertEquals(fa[i], fr[i]);
121+
}
122+
}
123+
}
124+
125+
@Test
126+
@IR(counts = {"vfabd", "> 0"})
127+
public static void testDoubleAbsDiff() {
128+
for (int i = 0; i < LENGTH; i += D_SPECIES.length()) {
129+
DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, i);
130+
DoubleVector bv = DoubleVector.fromArray(D_SPECIES, db, i);
131+
av.sub(bv).lanewise(VectorOperators.ABS).intoArray(dr, i);
132+
}
133+
}
134+
135+
@Run(test = "testDoubleAbsDiff")
136+
public static void testDoubleAbsDiff_runner() {
137+
testDoubleAbsDiff();
138+
for (int i = 0; i < LENGTH; i++) {
139+
Asserts.assertEquals(Math.abs(da[i] - db[i]), dr[i]);
140+
}
141+
}
142+
143+
@Test
144+
@IR(counts = {"vfabd_masked", "> 0"}, applyIf = {"UseSVE", "> 0"})
145+
public static void testDoubleAbsDiffMasked() {
146+
for (int i = 0; i < LENGTH; i += D_SPECIES.length()) {
147+
DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, i);
148+
DoubleVector bv = DoubleVector.fromArray(D_SPECIES, db, i);
149+
VectorMask<Double> mask = VectorMask.fromArray(D_SPECIES, m, i);
150+
av.lanewise(VectorOperators.SUB, bv, mask).lanewise(VectorOperators.ABS, mask).intoArray(dr, i);
151+
}
152+
}
153+
154+
@Run(test = "testDoubleAbsDiffMasked")
155+
public static void testDoubleAbsDiffMasked_runner() {
156+
testDoubleAbsDiffMasked();
157+
for (int i = 0; i < LENGTH; i++) {
158+
if (m[i]) {
159+
Asserts.assertEquals(Math.abs(da[i] - db[i]), dr[i]);
160+
} else {
161+
Asserts.assertEquals(da[i], dr[i]);
162+
}
163+
}
164+
}
165+
166+
public static void main(String[] args) {
167+
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
168+
}
169+
}

0 commit comments

Comments
 (0)