Skip to content

Commit 6727490

Browse files
Bhavana Kilambinick-arm
Bhavana Kilambi
authored andcommitted
8303161: [vectorapi] VectorMask.cast narrow operation returns incorrect value with SVE
Reviewed-by: eliu, xgong, ngasson
1 parent f07decb commit 6727490

File tree

5 files changed

+589
-140
lines changed

5 files changed

+589
-140
lines changed

src/hotspot/cpu/aarch64/aarch64_vector.ad

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5313,18 +5313,19 @@ instruct vmaskcast_extend_sve(pReg dst, pReg src) %{
53135313
ins_pipe(pipe_slow);
53145314
%}
53155315

5316-
instruct vmaskcast_narrow_sve(pReg dst, pReg src) %{
5316+
instruct vmaskcast_narrow_sve(pReg dst, pReg src, pReg ptmp) %{
53175317
predicate(UseSVE > 0 &&
53185318
Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)));
53195319
match(Set dst (VectorMaskCast src));
5320-
format %{ "vmaskcast_narrow_sve $dst, $src" %}
5320+
effect(TEMP_DEF dst, TEMP ptmp);
5321+
format %{ "vmaskcast_narrow_sve $dst, $src\t# KILL $ptmp" %}
53215322
ins_encode %{
53225323
uint length_in_bytes_dst = Matcher::vector_length_in_bytes(this);
53235324
uint length_in_bytes_src = Matcher::vector_length_in_bytes(this, $src);
53245325
assert(length_in_bytes_dst * 2 == length_in_bytes_src ||
53255326
length_in_bytes_dst * 4 == length_in_bytes_src ||
53265327
length_in_bytes_dst * 8 == length_in_bytes_src, "invalid vector length");
5327-
__ sve_vmaskcast_narrow($dst$$PRegister, $src$$PRegister,
5328+
__ sve_vmaskcast_narrow($dst$$PRegister, $src$$PRegister, $ptmp$$PRegister,
53285329
length_in_bytes_dst, length_in_bytes_src);
53295330
%}
53305331
ins_pipe(pipe_slow);

src/hotspot/cpu/aarch64/aarch64_vector_ad.m4

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3710,18 +3710,19 @@ instruct vmaskcast_extend_sve(pReg dst, pReg src) %{
37103710
ins_pipe(pipe_slow);
37113711
%}
37123712

3713-
instruct vmaskcast_narrow_sve(pReg dst, pReg src) %{
3713+
instruct vmaskcast_narrow_sve(pReg dst, pReg src, pReg ptmp) %{
37143714
predicate(UseSVE > 0 &&
37153715
Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)));
37163716
match(Set dst (VectorMaskCast src));
3717-
format %{ "vmaskcast_narrow_sve $dst, $src" %}
3717+
effect(TEMP_DEF dst, TEMP ptmp);
3718+
format %{ "vmaskcast_narrow_sve $dst, $src\t# KILL $ptmp" %}
37183719
ins_encode %{
37193720
uint length_in_bytes_dst = Matcher::vector_length_in_bytes(this);
37203721
uint length_in_bytes_src = Matcher::vector_length_in_bytes(this, $src);
37213722
assert(length_in_bytes_dst * 2 == length_in_bytes_src ||
37223723
length_in_bytes_dst * 4 == length_in_bytes_src ||
37233724
length_in_bytes_dst * 8 == length_in_bytes_src, "invalid vector length");
3724-
__ sve_vmaskcast_narrow($dst$$PRegister, $src$$PRegister,
3725+
__ sve_vmaskcast_narrow($dst$$PRegister, $src$$PRegister, $ptmp$$PRegister,
37253726
length_in_bytes_dst, length_in_bytes_src);
37263727
%}
37273728
ins_pipe(pipe_slow);

src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,18 +1289,28 @@ void C2_MacroAssembler::sve_vmaskcast_extend(PRegister dst, PRegister src,
12891289

12901290
// Narrow src predicate to dst predicate with the same lane count but
12911291
// smaller element size, e.g. 512Long -> 64Byte
1292-
void C2_MacroAssembler::sve_vmaskcast_narrow(PRegister dst, PRegister src,
1292+
void C2_MacroAssembler::sve_vmaskcast_narrow(PRegister dst, PRegister src, PRegister ptmp,
12931293
uint dst_element_length_in_bytes, uint src_element_length_in_bytes) {
12941294
// The insignificant bits in src predicate are expected to be zero.
1295+
// To ensure the higher order bits of the resultant narrowed vector are 0, an all-zero predicate is
1296+
// passed as the second argument. An example narrowing operation with a given mask would be -
1297+
// 128Long -> 64Int on a 128-bit machine i.e 2L -> 2I
1298+
// Mask (for 2 Longs) : TF
1299+
// Predicate register for the above mask (16 bits) : 00000001 00000000
1300+
// After narrowing (uzp1 dst.b, src.b, ptmp.b) : 0000 0000 0001 0000
1301+
// Which translates to mask for 2 integers as : TF (lower half is considered while upper half is 0)
1302+
assert_different_registers(src, ptmp);
1303+
assert_different_registers(dst, ptmp);
1304+
sve_pfalse(ptmp);
12951305
if (dst_element_length_in_bytes * 2 == src_element_length_in_bytes) {
1296-
sve_uzp1(dst, B, src, src);
1306+
sve_uzp1(dst, B, src, ptmp);
12971307
} else if (dst_element_length_in_bytes * 4 == src_element_length_in_bytes) {
1298-
sve_uzp1(dst, H, src, src);
1299-
sve_uzp1(dst, B, dst, dst);
1308+
sve_uzp1(dst, H, src, ptmp);
1309+
sve_uzp1(dst, B, dst, ptmp);
13001310
} else if (dst_element_length_in_bytes * 8 == src_element_length_in_bytes) {
1301-
sve_uzp1(dst, S, src, src);
1302-
sve_uzp1(dst, H, dst, dst);
1303-
sve_uzp1(dst, B, dst, dst);
1311+
sve_uzp1(dst, S, src, ptmp);
1312+
sve_uzp1(dst, H, dst, ptmp);
1313+
sve_uzp1(dst, B, dst, ptmp);
13041314
} else {
13051315
assert(false, "unsupported");
13061316
ShouldNotReachHere();

src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@
103103
void sve_vmaskcast_extend(PRegister dst, PRegister src,
104104
uint dst_element_length_in_bytes, uint src_element_lenght_in_bytes);
105105

106-
void sve_vmaskcast_narrow(PRegister dst, PRegister src,
106+
void sve_vmaskcast_narrow(PRegister dst, PRegister src, PRegister ptmp,
107107
uint dst_element_length_in_bytes, uint src_element_lenght_in_bytes);
108108

109109
// Vector reduction

0 commit comments

Comments
 (0)