Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8265956: JVM crashes when matching LShiftVB Node #3747

Closed
wants to merge 7 commits into from
Closed
Changes from 3 commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -1174,6 +1174,17 @@ void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister shift)
}
}

void C2_MacroAssembler::vshiftw_imm(int opcode, XMMRegister dst, int shift) {
if (opcode == Op_RShiftVB) {
psraw(dst, shift);
} else if (opcode == Op_LShiftVB) {
psllw(dst, shift);
} else {
assert((opcode == Op_URShiftVB),"opcode should be Op_URShiftVB");
psrlw(dst, shift);
}
}

void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
switch (opcode) {
case Op_RShiftVB: // fall-through
@@ -1189,6 +1200,17 @@ void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister src, XM
}
}

void C2_MacroAssembler::vshiftw_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
if (opcode == Op_RShiftVB) {
vpsraw(dst, nds, shift, vector_len);
} else if (opcode == Op_LShiftVB) {
vpsllw(dst, nds, shift, vector_len);
} else {
assert((opcode == Op_URShiftVB),"opcode should be Op_URShiftVB");
vpsrlw(dst, nds, shift, vector_len);
}
}

void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister shift) {
switch (opcode) {
case Op_RShiftVL: psrlq(dst, shift); break; // using srl to implement sra on pre-avs512 systems
@@ -99,7 +99,9 @@
void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void vshiftw(int opcode, XMMRegister dst, XMMRegister shift);
void vshiftw_imm(int opcode, XMMRegister dst, int shift);
void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
void vshiftw_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void vshiftq(int opcode, XMMRegister dst, XMMRegister shift);
void vshiftq_imm(int opcode, XMMRegister dst, int shift);
void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
@@ -5857,6 +5857,26 @@ instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
ins_pipe( pipe_slow );
%}

instruct vshiftB_imm(vec dst, vec src, immI8 shift, vec tmp, rRegI scratch) %{
predicate(vector_length(n) <= 8);
match(Set dst (LShiftVB src (LShiftCntV shift)));
match(Set dst (RShiftVB src (RShiftCntV shift)));
match(Set dst (URShiftVB src (RShiftCntV shift)));
This conversation was marked as resolved by Wanghuang-Huawei
Comment on lines +5862 to +5864

This comment has been minimized.

@XiaohongGong

XiaohongGong May 8, 2021
Contributor Outdated

Regarding to this issue, it's no need to add these rules here. There is the vector shift rules already.

This comment has been minimized.

@XiaohongGong

XiaohongGong May 8, 2021
Contributor Outdated

The same to all other rules added in this file.

This comment has been minimized.

@Wanghuang-Huawei

Wanghuang-Huawei May 8, 2021
Author Outdated

Please update the copyright year of "vectorIntrinsics.cpp" to 2021. Thanks!

Thank you. I will change that.

This comment has been minimized.

@Wanghuang-Huawei

Wanghuang-Huawei May 8, 2021
Author Outdated

The same to all other rules added in this file.

OK. I will remove those codes.

effect(TEMP dst, USE src, TEMP tmp, TEMP scratch);
format %{"vshiftB_imm $dst,$src,$shift" %}
ins_encode %{
assert(UseSSE > 3, "required");
int opcode = this->ideal_Opcode();
bool sign = (opcode != Op_URShiftVB);
__ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
__ vshiftw_imm(opcode, $tmp$$XMMRegister, $shift$$constant);
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
__ pand($dst$$XMMRegister, $tmp$$XMMRegister);
__ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}

instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{
predicate(vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) &&
UseAVX <= 1);
@@ -5882,6 +5902,30 @@ instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratc
ins_pipe( pipe_slow );
%}

instruct vshift16B_imm(vec dst, vec src, immI8 shift, vec tmp1, vec tmp2, rRegI scratch) %{
predicate(vector_length(n) == 16 && UseAVX <= 1);
match(Set dst (LShiftVB src (LShiftCntV shift)));
match(Set dst (RShiftVB src (RShiftCntV shift)));
match(Set dst (URShiftVB src (RShiftCntV shift)));
effect(TEMP dst, USE src, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vshift16B_imm $dst,$src,$shift" %}
ins_encode %{
assert(UseSSE > 3, "required");
int opcode = this->ideal_Opcode();
bool sign = (opcode != Op_URShiftVB);
__ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
__ vshiftw_imm(opcode, $tmp1$$XMMRegister, $shift$$constant);
__ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
__ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
__ vshiftw_imm(opcode, $tmp2$$XMMRegister, $shift$$constant);
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
__ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
__ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
__ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}

instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
predicate(vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) &&
UseAVX > 1);
@@ -5903,6 +5947,26 @@ instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
ins_pipe( pipe_slow );
%}

instruct vshift16B_avx_imm(vec dst, vec src, immI8 shift, vec tmp, rRegI scratch) %{
predicate(vector_length(n) == 16 && UseAVX > 1);
match(Set dst (LShiftVB src (LShiftCntV shift)));
match(Set dst (RShiftVB src (RShiftCntV shift)));
match(Set dst (URShiftVB src (RShiftCntV shift)));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{"vshift16B_avx_imm $dst,$src,$shift" %}
ins_encode %{
int opcode = this->ideal_Opcode();
bool sign = (opcode != Op_URShiftVB);
int vlen_enc = Assembler::AVX_256bit;
__ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
__ vshiftw_imm(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$constant, vlen_enc);
__ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register);
__ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
__ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
%}
ins_pipe( pipe_slow );
%}

instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
predicate(vector_length(n) == 32 && VectorNode::is_vshift_cnt(n->in(2)));
match(Set dst ( LShiftVB src shift));
@@ -5928,6 +5992,31 @@ instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
ins_pipe( pipe_slow );
%}

instruct vshift32B_avx_imm(vec dst, vec src, immI8 shift, vec tmp, rRegI scratch) %{
predicate(vector_length(n) == 32);
match(Set dst (LShiftVB src (LShiftCntV shift)));
match(Set dst (RShiftVB src (RShiftCntV shift)));
match(Set dst (URShiftVB src (RShiftCntV shift)));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{"vshift32B_avx_imm $dst,$src,$shift" %}
ins_encode %{
assert(UseAVX > 1, "required");
int opcode = this->ideal_Opcode();
bool sign = (opcode != Op_URShiftVB);
int vlen_enc = Assembler::AVX_256bit;
__ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
__ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
__ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
__ vshiftw_imm(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$constant, vlen_enc);
__ vshiftw_imm(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$constant, vlen_enc);
__ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register);
__ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register);
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
__ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{
predicate(vector_length(n) == 64 && VectorNode::is_vshift_cnt(n->in(2)));
match(Set dst ( LShiftVB src shift));
@@ -5956,6 +6045,34 @@ instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI sc
ins_pipe( pipe_slow );
%}

instruct vshift64B_avx_imm(vec dst, vec src, immI8 shift, vec tmp1, vec tmp2, rRegI scratch) %{
predicate(vector_length(n) == 64);
match(Set dst (LShiftVB src (LShiftCntV shift)));
match(Set dst (RShiftVB src (RShiftCntV shift)));
match(Set dst (URShiftVB src (RShiftCntV shift)));
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vshift64B_avx_imm $dst,$src,$shift" %}
ins_encode %{
assert(UseAVX > 2, "required");
int opcode = this->ideal_Opcode();
bool sign = (opcode != Op_URShiftVB);
int vlen_enc = Assembler::AVX_512bit;
__ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
__ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
__ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
__ vshiftw_imm(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$constant, vlen_enc);
__ vshiftw_imm(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$constant, vlen_enc);
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
__ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
__ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
__ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
__ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
__ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register);
__ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

// Shorts vector logical right shift produces incorrect Java result
// for negative data because java code convert short value into int with
// sign extension before a shift. But char vectors are fine since chars are
@@ -369,7 +369,8 @@ bool LibraryCallKit::inline_vector_shuffle_iota() {
res = gvn().transform(VectorNode::make(Op_MulI, res, bcast_step, num_elem, elem_bt));
} else if (step_val->get_con() > 1) {
Node* cnt = gvn().makecon(TypeInt::make(log2i_exact(step_val->get_con())));
res = gvn().transform(VectorNode::make(Op_LShiftVB, res, cnt, vt));
Node* shift_cnt = gvn().transform(new LShiftCntVNode(cnt, vt));
This conversation was marked as resolved by Wanghuang-Huawei

This comment has been minimized.

@XiaohongGong

XiaohongGong May 8, 2021
Contributor Outdated

Use "vector_shift_count"? It will mask the shift count before generating the LShiftCntVNode.

This comment has been minimized.

@Wanghuang-Huawei

Wanghuang-Huawei May 8, 2021
Author Outdated

vector_shift_count which calls VectorNode::shift_count does not contain Op_LShiftB:

VectorNode* VectorNode::shift_count(int opc, Node* cnt, uint vlen, BasicType bt) {
  // Match shift count type with shift vector type.
  const TypeVect* vt = TypeVect::make(bt, vlen);
  switch (opc) {
  case Op_LShiftI:
  case Op_LShiftL:
    return new LShiftCntVNode(cnt, vt);
  case Op_RShiftI:
  case Op_RShiftL:
  case Op_URShiftB:
  case Op_URShiftS:
  case Op_URShiftI:
  case Op_URShiftL:
    return new RShiftCntVNode(cnt, vt);
  default:
    fatal("Missed vector creation for '%s'", NodeClassNames[opc]);
    return NULL;
  }
}

This comment has been minimized.

@XiaohongGong

XiaohongGong May 8, 2021
Contributor Outdated

This comment has been minimized.

@Wanghuang-Huawei

Wanghuang-Huawei May 8, 2021
Author Outdated

Thank you for your comment. I think it‘s a bit strange here to use LShfitI instead of LShiftB. Anyway, reusing is better than new things.

res = gvn().transform(VectorNode::make(Op_LShiftVB, res, shift_cnt, vt));
}

if (!start_val->is_con() || start_val->get_con() != 0) {
@@ -0,0 +1,103 @@
/*
* Copyright (c) 2021, Huawei Technologies Co. Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

package compiler.vectorapi;

import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.VectorSpecies;
import jdk.incubator.vector.VectorShuffle;

import org.testng.Assert;
import org.testng.annotations.Test;


/*
* @test
* @bug 8265956
* @modules jdk.incubator.vector
* @run testng/othervm compiler.vectorapi.TestVectorShuffleIotaByte
*/

@Test
public class TestVectorShuffleIotaByte {
This conversation was marked as resolved by Wanghuang-Huawei

This comment has been minimized.

@DamonFool

DamonFool May 10, 2021
Member

Can this jtreg test reproduce the bug on x86?

I tested it on our x86 platforms, but failed to reproduce it.
Thanks.

This comment has been minimized.

@Wanghuang-Huawei

Wanghuang-Huawei May 10, 2021
Author

Thank you for your wonderful review. I will change that.

static final VectorSpecies<Byte> SPECIESb_64 = ByteVector.SPECIES_64;
static final VectorSpecies<Byte> SPECIESb_128 = ByteVector.SPECIES_128;
static final VectorSpecies<Byte> SPECIESb_256 = ByteVector.SPECIES_256;
static final VectorSpecies<Byte> SPECIESb_512 = ByteVector.SPECIES_512;

static final int INVOC_COUNT = Integer.getInteger("jdk.incubator.vector.test.loop-iterations", 50000);

static byte[] ab_64 = {41, 45, 59, 46, 115, 101, 103, 97};
static byte[] ab_128 = {112, 32, 116, 117, 111, 104, 116, 105, 119, 32, 107, 111, 111, 98, 32, 97};
static byte[] ab_256 = {32, 101, 107, 105, 108, 32, 115, 105, 32, 117, 111, 121, 32, 116, 117, 111,
104, 116, 105, 119, 32, 121, 97, 100, 32, 121, 114, 101, 118, 69, 32, 46};
static byte[] ab_512 = {103, 110, 97, 117, 72, 32, 71, 78, 65, 87, 32, 45, 45, 33, 117, 111, 121, 32,
103, 110, 105, 115, 115, 105, 77, 32, 46, 117, 111, 121, 32, 111, 116, 32, 114,
101, 116, 116, 101, 108, 32, 100, 114, 105, 104, 116, 32, 121, 109, 32, 115, 105,
32, 115, 105, 104, 116, 44, 121, 116, 101, 101, 119, 83};

static byte[] expected_64 = {1, 3, 5, 7, -7, -5, -3, -1};
static byte[] expected_128 = {1, 3, 5, 7, 9, 11, 13, 15, -15, -13, -11, -9, -7, -5, -3, -1};
static byte[] expected_256 = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
-31, -29, -27, -25, -23, -21, -19, -17, -15, -13, -11, -9, -7, -5, -3, -1};
static byte[] expected_512 = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
Comment on lines +59 to +63

This comment has been minimized.

@XiaohongGong

XiaohongGong May 8, 2021
Contributor Outdated

Looks much better to me! Just a small suggestion here: it's better to use static final if these arrays are not expected to be modified.

This comment has been minimized.

@Wanghuang-Huawei

Wanghuang-Huawei May 8, 2021
Author Outdated

OK. Thank you for your review.

This comment has been minimized.

@Wanghuang-Huawei

Wanghuang-Huawei May 8, 2021
Author Outdated

I have changed this.

33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
-63, -61, -59, -57, -55, -53, -51, -49, -47, -45, -43, -41, -39, -37, -35, -33,
-31, -29, -27, -25, -23, -21, -19, -17, -15, -13, -11, -9, -7, -5, -3, -1};

@Test
static void testShuffleIota_64() {
for (int ic = 0; ic < INVOC_COUNT; ic++) {
ByteVector bv1 = (ByteVector) VectorShuffle.iota(SPECIESb_64, 1, 2, false).toVector();
bv1.intoArray(ab_64, 0);
}
Assert.assertEquals(ab_64, expected_64);
This conversation was marked as resolved by Wanghuang-Huawei

This comment has been minimized.

@DamonFool

DamonFool May 10, 2021
Member

How about putting the expected value as the first parameter, which is suggested by [1] ?

[1] https://github.com/openjdk/jdk/blob/master/doc/hotspot-unit-tests.md#first-parameter-is-expected-value

This comment has been minimized.

@Wanghuang-Huawei

Wanghuang-Huawei May 10, 2021
Author

Thank you for your review.

  • I think this document might be the advice of GoogleTest in jdk(test/hotspot/gtest) instead of jtreg (test/hotspot/jtreg)
  • The common way which can pass expect result to method is using @DataProvider. However, in this small test it will inflate the whole test case codes. :-)
}

@Test
static void testShuffleIota_128() {
for (int ic = 0; ic < INVOC_COUNT; ic++) {
ByteVector bv2 = (ByteVector) VectorShuffle.iota(SPECIESb_128, 1, 2, false).toVector();
bv2.intoArray(ab_128, 0);
}
Assert.assertEquals(ab_128, expected_128);
}

@Test
static void testShuffleIota_256() {
for (int ic = 0; ic < INVOC_COUNT; ic++) {
ByteVector bv3 = (ByteVector) VectorShuffle.iota(SPECIESb_256, 1, 2, false).toVector();
bv3.intoArray(ab_256, 0);
}
Assert.assertEquals(ab_256, expected_256);
}

@Test
static void testShuffleIota_512() {
for (int ic = 0; ic < INVOC_COUNT; ic++) {
ByteVector bv4 = (ByteVector) VectorShuffle.iota(SPECIESb_512, 1, 2, false).toVector();
bv4.intoArray(ab_512, 0);
}
Assert.assertEquals(ab_512, expected_512);
}
}