From 12cc2b4274bcea2df4ac1f3e00fd28bdcb545d2a Mon Sep 17 00:00:00 2001 From: Jatin Bhateja Date: Wed, 29 May 2024 11:00:51 +0530 Subject: [PATCH 1/4] 8332119: Incorrect IllegalArgumentException for C2 compiled permute kernel --- src/hotspot/share/opto/library_call.hpp | 1 + src/hotspot/share/opto/vectorIntrinsics.cpp | 46 ++++++-- .../vectorapi/TestTwoVectorPermute.java | 101 ++++++++++++++++++ 3 files changed, 138 insertions(+), 10 deletions(-) create mode 100644 test/hotspot/jtreg/compiler/vectorapi/TestTwoVectorPermute.java diff --git a/src/hotspot/share/opto/library_call.hpp b/src/hotspot/share/opto/library_call.hpp index 1111c795114c0..b6e34a3fde467 100644 --- a/src/hotspot/share/opto/library_call.hpp +++ b/src/hotspot/share/opto/library_call.hpp @@ -350,6 +350,7 @@ class LibraryCallKit : public GraphKit { bool inline_vector_frombits_coerced(); bool inline_vector_shuffle_to_vector(); bool inline_vector_shuffle_iota(); + Node* wrap_indexes(Node* index_vec, int num_elem, BasicType type_bt); bool inline_vector_mask_operation(); bool inline_vector_mem_operation(bool is_store); bool inline_vector_mem_masked_operation(bool is_store); diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp index 807912327e6d1..64902f3a1a150 100644 --- a/src/hotspot/share/opto/vectorIntrinsics.cpp +++ b/src/hotspot/share/opto/vectorIntrinsics.cpp @@ -511,6 +511,27 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) { return true; } +Node* LibraryCallKit::wrap_indexes(Node* index_vec, int num_elem, BasicType elem_bt) { + assert(elem_bt == T_BYTE, ""); + const TypeVect * vt = TypeVect::make(elem_bt, num_elem); + const Type * type_bt = Type::get_const_basic_type(elem_bt); + + Node* mod_val = gvn().makecon(TypeInt::make(num_elem-1)); + Node* bcast_mod = gvn().transform(VectorNode::scalar2vector(mod_val, num_elem, type_bt)); + + BoolTest::mask pred = BoolTest::ugt; + ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(pred)); + Node* lane_cnt = gvn().makecon(TypeInt::make(num_elem)); + Node* bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt)); + const TypeVect* vmask_type = TypeVect::makemask(type_bt, num_elem); + Node* mask = gvn().transform(new VectorMaskCmpNode(pred, bcast_lane_cnt, index_vec, pred_node, vmask_type)); + + // Make the indices greater than lane count as -ve values to match the java side implementation. + index_vec = gvn().transform(VectorNode::make(Op_AndV, index_vec, bcast_mod, vt)); + Node* biased_val = gvn().transform(VectorNode::make(Op_SubVB, index_vec, bcast_lane_cnt, vt)); + return gvn().transform(new VectorBlendNode(biased_val, index_vec, mask)); +} + // , E> // Sh ShuffleIota(Class E, Class shuffleClass, Vector.Species s, int length, // int start, int step, int wrap, ShuffleIotaOperation defaultImpl) @@ -598,16 +619,7 @@ bool LibraryCallKit::inline_vector_shuffle_iota() { // Wrap the indices greater than lane count. res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt)); } else { - ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(BoolTest::ugt)); - Node * lane_cnt = gvn().makecon(TypeInt::make(num_elem)); - Node * bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt)); - const TypeVect* vmask_type = TypeVect::makemask(elem_bt, num_elem); - Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ugt, bcast_lane_cnt, res, pred_node, vmask_type)); - - // Make the indices greater than lane count as -ve values to match the java side implementation. - res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt)); - Node * biased_val = gvn().transform(VectorNode::make(Op_SubVB, res, bcast_lane_cnt, vt)); - res = gvn().transform(new VectorBlendNode(biased_val, res, mask)); + res = wrap_indexes(res, num_elem, elem_bt); } ciKlass* sbox_klass = shuffle_klass->const_oop()->as_instance()->java_lang_Class_klass(); @@ -2286,6 +2298,16 @@ bool LibraryCallKit::inline_vector_convert() { return false; } + + if (is_vector_shuffle(vbox_klass_to) && + (!arch_supports_vector(Op_SubVB, num_elem_to, elem_bt_to, VecMaskNotUsed) || + !arch_supports_vector(Op_VectorBlend, num_elem_to, elem_bt_to, VecMaskNotUsed) || + !arch_supports_vector(Op_VectorMaskCmp, num_elem_to, elem_bt_to, VecMaskNotUsed) || + !arch_supports_vector(Op_AndV, num_elem_to, elem_bt_to, VecMaskNotUsed) || + !arch_supports_vector(Op_Replicate, num_elem_to, elem_bt_to, VecMaskNotUsed))) { + return false; + } + // At this point, we know that both input and output vector registers are supported // by the architecture. Next check if the casted type is simply to same type - which means // that it is actually a resize and not a cast. @@ -2383,6 +2405,10 @@ bool LibraryCallKit::inline_vector_convert() { op = gvn().transform(new VectorReinterpretNode(op, src_type, dst_type)); } + if (is_vector_shuffle(vbox_klass_to)) { + op = wrap_indexes(op, num_elem_to, elem_bt_to); + } + const TypeInstPtr* vbox_type_to = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass_to); Node* vbox = box_vector(op, vbox_type_to, elem_bt_to, num_elem_to); set_result(vbox); diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestTwoVectorPermute.java b/test/hotspot/jtreg/compiler/vectorapi/TestTwoVectorPermute.java new file mode 100644 index 0000000000000..a5370dcdcafea --- /dev/null +++ b/test/hotspot/jtreg/compiler/vectorapi/TestTwoVectorPermute.java @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** +* @test +* @bug 8332119 +* @summary Incorrect IllegalArgumentException for C2 compiled permute kernel +* @modules jdk.incubator.vector +* @requires vm.compiler2.enabled +* @library /test/lib / +* @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xbatch -XX:-TieredCompilation -XX:CompileOnly=TestTwoVectorPermute::micro compiler.vectorapi.TestTwoVectorPermute +* @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xbatch -XX:-TieredCompilation compiler.vectorapi.TestTwoVectorPermute +*/ +package compiler.vectorapi; + + +import jdk.incubator.vector.*; +import java.util.Arrays; +import java.util.Random; + +public class TestTwoVectorPermute { + public static final VectorSpecies FSP = FloatVector.SPECIES_256; + + public static void validate(float [] res, float [] shuf, float [] src1, float [] src2) { + for (int i = 0; i < res.length; i++) { + float expected = Float.NaN; + // Exceptional index. + if (shuf[i] < 0 || shuf[i] >= FSP.length()) { + int wrapped_index = ((int)shuf[i] & (FSP.length() - 1)); + if (Integer.compareUnsigned((int)shuf[i], FSP.length()) > 0) { + wrapped_index -= FSP.length(); + } + wrapped_index = wrapped_index < 0 ? wrapped_index + FSP.length() : wrapped_index; + expected = src2[wrapped_index]; + } else { + expected = src1[(int)shuf[i]]; + } + if (res[i] != expected) { + throw new AssertionError("Result mismatch at " + i + " index, (actual = " + res[i] + ") != ( expected " + expected + " )"); + } + } + } + + public static void micro(float [] res, float [] shuf, float [] src1, float [] src2) { + VectorShuffle vshuf = FloatVector.fromArray(FSP, shuf, 0).toShuffle(); + VectorShuffle vshuf_wrapped = vshuf.wrapIndexes(); + FloatVector.fromArray(FSP, src1, 0) + .rearrange(vshuf_wrapped) + .blend(FloatVector.fromArray(FSP, src2, 0) + .rearrange(vshuf_wrapped), + vshuf.laneIsValid().not()) + .intoArray(res, 0); + } + + public static void main(String [] args) { + float [] res = new float[FSP.length()]; + float [] shuf = new float[FSP.length()]; + float [] src1 = new float[FSP.length()]; + float [] src2 = new float[FSP.length()]; + + for (int i = 0; i < FSP.length(); i++) { + shuf[i] = i * 2; + } + for (int i = 0; i < FSP.length(); i++) { + src1[i] = i; + src2[i] = i + FSP.length(); + } + for (int i = 0; i < 10000; i++) { + micro(res, shuf, src1, src2); + } + validate(res, shuf, src1, src2); + for (int i = 0; i < FSP.length(); i++) { + shuf[i] = -i * 2; + } + for (int i = 0; i < 10000; i++) { + micro(res, shuf, src1, src2); + } + validate(res, shuf, src1, src2); + System.out.println("PASSED"); + } +} From 102b78aec6b514ac43d40567e2e2cce00f130490 Mon Sep 17 00:00:00 2001 From: Jatin Bhateja Date: Sun, 2 Jun 2024 08:39:08 -0700 Subject: [PATCH 2/4] Review Comments Incorporated. --- src/hotspot/share/opto/library_call.hpp | 2 +- src/hotspot/share/opto/vectorIntrinsics.cpp | 6 +++--- .../jtreg/compiler/vectorapi/TestTwoVectorPermute.java | 9 +++++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/hotspot/share/opto/library_call.hpp b/src/hotspot/share/opto/library_call.hpp index b6e34a3fde467..941282ce2e744 100644 --- a/src/hotspot/share/opto/library_call.hpp +++ b/src/hotspot/share/opto/library_call.hpp @@ -350,7 +350,7 @@ class LibraryCallKit : public GraphKit { bool inline_vector_frombits_coerced(); bool inline_vector_shuffle_to_vector(); bool inline_vector_shuffle_iota(); - Node* wrap_indexes(Node* index_vec, int num_elem, BasicType type_bt); + Node* partially_wrap_indexes(Node* index_vec, int num_elem, BasicType type_bt); bool inline_vector_mask_operation(); bool inline_vector_mem_operation(bool is_store); bool inline_vector_mem_masked_operation(bool is_store); diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp index 64902f3a1a150..83ea628c381de 100644 --- a/src/hotspot/share/opto/vectorIntrinsics.cpp +++ b/src/hotspot/share/opto/vectorIntrinsics.cpp @@ -511,7 +511,7 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) { return true; } -Node* LibraryCallKit::wrap_indexes(Node* index_vec, int num_elem, BasicType elem_bt) { +Node* LibraryCallKit::partially_wrap_indexes(Node* index_vec, int num_elem, BasicType elem_bt) { assert(elem_bt == T_BYTE, ""); const TypeVect * vt = TypeVect::make(elem_bt, num_elem); const Type * type_bt = Type::get_const_basic_type(elem_bt); @@ -619,7 +619,7 @@ bool LibraryCallKit::inline_vector_shuffle_iota() { // Wrap the indices greater than lane count. res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt)); } else { - res = wrap_indexes(res, num_elem, elem_bt); + res = partially_wrap_indexes(res, num_elem, elem_bt); } ciKlass* sbox_klass = shuffle_klass->const_oop()->as_instance()->java_lang_Class_klass(); @@ -2406,7 +2406,7 @@ bool LibraryCallKit::inline_vector_convert() { } if (is_vector_shuffle(vbox_klass_to)) { - op = wrap_indexes(op, num_elem_to, elem_bt_to); + op = partially_wrap_indexes(op, num_elem_to, elem_bt_to); } const TypeInstPtr* vbox_type_to = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass_to); diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestTwoVectorPermute.java b/test/hotspot/jtreg/compiler/vectorapi/TestTwoVectorPermute.java index a5370dcdcafea..ba43a07b5839f 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/TestTwoVectorPermute.java +++ b/test/hotspot/jtreg/compiler/vectorapi/TestTwoVectorPermute.java @@ -44,16 +44,17 @@ public class TestTwoVectorPermute { public static void validate(float [] res, float [] shuf, float [] src1, float [] src2) { for (int i = 0; i < res.length; i++) { float expected = Float.NaN; + int shuf_index = (int)shuf[i]; // Exceptional index. - if (shuf[i] < 0 || shuf[i] >= FSP.length()) { - int wrapped_index = ((int)shuf[i] & (FSP.length() - 1)); - if (Integer.compareUnsigned((int)shuf[i], FSP.length()) > 0) { + if (shuf_index < 0 || shuf_index >= FSP.length()) { + int wrapped_index = (shuf_index & (FSP.length() - 1)); + if (Integer.compareUnsigned(shuf_index, FSP.length()) > 0) { wrapped_index -= FSP.length(); } wrapped_index = wrapped_index < 0 ? wrapped_index + FSP.length() : wrapped_index; expected = src2[wrapped_index]; } else { - expected = src1[(int)shuf[i]]; + expected = src1[shuf_index]; } if (res[i] != expected) { throw new AssertionError("Result mismatch at " + i + " index, (actual = " + res[i] + ") != ( expected " + expected + " )"); From 16996e5760e53d302519598a5b213bb3aa9de037 Mon Sep 17 00:00:00 2001 From: Jatin Bhateja Date: Wed, 5 Jun 2024 07:40:46 +0530 Subject: [PATCH 3/4] Review comments resolutions. --- src/hotspot/share/opto/vectorIntrinsics.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp index 83ea628c381de..120aabed18106 100644 --- a/src/hotspot/share/opto/vectorIntrinsics.cpp +++ b/src/hotspot/share/opto/vectorIntrinsics.cpp @@ -511,10 +511,20 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) { return true; } +// Following routine generates IR corresponding to AbstractShuffle::partiallyWrapIndex method, +// which partially wraps index by modulo VEC_LENGTH and generates a negative index value if original +// index is out of valid index range [0, VEC_LENGTH) +// +// wrapped_index = (VEC_LENGTH - 1) & index +// if (index u> VEC_LENGTH) { +// wrapped_index -= VEC_LENGTH; +// +// Note: Unsigned greater than comparison treat both <0 and >VEC_LENGTH indices as out-of-bound +// indexes. Node* LibraryCallKit::partially_wrap_indexes(Node* index_vec, int num_elem, BasicType elem_bt) { assert(elem_bt == T_BYTE, ""); - const TypeVect * vt = TypeVect::make(elem_bt, num_elem); - const Type * type_bt = Type::get_const_basic_type(elem_bt); + const TypeVect* vt = TypeVect::make(elem_bt, num_elem); + const Type* type_bt = Type::get_const_basic_type(elem_bt); Node* mod_val = gvn().makecon(TypeInt::make(num_elem-1)); Node* bcast_mod = gvn().transform(VectorNode::scalar2vector(mod_val, num_elem, type_bt)); @@ -617,9 +627,9 @@ bool LibraryCallKit::inline_vector_shuffle_iota() { if (do_wrap) { // Wrap the indices greater than lane count. - res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt)); + res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt)); } else { - res = partially_wrap_indexes(res, num_elem, elem_bt); + res = partially_wrap_indexes(res, num_elem, elem_bt); } ciKlass* sbox_klass = shuffle_klass->const_oop()->as_instance()->java_lang_Class_klass(); @@ -2305,6 +2315,8 @@ bool LibraryCallKit::inline_vector_convert() { !arch_supports_vector(Op_VectorMaskCmp, num_elem_to, elem_bt_to, VecMaskNotUsed) || !arch_supports_vector(Op_AndV, num_elem_to, elem_bt_to, VecMaskNotUsed) || !arch_supports_vector(Op_Replicate, num_elem_to, elem_bt_to, VecMaskNotUsed))) { + log_if_needed(" ** not supported: arity=1 op=shuffle_index_wrap vlen2=%d etype2=%s", + num_elem_to, type2name(elem_bt_to)); return false; } From cb2877bff3a0f6be196de6fde6d047456868c47a Mon Sep 17 00:00:00 2001 From: Jatin Bhateja Date: Wed, 5 Jun 2024 23:06:58 +0530 Subject: [PATCH 4/4] Review comments addressed. --- src/hotspot/share/opto/vectorIntrinsics.cpp | 8 ++++---- .../jtreg/compiler/vectorapi/TestTwoVectorPermute.java | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp index 120aabed18106..3ef6ae02534b9 100644 --- a/src/hotspot/share/opto/vectorIntrinsics.cpp +++ b/src/hotspot/share/opto/vectorIntrinsics.cpp @@ -522,12 +522,12 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) { // Note: Unsigned greater than comparison treat both <0 and >VEC_LENGTH indices as out-of-bound // indexes. Node* LibraryCallKit::partially_wrap_indexes(Node* index_vec, int num_elem, BasicType elem_bt) { - assert(elem_bt == T_BYTE, ""); + assert(elem_bt == T_BYTE, "Shuffles use byte array based backing storage."); const TypeVect* vt = TypeVect::make(elem_bt, num_elem); const Type* type_bt = Type::get_const_basic_type(elem_bt); - Node* mod_val = gvn().makecon(TypeInt::make(num_elem-1)); - Node* bcast_mod = gvn().transform(VectorNode::scalar2vector(mod_val, num_elem, type_bt)); + Node* mod_mask = gvn().makecon(TypeInt::make(num_elem-1)); + Node* bcast_mod_mask = gvn().transform(VectorNode::scalar2vector(mod_mask, num_elem, type_bt)); BoolTest::mask pred = BoolTest::ugt; ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(pred)); @@ -537,7 +537,7 @@ Node* LibraryCallKit::partially_wrap_indexes(Node* index_vec, int num_elem, Basi Node* mask = gvn().transform(new VectorMaskCmpNode(pred, bcast_lane_cnt, index_vec, pred_node, vmask_type)); // Make the indices greater than lane count as -ve values to match the java side implementation. - index_vec = gvn().transform(VectorNode::make(Op_AndV, index_vec, bcast_mod, vt)); + index_vec = gvn().transform(VectorNode::make(Op_AndV, index_vec, bcast_mod_mask, vt)); Node* biased_val = gvn().transform(VectorNode::make(Op_SubVB, index_vec, bcast_lane_cnt, vt)); return gvn().transform(new VectorBlendNode(biased_val, index_vec, mask)); } diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestTwoVectorPermute.java b/test/hotspot/jtreg/compiler/vectorapi/TestTwoVectorPermute.java index ba43a07b5839f..dfd1e0d1d1524 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/TestTwoVectorPermute.java +++ b/test/hotspot/jtreg/compiler/vectorapi/TestTwoVectorPermute.java @@ -26,10 +26,10 @@ * @bug 8332119 * @summary Incorrect IllegalArgumentException for C2 compiled permute kernel * @modules jdk.incubator.vector -* @requires vm.compiler2.enabled * @library /test/lib / * @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xbatch -XX:-TieredCompilation -XX:CompileOnly=TestTwoVectorPermute::micro compiler.vectorapi.TestTwoVectorPermute * @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xbatch -XX:-TieredCompilation compiler.vectorapi.TestTwoVectorPermute +* @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xbatch -XX:TieredStopAtLevel=3 compiler.vectorapi.TestTwoVectorPermute */ package compiler.vectorapi; @@ -41,7 +41,7 @@ public class TestTwoVectorPermute { public static final VectorSpecies FSP = FloatVector.SPECIES_256; - public static void validate(float [] res, float [] shuf, float [] src1, float [] src2) { + public static void validate(float[] res, float[] shuf, float[] src1, float[] src2) { for (int i = 0; i < res.length; i++) { float expected = Float.NaN; int shuf_index = (int)shuf[i]; @@ -62,7 +62,7 @@ public static void validate(float [] res, float [] shuf, float [] src1, float [] } } - public static void micro(float [] res, float [] shuf, float [] src1, float [] src2) { + public static void micro(float[] res, float[] shuf, float[] src1, float[] src2) { VectorShuffle vshuf = FloatVector.fromArray(FSP, shuf, 0).toShuffle(); VectorShuffle vshuf_wrapped = vshuf.wrapIndexes(); FloatVector.fromArray(FSP, src1, 0)