-
Notifications
You must be signed in to change notification settings - Fork 6.2k
8332119: Incorrect IllegalArgumentException for C2 compiled permute kernel #19442
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -511,6 +511,37 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) { | |
| return true; | ||
| } | ||
|
|
||
| // Following routine generates IR corresponding to AbstractShuffle::partiallyWrapIndex method, | ||
| // which partially wraps index by modulo VEC_LENGTH and generates a negative index value if original | ||
| // index is out of valid index range [0, VEC_LENGTH) | ||
| // | ||
| // wrapped_index = (VEC_LENGTH - 1) & index | ||
| // if (index u> VEC_LENGTH) { | ||
| // wrapped_index -= VEC_LENGTH; | ||
| // | ||
| // Note: Unsigned greater than comparison treat both <0 and >VEC_LENGTH indices as out-of-bound | ||
| // indexes. | ||
| Node* LibraryCallKit::partially_wrap_indexes(Node* index_vec, int num_elem, BasicType elem_bt) { | ||
| assert(elem_bt == T_BYTE, "Shuffles use byte array based backing storage."); | ||
| const TypeVect* vt = TypeVect::make(elem_bt, num_elem); | ||
| const Type* type_bt = Type::get_const_basic_type(elem_bt); | ||
|
|
||
| Node* mod_mask = gvn().makecon(TypeInt::make(num_elem-1)); | ||
| Node* bcast_mod_mask = gvn().transform(VectorNode::scalar2vector(mod_mask, num_elem, type_bt)); | ||
|
|
||
| BoolTest::mask pred = BoolTest::ugt; | ||
| ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(pred)); | ||
| Node* lane_cnt = gvn().makecon(TypeInt::make(num_elem)); | ||
| Node* bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt)); | ||
| const TypeVect* vmask_type = TypeVect::makemask(type_bt, num_elem); | ||
| Node* mask = gvn().transform(new VectorMaskCmpNode(pred, bcast_lane_cnt, index_vec, pred_node, vmask_type)); | ||
|
|
||
| // Make the indices greater than lane count as -ve values to match the java side implementation. | ||
| index_vec = gvn().transform(VectorNode::make(Op_AndV, index_vec, bcast_mod_mask, vt)); | ||
| Node* biased_val = gvn().transform(VectorNode::make(Op_SubVB, index_vec, bcast_lane_cnt, vt)); | ||
| return gvn().transform(new VectorBlendNode(biased_val, index_vec, mask)); | ||
| } | ||
|
|
||
| // <Sh extends VectorShuffle<E>, E> | ||
| // Sh ShuffleIota(Class<?> E, Class<?> shuffleClass, Vector.Species<E> s, int length, | ||
| // int start, int step, int wrap, ShuffleIotaOperation<Sh, E> defaultImpl) | ||
|
|
@@ -596,18 +627,9 @@ bool LibraryCallKit::inline_vector_shuffle_iota() { | |
|
|
||
| if (do_wrap) { | ||
| // Wrap the indices greater than lane count. | ||
| res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt)); | ||
| } else { | ||
| ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(BoolTest::ugt)); | ||
| Node * lane_cnt = gvn().makecon(TypeInt::make(num_elem)); | ||
| Node * bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt)); | ||
| const TypeVect* vmask_type = TypeVect::makemask(elem_bt, num_elem); | ||
| Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ugt, bcast_lane_cnt, res, pred_node, vmask_type)); | ||
|
|
||
| // Make the indices greater than lane count as -ve values to match the java side implementation. | ||
| res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt)); | ||
| Node * biased_val = gvn().transform(VectorNode::make(Op_SubVB, res, bcast_lane_cnt, vt)); | ||
| res = gvn().transform(new VectorBlendNode(biased_val, res, mask)); | ||
| } else { | ||
| res = partially_wrap_indexes(res, num_elem, elem_bt); | ||
| } | ||
|
|
||
| ciKlass* sbox_klass = shuffle_klass->const_oop()->as_instance()->java_lang_Class_klass(); | ||
|
|
@@ -2286,6 +2308,18 @@ bool LibraryCallKit::inline_vector_convert() { | |
| return false; | ||
| } | ||
|
|
||
|
|
||
| if (is_vector_shuffle(vbox_klass_to) && | ||
| (!arch_supports_vector(Op_SubVB, num_elem_to, elem_bt_to, VecMaskNotUsed) || | ||
| !arch_supports_vector(Op_VectorBlend, num_elem_to, elem_bt_to, VecMaskNotUsed) || | ||
| !arch_supports_vector(Op_VectorMaskCmp, num_elem_to, elem_bt_to, VecMaskNotUsed) || | ||
| !arch_supports_vector(Op_AndV, num_elem_to, elem_bt_to, VecMaskNotUsed) || | ||
| !arch_supports_vector(Op_Replicate, num_elem_to, elem_bt_to, VecMaskNotUsed))) { | ||
| log_if_needed(" ** not supported: arity=1 op=shuffle_index_wrap vlen2=%d etype2=%s", | ||
| num_elem_to, type2name(elem_bt_to)); | ||
| return false; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add |
||
| } | ||
|
|
||
| // At this point, we know that both input and output vector registers are supported | ||
| // by the architecture. Next check if the casted type is simply to same type - which means | ||
| // that it is actually a resize and not a cast. | ||
|
|
@@ -2383,6 +2417,10 @@ bool LibraryCallKit::inline_vector_convert() { | |
| op = gvn().transform(new VectorReinterpretNode(op, src_type, dst_type)); | ||
| } | ||
|
|
||
| if (is_vector_shuffle(vbox_klass_to)) { | ||
| op = partially_wrap_indexes(op, num_elem_to, elem_bt_to); | ||
| } | ||
|
|
||
| const TypeInstPtr* vbox_type_to = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass_to); | ||
| Node* vbox = box_vector(op, vbox_type_to, elem_bt_to, num_elem_to); | ||
| set_result(vbox); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,102 @@ | ||
| /* | ||
| * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. | ||
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | ||
| * | ||
| * This code is free software; you can redistribute it and/or modify it | ||
| * under the terms of the GNU General Public License version 2 only, as | ||
| * published by the Free Software Foundation. | ||
| * | ||
| * This code is distributed in the hope that it will be useful, but WITHOUT | ||
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | ||
| * version 2 for more details (a copy is included in the LICENSE file that | ||
| * accompanied this code). | ||
| * | ||
| * You should have received a copy of the GNU General Public License version | ||
| * 2 along with this work; if not, write to the Free Software Foundation, | ||
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| * | ||
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | ||
| * or visit www.oracle.com if you need additional information or have any | ||
| * questions. | ||
| */ | ||
|
|
||
| /** | ||
| * @test | ||
| * @bug 8332119 | ||
| * @summary Incorrect IllegalArgumentException for C2 compiled permute kernel | ||
| * @modules jdk.incubator.vector | ||
| * @library /test/lib / | ||
| * @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xbatch -XX:-TieredCompilation -XX:CompileOnly=TestTwoVectorPermute::micro compiler.vectorapi.TestTwoVectorPermute | ||
| * @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xbatch -XX:-TieredCompilation compiler.vectorapi.TestTwoVectorPermute | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would also add a run without |
||
| * @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xbatch -XX:TieredStopAtLevel=3 compiler.vectorapi.TestTwoVectorPermute | ||
| */ | ||
| package compiler.vectorapi; | ||
|
|
||
|
|
||
| import jdk.incubator.vector.*; | ||
| import java.util.Arrays; | ||
| import java.util.Random; | ||
|
|
||
| public class TestTwoVectorPermute { | ||
| public static final VectorSpecies<Float> FSP = FloatVector.SPECIES_256; | ||
|
|
||
| public static void validate(float[] res, float[] shuf, float[] src1, float[] src2) { | ||
| for (int i = 0; i < res.length; i++) { | ||
| float expected = Float.NaN; | ||
| int shuf_index = (int)shuf[i]; | ||
| // Exceptional index. | ||
| if (shuf_index < 0 || shuf_index >= FSP.length()) { | ||
| int wrapped_index = (shuf_index & (FSP.length() - 1)); | ||
| if (Integer.compareUnsigned(shuf_index, FSP.length()) > 0) { | ||
| wrapped_index -= FSP.length(); | ||
| } | ||
| wrapped_index = wrapped_index < 0 ? wrapped_index + FSP.length() : wrapped_index; | ||
| expected = src2[wrapped_index]; | ||
| } else { | ||
| expected = src1[shuf_index]; | ||
| } | ||
| if (res[i] != expected) { | ||
| throw new AssertionError("Result mismatch at " + i + " index, (actual = " + res[i] + ") != ( expected " + expected + " )"); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| public static void micro(float[] res, float[] shuf, float[] src1, float[] src2) { | ||
| VectorShuffle<Float> vshuf = FloatVector.fromArray(FSP, shuf, 0).toShuffle(); | ||
| VectorShuffle<Float> vshuf_wrapped = vshuf.wrapIndexes(); | ||
| FloatVector.fromArray(FSP, src1, 0) | ||
| .rearrange(vshuf_wrapped) | ||
| .blend(FloatVector.fromArray(FSP, src2, 0) | ||
| .rearrange(vshuf_wrapped), | ||
| vshuf.laneIsValid().not()) | ||
| .intoArray(res, 0); | ||
| } | ||
|
|
||
| public static void main(String [] args) { | ||
| float [] res = new float[FSP.length()]; | ||
| float [] shuf = new float[FSP.length()]; | ||
| float [] src1 = new float[FSP.length()]; | ||
| float [] src2 = new float[FSP.length()]; | ||
|
|
||
| for (int i = 0; i < FSP.length(); i++) { | ||
| shuf[i] = i * 2; | ||
| } | ||
| for (int i = 0; i < FSP.length(); i++) { | ||
| src1[i] = i; | ||
| src2[i] = i + FSP.length(); | ||
| } | ||
| for (int i = 0; i < 10000; i++) { | ||
| micro(res, shuf, src1, src2); | ||
| } | ||
| validate(res, shuf, src1, src2); | ||
| for (int i = 0; i < FSP.length(); i++) { | ||
| shuf[i] = -i * 2; | ||
| } | ||
| for (int i = 0; i < 10000; i++) { | ||
| micro(res, shuf, src1, src2); | ||
| } | ||
| validate(res, shuf, src1, src2); | ||
| System.out.println("PASSED"); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add comment with pseudo code to show what this method do?