Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/hotspot/share/opto/library_call.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ class LibraryCallKit : public GraphKit {
bool inline_vector_frombits_coerced();
bool inline_vector_shuffle_to_vector();
bool inline_vector_shuffle_iota();
Node* partially_wrap_indexes(Node* index_vec, int num_elem, BasicType type_bt);
bool inline_vector_mask_operation();
bool inline_vector_mem_operation(bool is_store);
bool inline_vector_mem_masked_operation(bool is_store);
Expand Down
60 changes: 49 additions & 11 deletions src/hotspot/share/opto/vectorIntrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,37 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) {
return true;
}

// Following routine generates IR corresponding to AbstractShuffle::partiallyWrapIndex method,
// which partially wraps index by modulo VEC_LENGTH and generates a negative index value if original
// index is out of valid index range [0, VEC_LENGTH)
//
// wrapped_index = (VEC_LENGTH - 1) & index
// if (index u> VEC_LENGTH) {
// wrapped_index -= VEC_LENGTH;
//
// Note: Unsigned greater than comparison treat both <0 and >VEC_LENGTH indices as out-of-bound
// indexes.
Node* LibraryCallKit::partially_wrap_indexes(Node* index_vec, int num_elem, BasicType elem_bt) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add comment with pseudo code to show what this method do?

assert(elem_bt == T_BYTE, "Shuffles use byte array based backing storage.");
const TypeVect* vt = TypeVect::make(elem_bt, num_elem);
const Type* type_bt = Type::get_const_basic_type(elem_bt);

Node* mod_mask = gvn().makecon(TypeInt::make(num_elem-1));
Node* bcast_mod_mask = gvn().transform(VectorNode::scalar2vector(mod_mask, num_elem, type_bt));

BoolTest::mask pred = BoolTest::ugt;
ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(pred));
Node* lane_cnt = gvn().makecon(TypeInt::make(num_elem));
Node* bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt));
const TypeVect* vmask_type = TypeVect::makemask(type_bt, num_elem);
Node* mask = gvn().transform(new VectorMaskCmpNode(pred, bcast_lane_cnt, index_vec, pred_node, vmask_type));

// Make the indices greater than lane count as -ve values to match the java side implementation.
index_vec = gvn().transform(VectorNode::make(Op_AndV, index_vec, bcast_mod_mask, vt));
Node* biased_val = gvn().transform(VectorNode::make(Op_SubVB, index_vec, bcast_lane_cnt, vt));
return gvn().transform(new VectorBlendNode(biased_val, index_vec, mask));
}

// <Sh extends VectorShuffle<E>, E>
// Sh ShuffleIota(Class<?> E, Class<?> shuffleClass, Vector.Species<E> s, int length,
// int start, int step, int wrap, ShuffleIotaOperation<Sh, E> defaultImpl)
Expand Down Expand Up @@ -596,18 +627,9 @@ bool LibraryCallKit::inline_vector_shuffle_iota() {

if (do_wrap) {
// Wrap the indices greater than lane count.
res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt));
} else {
ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(BoolTest::ugt));
Node * lane_cnt = gvn().makecon(TypeInt::make(num_elem));
Node * bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt));
const TypeVect* vmask_type = TypeVect::makemask(elem_bt, num_elem);
Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ugt, bcast_lane_cnt, res, pred_node, vmask_type));

// Make the indices greater than lane count as -ve values to match the java side implementation.
res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt));
Node * biased_val = gvn().transform(VectorNode::make(Op_SubVB, res, bcast_lane_cnt, vt));
res = gvn().transform(new VectorBlendNode(biased_val, res, mask));
} else {
res = partially_wrap_indexes(res, num_elem, elem_bt);
}

ciKlass* sbox_klass = shuffle_klass->const_oop()->as_instance()->java_lang_Class_klass();
Expand Down Expand Up @@ -2286,6 +2308,18 @@ bool LibraryCallKit::inline_vector_convert() {
return false;
}


if (is_vector_shuffle(vbox_klass_to) &&
(!arch_supports_vector(Op_SubVB, num_elem_to, elem_bt_to, VecMaskNotUsed) ||
!arch_supports_vector(Op_VectorBlend, num_elem_to, elem_bt_to, VecMaskNotUsed) ||
!arch_supports_vector(Op_VectorMaskCmp, num_elem_to, elem_bt_to, VecMaskNotUsed) ||
!arch_supports_vector(Op_AndV, num_elem_to, elem_bt_to, VecMaskNotUsed) ||
!arch_supports_vector(Op_Replicate, num_elem_to, elem_bt_to, VecMaskNotUsed))) {
log_if_needed(" ** not supported: arity=1 op=shuffle_index_wrap vlen2=%d etype2=%s",
num_elem_to, type2name(elem_bt_to));
return false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add log_if_needed(" here too.

}

// At this point, we know that both input and output vector registers are supported
// by the architecture. Next check if the casted type is simply to same type - which means
// that it is actually a resize and not a cast.
Expand Down Expand Up @@ -2383,6 +2417,10 @@ bool LibraryCallKit::inline_vector_convert() {
op = gvn().transform(new VectorReinterpretNode(op, src_type, dst_type));
}

if (is_vector_shuffle(vbox_klass_to)) {
op = partially_wrap_indexes(op, num_elem_to, elem_bt_to);
}

const TypeInstPtr* vbox_type_to = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass_to);
Node* vbox = box_vector(op, vbox_type_to, elem_bt_to, num_elem_to);
set_result(vbox);
Expand Down
102 changes: 102 additions & 0 deletions test/hotspot/jtreg/compiler/vectorapi/TestTwoVectorPermute.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

/**
* @test
* @bug 8332119
* @summary Incorrect IllegalArgumentException for C2 compiled permute kernel
* @modules jdk.incubator.vector
* @library /test/lib /
* @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xbatch -XX:-TieredCompilation -XX:CompileOnly=TestTwoVectorPermute::micro compiler.vectorapi.TestTwoVectorPermute
* @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xbatch -XX:-TieredCompilation compiler.vectorapi.TestTwoVectorPermute
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would also add a run without -XX:-TieredCompilation, that could lead to different compilation patterns, and increase our test coverage.

* @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xbatch -XX:TieredStopAtLevel=3 compiler.vectorapi.TestTwoVectorPermute
*/
package compiler.vectorapi;


import jdk.incubator.vector.*;
import java.util.Arrays;
import java.util.Random;

public class TestTwoVectorPermute {
public static final VectorSpecies<Float> FSP = FloatVector.SPECIES_256;

public static void validate(float[] res, float[] shuf, float[] src1, float[] src2) {
for (int i = 0; i < res.length; i++) {
float expected = Float.NaN;
int shuf_index = (int)shuf[i];
// Exceptional index.
if (shuf_index < 0 || shuf_index >= FSP.length()) {
int wrapped_index = (shuf_index & (FSP.length() - 1));
if (Integer.compareUnsigned(shuf_index, FSP.length()) > 0) {
wrapped_index -= FSP.length();
}
wrapped_index = wrapped_index < 0 ? wrapped_index + FSP.length() : wrapped_index;
expected = src2[wrapped_index];
} else {
expected = src1[shuf_index];
}
if (res[i] != expected) {
throw new AssertionError("Result mismatch at " + i + " index, (actual = " + res[i] + ") != ( expected " + expected + " )");
}
}
}

public static void micro(float[] res, float[] shuf, float[] src1, float[] src2) {
VectorShuffle<Float> vshuf = FloatVector.fromArray(FSP, shuf, 0).toShuffle();
VectorShuffle<Float> vshuf_wrapped = vshuf.wrapIndexes();
FloatVector.fromArray(FSP, src1, 0)
.rearrange(vshuf_wrapped)
.blend(FloatVector.fromArray(FSP, src2, 0)
.rearrange(vshuf_wrapped),
vshuf.laneIsValid().not())
.intoArray(res, 0);
}

public static void main(String [] args) {
float [] res = new float[FSP.length()];
float [] shuf = new float[FSP.length()];
float [] src1 = new float[FSP.length()];
float [] src2 = new float[FSP.length()];

for (int i = 0; i < FSP.length(); i++) {
shuf[i] = i * 2;
}
for (int i = 0; i < FSP.length(); i++) {
src1[i] = i;
src2[i] = i + FSP.length();
}
for (int i = 0; i < 10000; i++) {
micro(res, shuf, src1, src2);
}
validate(res, shuf, src1, src2);
for (int i = 0; i < FSP.length(); i++) {
shuf[i] = -i * 2;
}
for (int i = 0; i < 10000; i++) {
micro(res, shuf, src1, src2);
}
validate(res, shuf, src1, src2);
System.out.println("PASSED");
}
}