Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
8268154: Add masking support for vector load intrinsics
Reviewed-by: jbhateja, psandoz
  • Loading branch information
Xiaohong Gong committed Jun 24, 2021
1 parent 14ed85b commit 1c91304
Show file tree
Hide file tree
Showing 44 changed files with 862 additions and 84 deletions.
6 changes: 6 additions & 0 deletions src/hotspot/share/classfile/vmIntrinsics.hpp
Expand Up @@ -847,6 +847,12 @@ class methodHandle;
"ILjdk/internal/vm/vector/VectorSupport$VectorSpecies;Ljdk/internal/vm/vector/VectorSupport$LoadOperation;)Ljava/lang/Object;") \
do_name(vector_load_op_name, "load") \
\
do_intrinsic(_VectorLoadMaskedOp, jdk_internal_vm_vector_VectorSupport, vector_load_masked_op_name, vector_load_masked_op_sig, F_S) \
do_signature(vector_load_masked_op_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljava/lang/Object;ILjdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"Ljdk/internal/vm/vector/VectorSupport$LoadVectorMaskedOperation;)Ljava/lang/Object;") \
do_name(vector_load_masked_op_name, "loadMasked") \
\
do_intrinsic(_VectorStoreOp, jdk_internal_vm_vector_VectorSupport, vector_store_op_name, vector_store_op_sig, F_S) \
do_signature(vector_store_op_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljava/lang/Object;ILjdk/internal/vm/vector/VectorSupport$StoreVectorOperation;)V") \
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/share/opto/c2compiler.cpp
Expand Up @@ -664,6 +664,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
case vmIntrinsics::_VectorShuffleIota:
case vmIntrinsics::_VectorShuffleToVector:
case vmIntrinsics::_VectorLoadOp:
case vmIntrinsics::_VectorLoadMaskedOp:
case vmIntrinsics::_VectorStoreOp:
case vmIntrinsics::_VectorStoreMaskedOp:
case vmIntrinsics::_VectorGatherOp:
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/opto/library_call.cpp
Expand Up @@ -647,6 +647,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
return inline_vector_shuffle_to_vector();
case vmIntrinsics::_VectorLoadOp:
return inline_vector_mem_operation(/*is_store=*/false);
case vmIntrinsics::_VectorLoadMaskedOp:
return inline_vector_mem_masked_operation(/*is_store*/false);
case vmIntrinsics::_VectorStoreOp:
return inline_vector_mem_operation(/*is_store=*/true);
case vmIntrinsics::_VectorStoreMaskedOp:
Expand Down
9 changes: 5 additions & 4 deletions src/hotspot/share/opto/library_call.hpp
Expand Up @@ -330,10 +330,11 @@ class LibraryCallKit : public GraphKit {
Node* gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2);

enum VectorMaskUseType {
VecMaskUseLoad,
VecMaskUseStore,
VecMaskUseAll,
VecMaskNotUsed
VecMaskUseLoad = 1 << 0,
VecMaskUseStore = 1 << 1,
VecMaskUseAll = VecMaskUseLoad | VecMaskUseStore,
VecMaskUsePred = 1 << 2,
VecMaskNotUsed = 1 << 3
};

bool arch_supports_vector(int op, int num_elem, BasicType type, VectorMaskUseType mask_use_type, bool has_scalar_args = false);
Expand Down
134 changes: 89 additions & 45 deletions src/hotspot/share/opto/vectorIntrinsics.cpp
Expand Up @@ -159,7 +159,7 @@ bool LibraryCallKit::arch_supports_vector(int sopc, int num_elem, BasicType type
}

// Check whether mask unboxing is supported.
if (mask_use_type == VecMaskUseAll || mask_use_type == VecMaskUseLoad) {
if ((mask_use_type & VecMaskUseLoad) != 0) {
if (!Matcher::match_rule_supported_vector(Op_VectorLoadMask, num_elem, type)) {
#ifndef PRODUCT
if (C->print_intrinsics()) {
Expand All @@ -172,7 +172,7 @@ bool LibraryCallKit::arch_supports_vector(int sopc, int num_elem, BasicType type
}

// Check whether mask boxing is supported.
if (mask_use_type == VecMaskUseAll || mask_use_type == VecMaskUseStore) {
if ((mask_use_type & VecMaskUseStore) != 0) {
if (!Matcher::match_rule_supported_vector(Op_VectorStoreMask, num_elem, type)) {
#ifndef PRODUCT
if (C->print_intrinsics()) {
Expand All @@ -184,6 +184,19 @@ bool LibraryCallKit::arch_supports_vector(int sopc, int num_elem, BasicType type
}
}

if ((mask_use_type & VecMaskUsePred) != 0) {
if (!Matcher::has_predicated_vectors() ||
!Matcher::match_rule_supported_vector_masked(sopc, num_elem, type)) {
#ifndef PRODUCT
if (C->print_intrinsics()) {
tty->print_cr("Rejected vector mask predicate using (%s,%s,%d) because architecture does not support it",
NodeClassNames[sopc], type2name(type), num_elem);
}
#endif
return false;
}
}

return true;
}

Expand Down Expand Up @@ -335,13 +348,23 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) {
: is_masked_op ? VecMaskUseLoad : VecMaskNotUsed;
if ((sopc != 0) && !arch_supports_vector(sopc, num_elem, elem_bt, mask_use_type)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: arity=%d opc=%d vlen=%d etype=%s ismask=%d",
tty->print_cr(" ** not supported: arity=%d opc=%d vlen=%d etype=%s ismask=%d is_masked_op=%d",
n, sopc, num_elem, type2name(elem_bt),
is_vector_mask(vbox_klass) ? 1 : 0);
is_vector_mask(vbox_klass) ? 1 : 0, is_masked_op ? 1 : 0);
}
return false; // not supported
}

// Return true if current platform has implemented the masked operation with predicate feature.
bool use_predicate = is_masked_op && sopc != 0 && arch_supports_vector(sopc, num_elem, elem_bt, VecMaskUsePred);
if (is_masked_op && !use_predicate && !arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: arity=%d opc=%d vlen=%d etype=%s ismask=0 is_masked_op=1",
n, sopc, num_elem, type2name(elem_bt));
}
return false;
}

Node* opd1 = NULL; Node* opd2 = NULL; Node* opd3 = NULL;
switch (n) {
case 3: {
Expand Down Expand Up @@ -381,7 +404,6 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) {
}

Node* mask = NULL;
bool use_predicate = false;
if (is_masked_op) {
ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass();
assert(is_vector_mask(mbox_klass), "argument(2) should be a mask class");
Expand All @@ -394,13 +416,6 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) {
}
return false;
}

// Return true if current platform has implemented the masked operation with predicate feature.
use_predicate = sopc != 0 && Matcher::has_predicated_vectors() &&
Matcher::match_rule_supported_vector_masked(sopc, num_elem, elem_bt);
if (!use_predicate && !arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad)) {
return false;
}
}

Node* operation = NULL;
Expand Down Expand Up @@ -952,24 +967,23 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
return true;
}

// <C, V, E, S extends VectorSpecies<E>,
// M extends VectorMask<E>>
// V loadMasked(Class<? extends V> vectorClass, Class<M> maskClass, Class<E> elementType,
// int length, Object base, long offset, M m
// C container, int index, S s, // Arguments for default implementation
// LoadVectorMaskedOperation<C, V, E, S, M> defaultImpl) {
//
// <C, V extends Vector<?>,
// M extends VectorMask<?>>
// void storeMasked(Class<?> vectorClass, Class<M> maskClass, Class<?> elementType,
// int length, Object base, long offset, // Unsafe addressing
// int length, Object base, long offset,
// V v, M m,
// C container, int index, // Arguments for default implementation
// C container, int index, // Arguments for default implementation
// StoreVectorMaskedOperation<C, V, M> defaultImpl) {
//
// TODO: Handle special case where load/store happens from/to byte array but element type
// is not byte. And also add the mask support for vector load. The intrinsic looks like:
//
// <C, V extends Vector<?>,
// M extends VectorMask<?>>
// V loadMasked(Class<?> vectorClass, Class<M> maskClass, Class<?> elementType,
// int vlen, Object base, long offset,
// M m, // mask arguemnt
// Object container, int index,
// LoadMaskedOperation<C, V, M> defaultImpl) {
// TODO: Handle special cases where load/store happens from/to byte array but element type
// is not byte.
//
bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
Expand Down Expand Up @@ -1010,18 +1024,30 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
}
return false; // should be primitive type
}

BasicType elem_bt = elem_type->basic_type();
int num_elem = vlen->get_con();
int sopc = is_store ? Op_StoreVectorMasked : Op_LoadVectorMasked;
if (!arch_supports_vector(sopc, num_elem, elem_bt, VecMaskUseLoad) ||
!Matcher::match_rule_supported_vector_masked(sopc, num_elem, elem_bt)) {

bool use_predicate = arch_supports_vector(is_store ? Op_StoreVectorMasked : Op_LoadVectorMasked,
num_elem, elem_bt, (VectorMaskUseType) (VecMaskUseLoad | VecMaskUsePred));
// Masked vector store operation needs the architecture predicate feature. We need to check
// whether the predicated vector operation is supported by backend.
if (is_store && !use_predicate) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: arity=%d op=%s vlen=%d etype=%s ismask=yes",
is_store, is_store ? "storeMask" : "loadMask",
tty->print_cr(" ** not supported: op=storeMasked vlen=%d etype=%s",
num_elem, type2name(elem_bt));
}
return false; // not supported
return false;
}

// This only happens for masked vector load. If predicate is not supported, then check whether
// the normal vector load and blend operations are supported by backend.
if (!use_predicate && (!arch_supports_vector(Op_LoadVector, num_elem, elem_bt, VecMaskNotUsed) ||
!arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad))) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: op=loadMasked vlen=%d etype=%s",
num_elem, type2name(elem_bt));
}
return false;
}

Node* base = argument(4);
Expand All @@ -1034,7 +1060,12 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
Node* addr = make_unsafe_address(base, offset, elem_bt, true);
const TypePtr *addr_type = gvn().type(addr)->isa_ptr();
const TypeAryPtr* arr_type = addr_type->isa_aryptr();
if (arr_type != NULL && elem_bt != arr_type->elem()->array_element_basic_type()) {
if (arr_type != NULL && !elem_consistent_with_arr(elem_bt, arr_type)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: arity=%d op=%s vlen=%d etype=%s atype=%s",
is_store, is_store ? "storeMasked" : "loadMasked",
num_elem, type2name(elem_bt), type2name(arr_type->elem()->array_element_basic_type()));
}
set_map(old_map);
set_sp(old_sp);
return false;
Expand All @@ -1052,6 +1083,18 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);

Node* mask = unbox_vector(is_store ? argument(8) : argument(7), mbox_type, elem_bt, num_elem);
if (mask == NULL) {
if (C->print_intrinsics()) {
tty->print_cr(" ** unbox failed mask=%s",
is_store ? NodeClassNames[argument(8)->Opcode()]
: NodeClassNames[argument(7)->Opcode()]);
}
set_map(old_map);
set_sp(old_sp);
return false;
}

if (is_store) {
Node* val = unbox_vector(argument(7), vbox_type, elem_bt, num_elem);
if (val == NULL) {
Expand All @@ -1063,23 +1106,24 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
set_sp(old_sp);
return false; // operand unboxing failed
}
Node* mask = unbox_vector(argument(8), mbox_type, elem_bt, num_elem);
if (mask == NULL) {
if (C->print_intrinsics()) {
tty->print_cr(" ** unbox failed mask=%s",
NodeClassNames[argument(8)->Opcode()]);
}
set_map(old_map);
set_sp(old_sp);
return false;
}

set_all_memory(reset_memory());
Node* vstore = gvn().transform(new StoreVectorMaskedNode(control(), memory(addr), addr, val, addr_type, mask));
set_memory(vstore, addr_type);
} else {
// TODO: mask support for load op.
assert(false, "unimplemented masked memory operation");
Node* vload = NULL;
if (use_predicate) {
// Generate masked load vector node if predicate feature is supported.
const TypeVect* vt = TypeVect::make(elem_bt, num_elem);
vload = gvn().transform(new LoadVectorMaskedNode(control(), memory(addr), addr, addr_type, vt, mask));
} else {
// Use the vector blend to implement the masked load vector. The biased elements are zeros.
Node* zero = gvn().transform(gvn().zerocon(elem_bt));
zero = gvn().transform(VectorNode::scalar2vector(zero, num_elem, Type::get_const_basic_type(elem_bt)));
vload = gvn().transform(LoadVectorNode::make(0, control(), memory(addr), addr, addr_type, num_elem, elem_bt));
vload = gvn().transform(new VectorBlendNode(zero, vload, mask));
}
Node* box = box_vector(vload, vbox_type, elem_bt, num_elem);
set_result(box);
}

old_map->destruct(&_gvn);
Expand Down
Expand Up @@ -321,6 +321,24 @@ VM load(Class<? extends VM> vmClass, Class<E> E, int length,

/* ============================================================================ */

public interface LoadVectorMaskedOperation<C, V, E, S extends VectorSpecies<E>, M extends VectorMask<E>> {
V load(C container, int index, S s, M m);
}

@IntrinsicCandidate
public static
<C, V, E, S extends VectorSpecies<E>,
M extends VectorMask<E>>
V loadMasked(Class<? extends V> vectorClass, Class<M> maskClass, Class<E> elementType,
int length, Object base, long offset, M m,
C container, int index, S s, // Arguments for default implementation
LoadVectorMaskedOperation<C, V, E, S, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.load(container, index, s, m);
}

/* ============================================================================ */

public interface LoadVectorOperationWithMap<C, V extends Vector<?>, E, S extends VectorSpecies<E>> {
V loadWithMap(C container, int index, int[] indexMap, int indexM, S s);
}
Expand Down
Expand Up @@ -848,6 +848,13 @@ ByteVector fromArray0(byte[] a, int offset) {
return super.fromArray0Template(a, offset); // specialize
}

@ForceInline
@Override
final
ByteVector fromArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromArray0Template(Byte128Mask.class, a, offset, (Byte128Mask) m); // specialize
}


@ForceInline
@Override
Expand All @@ -856,6 +863,13 @@ ByteVector fromBooleanArray0(boolean[] a, int offset) {
return super.fromBooleanArray0Template(a, offset); // specialize
}

@ForceInline
@Override
final
ByteVector fromBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m) {
return super.fromBooleanArray0Template(Byte128Mask.class, a, offset, (Byte128Mask) m); // specialize
}

@ForceInline
@Override
final
Expand Down Expand Up @@ -884,13 +898,21 @@ void intoArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoArray0Template(Byte128Mask.class, a, offset, (Byte128Mask) m);
}

@ForceInline
@Override
final
void intoBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m) {
super.intoBooleanArray0Template(Byte128Mask.class, a, offset, (Byte128Mask) m);
}

@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}


// End of specialized low-level memory operations.

// ================================================
Expand Down

0 comments on commit 1c91304

Please sign in to comment.