Skip to content

Commit

Permalink
8306136: [vectorapi] Intrinsics of VectorMask.laneIsSet()
Browse files Browse the repository at this point in the history
Reviewed-by: psandoz, xgong
  • Loading branch information
e1iu committed Jul 21, 2023
1 parent 783de32 commit d4aacdb
Show file tree
Hide file tree
Showing 40 changed files with 1,070 additions and 78 deletions.
49 changes: 49 additions & 0 deletions src/hotspot/cpu/aarch64/aarch64_vector.ad
Expand Up @@ -4587,6 +4587,55 @@ instruct insertD_gt128b(vReg dst, vReg src, vRegD val, immI idx,
%}

// ------------------------------ Extract --------------------------------------

// BOOLEAN

instruct extractUB_ireg(iRegINoSp dst, vReg src, iRegI idx, vReg tmp) %{
match(Set dst (ExtractUB src idx));
effect(TEMP tmp);
format %{ "extractUB_ireg $dst, $src, $idx\t# variable index. KILL $tmp" %}
ins_encode %{
// Input "src" is a vector of boolean represented as
// bytes with 0x00/0x01 as element values.
// "idx" is expected to be in range.

uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
__ mov($tmp$$FloatRegister, __ B, 0, $idx$$Register);
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
__ tbl($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src$$FloatRegister, 1, $tmp$$FloatRegister);
} else {
assert(UseSVE > 0, "must be sve");
__ sve_tbl($tmp$$FloatRegister, __ B, $src$$FloatRegister, $tmp$$FloatRegister);
}
__ smov($dst$$Register, $tmp$$FloatRegister, __ B, 0);
%}
ins_pipe(pipe_slow);
%}

instruct extractUB_index_lt16(iRegINoSp dst, vReg src, immI idx) %{
predicate(n->in(2)->get_int() < 16);
match(Set dst (ExtractUB src idx));
format %{ "extractUB_index_lt16 $dst, $src, $idx\t# index < 16" %}
ins_encode %{
__ smov($dst$$Register, $src$$FloatRegister, __ B, (int)($idx$$constant));
%}
ins_pipe(pipe_slow);
%}

instruct extractUB_index_ge16(iRegINoSp dst, vReg src, immI idx, vReg tmp) %{
predicate(n->in(2)->get_int() >= 16);
match(Set dst (ExtractUB src idx));
effect(TEMP tmp);
format %{ "extractUB_index_ge16 $dst, $src, $idx\t# index >=16. KILL $tmp" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
__ sve_extract_integral($dst$$Register, T_BYTE, $src$$FloatRegister,
(int)($idx$$constant), $tmp$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}

// BYTE

instruct extractB_index_lt16(iRegINoSp dst, vReg src, immI idx) %{
Expand Down
28 changes: 28 additions & 0 deletions src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
Expand Up @@ -3118,6 +3118,34 @@ instruct extract$1_index_ge$2($3 dst, vReg src, immI idx, vReg tmp) %{
ins_pipe(pipe_slow);
%}')dnl
dnl

// BOOLEAN

instruct extractUB_ireg(iRegINoSp dst, vReg src, iRegI idx, vReg tmp) %{
match(Set dst (ExtractUB src idx));
effect(TEMP tmp);
format %{ "extractUB_ireg $dst, $src, $idx\t# variable index. KILL $tmp" %}
ins_encode %{
// Input "src" is a vector of boolean represented as
// bytes with 0x00/0x01 as element values.
// "idx" is expected to be in range.

uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
__ mov($tmp$$FloatRegister, __ B, 0, $idx$$Register);
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
__ tbl($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src$$FloatRegister, 1, $tmp$$FloatRegister);
} else {
assert(UseSVE > 0, "must be sve");
__ sve_tbl($tmp$$FloatRegister, __ B, $src$$FloatRegister, $tmp$$FloatRegister);
}
__ smov($dst$$Register, $tmp$$FloatRegister, __ B, 0);
%}
ins_pipe(pipe_slow);
%}
EXTRACT_INT_SMALL(UB, 16, iRegINoSp, smov, B)
EXTRACT_INT_LARGE(UB, 16, iRegINoSp, T_BYTE)

// BYTE
EXTRACT_INT_SMALL(B, 16, iRegINoSp, smov, B)
EXTRACT_INT_LARGE(B, 16, iRegINoSp, T_BYTE)
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/classfile/vmIntrinsics.hpp
Expand Up @@ -1104,7 +1104,7 @@ class methodHandle;
do_signature(vector_extract_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$VecExtractOp;)" \
"J") \
Expand Down
126 changes: 84 additions & 42 deletions src/hotspot/share/opto/vectorIntrinsics.cpp
Expand Up @@ -2687,11 +2687,12 @@ bool LibraryCallKit::inline_vector_insert() {
}

// public static
// <V extends Vector<E>,
// <VM extends VectorPayload,
// E>
// long extract(Class<? extends V> vectorClass, Class<E> elementType, int vlen,
// V vec, int ix,
// VecExtractOp<V> defaultImpl)
// long extract(Class<? extends VM> vClass, Class<E> eClass,
// int length,
// VM vm, int i,
// VecExtractOp<VM> defaultImpl)
bool LibraryCallKit::inline_vector_extract() {
const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
const TypeInstPtr* elem_klass = gvn().type(argument(1))->isa_instptr();
Expand All @@ -2701,13 +2702,12 @@ bool LibraryCallKit::inline_vector_extract() {
if (vector_klass == nullptr || elem_klass == nullptr || vlen == nullptr || idx == nullptr) {
return false; // dead code
}
if (vector_klass->const_oop() == nullptr || elem_klass->const_oop() == nullptr || !vlen->is_con() || !idx->is_con()) {
if (vector_klass->const_oop() == nullptr || elem_klass->const_oop() == nullptr || !vlen->is_con()) {
if (C->print_intrinsics()) {
tty->print_cr(" ** missing constant: vclass=%s etype=%s vlen=%s idx=%s",
tty->print_cr(" ** missing constant: vclass=%s etype=%s vlen=%s",
NodeClassNames[argument(0)->Opcode()],
NodeClassNames[argument(1)->Opcode()],
NodeClassNames[argument(2)->Opcode()],
NodeClassNames[argument(4)->Opcode()]);
NodeClassNames[argument(2)->Opcode()]);
}
return false; // not enough info for intrinsification
}
Expand All @@ -2726,51 +2726,93 @@ bool LibraryCallKit::inline_vector_extract() {
}
BasicType elem_bt = elem_type->basic_type();
int num_elem = vlen->get_con();
int vopc = ExtractNode::opcode(elem_bt);
if (!arch_supports_vector(vopc, num_elem, elem_bt, VecMaskNotUsed)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: arity=1 op=extract vlen=%d etype=%s ismask=no",
num_elem, type2name(elem_bt));
}
return false; // not supported
}

ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);

Node* opd = unbox_vector(argument(3), vbox_type, elem_bt, num_elem);
if (opd == nullptr) {
return false;
}
Node* opd = nullptr;

ConINode* idx_con = gvn().intcon(idx->get_con())->as_ConI();
Node* operation = gvn().transform(ExtractNode::make(opd, idx_con, elem_bt));
if (is_vector_mask(vbox_klass)) {
// vbox_klass is mask. This is used for VectorMask.laneIsSet(int).

Node* bits = nullptr;
switch (elem_bt) {
case T_BYTE:
case T_SHORT:
case T_INT: {
bits = gvn().transform(new ConvI2LNode(operation));
break;
Node* pos = argument(4); // can be variable
if (arch_supports_vector(Op_ExtractUB, num_elem, elem_bt, VecMaskUseAll)) {
// Transform mask to vector with type of boolean and utilize ExtractUB node.
opd = unbox_vector(argument(3), vbox_type, elem_bt, num_elem);
if (opd == nullptr) {
return false;
}
opd = gvn().transform(VectorStoreMaskNode::make(gvn(), opd, elem_bt, num_elem));
opd = gvn().transform(new ExtractUBNode(opd, pos));
opd = gvn().transform(new ConvI2LNode(opd));
} else if (arch_supports_vector(Op_VectorMaskToLong, num_elem, elem_bt, VecMaskUseLoad)) {
opd = unbox_vector(argument(3), vbox_type, elem_bt, num_elem);
if (opd == nullptr) {
return false;
}
// VectorMaskToLongNode requires the input is either a mask or a vector with BOOLEAN type.
if (opd->bottom_type()->isa_vectmask() == nullptr) {
opd = gvn().transform(VectorStoreMaskNode::make(gvn(), opd, elem_bt, num_elem));
}
// ((toLong() >>> pos) & 1L
opd = gvn().transform(new VectorMaskToLongNode(opd, TypeLong::LONG));
opd = gvn().transform(new URShiftLNode(opd, pos));
opd = gvn().transform(new AndLNode(opd, gvn().makecon(TypeLong::ONE)));
} else {
if (C->print_intrinsics()) {
tty->print_cr(" ** Rejected mask extraction because architecture does not support it");
}
return false; // not supported
}
case T_FLOAT: {
bits = gvn().transform(new MoveF2INode(operation));
bits = gvn().transform(new ConvI2LNode(bits));
break;
} else {
// vbox_klass is vector. This is used for Vector.lane(int).
if (!idx->is_con()) {
if (C->print_intrinsics()) {
tty->print_cr(" ** missing constant: idx=%s", NodeClassNames[argument(4)->Opcode()]);
}
return false; // not enough info for intrinsification
}
case T_DOUBLE: {
bits = gvn().transform(new MoveD2LNode(operation));
break;

int vopc = ExtractNode::opcode(elem_bt);
if (!arch_supports_vector(vopc, num_elem, elem_bt, VecMaskNotUsed)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: arity=1 op=extract vlen=%d etype=%s ismask=no",
num_elem, type2name(elem_bt));
}
return false; // not supported
}
case T_LONG: {
bits = operation; // no conversion needed
break;

opd = unbox_vector(argument(3), vbox_type, elem_bt, num_elem);
if (opd == nullptr) {
return false;
}
default: fatal("%s", type2name(elem_bt));
}
ConINode* idx_con = gvn().intcon(idx->get_con())->as_ConI();

set_result(bits);
opd = gvn().transform(ExtractNode::make(opd, idx_con, elem_bt));
switch (elem_bt) {
case T_BYTE:
case T_SHORT:
case T_INT: {
opd = gvn().transform(new ConvI2LNode(opd));
break;
}
case T_FLOAT: {
opd = gvn().transform(new MoveF2INode(opd));
opd = gvn().transform(new ConvI2LNode(opd));
break;
}
case T_DOUBLE: {
opd = gvn().transform(new MoveD2LNode(opd));
break;
}
case T_LONG: {
// no conversion needed
break;
}
default: fatal("%s", type2name(elem_bt));
}
}
set_result(opd);
return true;
}

Expand Down
5 changes: 3 additions & 2 deletions src/hotspot/share/opto/vectornode.cpp
Expand Up @@ -1202,9 +1202,10 @@ int ExtractNode::opcode(BasicType bt) {
}
}

// Extract a scalar element of vector.
// Extract a scalar element of vector by constant position.
Node* ExtractNode::make(Node* v, ConINode* pos, BasicType bt) {
assert(pos->get_int() < Matcher::max_vector_size(bt), "pos in range");
assert(pos->get_int() >= 0 &&
pos->get_int() < Matcher::max_vector_size(bt), "pos in range");
switch (bt) {
case T_BOOLEAN: return new ExtractUBNode(v, pos);
case T_BYTE: return new ExtractBNode(v, pos);
Expand Down
24 changes: 10 additions & 14 deletions src/hotspot/share/opto/vectornode.hpp
Expand Up @@ -1280,12 +1280,8 @@ class VectorLoadConstNode : public VectorNode {
// Extract a scalar from a vector at position "pos"
class ExtractNode : public Node {
public:
ExtractNode(Node* src, ConINode* pos) : Node(nullptr, src, (Node*)pos) {
assert(in(2)->get_int() >= 0, "positive constants");
}
ExtractNode(Node* src, Node* pos) : Node(nullptr, src, pos) {}
virtual int Opcode() const;
uint pos() const { return in(2)->get_int(); }

static Node* make(Node* v, ConINode* pos, BasicType bt);
static int opcode(BasicType bt);
};
Expand All @@ -1294,7 +1290,7 @@ class ExtractNode : public Node {
// Extract a byte from a vector at position "pos"
class ExtractBNode : public ExtractNode {
public:
ExtractBNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
ExtractBNode(Node* src, Node* pos) : ExtractNode(src, pos) {}
virtual int Opcode() const;
virtual const Type* bottom_type() const { return TypeInt::BYTE; }
virtual uint ideal_reg() const { return Op_RegI; }
Expand All @@ -1304,17 +1300,17 @@ class ExtractBNode : public ExtractNode {
// Extract a boolean from a vector at position "pos"
class ExtractUBNode : public ExtractNode {
public:
ExtractUBNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
ExtractUBNode(Node* src, Node* pos) : ExtractNode(src, pos) {}
virtual int Opcode() const;
virtual const Type* bottom_type() const { return TypeInt::UBYTE; }
virtual const Type* bottom_type() const { return TypeInt::BOOL; }
virtual uint ideal_reg() const { return Op_RegI; }
};

//------------------------------ExtractCNode-----------------------------------
// Extract a char from a vector at position "pos"
class ExtractCNode : public ExtractNode {
public:
ExtractCNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
ExtractCNode(Node* src, Node* pos) : ExtractNode(src, pos) {}
virtual int Opcode() const;
virtual const Type *bottom_type() const { return TypeInt::CHAR; }
virtual uint ideal_reg() const { return Op_RegI; }
Expand All @@ -1324,7 +1320,7 @@ class ExtractCNode : public ExtractNode {
// Extract a short from a vector at position "pos"
class ExtractSNode : public ExtractNode {
public:
ExtractSNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
ExtractSNode(Node* src, Node* pos) : ExtractNode(src, pos) {}
virtual int Opcode() const;
virtual const Type *bottom_type() const { return TypeInt::SHORT; }
virtual uint ideal_reg() const { return Op_RegI; }
Expand All @@ -1334,7 +1330,7 @@ class ExtractSNode : public ExtractNode {
// Extract an int from a vector at position "pos"
class ExtractINode : public ExtractNode {
public:
ExtractINode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
ExtractINode(Node* src, Node* pos) : ExtractNode(src, pos) {}
virtual int Opcode() const;
virtual const Type *bottom_type() const { return TypeInt::INT; }
virtual uint ideal_reg() const { return Op_RegI; }
Expand All @@ -1344,7 +1340,7 @@ class ExtractINode : public ExtractNode {
// Extract a long from a vector at position "pos"
class ExtractLNode : public ExtractNode {
public:
ExtractLNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
ExtractLNode(Node* src, Node* pos) : ExtractNode(src, pos) {}
virtual int Opcode() const;
virtual const Type *bottom_type() const { return TypeLong::LONG; }
virtual uint ideal_reg() const { return Op_RegL; }
Expand All @@ -1354,7 +1350,7 @@ class ExtractLNode : public ExtractNode {
// Extract a float from a vector at position "pos"
class ExtractFNode : public ExtractNode {
public:
ExtractFNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
ExtractFNode(Node* src, Node* pos) : ExtractNode(src, pos) {}
virtual int Opcode() const;
virtual const Type *bottom_type() const { return Type::FLOAT; }
virtual uint ideal_reg() const { return Op_RegF; }
Expand All @@ -1364,7 +1360,7 @@ class ExtractFNode : public ExtractNode {
// Extract a double from a vector at position "pos"
class ExtractDNode : public ExtractNode {
public:
ExtractDNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
ExtractDNode(Node* src, Node* pos) : ExtractNode(src, pos) {}
virtual int Opcode() const;
virtual const Type *bottom_type() const { return Type::DOUBLE; }
virtual uint ideal_reg() const { return Op_RegD; }
Expand Down
Expand Up @@ -306,20 +306,20 @@ long reductionCoerced(int oprId,

/* ============================================================================ */

public interface VecExtractOp<V extends Vector<?>> {
long apply(V v, int i);
public interface VecExtractOp<VM extends VectorPayload> {
long apply(VM vm, int i);
}

@IntrinsicCandidate
public static
<V extends Vector<E>,
<VM extends VectorPayload,
E>
long extract(Class<? extends V> vClass, Class<E> eClass,
long extract(Class<? extends VM> vClass, Class<E> eClass,
int length,
V v, int i,
VecExtractOp<V> defaultImpl) {
VM vm, int i,
VecExtractOp<VM> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(v, i);
return defaultImpl.apply(vm, i);
}

/* ============================================================================ */
Expand Down

1 comment on commit d4aacdb

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.