Skip to content
Permalink
Browse files
8273949: Intrinsic creation for VectorMask.toLong operation.
Reviewed-by: psandoz, sviswanathan, eliu
  • Loading branch information
Jatin Bhateja committed Sep 23, 2021
1 parent b44662a commit 0e7348dd9b81a7310357c600131fb99bd9f74ccd
Showing 45 changed files with 645 additions and 251 deletions.
@@ -4058,13 +4058,14 @@ void C2_MacroAssembler::masked_op(int ideal_opc, int mask_len, KRegister dst,
}

#ifdef _LP64
void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
Register tmp, KRegister ktmp, int masklen, int vec_enc) {
assert(VM_Version::supports_avx512vlbw(), "");
vpxor(xtmp, xtmp, xtmp, vec_enc);
vpsubb(xtmp, xtmp, mask, vec_enc);
evpmovb2m(ktmp, xtmp, vec_enc);
kmovql(tmp, ktmp);
void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, KRegister mask,
Register tmp, int masklen, int vec_enc) {
if(VM_Version::supports_avx512bw()) {
kmovql(tmp, mask);
} else {
assert(masklen <= 16, "");
kmovwl(tmp, mask);
}
switch(opc) {
case Op_VectorMaskTrueCount:
popcntq(dst, tmp);
@@ -224,8 +224,7 @@

public:
#ifdef _LP64
void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, Register tmp,
KRegister ktmp, int masklen, int vec_enc);
void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int vec_enc);

void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, XMMRegister xtmp1,
Register tmp, int masklen, int vec_enc);
@@ -1558,6 +1558,7 @@ const bool Matcher::match_rule_supported(int opcode) {
case Op_VectorMaskFirstTrue:
case Op_VectorMaskLastTrue:
case Op_VectorMaskTrueCount:
case Op_VectorMaskToLong:
if (!is_LP64 || UseAVX < 1) {
return false;
}
@@ -8659,58 +8660,87 @@ instruct vmasked_store64(memory mem, vec src, kReg mask) %{
ins_pipe( pipe_slow );
%}

instruct vmask_truecount_evex(rRegI dst, vec mask, rRegL tmp, kReg ktmp, vec xtmp) %{
predicate(VM_Version::supports_avx512vlbw());
instruct vmask_tolong_evex(rRegL dst, kReg mask) %{
predicate(n->in(1)->bottom_type()->isa_vectmask());
match(Set dst (VectorMaskToLong mask));
format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
ins_encode %{
if (VM_Version::supports_avx512vlbw()) {
__ kmovql($dst$$Register, $mask$$KRegister);
} else {
int mask_len = Matcher::vector_length(this, $mask);
assert(mask_len <= 16, "");
__ kmovwl($dst$$Register, $mask$$KRegister);
}
%}
ins_pipe( pipe_slow );
%}

instruct vmask_tolong_avx(rRegL dst, vec mask, vec xtmp) %{
predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL &&
n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN);
match(Set dst (VectorMaskToLong mask));
format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
effect(TEMP xtmp);
ins_encode %{
int vlen_enc = vector_length_encoding(this, $mask);
__ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
__ vpsubb($xtmp$$XMMRegister, $xtmp$$XMMRegister, $mask$$XMMRegister, vlen_enc);
__ vpmovmskb($dst$$Register, $xtmp$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
predicate(n->in(1)->bottom_type()->isa_vectmask());
match(Set dst (VectorMaskTrueCount mask));
effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp, TEMP xtmp);
format %{ "vector_truecount_evex $mask \t! vector mask true count" %}
effect(TEMP_DEF dst, TEMP tmp, KILL cr);
format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen_enc = vector_length_encoding(this, $mask);
int mask_len = Matcher::vector_length(this, $mask);
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
$tmp$$Register, $ktmp$$KRegister, mask_len, vlen_enc);
__ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, $tmp$$Register, mask_len, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

instruct vmask_first_or_last_true_evex(rRegI dst, vec mask, rRegL tmp, kReg ktmp, vec xtmp, rFlagsReg cr) %{
predicate(VM_Version::supports_avx512vlbw());
match(Set dst (VectorMaskFirstTrue mask));
match(Set dst (VectorMaskLastTrue mask));
effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp, TEMP xtmp, KILL cr);
format %{ "vector_mask_first_or_last_true_evex $mask \t! vector first/last true location" %}
instruct vmask_truecount_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1, rFlagsReg cr) %{
predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL);
match(Set dst (VectorMaskTrueCount mask));
effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1, KILL cr);
format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp and $xtmp1 as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen_enc = vector_length_encoding(this, $mask);
int mask_len = Matcher::vector_length(this, $mask);
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
$tmp$$Register, $ktmp$$KRegister, mask_len, vlen_enc);
$xtmp1$$XMMRegister, $tmp$$Register, mask_len, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

instruct vmask_truecount_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1) %{
predicate(!VM_Version::supports_avx512vlbw());
match(Set dst (VectorMaskTrueCount mask));
effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1);
format %{ "vector_truecount_avx $mask \t! vector mask true count" %}
instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
predicate(n->in(1)->bottom_type()->isa_vectmask());
match(Set dst (VectorMaskFirstTrue mask));
match(Set dst (VectorMaskLastTrue mask));
effect(TEMP_DEF dst, TEMP tmp, KILL cr);
format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen_enc = vector_length_encoding(this, $mask);
int mask_len = Matcher::vector_length(this, $mask);
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
$xtmp1$$XMMRegister, $tmp$$Register, mask_len, vlen_enc);
__ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, $tmp$$Register, mask_len, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1, rFlagsReg cr) %{
predicate(!VM_Version::supports_avx512vlbw());
predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL);
match(Set dst (VectorMaskFirstTrue mask));
match(Set dst (VectorMaskLastTrue mask));
effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1, KILL cr);
format %{ "vector_mask_first_or_last_true_avx $mask \t! vector first/last true location" %}
format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp and $xtmp1 as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen_enc = vector_length_encoding(this, $mask);
@@ -9297,7 +9327,7 @@ instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask, rRegP
#ifdef _LP64
instruct mask_all_evexI_imm(kReg dst, immI cnt, rRegL tmp) %{
match(Set dst (MaskAll cnt));
effect(TEMP tmp);
effect(TEMP_DEF dst, TEMP tmp);
format %{ "mask_all_evexI $dst, $cnt \t! using $tmp as TEMP" %}
ins_encode %{
int vec_len = Matcher::vector_length(this);
@@ -9317,7 +9347,7 @@ instruct mask_all_evexI_imm(kReg dst, immI cnt, rRegL tmp) %{

instruct mask_all_evexI(kReg dst, rRegI src, rRegL tmp) %{
match(Set dst (MaskAll src));
effect(TEMP tmp);
effect(TEMP_DEF dst, TEMP tmp);
format %{ "mask_all_evexI $dst, $src \t! using $tmp as TEMP" %}
ins_encode %{
int vec_len = Matcher::vector_length(this);
@@ -1080,7 +1080,7 @@ class methodHandle;
"I" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMaskOp;)" \
"I") \
"J") \
do_name(vector_mask_oper_name, "maskReductionCoerced") \
\
/* (2) Bytecode intrinsics */ \
@@ -425,6 +425,7 @@ macro(VectorMaskOp)
macro(VectorMaskTrueCount)
macro(VectorMaskFirstTrue)
macro(VectorMaskLastTrue)
macro(VectorMaskToLong)
macro(Pack)
macro(PackB)
macro(PackS)
@@ -653,7 +653,7 @@ bool LibraryCallKit::inline_vector_shuffle_iota() {
}

// <E, M>
// int maskReductionCoerced(int oper, Class<? extends M> maskClass, Class<?> elemClass,
// long maskReductionCoerced(int oper, Class<? extends M> maskClass, Class<?> elemClass,
// int length, M m, VectorMaskOp<M> defaultImpl)
bool LibraryCallKit::inline_vector_mask_operation() {
const TypeInt* oper = gvn().type(argument(0))->isa_int();
@@ -698,8 +698,14 @@ bool LibraryCallKit::inline_vector_mask_operation() {
ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass();
const TypeInstPtr* mask_box_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);
Node* mask_vec = unbox_vector(mask, mask_box_type, elem_bt, num_elem, true);
Node* store_mask = gvn().transform(VectorStoreMaskNode::make(gvn(), mask_vec, elem_bt, num_elem));
Node* maskoper = gvn().transform(VectorMaskOpNode::make(store_mask, TypeInt::INT, mopc));
if (mask_vec->bottom_type()->isa_vectmask() == NULL) {
mask_vec = gvn().transform(VectorStoreMaskNode::make(gvn(), mask_vec, elem_bt, num_elem));
}
const Type* maskoper_ty = mopc == Op_VectorMaskToLong ? (const Type*)TypeLong::LONG : (const Type*)TypeInt::INT;
Node* maskoper = gvn().transform(VectorMaskOpNode::make(mask_vec, maskoper_ty, mopc));
if (mopc != Op_VectorMaskToLong) {
maskoper = ConvI2L(maskoper);
}
set_result(maskoper);

C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
@@ -1425,13 +1425,14 @@ Node* VectorMaskOpNode::make(Node* mask, const Type* ty, int mopc) {
return new VectorMaskLastTrueNode(mask, ty);
case Op_VectorMaskFirstTrue:
return new VectorMaskFirstTrueNode(mask, ty);
case Op_VectorMaskToLong:
return new VectorMaskToLongNode(mask, ty);
default:
assert(false, "Unhandled operation");
}
return NULL;
}


#ifndef PRODUCT
void VectorBoxAllocateNode::dump_spec(outputStream *st) const {
CallStaticJavaNode::dump_spec(st);
@@ -922,7 +922,7 @@ class VectorMaskOpNode : public TypeNode {
public:
VectorMaskOpNode(Node* mask, const Type* ty, int mopc):
TypeNode(ty, 2), _mopc(mopc) {
assert(mask->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN, "");
assert(Matcher::has_predicated_vectors() || mask->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN, "");
init_req(1, mask);
}

@@ -957,6 +957,14 @@ class VectorMaskLastTrueNode : public VectorMaskOpNode {
virtual int Opcode() const;
};

class VectorMaskToLongNode : public VectorMaskOpNode {
public:
VectorMaskToLongNode(Node* mask, const Type* ty):
VectorMaskOpNode(mask, ty, Op_VectorMaskToLong) {}
virtual int Opcode() const;
virtual uint ideal_reg() const { return Op_RegL; }
};

//-------------------------- Vector mask broadcast -----------------------------------
class MaskAllNode : public VectorNode {
public:
@@ -430,6 +430,18 @@ int VectorSupport::vop2ideal(jint id, BasicType bt) {
}
break;
}
case VECTOR_OP_MASK_TOLONG: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: // fall-through
case T_LONG: // fall-through
case T_FLOAT: // fall-through
case T_DOUBLE: return Op_VectorMaskToLong;
default: fatal("MASK_TOLONG: %s", type2name(bt));
}
break;
}
case VECTOR_OP_TAN:
case VECTOR_OP_TANH:
case VECTOR_OP_SIN:
@@ -82,10 +82,11 @@ class VectorSupport : AllStatic {
VECTOR_OP_MASK_TRUECOUNT = 19,
VECTOR_OP_MASK_FIRSTTRUE = 20,
VECTOR_OP_MASK_LASTTRUE = 21,
VECTOR_OP_MASK_TOLONG = 22,

// Rotate operations
VECTOR_OP_LROTATE = 22,
VECTOR_OP_RROTATE = 23,
VECTOR_OP_LROTATE = 23,
VECTOR_OP_RROTATE = 24,

// Vector Math Library
VECTOR_OP_TAN = 101,
@@ -69,10 +69,11 @@ public class VectorSupport {
public static final int VECTOR_OP_MASK_TRUECOUNT = 19;
public static final int VECTOR_OP_MASK_FIRSTTRUE = 20;
public static final int VECTOR_OP_MASK_LASTTRUE = 21;
public static final int VECTOR_OP_MASK_TOLONG = 22;

// Rotate operations
public static final int VECTOR_OP_LROTATE = 22;
public static final int VECTOR_OP_RROTATE = 23;
public static final int VECTOR_OP_LROTATE = 23;
public static final int VECTOR_OP_RROTATE = 24;

// Math routines
public static final int VECTOR_OP_TAN = 101;
@@ -632,18 +633,18 @@ VP maybeRebox(VP v) {

/* ============================================================================ */
public interface VectorMaskOp<M extends VectorMask<?>> {
int apply(M m);
long apply(M m);
}

@IntrinsicCandidate
public static
<M extends VectorMask<E>,
E>
int maskReductionCoerced(int oper,
Class<? extends M> mClass, Class<?> eClass,
int length,
M m,
VectorMaskOp<M> defaultImpl) {
long maskReductionCoerced(int oper,
Class<? extends M> mClass, Class<?> eClass,
int length,
M m,
VectorMaskOp<M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(m);
}
@@ -24,6 +24,8 @@
*/
package jdk.incubator.vector;

import java.util.Objects;

import jdk.internal.vm.annotation.ForceInline;

import static jdk.incubator.vector.VectorOperators.*;
@@ -63,23 +65,13 @@ public final VectorSpecies<E> vectorSpecies() {

@Override
public boolean laneIsSet(int i) {
return getBits()[i];
}

@Override
public long toLong() {
// FIXME: This should be an intrinsic.
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
long res = 0;
long set = 1;
boolean[] bits = getBits();
for (int i = 0; i < bits.length; i++) {
res = bits[i] ? res | set : res;
set = set << 1;
int length = length();
Objects.checkIndex(i, length);
if (length <= Long.SIZE) {
return ((toLong() >>> i) & 1L) == 1;
} else {
return getBits()[i];
}
return res;
}

@Override
@@ -180,6 +172,17 @@ static int lastTrueHelper(boolean[] bits) {
return -1;
}

/*package-private*/
static long toLongHelper(boolean[] bits) {
long res = 0;
long set = 1;
for (int i = 0; i < bits.length; i++) {
res = bits[i] ? res | set : res;
set = set << 1;
}
return res;
}

@Override
@ForceInline
public VectorMask<E> indexInRange(int offset, int limit) {

0 comments on commit 0e7348d

Please sign in to comment.