Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8264104: Eliminate unnecessary vector mask conversion during VectorUnbox for floating point VectorMask #3238

Closed
wants to merge 5 commits into from
@@ -3334,6 +3334,36 @@ instruct storemask2L(vecD dst, vecX src, immI_8 size)
ins_pipe(pipe_slow);
%}

// vector mask cast

instruct vmaskcastD(vecD dst)
%{
predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 &&
n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8 &&
n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length());
match(Set dst (VectorMaskCast dst));
ins_cost(0);
format %{ "vmaskcast $dst\t# empty" %}
ins_encode %{
// empty
%}
ins_pipe(pipe_class_empty);
%}

instruct vmaskcastX(vecX dst)
%{
predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 &&
n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16 &&
n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length());
match(Set dst (VectorMaskCast dst));
ins_cost(0);
format %{ "vmaskcast $dst\t# empty" %}
ins_encode %{
// empty
%}
ins_pipe(pipe_class_empty);
%}

//-------------------------------- LOAD_IOTA_INDICES----------------------------------

instruct loadcon8B(vecD dst, immI0 src)
@@ -1258,6 +1258,27 @@ instruct storemask2L(vecD dst, vecX src, immI_8 size)
ins_pipe(pipe_slow);
%}

// vector mask cast
dnl
define(`VECTOR_MASK_CAST', `
instruct vmaskcast$1`'(vec$1 dst)
%{
predicate(n->bottom_type()->is_vect()->length_in_bytes() == $2 &&
n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $2 &&
n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length());
match(Set dst (VectorMaskCast dst));
ins_cost(0);
format %{ "vmaskcast $dst\t# empty" %}
ins_encode %{
// empty
%}
ins_pipe(pipe_class_empty);
%}')dnl
dnl $1 $2
VECTOR_MASK_CAST(D, 8)
VECTOR_MASK_CAST(X, 16)
dnl

//-------------------------------- LOAD_IOTA_INDICES----------------------------------
dnl
define(`PREDICATE', `ifelse($1, 8,
@@ -1760,3 +1760,18 @@ instruct vsubD(vReg dst, vReg src1, vReg src2) %{
%}
ins_pipe(pipe_slow);
%}

// vector mask cast

instruct vmaskcast(vReg dst) %{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length() &&
n->bottom_type()->is_vect()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes());
match(Set dst (VectorMaskCast dst));
ins_cost(0);
format %{ "vmaskcast $dst\t# empty (sve)" %}
ins_encode %{
// empty
%}
ins_pipe(pipe_class_empty);
%}

@@ -904,3 +904,18 @@ BINARY_OP_UNPREDICATED(vsubI, SubVI, S, 4, sve_sub)
BINARY_OP_UNPREDICATED(vsubL, SubVL, D, 2, sve_sub)
BINARY_OP_UNPREDICATED(vsubF, SubVF, S, 4, sve_fsub)
BINARY_OP_UNPREDICATED(vsubD, SubVD, D, 2, sve_fsub)

// vector mask cast

instruct vmaskcast(vReg dst) %{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length() &&
n->bottom_type()->is_vect()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes());
match(Set dst (VectorMaskCast dst));
This conversation was marked as resolved by XiaohongGong

This comment has been minimized.

@jatin-bhateja

jatin-bhateja Apr 13, 2021
Member

A strict check based on in-type and out-type in predicate could strengthen the pattern.

This comment has been minimized.

This comment has been minimized.

@XiaohongGong

XiaohongGong Apr 14, 2021
Author Contributor

I'v added the type assertion in the constructor of the "VectorMaskCastNode". See :

assert(in_vt->length() == vt->length(), "vector length must match");
assert(type2aelembytes(in_vt->element_basic_type()) == type2aelembytes(vt->element_basic_type()), "element size must match");

That's why I didn't add the check in predicate. So do you think it's enough?

This comment has been minimized.

@iwanowww

iwanowww Apr 14, 2021

IMO AD instructions should also be accompanied either by predicates or asserts which stress the conditions for the no-op implementation.

This comment has been minimized.

@XiaohongGong

XiaohongGong Apr 15, 2021
Author Contributor

OK, thanks! I will add the type restrict in the predicate.

ins_cost(0);
format %{ "vmaskcast $dst\t# empty (sve)" %}
ins_encode %{
// empty
%}
ins_pipe(pipe_class_empty);
%}

@@ -4200,7 +4200,8 @@ bool MatchRule::is_vector() const {
"FmaVD", "FmaVF","PopCountVI",
// Next are not supported currently.
"PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D",
"ExtractB","ExtractUB","ExtractC","ExtractS","ExtractI","ExtractL","ExtractF","ExtractD"
"ExtractB","ExtractUB","ExtractC","ExtractS","ExtractI","ExtractL","ExtractF","ExtractD",
"VectorMaskCast"
};
int cnt = sizeof(vector_list)/sizeof(char*);
if (_rChild) {
@@ -446,6 +446,7 @@ macro(VectorBoxAllocate)
macro(VectorUnbox)
macro(VectorMaskWrapper)
macro(VectorMaskCmp)
macro(VectorMaskCast)
macro(VectorTest)
macro(VectorBlend)
macro(VectorRearrange)
@@ -1232,6 +1232,13 @@ Node* VectorUnboxNode::Ideal(PhaseGVN* phase, bool can_reshape) {
bool is_vector_mask = vbox_klass->is_subclass_of(ciEnv::current()->vector_VectorMask_klass());
bool is_vector_shuffle = vbox_klass->is_subclass_of(ciEnv::current()->vector_VectorShuffle_klass());
if (is_vector_mask) {
if (in_vt->length_in_bytes() == out_vt->length_in_bytes() &&
Matcher::match_rule_supported_vector(Op_VectorMaskCast, out_vt->length(), out_vt->element_basic_type())) {
// Apply "VectorUnbox (VectorBox vmask) ==> VectorMaskCast (vmask)"
// directly. This could avoid the transformation ordering issue from
// "VectorStoreMask (VectorLoadMask vmask) => vmask".
return new VectorMaskCastNode(value, out_vt);
}
// VectorUnbox (VectorBox vmask) ==> VectorLoadMask (VectorStoreMask vmask)
value = phase->transform(VectorStoreMaskNode::make(*phase, value, in_vt->element_basic_type(), in_vt->length()));
return new VectorLoadMaskNode(value, out_vt);
@@ -1240,6 +1240,17 @@ class VectorStoreMaskNode : public VectorNode {
static VectorStoreMaskNode* make(PhaseGVN& gvn, Node* in, BasicType in_type, uint num_elem);
};

class VectorMaskCastNode : public VectorNode {

This comment has been minimized.

@jatin-bhateja

jatin-bhateja Apr 13, 2021
Member

VectorMaskReinterpret seems better choice, since its a re-interpretation and not a casting (up/down).

This comment has been minimized.

@iwanowww

iwanowww Apr 13, 2021

Considering masks have platform-specific representation, full-blown casts between different element types look more appropriate here.

In this particular case, the focus is on the cheapest possible case when representations share the same bit pattern and the cast degenerates into a no-op. But in the longer term, it makes perfect sense to support the full matrix of conversions and don't rely on VectorLoadMask <=> VectorStoreMask and intermediate canonical vector representation.

This comment has been minimized.

public:
VectorMaskCastNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {
const TypeVect* in_vt = in->bottom_type()->is_vect();
assert(in_vt->length() == vt->length(), "vector length must match");
assert(type2aelembytes(in_vt->element_basic_type()) == type2aelembytes(vt->element_basic_type()), "element size must match");
}

This conversation was marked as resolved by XiaohongGong

This comment has been minimized.

@iwanowww

iwanowww Apr 9, 2021

FTR there's a way to avoid platform-specific changes (in AD files) if we don't envision any non-trivial conversions to be supported: just disable matching of the node by overriding Node::ideal_reg():

virtual uint ideal_reg() const { return 0; } // not matched in the AD file 

This comment has been minimized.

@XiaohongGong

XiaohongGong Apr 12, 2021
Author Contributor

Good idea, I will try this way. Thanks!

This comment has been minimized.

@XiaohongGong

XiaohongGong Apr 12, 2021
Author Contributor

I met the bad AD file issue when I remove the changes in AD files and overriding Node::ideal_reg(). I guess if a node is not used as an input of other node, this can work well? For the VectorMaskCastNode, it must be an input for some other nodes. If just disable the matching of this node, how does its usage find the right input code?

This comment has been minimized.

@iwanowww

iwanowww Apr 12, 2021

Yeah, ideal_reg() { return 0; } won't work here. Sorry for the misleading suggestion.

This comment has been minimized.

@XiaohongGong

XiaohongGong Apr 13, 2021
Author Contributor

That's ok. Thanks!

virtual int Opcode() const;
};

// This is intended for use as a simple reinterpret node that has no cast.
class VectorReinterpretNode : public VectorNode {
private:
@@ -1878,6 +1878,7 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
declare_c2_type(VectorInsertNode, VectorNode) \
declare_c2_type(VectorUnboxNode, VectorNode) \
declare_c2_type(VectorReinterpretNode, VectorNode) \
declare_c2_type(VectorMaskCastNode, VectorNode) \
declare_c2_type(VectorBoxNode, Node) \
declare_c2_type(VectorBoxAllocateNode, CallStaticJavaNode) \
declare_c2_type(VectorTestNode, Node) \
ProTip! Use n and p to navigate between commits in a pull request.