Skip to content

Commit

Permalink
8277168: AArch64: Enable arraycopy partial inlining with SVE
Browse files Browse the repository at this point in the history
Reviewed-by: jbhateja, roland, aph
  • Loading branch information
Pengfei Li committed Dec 8, 2021
1 parent fb6d611 commit e7db581
Show file tree
Hide file tree
Showing 16 changed files with 80 additions and 30 deletions.
2 changes: 1 addition & 1 deletion src/hotspot/cpu/aarch64/aarch64.ad
Expand Up @@ -2481,7 +2481,7 @@ const RegMask* Matcher::predicate_reg_mask(void) {
return &_PR_REG_mask;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return new TypeVectMask(elemTy, length);
}

Expand Down
17 changes: 16 additions & 1 deletion src/hotspot/cpu/aarch64/aarch64_sve.ad
Expand Up @@ -5744,4 +5744,19 @@ instruct vmask_lasttrue_partial(iRegINoSp dst, pReg src, pReg ptmp, rFlagsReg cr
__ sve_vmask_lasttrue($dst$$Register, bt, as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg));
%}
ins_pipe(pipe_slow);
%}
%}

// ---------------------------- Vector mask generation ---------------------------
instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
predicate(UseSVE > 0);
match(Set pg (VectorMaskGen len));
effect(KILL cr);
ins_cost(SVE_COST);
format %{ "sve_whilelo $pg, zr, $len\t # sve" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_whilelo(as_PRegister($pg$$reg), size, zr, as_Register($len$$reg));
%}
ins_pipe(pipe_slow);
%}
17 changes: 16 additions & 1 deletion src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
Expand Up @@ -3174,4 +3174,19 @@ instruct vmask_lasttrue_partial(iRegINoSp dst, pReg src, pReg ptmp, rFlagsReg cr
__ sve_vmask_lasttrue($dst$$Register, bt, as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg));
%}
ins_pipe(pipe_slow);
%}dnl
%}

// ---------------------------- Vector mask generation ---------------------------
instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
predicate(UseSVE > 0);
match(Set pg (VectorMaskGen len));
effect(KILL cr);
ins_cost(SVE_COST);
format %{ "sve_whilelo $pg, zr, $len\t # sve" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_whilelo(as_PRegister($pg$$reg), size, zr, as_Register($len$$reg));
%}
ins_pipe(pipe_slow);
%}
8 changes: 8 additions & 0 deletions src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
Expand Up @@ -469,6 +469,14 @@ void VM_Version::initialize() {
}
}

int inline_size = (UseSVE > 0 && MaxVectorSize >= 16) ? MaxVectorSize : 0;
if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
FLAG_SET_DEFAULT(ArrayOperationPartialInlineSize, inline_size);
} else if (ArrayOperationPartialInlineSize != 0 && ArrayOperationPartialInlineSize != inline_size) {
warning("Setting ArrayOperationPartialInlineSize to %d", inline_size);
ArrayOperationPartialInlineSize = inline_size;
}

if (FLAG_IS_DEFAULT(OptoScheduling)) {
OptoScheduling = true;
}
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/arm/arm.ad
Expand Up @@ -991,7 +991,7 @@ const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return NULL;
}

Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/ppc/ppc.ad
Expand Up @@ -2185,7 +2185,7 @@ const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return NULL;
}

Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/s390/s390.ad
Expand Up @@ -1544,7 +1544,7 @@ const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return NULL;
}

Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/x86/x86.ad
Expand Up @@ -2051,7 +2051,7 @@ const RegMask* Matcher::predicate_reg_mask(void) {
return &_VECTMASK_REG_mask;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return new TypeVectMask(elemTy, length);
}

Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/opto/c2_globals.hpp
Expand Up @@ -85,7 +85,7 @@
product(intx, ArrayOperationPartialInlineSize, 0, DIAGNOSTIC, \
"Partial inline size used for small array operations" \
"(e.g. copy,cmp) acceleration.") \
range(0, 64) \
range(0, 256) \
\
product(bool, AlignVector, true, \
"Perform vector store/load alignment in loop") \
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/opto/library_call.cpp
Expand Up @@ -5427,7 +5427,7 @@ bool LibraryCallKit::inline_vectorizedMismatch() {
Node* obja_adr_mem = memory(C->get_alias_index(obja_adr_t));
Node* objb_adr_mem = memory(C->get_alias_index(objb_adr_t));

Node* vmask = _gvn.transform(new VectorMaskGenNode(ConvI2X(casted_length), TypeVect::VECTMASK, elem_bt));
Node* vmask = _gvn.transform(VectorMaskGenNode::make(ConvI2X(casted_length), elem_bt));
Node* vload_obja = _gvn.transform(new LoadVectorMaskedNode(control(), obja_adr_mem, obja_adr, obja_adr_t, vt, vmask));
Node* vload_objb = _gvn.transform(new LoadVectorMaskedNode(control(), objb_adr_mem, objb_adr, objb_adr_t, vt, vmask));
Node* result = _gvn.transform(new VectorCmpMaskedNode(vload_obja, vload_objb, vmask, TypeInt::INT));
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/opto/macroArrayCopy.cpp
Expand Up @@ -237,7 +237,7 @@ void PhaseMacroExpand::generate_partial_inlining_block(Node** ctrl, MergeMemNode
inline_block = generate_guard(ctrl, bol_le, NULL, PROB_FAIR);
stub_block = *ctrl;

Node* mask_gen = new VectorMaskGenNode(casted_length, TypeVect::VECTMASK, type);
Node* mask_gen = VectorMaskGenNode::make(casted_length, type);
transform_later(mask_gen);

unsigned vec_size = lane_count * type2aelembytes(type);
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/opto/matcher.hpp
Expand Up @@ -332,7 +332,7 @@ class Matcher : public PhaseTransform {
static const bool match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt);

static const RegMask* predicate_reg_mask(void);
static const TypeVect* predicate_reg_type(const Type* elemTy, int length);
static const TypeVectMask* predicate_reg_type(const Type* elemTy, int length);

// Vector width in bytes
static const int vector_width_in_bytes(BasicType bt);
Expand Down
12 changes: 10 additions & 2 deletions src/hotspot/share/opto/type.cpp
Expand Up @@ -2390,8 +2390,7 @@ const TypeVect *TypeVect::makemask(const Type* elem, uint length) {
BasicType elem_bt = elem->array_element_basic_type();
if (Matcher::has_predicated_vectors() &&
Matcher::match_rule_supported_vector_masked(Op_VectorLoadMask, length, elem_bt)) {
const TypeVect* mtype = Matcher::predicate_reg_type(elem, length);
return (TypeVect*)(const_cast<TypeVect*>(mtype))->hashcons();
return TypeVectMask::make(elem, length);
} else {
return make(elem, length);
}
Expand Down Expand Up @@ -2505,6 +2504,15 @@ const Type *TypeVectMask::xdual() const {
return new TypeVectMask(element_type()->dual(), length());
}

const TypeVectMask *TypeVectMask::make(const BasicType elem_bt, uint length) {
return make(get_const_basic_type(elem_bt), length);
}

const TypeVectMask *TypeVectMask::make(const Type* elem, uint length) {
const TypeVectMask* mtype = Matcher::predicate_reg_type(elem, length);
return (TypeVectMask*) const_cast<TypeVectMask*>(mtype)->hashcons();
}

//=============================================================================
// Convenience common pre-built types.
const TypePtr *TypePtr::NULL_PTR;
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/opto/type.hpp
Expand Up @@ -870,6 +870,8 @@ class TypeVectMask : public TypeVect {
TypeVectMask(const Type* elem, uint length) : TypeVect(VectorMask, elem, length) {}
virtual bool eq(const Type *t) const;
virtual const Type *xdual() const;
static const TypeVectMask* make(const BasicType elem_bt, uint length);
static const TypeVectMask* make(const Type* elem, uint length);
};

//------------------------------TypePtr----------------------------------------
Expand Down
22 changes: 14 additions & 8 deletions src/hotspot/share/opto/vectornode.cpp
Expand Up @@ -801,10 +801,10 @@ Node* LoadVectorMaskedNode::Ideal(PhaseGVN* phase, bool can_reshape) {
Node* mask_len = in(3)->in(1);
const TypeLong* ty = phase->type(mask_len)->isa_long();
if (ty && ty->is_con()) {
BasicType mask_bt = ((VectorMaskGenNode*)in(3))->get_elem_type();
uint load_sz = type2aelembytes(mask_bt) * ty->get_con();
if ( load_sz == 32 || load_sz == 64) {
assert(load_sz == 32 || MaxVectorSize > 32, "Unexpected load size");
BasicType mask_bt = Matcher::vector_element_basic_type(in(3));
int load_sz = type2aelembytes(mask_bt) * ty->get_con();
assert(load_sz <= MaxVectorSize, "Unexpected load size");
if (load_sz == MaxVectorSize) {
Node* ctr = in(MemNode::Control);
Node* mem = in(MemNode::Memory);
Node* adr = in(MemNode::Address);
Expand All @@ -820,10 +820,10 @@ Node* StoreVectorMaskedNode::Ideal(PhaseGVN* phase, bool can_reshape) {
Node* mask_len = in(4)->in(1);
const TypeLong* ty = phase->type(mask_len)->isa_long();
if (ty && ty->is_con()) {
BasicType mask_bt = ((VectorMaskGenNode*)in(4))->get_elem_type();
uint load_sz = type2aelembytes(mask_bt) * ty->get_con();
if ( load_sz == 32 || load_sz == 64) {
assert(load_sz == 32 || MaxVectorSize > 32, "Unexpected store size");
BasicType mask_bt = Matcher::vector_element_basic_type(in(4));
int load_sz = type2aelembytes(mask_bt) * ty->get_con();
assert(load_sz <= MaxVectorSize, "Unexpected store size");
if (load_sz == MaxVectorSize) {
Node* ctr = in(MemNode::Control);
Node* mem = in(MemNode::Memory);
Node* adr = in(MemNode::Address);
Expand Down Expand Up @@ -1425,6 +1425,12 @@ Node* ShiftVNode::Identity(PhaseGVN* phase) {
return this;
}

Node* VectorMaskGenNode::make(Node* length, BasicType mask_bt) {
int max_vector = Matcher::max_vector_size(mask_bt);
const TypeVectMask* t_vmask = TypeVectMask::make(mask_bt, max_vector);
return new VectorMaskGenNode(length, t_vmask);
}

Node* VectorMaskOpNode::make(Node* mask, const Type* ty, int mopc) {
switch(mopc) {
case Op_VectorMaskTrueCount:
Expand Down
14 changes: 5 additions & 9 deletions src/hotspot/share/opto/vectornode.hpp
Expand Up @@ -919,23 +919,19 @@ class VectorCmpMaskedNode : public TypeNode {
virtual int Opcode() const;
};

//------------------------------VectorMaskGenNode----------------------------------
class VectorMaskGenNode : public TypeNode {
public:
VectorMaskGenNode(Node* length, const Type* ty, BasicType ety): TypeNode(ty, 2), _elemType(ety) {
VectorMaskGenNode(Node* length, const Type* ty): TypeNode(ty, 2) {
init_req(1, length);
}

virtual int Opcode() const;
BasicType get_elem_type() { return _elemType;}
virtual uint size_of() const { return sizeof(VectorMaskGenNode); }
virtual uint ideal_reg() const {
return Op_RegVectMask;
}

private:
BasicType _elemType;
virtual uint ideal_reg() const { return Op_RegVectMask; }
static Node* make(Node* length, BasicType vmask_bt);
};

//------------------------------VectorMaskOpNode-----------------------------------
class VectorMaskOpNode : public TypeNode {
public:
VectorMaskOpNode(Node* mask, const Type* ty, int mopc):
Expand Down

0 comments on commit e7db581

Please sign in to comment.