Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
8266286: Add LoadVectorGather and StoreVectorScatter's partial version
Co-authored-by: Wang Huang <whuang@openjdk.org>
Co-authored-by: Ai Jiaming <aijiaming1@huawei.com>
Reviewed-by: xgong, njian
  • Loading branch information
Wang Huang and Ai Jiaming committed Jun 1, 2021
1 parent 5f57f54 commit 73e78a3
Show file tree
Hide file tree
Showing 2 changed files with 194 additions and 24 deletions.
109 changes: 97 additions & 12 deletions src/hotspot/cpu/aarch64/aarch64_sve.ad
Expand Up @@ -228,10 +228,6 @@ source %{
case Op_ExtractUB:
return false;
// Vector API specific
case Op_LoadVectorGather:
case Op_StoreVectorScatter:
// Partial size of gather/scatter are not supported for now.
return length_in_bytes == MaxVectorSize;
case Op_VectorLoadShuffle:
case Op_VectorRearrange:
if (vlen < 4) {
Expand Down Expand Up @@ -4111,8 +4107,9 @@ instruct rearrangeL(vReg dst, vReg src, vReg shuffle)

instruct gatherI(vReg dst, indirect mem, vReg idx) %{
predicate(UseSVE > 0 &&
(n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
(n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
match(Set dst (LoadVectorGather mem idx));
ins_cost(SVE_COST);
format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (I/F)" %}
Expand All @@ -4124,8 +4121,9 @@ instruct gatherI(vReg dst, indirect mem, vReg idx) %{

instruct gatherL(vReg dst, indirect mem, vReg idx) %{
predicate(UseSVE > 0 &&
(n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
(n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
match(Set dst (LoadVectorGather mem idx));
ins_cost(2 * SVE_COST);
format %{ "sve_uunpklo $idx, $idx\n\t"
Expand All @@ -4137,12 +4135,55 @@ instruct gatherL(vReg dst, indirect mem, vReg idx) %{
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector Load Gather Partial-------------------------------

instruct gatherI_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
predicate(UseSVE > 0 &&
n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
(n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
match(Set dst (LoadVectorGather mem idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST + INSN_COST);
format %{ "mov rscratch1, vector_length\n\t"
"sve_whilelo $pTmp, zr, rscratch1\n\t"
"load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (I/F)" %}
ins_encode %{
__ mov(rscratch1, vector_length(this));
__ sve_whilelo(as_PRegister($pTmp$$reg), __ S, zr, rscratch1);
__ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct gatherL_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
predicate(UseSVE > 0 &&
n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
(n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
match(Set dst (LoadVectorGather mem idx));
effect(TEMP pTmp, KILL cr);
ins_cost(3 * SVE_COST + INSN_COST);
format %{ "mov rscratch1, vector_length\n\t"
"sve_whilelo $pTmp, zr, rscratch1\n\t"
"sve_uunpklo $idx, $idx\n\t"
"load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (L/D)" %}
ins_encode %{
__ mov(rscratch1, vector_length(this));
__ sve_whilelo(as_PRegister($pTmp$$reg), __ D, zr, rscratch1);
__ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
__ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg));
%}
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector Store Scatter -------------------------------

instruct scatterI(indirect mem, vReg src, vReg idx) %{
predicate(UseSVE > 0 &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
match(Set mem (StoreVectorScatter mem (Binary src idx)));
ins_cost(SVE_COST);
format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (I/F)" %}
Expand All @@ -4154,8 +4195,9 @@ instruct scatterI(indirect mem, vReg src, vReg idx) %{

instruct scatterL(indirect mem, vReg src, vReg idx) %{
predicate(UseSVE > 0 &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
match(Set mem (StoreVectorScatter mem (Binary src idx)));
ins_cost(2 * SVE_COST);
format %{ "sve_uunpklo $idx, $idx\n\t"
Expand All @@ -4167,6 +4209,49 @@ instruct scatterL(indirect mem, vReg src, vReg idx) %{
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector Store Scatter Partial-------------------------------

instruct scatterI_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
predicate(UseSVE > 0 &&
n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
match(Set mem (StoreVectorScatter mem (Binary src idx)));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST + INSN_COST);
format %{ "mov rscratch1, vector_length\n\t"
"sve_whilelo $pTmp, zr, rscratch1\n\t"
"store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (I/F)" %}
ins_encode %{
__ mov(rscratch1, vector_length(this, $src));
__ sve_whilelo(as_PRegister($pTmp$$reg), __ S, zr, rscratch1);
__ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct scatterL_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
predicate(UseSVE > 0 &&
n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
match(Set mem (StoreVectorScatter mem (Binary src idx)));
effect(TEMP pTmp, KILL cr);
ins_cost(3 * SVE_COST + INSN_COST);
format %{ "mov rscratch1, vector_length\n\t"
"sve_whilelo $pTmp, zr, rscratch1\n\t"
"sve_uunpklo $idx, $idx\n\t"
"store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (L/D)" %}
ins_encode %{
__ mov(rscratch1, vector_length(this, $src));
__ sve_whilelo(as_PRegister($pTmp$$reg), __ D, zr, rscratch1);
__ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
__ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg));
%}
ins_pipe(pipe_slow);
%}


// ------------------------------ Vector Load Const -------------------------------

instruct loadconB(vReg dst, immI0 src) %{
Expand Down
109 changes: 97 additions & 12 deletions src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
Expand Up @@ -223,10 +223,6 @@ source %{
case Op_ExtractUB:
return false;
// Vector API specific
case Op_LoadVectorGather:
case Op_StoreVectorScatter:
// Partial size of gather/scatter are not supported for now.
return length_in_bytes == MaxVectorSize;
case Op_VectorLoadShuffle:
case Op_VectorRearrange:
if (vlen < 4) {
Expand Down Expand Up @@ -2427,8 +2423,9 @@ VECTOR_REARRANGE(L, 8, D)

instruct gatherI(vReg dst, indirect mem, vReg idx) %{
predicate(UseSVE > 0 &&
(n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
(n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
match(Set dst (LoadVectorGather mem idx));
ins_cost(SVE_COST);
format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (I/F)" %}
Expand All @@ -2440,8 +2437,9 @@ instruct gatherI(vReg dst, indirect mem, vReg idx) %{

instruct gatherL(vReg dst, indirect mem, vReg idx) %{
predicate(UseSVE > 0 &&
(n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
(n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
match(Set dst (LoadVectorGather mem idx));
ins_cost(2 * SVE_COST);
format %{ "sve_uunpklo $idx, $idx\n\t"
Expand All @@ -2453,12 +2451,55 @@ instruct gatherL(vReg dst, indirect mem, vReg idx) %{
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector Load Gather Partial-------------------------------

instruct gatherI_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
predicate(UseSVE > 0 &&
n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
(n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
match(Set dst (LoadVectorGather mem idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST + INSN_COST);
format %{ "mov rscratch1, vector_length\n\t"
"sve_whilelo $pTmp, zr, rscratch1\n\t"
"load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (I/F)" %}
ins_encode %{
__ mov(rscratch1, vector_length(this));
__ sve_whilelo(as_PRegister($pTmp$$reg), __ S, zr, rscratch1);
__ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct gatherL_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
predicate(UseSVE > 0 &&
n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
(n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
match(Set dst (LoadVectorGather mem idx));
effect(TEMP pTmp, KILL cr);
ins_cost(3 * SVE_COST + INSN_COST);
format %{ "mov rscratch1, vector_length\n\t"
"sve_whilelo $pTmp, zr, rscratch1\n\t"
"sve_uunpklo $idx, $idx\n\t"
"load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (L/D)" %}
ins_encode %{
__ mov(rscratch1, vector_length(this));
__ sve_whilelo(as_PRegister($pTmp$$reg), __ D, zr, rscratch1);
__ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
__ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg));
%}
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector Store Scatter -------------------------------

instruct scatterI(indirect mem, vReg src, vReg idx) %{
predicate(UseSVE > 0 &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
match(Set mem (StoreVectorScatter mem (Binary src idx)));
ins_cost(SVE_COST);
format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (I/F)" %}
Expand All @@ -2470,8 +2511,9 @@ instruct scatterI(indirect mem, vReg src, vReg idx) %{

instruct scatterL(indirect mem, vReg src, vReg idx) %{
predicate(UseSVE > 0 &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
match(Set mem (StoreVectorScatter mem (Binary src idx)));
ins_cost(2 * SVE_COST);
format %{ "sve_uunpklo $idx, $idx\n\t"
Expand All @@ -2483,6 +2525,49 @@ instruct scatterL(indirect mem, vReg src, vReg idx) %{
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector Store Scatter Partial-------------------------------

instruct scatterI_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
predicate(UseSVE > 0 &&
n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
match(Set mem (StoreVectorScatter mem (Binary src idx)));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST + INSN_COST);
format %{ "mov rscratch1, vector_length\n\t"
"sve_whilelo $pTmp, zr, rscratch1\n\t"
"store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (I/F)" %}
ins_encode %{
__ mov(rscratch1, vector_length(this, $src));
__ sve_whilelo(as_PRegister($pTmp$$reg), __ S, zr, rscratch1);
__ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct scatterL_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
predicate(UseSVE > 0 &&
n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
match(Set mem (StoreVectorScatter mem (Binary src idx)));
effect(TEMP pTmp, KILL cr);
ins_cost(3 * SVE_COST + INSN_COST);
format %{ "mov rscratch1, vector_length\n\t"
"sve_whilelo $pTmp, zr, rscratch1\n\t"
"sve_uunpklo $idx, $idx\n\t"
"store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (L/D)" %}
ins_encode %{
__ mov(rscratch1, vector_length(this, $src));
__ sve_whilelo(as_PRegister($pTmp$$reg), __ D, zr, rscratch1);
__ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
__ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg));
%}
ins_pipe(pipe_slow);
%}


// ------------------------------ Vector Load Const -------------------------------

instruct loadconB(vReg dst, immI0 src) %{
Expand Down

0 comments on commit 73e78a3

Please sign in to comment.