Skip to content
Permalink
Browse files
8264973: AArch64: Optimize vector max/min/add reduction of two intege…
…rs with NEON pairwise instructions

Reviewed-by: njian, aph
  • Loading branch information
Dong Bo authored and Fei Yang committed May 25, 2021
1 parent b4d4884 commit 123cdd1fbd4fa02177c06afb67a09aee21d0a482
@@ -874,43 +874,39 @@ instruct reduce_min4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlag
ins_pipe(pipe_slow);
%}

instruct reduce_max2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecX tmp, rFlagsReg cr)
instruct reduce_max2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
%{
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (MaxReductionV isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP_DEF dst, TEMP tmp, KILL cr);
format %{ "dup $tmp, T2D, $vsrc\n\t"
"smaxv $tmp, T4S, $tmp\n\t"
format %{ "smaxp $tmp, T2S, $vsrc, $vsrc\n\t"
"umov $dst, $tmp, S, 0\n\t"
"cmpw $dst, $isrc\n\t"
"cselw $dst, $dst, $isrc GT\t# max reduction2I"
%}
ins_encode %{
__ dup(as_FloatRegister($tmp$$reg), __ T2D, as_FloatRegister($vsrc$$reg));
__ smaxv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($tmp$$reg));
__ smaxp(as_FloatRegister($tmp$$reg), __ T2S, as_FloatRegister($vsrc$$reg), as_FloatRegister($vsrc$$reg));
__ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
__ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
__ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
%}
ins_pipe(pipe_slow);
%}

instruct reduce_min2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecX tmp, rFlagsReg cr)
instruct reduce_min2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
%{
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (MinReductionV isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP_DEF dst, TEMP tmp, KILL cr);
format %{ "dup $tmp, T2D, $vsrc\n\t"
"sminv $tmp, T4S, $tmp\n\t"
format %{ "sminp $tmp, T2S, $vsrc, $vsrc\n\t"
"umov $dst, $tmp, S, 0\n\t"
"cmpw $dst, $isrc\n\t"
"cselw $dst, $dst, $isrc LT\t# min reduction2I"
%}
ins_encode %{
__ dup(as_FloatRegister($tmp$$reg), __ T2D, as_FloatRegister($vsrc$$reg));
__ sminv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($tmp$$reg));
__ sminp(as_FloatRegister($tmp$$reg), __ T2S, as_FloatRegister($vsrc$$reg), as_FloatRegister($vsrc$$reg));
__ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
__ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
__ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
@@ -4053,22 +4049,21 @@ instruct replicate2D(vecX dst, vRegD src)

// ====================REDUCTION ARITHMETIC====================================

instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2)
instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp, iRegINoSp itmp)
%{
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (AddReductionVI isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP tmp, TEMP tmp2);
format %{ "umov $tmp, $vsrc, S, 0\n\t"
"umov $tmp2, $vsrc, S, 1\n\t"
"addw $tmp, $isrc, $tmp\n\t"
"addw $dst, $tmp, $tmp2\t# add reduction2I"
effect(TEMP vtmp, TEMP itmp);
format %{ "addpv $vtmp, T2S, $vsrc, $vsrc\n\t"
"umov $itmp, $vtmp, S, 0\n\t"
"addw $dst, $itmp, $isrc\t# add reduction2I"
%}
ins_encode %{
__ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
__ umov($tmp2$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
__ addw($tmp$$Register, $isrc$$Register, $tmp$$Register);
__ addw($dst$$Register, $tmp$$Register, $tmp2$$Register);
__ addpv(as_FloatRegister($vtmp$$reg), __ T2S,
as_FloatRegister($vsrc$$reg), as_FloatRegister($vsrc$$reg));
__ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
__ addw($dst$$Register, $itmp$$Register, $isrc$$Register);
%}
ins_pipe(pipe_class_default);
%}
@@ -496,21 +496,19 @@ REDUCE_MAX_MIN_INT(min, 8, S, X, Min, s, LT)
REDUCE_MAX_MIN_INT(min, 4, I, X, Min, u, LT)
dnl
define(`REDUCE_MAX_MIN_2I', `
instruct reduce_$1`'2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecX tmp, rFlagsReg cr)
instruct reduce_$1`'2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
%{
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst ($2ReductionV isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP_DEF dst, TEMP tmp, KILL cr);
format %{ "dup $tmp, T2D, $vsrc\n\t"
"s$1v $tmp, T4S, $tmp\n\t"
format %{ "s$1p $tmp, T2S, $vsrc, $vsrc\n\t"
"umov $dst, $tmp, S, 0\n\t"
"cmpw $dst, $isrc\n\t"
"cselw $dst, $dst, $isrc $3\t# $1 reduction2I"
%}
ins_encode %{
__ dup(as_FloatRegister($tmp$$reg), __ T2D, as_FloatRegister($vsrc$$reg));
__ s$1v(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($tmp$$reg));
__ s$1p(as_FloatRegister($tmp$$reg), __ T2S, as_FloatRegister($vsrc$$reg), as_FloatRegister($vsrc$$reg));
__ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
__ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
__ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::$3);
@@ -1603,27 +1601,22 @@ dnl
// ====================REDUCTION ARITHMETIC====================================
dnl
define(`REDUCE_ADD_INT', `
instruct reduce_add$1$2`'(iRegINoSp dst, iRegIorL2I isrc, vec$3 vsrc, ifelse($1, 2, iRegINoSp tmp, vecX vtmp), iRegINoSp ifelse($1, 2, tmp2, itmp))
instruct reduce_add$1$2`'(iRegINoSp dst, iRegIorL2I isrc, vec$3 vsrc, vec$3 vtmp, iRegINoSp itmp)
%{
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (AddReductionVI isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP ifelse($1, 2, tmp, vtmp), TEMP ifelse($1, 2, tmp2, itmp));
format %{ ifelse($1, 2, `"umov $tmp, $vsrc, S, 0\n\t"
"umov $tmp2, $vsrc, S, 1\n\t"
"addw $tmp, $isrc, $tmp\n\t"
"addw $dst, $tmp, $tmp2\t# add reduction2I"',`"addv $vtmp, T4S, $vsrc\n\t"
effect(TEMP vtmp, TEMP itmp);
format %{ ifelse($1, 2, `"addpv $vtmp, T2S, $vsrc, $vsrc\n\t"',`"addv $vtmp, T4S, $vsrc\n\t"')
"umov $itmp, $vtmp, S, 0\n\t"
"addw $dst, $itmp, $isrc\t# add reduction4I"')
"addw $dst, $itmp, $isrc\t# add reduction$1I"
%}
ins_encode %{
ifelse($1, 2, `__ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
__ umov($tmp2$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
__ addw($tmp$$Register, $isrc$$Register, $tmp$$Register);
__ addw($dst$$Register, $tmp$$Register, $tmp2$$Register);', `__ addv(as_FloatRegister($vtmp$$reg), __ T4S,
as_FloatRegister($vsrc$$reg));
ifelse($1, 2, `__ addpv(as_FloatRegister($vtmp$$reg), __ T2S,
as_FloatRegister($vsrc$$reg), as_FloatRegister($vsrc$$reg));', `__ addv(as_FloatRegister($vtmp$$reg), __ T4S,
as_FloatRegister($vsrc$$reg));')
__ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
__ addw($dst$$Register, $itmp$$Register, $isrc$$Register);')
__ addw($dst$$Register, $itmp$$Register, $isrc$$Register);
%}
ins_pipe(pipe_class_default);
%}')dnl
@@ -2404,6 +2404,8 @@ void mvnw(Register Rd, Register Rm,
INSN(umlalv, 1, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(maxv, 0, 0b011001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(minv, 0, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(smaxp, 0, 0b101001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(sminp, 0, 0b101011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(cmeq, 1, 0b100011, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(cmgt, 0, 0b001101, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(cmge, 0, 0b001111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
@@ -1461,11 +1461,17 @@ def generate(kind, names):
["maxv", "smax", "8B"], ["maxv", "smax", "16B"],
["maxv", "smax", "4H"], ["maxv", "smax", "8H"],
["maxv", "smax", "2S"], ["maxv", "smax", "4S"],
["smaxp", "smaxp", "8B"], ["smaxp", "smaxp", "16B"],
["smaxp", "smaxp", "4H"], ["smaxp", "smaxp", "8H"],
["smaxp", "smaxp", "2S"], ["smaxp", "smaxp", "4S"],
["fmax", "fmax", "2S"], ["fmax", "fmax", "4S"],
["fmax", "fmax", "2D"],
["minv", "smin", "8B"], ["minv", "smin", "16B"],
["minv", "smin", "4H"], ["minv", "smin", "8H"],
["minv", "smin", "2S"], ["minv", "smin", "4S"],
["sminp", "sminp", "8B"], ["sminp", "sminp", "16B"],
["sminp", "sminp", "4H"], ["sminp", "sminp", "8H"],
["sminp", "sminp", "2S"], ["sminp", "sminp", "4S"],
["fmin", "fmin", "2S"], ["fmin", "fmin", "4S"],
["fmin", "fmin", "2D"],
["cmeq", "cmeq", "8B"], ["cmeq", "cmeq", "16B"],
Loading

1 comment on commit 123cdd1

@openjdk-notifier

This comment has been minimized.

Copy link

@openjdk-notifier openjdk-notifier bot commented on 123cdd1 May 25, 2021

Please sign in to comment.