Skip to content

Commit ea83b44

Browse files
author
Pengfei Li
committed
8280510: AArch64: Vectorize operations with loop induction variable
Reviewed-by: adinn, thartmann
1 parent 36bf6fb commit ea83b44

File tree

12 files changed

+177
-6
lines changed

12 files changed

+177
-6
lines changed

src/hotspot/cpu/aarch64/aarch64.ad

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2465,6 +2465,7 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
24652465
}
24662466
break;
24672467
case Op_MulVL:
2468+
case Op_PopulateIndex:
24682469
return false;
24692470
case Op_VectorLoadShuffle:
24702471
case Op_VectorRearrange:

src/hotspot/cpu/aarch64/aarch64_sve.ad

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5380,6 +5380,21 @@ instruct loadconB(vReg dst, immI0 src) %{
53805380
ins_pipe(pipe_slow);
53815381
%}
53825382

5383+
// -------------------------- Populate Index to a Vector --------------------------
5384+
5385+
instruct populateindex(vReg dst, iRegIorL2I src1, immI src2) %{
5386+
predicate(UseSVE > 0);
5387+
match(Set dst (PopulateIndex src1 src2));
5388+
ins_cost(SVE_COST);
5389+
format %{ "sve_index $dst, $src1, $src2\t # populate index (sve)" %}
5390+
ins_encode %{
5391+
BasicType bt = Matcher::vector_element_basic_type(this);
5392+
__ sve_index(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt),
5393+
as_Register($src1$$reg), $src2$$constant);
5394+
%}
5395+
ins_pipe(pipe_slow);
5396+
%}
5397+
53835398
// Intrisics for String.indexOf(char)
53845399

53855400

src/hotspot/cpu/aarch64/aarch64_sve_ad.m4

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2962,6 +2962,21 @@ instruct loadconB(vReg dst, immI0 src) %{
29622962
ins_pipe(pipe_slow);
29632963
%}
29642964

2965+
// -------------------------- Populate Index to a Vector --------------------------
2966+
2967+
instruct populateindex(vReg dst, iRegIorL2I src1, immI src2) %{
2968+
predicate(UseSVE > 0);
2969+
match(Set dst (PopulateIndex src1 src2));
2970+
ins_cost(SVE_COST);
2971+
format %{ "sve_index $dst, $src1, $src2\t # populate index (sve)" %}
2972+
ins_encode %{
2973+
BasicType bt = Matcher::vector_element_basic_type(this);
2974+
__ sve_index(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt),
2975+
as_Register($src1$$reg), $src2$$constant);
2976+
%}
2977+
ins_pipe(pipe_slow);
2978+
%}
2979+
29652980
// Intrisics for String.indexOf(char)
29662981

29672982
dnl

src/hotspot/cpu/aarch64/assembler_aarch64.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3793,9 +3793,19 @@ void sve_fcm(Condition cond, PRegister Pd, SIMD_RegVariant T,
37933793
INSN(sve_lastb, 0b1);
37943794
#undef INSN
37953795

3796+
// SVE Create index starting from general-purpose register and incremented by immediate
3797+
void sve_index(FloatRegister Zd, SIMD_RegVariant T, Register Rn, int imm) {
3798+
starti;
3799+
assert(T != Q, "invalid size");
3800+
f(0b00000100, 31, 24), f(T, 23, 22), f(0b1, 21);
3801+
sf(imm, 20, 16), f(0b010001, 15, 10);
3802+
rf(Rn, 5), rf(Zd, 0);
3803+
}
3804+
37963805
// SVE create index starting from and incremented by immediate
37973806
void sve_index(FloatRegister Zd, SIMD_RegVariant T, int imm1, int imm2) {
37983807
starti;
3808+
assert(T != Q, "invalid size");
37993809
f(0b00000100, 31, 24), f(T, 23, 22), f(0b1, 21);
38003810
sf(imm2, 20, 16), f(0b010000, 15, 10);
38013811
sf(imm1, 9, 5), rf(Zd, 0);

src/hotspot/share/adlc/formssel.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4103,6 +4103,7 @@ int MatchRule::is_expensive() const {
41034103
strcmp(opType,"ReplicateL")==0 ||
41044104
strcmp(opType,"ReplicateF")==0 ||
41054105
strcmp(opType,"ReplicateD")==0 ||
4106+
strcmp(opType,"PopulateIndex")==0 ||
41064107
strcmp(opType,"AddReductionVI")==0 ||
41074108
strcmp(opType,"AddReductionVL")==0 ||
41084109
strcmp(opType,"AddReductionVF")==0 ||
@@ -4227,7 +4228,7 @@ bool MatchRule::is_vector() const {
42274228
"LShiftVB","LShiftVS","LShiftVI","LShiftVL",
42284229
"RShiftVB","RShiftVS","RShiftVI","RShiftVL",
42294230
"URShiftVB","URShiftVS","URShiftVI","URShiftVL",
4230-
"ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD",
4231+
"ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD","PopulateIndex",
42314232
"RoundDoubleModeV","RotateLeftV" , "RotateRightV", "LoadVector","StoreVector",
42324233
"LoadVectorGather", "StoreVectorScatter", "LoadVectorGatherMasked", "StoreVectorScatterMasked",
42334234
"VectorTest", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert",

src/hotspot/share/opto/classes.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ macro(PopCountI)
284284
macro(PopCountL)
285285
macro(PopCountVI)
286286
macro(PopCountVL)
287+
macro(PopulateIndex)
287288
macro(PrefetchAllocation)
288289
macro(Proj)
289290
macro(RShiftI)

src/hotspot/share/opto/node.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ class PhaseTransform;
149149
class PhaseValues;
150150
class PhiNode;
151151
class Pipeline;
152+
class PopulateIndexNode;
152153
class ProjNode;
153154
class RangeCheckNode;
154155
class RegMask;

src/hotspot/share/opto/superword.cpp

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,7 +1312,16 @@ bool SuperWord::have_similar_inputs(Node* s1, Node* s2) {
13121312
// assert(independent(s1, s2) == true, "check independent");
13131313
if (s1->req() > 1 && !s1->is_Store() && !s1->is_Load()) {
13141314
for (uint i = 1; i < s1->req(); i++) {
1315-
if (s1->in(i)->Opcode() != s2->in(i)->Opcode()) return false;
1315+
Node* s1_in = s1->in(i);
1316+
Node* s2_in = s2->in(i);
1317+
if (s1_in->is_Phi() && s2_in->is_Add() && s2_in->in(1) == s1_in) {
1318+
// Special handling for expressions with loop iv, like "b[i] = a[i] * i".
1319+
// In this case, one node has an input from the tripcount iv and another
1320+
// node has an input from iv plus an offset.
1321+
if (!s1_in->as_Phi()->is_tripcount(T_INT)) return false;
1322+
} else {
1323+
if (s1_in->Opcode() != s2_in->Opcode()) return false;
1324+
}
13161325
}
13171326
}
13181327
return true;
@@ -2837,6 +2846,23 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
28372846
vlen = cl->slp_max_unroll();
28382847
}
28392848

2849+
// Insert index population operation
2850+
if (opd == iv()) {
2851+
BasicType p0_bt = velt_basic_type(p0);
2852+
BasicType iv_bt = is_subword_type(p0_bt) ? p0_bt : T_INT;
2853+
const TypeVect* vt = TypeVect::make(iv_bt, vlen);
2854+
Node* vn = new PopulateIndexNode(iv(), _igvn.intcon(1), vt);
2855+
#ifdef ASSERT
2856+
if (TraceNewVectors) {
2857+
tty->print("new Vector node: ");
2858+
vn->dump();
2859+
}
2860+
#endif
2861+
_igvn.register_new_node_with_optimizer(vn);
2862+
_phase->set_ctrl(vn, _phase->get_ctrl(opd));
2863+
return vn;
2864+
}
2865+
28402866
if (same_inputs(p, opd_idx)) {
28412867
if (opd->is_Vector() || opd->is_LoadVector()) {
28422868
assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector");
@@ -2847,7 +2873,6 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
28472873
return opd; // input is matching vector
28482874
}
28492875
if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
2850-
Compile* C = _phase->C;
28512876
Node* cnt = opd;
28522877
// Vector instructions do not mask shift count, do it here.
28532878
juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
@@ -3008,10 +3033,25 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
30083033
Node* def = use->in(u_idx);
30093034
Node_List* d_pk = my_pack(def);
30103035
if (d_pk == NULL) {
3011-
// check for scalar promotion
30123036
Node* n = u_pk->at(0)->in(u_idx);
3013-
for (uint i = 1; i < u_pk->size(); i++) {
3014-
if (u_pk->at(i)->in(u_idx) != n) return false;
3037+
if (n == iv()) {
3038+
// check for index population
3039+
BasicType bt = velt_basic_type(use);
3040+
if (!VectorNode::is_populate_index_supported(bt)) return false;
3041+
for (uint i = 1; i < u_pk->size(); i++) {
3042+
// We can create a vector filled with iv indices if all other nodes
3043+
// in use pack have inputs of iv plus node index.
3044+
Node* use_in = u_pk->at(i)->in(u_idx);
3045+
if (!use_in->is_Add() || use_in->in(1) != n) return false;
3046+
const TypeInt* offset_t = use_in->in(2)->bottom_type()->is_int();
3047+
if (offset_t == NULL || !offset_t->is_con() ||
3048+
offset_t->get_con() != (jint) i) return false;
3049+
}
3050+
} else {
3051+
// check for scalar promotion
3052+
for (uint i = 1; i < u_pk->size(); i++) {
3053+
if (u_pk->at(i)->in(u_idx) != n) return false;
3054+
}
30153055
}
30163056
return true;
30173057
}

src/hotspot/share/opto/vectornode.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,11 @@ bool VectorNode::is_vector_integral_negate_supported(int opc, uint vlen, BasicTy
395395
return false;
396396
}
397397

398+
bool VectorNode::is_populate_index_supported(BasicType bt) {
399+
int vlen = Matcher::max_vector_size(bt);
400+
return Matcher::match_rule_supported_vector(Op_PopulateIndex, vlen, bt);
401+
}
402+
398403
bool VectorNode::is_shift_opcode(int opc) {
399404
switch (opc) {
400405
case Op_LShiftI:

src/hotspot/share/opto/vectornode.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ class VectorNode : public TypeNode {
9898
static bool is_scalar_rotate(Node* n);
9999
static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt);
100100
static bool is_vector_integral_negate_supported(int opc, uint vlen, BasicType bt, bool use_predicate);
101+
static bool is_populate_index_supported(BasicType bt);
101102
static bool is_invariant_vector(Node* n);
102103
static bool is_all_ones_vector(Node* n);
103104
static bool is_vector_bitwise_not_pattern(Node* n);
@@ -1104,6 +1105,13 @@ class ReplicateDNode : public VectorNode {
11041105
virtual int Opcode() const;
11051106
};
11061107

1108+
//======================Populate_Indices_into_a_Vector=========================
1109+
class PopulateIndexNode : public VectorNode {
1110+
public:
1111+
PopulateIndexNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
1112+
virtual int Opcode() const;
1113+
};
1114+
11071115
//========================Pack_Scalars_into_a_Vector===========================
11081116

11091117
//------------------------------PackNode---------------------------------------

0 commit comments

Comments
 (0)