Skip to content

Commit

Permalink
8287087: C2: perform SLP reduction analysis on-demand
Browse files Browse the repository at this point in the history
Reviewed-by: epeter, jbhateja, thartmann
  • Loading branch information
robcasloz committed Apr 27, 2023
1 parent ba43649 commit 1be80a4
Show file tree
Hide file tree
Showing 17 changed files with 715 additions and 167 deletions.
16 changes: 8 additions & 8 deletions src/hotspot/cpu/x86/x86_64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -5374,7 +5374,7 @@ instruct loadD(regD dst, memory mem)

// max = java.lang.Math.max(float a, float b)
instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
predicate(UseAVX > 0 && !n->is_reduction());
predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
match(Set dst (MaxF a b));
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
format %{
Expand All @@ -5396,7 +5396,7 @@ instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp,
%}

instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
predicate(UseAVX > 0 && n->is_reduction());
predicate(UseAVX > 0 && SuperWord::is_reduction(n));
match(Set dst (MaxF a b));
effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);

Expand All @@ -5410,7 +5410,7 @@ instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRe

// max = java.lang.Math.max(double a, double b)
instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
predicate(UseAVX > 0 && !n->is_reduction());
predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
match(Set dst (MaxD a b));
effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
format %{
Expand All @@ -5432,7 +5432,7 @@ instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp,
%}

instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
predicate(UseAVX > 0 && n->is_reduction());
predicate(UseAVX > 0 && SuperWord::is_reduction(n));
match(Set dst (MaxD a b));
effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);

Expand All @@ -5446,7 +5446,7 @@ instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRe

// min = java.lang.Math.min(float a, float b)
instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
predicate(UseAVX > 0 && !n->is_reduction());
predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
match(Set dst (MinF a b));
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
format %{
Expand All @@ -5468,7 +5468,7 @@ instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp,
%}

instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
predicate(UseAVX > 0 && n->is_reduction());
predicate(UseAVX > 0 && SuperWord::is_reduction(n));
match(Set dst (MinF a b));
effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);

Expand All @@ -5482,7 +5482,7 @@ instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRe

// min = java.lang.Math.min(double a, double b)
instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
predicate(UseAVX > 0 && !n->is_reduction());
predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
match(Set dst (MinD a b));
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
format %{
Expand All @@ -5504,7 +5504,7 @@ instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp,
%}

instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
predicate(UseAVX > 0 && n->is_reduction());
predicate(UseAVX > 0 && SuperWord::is_reduction(n));
match(Set dst (MinD a b));
effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);

Expand Down
3 changes: 2 additions & 1 deletion src/hotspot/share/adlc/main.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -271,6 +271,7 @@ int main(int argc, char *argv[])
AD.addInclude(AD._DFA_file, "opto/narrowptrnode.hpp");
AD.addInclude(AD._DFA_file, "opto/opcodes.hpp");
AD.addInclude(AD._DFA_file, "opto/convertnode.hpp");
AD.addInclude(AD._DFA_file, "opto/superword.hpp");
AD.addInclude(AD._DFA_file, "utilities/powerOfTwo.hpp");

// Make sure each .cpp file starts with include lines:
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/share/opto/idealGraphPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,8 @@ void IdealGraphPrinter::visit_node(Node *n, bool edges, VectorSet* temp_set) {
if (flags & Node::Flag_has_call) {
print_prop("has_call", "true");
}
if (flags & Node::Flag_is_reduction) {
print_prop("is_reduction", "true");
if (flags & Node::Flag_has_swapped_edges) {
print_prop("has_swapped_edges", "true");
}

if (C->matcher() != nullptr) {
Expand Down
76 changes: 0 additions & 76 deletions src/hotspot/share/opto/loopTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1037,10 +1037,6 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
}

if (UseSuperWord) {
if (!cl->is_reduction_loop()) {
phase->mark_reductions(this);
}

// Only attempt slp analysis when user controls do not prohibit it
if (!range_checks_present() && (LoopMaxUnroll > _local_loop_unroll_factor)) {
// Once policy_slp_analysis succeeds, mark the loop with the
Expand Down Expand Up @@ -1694,15 +1690,6 @@ void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_n
set_idom(new_pre_exit, pre_end, dd_main_head);
set_loop(new_pre_exit, outer_loop->_parent);

if (peel_only) {
// Nodes in the peeled iteration that were marked as reductions within the
// original loop might not be reductions within their new outer loop.
for (uint i = 0; i < loop->_body.size(); i++) {
Node* n = old_new[loop->_body[i]->_idx];
n->remove_flag(Node::Flag_is_reduction);
}
}

// Step B2: Build a zero-trip guard for the main-loop. After leaving the
// pre-loop, the main-loop may not execute at all. Later in life this
// zero-trip guard will become the minimum-trip guard when we unroll
Expand Down Expand Up @@ -2456,69 +2443,6 @@ void PhaseIdealLoop::do_maximally_unroll(IdealLoopTree *loop, Node_List &old_new
}
}

void PhaseIdealLoop::mark_reductions(IdealLoopTree *loop) {
if (SuperWordReductions == false) return;

CountedLoopNode* loop_head = loop->_head->as_CountedLoop();
if (loop_head->unrolled_count() > 1) {
return;
}

Node* trip_phi = loop_head->phi();
for (DUIterator_Fast imax, i = loop_head->fast_outs(imax); i < imax; i++) {
Node* phi = loop_head->fast_out(i);
if (phi->is_Phi() && phi->outcnt() > 0 && phi != trip_phi) {
// For definitions which are loop inclusive and not tripcounts.
Node* def_node = phi->in(LoopNode::LoopBackControl);

if (def_node != nullptr) {
Node* n_ctrl = get_ctrl(def_node);
if (n_ctrl != nullptr && loop->is_member(get_loop(n_ctrl))) {
// Now test it to see if it fits the standard pattern for a reduction operator.
int opc = def_node->Opcode();
if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type())
|| opc == Op_MinD || opc == Op_MinF || opc == Op_MaxD || opc == Op_MaxF) {
if (!def_node->is_reduction()) { // Not marked yet
// To be a reduction, the arithmetic node must have the phi as input and provide a def to it
bool ok = false;
for (unsigned j = 1; j < def_node->req(); j++) {
Node* in = def_node->in(j);
if (in == phi) {
ok = true;
break;
}
}

// do nothing if we did not match the initial criteria
if (ok == false) {
continue;
}

// The result of the reduction must not be used in the loop
for (DUIterator_Fast imax, i = def_node->fast_outs(imax); i < imax && ok; i++) {
Node* u = def_node->fast_out(i);
if (!loop->is_member(get_loop(ctrl_or_self(u)))) {
continue;
}
if (u == phi) {
continue;
}
ok = false;
}

// iff the uses conform
if (ok) {
def_node->add_flag(Node::Flag_is_reduction);
loop_head->mark_has_reductions();
}
}
}
}
}
}
}
}

//------------------------------adjust_limit-----------------------------------
// Helper function that computes new loop limit as (rc_limit-offset)/scale
Node* PhaseIdealLoop::adjust_limit(bool is_positive_stride, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round) {
Expand Down
2 changes: 0 additions & 2 deletions src/hotspot/share/opto/loopnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2249,7 +2249,6 @@ void CountedLoopNode::dump_spec(outputStream *st) const {
if (is_pre_loop ()) st->print("pre of N%d" , _main_idx);
if (is_main_loop()) st->print("main of N%d", _idx);
if (is_post_loop()) st->print("post of N%d", _main_idx);
if (is_reduction_loop()) st->print(" reduction");
if (is_strip_mined()) st->print(" strip mined");
}
#endif
Expand Down Expand Up @@ -3991,7 +3990,6 @@ void IdealLoopTree::dump_head() {
if (cl->is_pre_loop ()) tty->print(" pre" );
if (cl->is_main_loop()) tty->print(" main");
if (cl->is_post_loop()) tty->print(" post");
if (cl->is_reduction_loop()) tty->print(" reduction");
if (cl->is_vectorized_loop()) tty->print(" vector");
if (range_checks_present()) tty->print(" rc ");
if (cl->is_multiversioned()) tty->print(" multi ");
Expand Down
38 changes: 16 additions & 22 deletions src/hotspot/share/opto/loopnode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,23 +61,22 @@ class LoopNode : public RegionNode {
uint _loop_flags;
// Names for flag bitfields
enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
MainHasNoPreLoop = 1<<2,
HasExactTripCount = 1<<3,
InnerLoop = 1<<4,
PartialPeelLoop = 1<<5,
PartialPeelFailed = 1<<6,
HasReductions = 1<<7,
WasSlpAnalyzed = 1<<8,
PassedSlpAnalysis = 1<<9,
DoUnrollOnly = 1<<10,
VectorizedLoop = 1<<11,
HasAtomicPostLoop = 1<<12,
IsMultiversioned = 1<<13,
StripMined = 1<<14,
SubwordLoop = 1<<15,
ProfileTripFailed = 1<<16,
LoopNestInnerLoop = 1 << 17,
LoopNestLongOuterLoop = 1 << 18};
MainHasNoPreLoop = 1<<2,
HasExactTripCount = 1<<3,
InnerLoop = 1<<4,
PartialPeelLoop = 1<<5,
PartialPeelFailed = 1<<6,
WasSlpAnalyzed = 1<<7,
PassedSlpAnalysis = 1<<8,
DoUnrollOnly = 1<<9,
VectorizedLoop = 1<<10,
HasAtomicPostLoop = 1<<11,
IsMultiversioned = 1<<12,
StripMined = 1<<13,
SubwordLoop = 1<<14,
ProfileTripFailed = 1<<15,
LoopNestInnerLoop = 1<<16,
LoopNestLongOuterLoop = 1<<17};
char _unswitch_count;
enum { _unswitch_max=3 };
char _postloop_flags;
Expand Down Expand Up @@ -105,7 +104,6 @@ class LoopNode : public RegionNode {
bool is_loop_nest_outer_loop() const { return _loop_flags & LoopNestLongOuterLoop; }

void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
void mark_has_reductions() { _loop_flags |= HasReductions; }
void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
Expand Down Expand Up @@ -286,7 +284,6 @@ class CountedLoopNode : public BaseCountedLoopNode {
bool is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; }
bool is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; }
bool is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; }
bool is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
bool was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
bool has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
bool is_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
Expand Down Expand Up @@ -1313,9 +1310,6 @@ class PhaseIdealLoop : public PhaseTransform {
// Unroll the loop body one step - make each trip do 2 iterations.
void do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip );

// Mark vector reduction candidates before loop unrolling
void mark_reductions( IdealLoopTree *loop );

// Return true if exp is a constant times an induction var
bool is_scaled_iv(Node* exp, Node* iv, BasicType bt, jlong* p_scale, bool* p_short_scale, int depth = 0);

Expand Down
4 changes: 0 additions & 4 deletions src/hotspot/share/opto/loopopts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2622,10 +2622,6 @@ void PhaseIdealLoop::clone_loop_body(const Node_List& body, Node_List &old_new,
Node* old = body.at(i);
Node* nnn = old->clone();
old_new.map(old->_idx, nnn);
if (old->is_reduction()) {
// Reduction flag is not copied by default. Copy it here when cloning the entire loop body.
nnn->add_flag(Node::Flag_is_reduction);
}
if (C->do_vector_loop() && cm != nullptr) {
cm->verify_insert_and_clone(old, nnn, cm->clone_idx());
}
Expand Down
4 changes: 0 additions & 4 deletions src/hotspot/share/opto/node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -521,10 +521,6 @@ Node *Node::clone() const {
// If it is applicable, it will happen anyway when the cloned node is registered with IGVN.
n->remove_flag(Node::NodeFlags::Flag_for_post_loop_opts_igvn);
}
if (n->is_reduction()) {
// Do not copy reduction information. This must be explicitly set by the calling code.
n->remove_flag(Node::Flag_is_reduction);
}
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
bs->register_potential_barrier_node(n);

Expand Down
14 changes: 9 additions & 5 deletions src/hotspot/share/opto/node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,12 @@ class Node {
_in[i2] = n1;
// If this node is in the hash table, make sure it doesn't need a rehash.
assert(check_hash == NO_HASH || check_hash == hash(), "edge swap must preserve hash code");
// Flip swapped edges flag.
if (has_swapped_edges()) {
remove_flag(Node::Flag_has_swapped_edges);
} else {
add_flag(Node::Flag_has_swapped_edges);
}
}

// Iterators over input Nodes for a Node X are written as:
Expand Down Expand Up @@ -784,7 +790,7 @@ class Node {
Flag_avoid_back_to_back_before = 1 << 8,
Flag_avoid_back_to_back_after = 1 << 9,
Flag_has_call = 1 << 10,
Flag_is_reduction = 1 << 11,
Flag_has_swapped_edges = 1 << 11,
Flag_is_scheduled = 1 << 12,
Flag_is_expensive = 1 << 13,
Flag_is_predicated_vector = 1 << 14,
Expand Down Expand Up @@ -1001,10 +1007,8 @@ class Node {
bool is_macro() const { return (_flags & Flag_is_macro) != 0; }
// The node is expensive: the best control is set during loop opts
bool is_expensive() const { return (_flags & Flag_is_expensive) != 0 && in(0) != nullptr; }

// An arithmetic node which accumulates a data in a loop.
// It must have the loop's phi as input and provide a def to the phi.
bool is_reduction() const { return (_flags & Flag_is_reduction) != 0; }
// The node's original edge position is swapped.
bool has_swapped_edges() const { return (_flags & Flag_has_swapped_edges) != 0; }

bool is_predicated_vector() const { return (_flags & Flag_is_predicated_vector) != 0; }

Expand Down
Loading

1 comment on commit 1be80a4

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.