Skip to content

Commit

Permalink
8324890: C2 SuperWord: refactor out VLoop, make unrolling_analysis st…
Browse files Browse the repository at this point in the history
…atic, remove init/reset mechanism

Reviewed-by: kvn, roland
  • Loading branch information
eme64 committed Feb 10, 2024
1 parent 71d2dbd commit 232d136
Show file tree
Hide file tree
Showing 9 changed files with 483 additions and 365 deletions.
9 changes: 3 additions & 6 deletions src/hotspot/share/opto/loopTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1104,12 +1104,9 @@ void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLo
if (!cl->was_slp_analyzed()) {
Compile::TracePhase tp("autoVectorize", &Phase::timers[Phase::_t_autoVectorize]);

SuperWord sw(phase);
sw.transform_loop(this, false);

// If the loop is slp canonical analyze it
if (sw.early_return() == false) {
sw.unrolling_analysis(_local_loop_unroll_factor);
VLoop vloop(this, true);
if (vloop.check_preconditions()) {
SuperWord::unrolling_analysis(vloop, _local_loop_unroll_factor);
}
}

Expand Down
56 changes: 16 additions & 40 deletions src/hotspot/share/opto/loopnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
#include "opto/predicates.hpp"
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
#include "opto/superword.hpp"
#include "opto/vectorization.hpp"
#include "runtime/sharedRuntime.hpp"
#include "utilities/checkedCast.hpp"
#include "utilities/powerOfTwo.hpp"
Expand Down Expand Up @@ -4863,30 +4863,30 @@ void PhaseIdealLoop::build_and_optimize() {
C->set_major_progress();
}

// Convert scalar to superword operations at the end of all loop opts.
// Auto-vectorize main-loop
if (C->do_superword() && C->has_loops() && !C->major_progress()) {
Compile::TracePhase tp("autoVectorize", &timers[_t_autoVectorize]);
// SuperWord transform
SuperWord sw(this);

// Shared data structures for all AutoVectorizations, to reduce allocations
// of large arrays.
VSharedData vshared;
for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
IdealLoopTree* lpt = iter.current();
if (lpt->is_counted()) {
CountedLoopNode *cl = lpt->_head->as_CountedLoop();
if (cl->is_main_loop()) {
if (!sw.transform_loop(lpt, true)) {
// Instigate more unrolling for optimization when vectorization fails.
if (cl->has_passed_slp()) {
C->set_major_progress();
cl->set_notpassed_slp();
cl->mark_do_unroll_only();
}
}
AutoVectorizeStatus status = auto_vectorize(lpt, vshared);

if (status == AutoVectorizeStatus::TriedAndFailed) {
// We tried vectorization, but failed. From now on only unroll the loop.
CountedLoopNode* cl = lpt->_head->as_CountedLoop();
if (cl->has_passed_slp()) {
C->set_major_progress();
cl->set_notpassed_slp();
cl->mark_do_unroll_only();
}
}
}
}

// Move UnorderedReduction out of counted loop. Can be introduced by SuperWord.
// Move UnorderedReduction out of counted loop. Can be introduced by AutoVectorization.
if (C->has_loops() && !C->major_progress()) {
for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
IdealLoopTree* lpt = iter.current();
Expand Down Expand Up @@ -5963,30 +5963,6 @@ CountedLoopEndNode* CountedLoopNode::find_pre_loop_end() {
return pre_end;
}

CountedLoopNode* CountedLoopNode::pre_loop_head() const {
assert(is_main_loop(), "Only main loop has pre loop");
assert(_pre_loop_end != nullptr && _pre_loop_end->loopnode() != nullptr,
"should find head from pre loop end");
return _pre_loop_end->loopnode();
}

CountedLoopEndNode* CountedLoopNode::pre_loop_end() {
#ifdef ASSERT
assert(is_main_loop(), "Only main loop has pre loop");
assert(_pre_loop_end != nullptr, "should be set when fetched");
Node* found_pre_end = find_pre_loop_end();
assert(_pre_loop_end == found_pre_end && _pre_loop_end == pre_loop_head()->loopexit(),
"should find the pre loop end and must be the same result");
#endif
return _pre_loop_end;
}

void CountedLoopNode::set_pre_loop_end(CountedLoopEndNode* pre_loop_end) {
assert(is_main_loop(), "Only main loop has pre loop");
assert(pre_loop_end, "must be valid");
_pre_loop_end = pre_loop_end;
}

//------------------------------get_late_ctrl----------------------------------
// Compute latest legal control.
Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
Expand Down
17 changes: 10 additions & 7 deletions src/hotspot/share/opto/loopnode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class PredicateBlock;
class PathFrequency;
class PhaseIdealLoop;
class VectorSet;
class VSharedData;
class Invariance;
struct small_cache;

Expand Down Expand Up @@ -231,14 +232,11 @@ class CountedLoopNode : public BaseCountedLoopNode {
// vector mapped unroll factor here
int _slp_maximum_unroll_factor;

// Cached CountedLoopEndNode of pre loop for main loops
CountedLoopEndNode* _pre_loop_end;

public:
CountedLoopNode(Node *entry, Node *backedge)
: BaseCountedLoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint),
_unrolled_count_log2(0), _node_count_before_unroll(0),
_slp_maximum_unroll_factor(0), _pre_loop_end(nullptr) {
_slp_maximum_unroll_factor(0) {
init_class_id(Class_CountedLoop);
// Initialize _trip_count to the largest possible value.
// Will be reset (lower) if the loop's trip count is known.
Expand Down Expand Up @@ -330,9 +328,6 @@ class CountedLoopNode : public BaseCountedLoopNode {

Node* is_canonical_loop_entry();
CountedLoopEndNode* find_pre_loop_end();
CountedLoopNode* pre_loop_head() const;
CountedLoopEndNode* pre_loop_end();
void set_pre_loop_end(CountedLoopEndNode* pre_loop_end);

#ifndef PRODUCT
virtual void dump_spec(outputStream *st) const;
Expand Down Expand Up @@ -1437,6 +1432,14 @@ class PhaseIdealLoop : public PhaseTransform {
bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
bool duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new);

// AutoVectorize the loop: replace scalar ops with vector ops.
enum AutoVectorizeStatus {
Impossible, // This loop has the wrong shape to even try vectorization.
Success, // We just successfully vectorized the loop.
TriedAndFailed, // We tried to vectorize, but failed.
};
AutoVectorizeStatus auto_vectorize(IdealLoopTree* lpt, VSharedData &vshared);

// Move UnorderedReduction out of loop if possible
void move_unordered_reduction_out_of_loop(IdealLoopTree* loop);

Expand Down
31 changes: 31 additions & 0 deletions src/hotspot/share/opto/loopopts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "opto/rootnode.hpp"
#include "opto/subnode.hpp"
#include "opto/subtypenode.hpp"
#include "opto/superword.hpp"
#include "opto/vectornode.hpp"
#include "utilities/macros.hpp"

Expand Down Expand Up @@ -4209,6 +4210,36 @@ bool PhaseIdealLoop::duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old
return true;
}

// AutoVectorize the loop: replace scalar ops with vector ops.
PhaseIdealLoop::AutoVectorizeStatus
PhaseIdealLoop::auto_vectorize(IdealLoopTree* lpt, VSharedData &vshared) {
// Counted loop only
if (!lpt->is_counted()) {
return AutoVectorizeStatus::Impossible;
}

// Main-loop only
CountedLoopNode* cl = lpt->_head->as_CountedLoop();
if (!cl->is_main_loop()) {
return AutoVectorizeStatus::Impossible;
}

VLoop vloop(lpt, false);
if (!vloop.check_preconditions()) {
return AutoVectorizeStatus::TriedAndFailed;
}

// Ensure the shared data is cleared before each use
vshared.clear();

SuperWord sw(vloop, vshared);
if (!sw.transform_loop()) {
return AutoVectorizeStatus::TriedAndFailed;
}

return AutoVectorizeStatus::Success;
}

// Having ReductionNodes in the loop is expensive. They need to recursively
// fold together the vector values, for every vectorized loop iteration. If
// we encounter the following pattern, we can vector accumulate the values
Expand Down
Loading

1 comment on commit 232d136

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.