Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions src/hotspot/share/compiler/compilerDefinitions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,15 +512,6 @@ bool CompilerConfig::check_args_consistency(bool status) {
FLAG_SET_CMDLINE(BackgroundCompilation, false);
}

#ifdef COMPILER2
if (PostLoopMultiversioning && !RangeCheckElimination) {
if (!FLAG_IS_DEFAULT(PostLoopMultiversioning)) {
warning("PostLoopMultiversioning disabled because RangeCheckElimination is disabled.");
}
FLAG_SET_CMDLINE(PostLoopMultiversioning, false);
}
#endif // COMPILER2

if (CompilerConfig::is_interpreter_only()) {
if (UseCompiler) {
if (!FLAG_IS_DEFAULT(UseCompiler)) {
Expand Down
3 changes: 0 additions & 3 deletions src/hotspot/share/opto/c2_globals.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,6 @@
"Map number of unrolls for main loop via " \
"Superword Level Parallelism analysis") \
\
product(bool, PostLoopMultiversioning, false, EXPERIMENTAL, \
"Multi versioned post loops to eliminate range checks") \
\
notproduct(bool, TraceSuperWordLoopUnrollAnalysis, false, \
"Trace what Superword Level Parallelism analysis applies") \
\
Expand Down
194 changes: 0 additions & 194 deletions src/hotspot/share/opto/loopTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1888,55 +1888,6 @@ void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old
loop->record_for_igvn();
}


//-------------------------insert_scalar_rced_post_loop------------------------
// Insert a copy of the rce'd main loop as a post loop,
// We have not unrolled the main loop, so this is the right time to inject this.
// Later we will examine the partner of this post loop pair which still has range checks
// to see inject code which tests at runtime if the range checks are applicable.
void PhaseIdealLoop::insert_scalar_rced_post_loop(IdealLoopTree *loop, Node_List &old_new) {
if (!loop->_head->is_CountedLoop()) return;

CountedLoopNode *cl = loop->_head->as_CountedLoop();

// only process RCE'd main loops
if (!cl->is_main_loop() || loop->range_checks_present()) return;

#ifndef PRODUCT
if (TraceLoopOpts) {
tty->print("PostScalarRce ");
loop->dump_head();
}
#endif
C->set_major_progress();

// Find common pieces of the loop being guarded with pre & post loops
CountedLoopNode *main_head = loop->_head->as_CountedLoop();
CountedLoopEndNode *main_end = main_head->loopexit();
// diagnostic to show loop end is not properly formed
assert(main_end->outcnt() == 2, "1 true, 1 false path only");

Node *incr = main_end->incr();
Node *limit = main_end->limit();

// In this case we throw away the result as we are not using it to connect anything else.
CountedLoopNode *post_head = nullptr;
insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
copy_assertion_predicates_to_post_loop(main_head->skip_strip_mined(), post_head, incr, main_head->stride());

// It's difficult to be precise about the trip-counts
// for post loops. They are usually very short,
// so guess that unit vector trips is a reasonable value.
post_head->set_profile_trip_cnt(4.0);
post_head->set_is_rce_post_loop();

// Now force out all loop-invariant dominating tests. The optimizer
// finds some, but we _know_ they are all useless.
peeled_dom_test_elim(loop, old_new);
loop->record_for_igvn();
}


//------------------------------insert_post_loop-------------------------------
// Insert post loops. Add a post loop to the given loop passed.
Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree* loop, Node_List& old_new,
Expand Down Expand Up @@ -3198,143 +3149,6 @@ bool IdealLoopTree::compute_has_range_checks() const {
return false;
}

//-------------------------multi_version_post_loops----------------------------
// Check the range checks that remain, if simple, use the bounds to guard
// which version to a post loop we execute, one with range checks or one without
bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop) {
bool multi_version_succeeded = false;
assert(RangeCheckElimination, "");
CountedLoopNode *legacy_cl = legacy_loop->_head->as_CountedLoop();
assert(legacy_cl->is_post_loop(), "");

// Check for existence of range checks using the unique instance to make a guard with
Unique_Node_List worklist;
for (uint i = 0; i < legacy_loop->_body.size(); i++) {
Node *iff = legacy_loop->_body[i];
int iff_opc = iff->Opcode();
if (iff_opc == Op_If || iff_opc == Op_RangeCheck) {
worklist.push(iff);
}
}

// Find RCE'd post loop so that we can stage its guard.
if (legacy_cl->is_canonical_loop_entry() == nullptr) {
return multi_version_succeeded;
}
Node* ctrl = legacy_cl->in(LoopNode::EntryControl);
Node* iffm = ctrl->in(0);

// Now we test that both the post loops are connected
Node* post_loop_region = iffm->in(0);
if (post_loop_region == nullptr) return multi_version_succeeded;
if (!post_loop_region->is_Region()) return multi_version_succeeded;
Node* covering_region = post_loop_region->in(RegionNode::Control+1);
if (covering_region == nullptr) return multi_version_succeeded;
if (!covering_region->is_Region()) return multi_version_succeeded;
Node* p_f = covering_region->in(RegionNode::Control);
if (p_f == nullptr) return multi_version_succeeded;
if (!p_f->is_IfFalse()) return multi_version_succeeded;
if (!p_f->in(0)->is_CountedLoopEnd()) return multi_version_succeeded;
CountedLoopEndNode* rce_loop_end = p_f->in(0)->as_CountedLoopEnd();
if (rce_loop_end == nullptr) return multi_version_succeeded;
CountedLoopNode* rce_cl = rce_loop_end->loopnode();
if (rce_cl == nullptr || !rce_cl->is_post_loop()) return multi_version_succeeded;
CountedLoopNode *known_rce_cl = rce_loop->_head->as_CountedLoop();
if (rce_cl != known_rce_cl) return multi_version_succeeded;

// Then we fetch the cover entry test
ctrl = rce_cl->in(LoopNode::EntryControl);
if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return multi_version_succeeded;

#ifndef PRODUCT
if (TraceLoopOpts) {
tty->print("PostMultiVersion\n");
rce_loop->dump_head();
legacy_loop->dump_head();
}
#endif

// Now fetch the limit we want to compare against
Node *limit = rce_cl->limit();
bool first_time = true;

// If we got this far, we identified the post loop which has been RCE'd and
// we have a work list. Now we will try to transform the if guard to cause
// the loop pair to be multi version executed with the determination left to runtime
// or the optimizer if full information is known about the given arrays at compile time.
Node *last_min = nullptr;
multi_version_succeeded = true;
while (worklist.size()) {
Node* rc_iffm = worklist.pop();
if (rc_iffm->is_If()) {
Node *rc_bolzm = rc_iffm->in(1);
if (rc_bolzm->is_Bool()) {
Node *rc_cmpzm = rc_bolzm->in(1);
if (rc_cmpzm->is_Cmp()) {
Node *rc_left = rc_cmpzm->in(2);
if (rc_left->Opcode() != Op_LoadRange) {
multi_version_succeeded = false;
break;
}
if (first_time) {
last_min = rc_left;
first_time = false;
} else {
Node *cur_min = new MinINode(last_min, rc_left);
last_min = cur_min;
_igvn.register_new_node_with_optimizer(last_min);
}
}
}
}
}

// All we have to do is update the limit of the rce loop
// with the min of our expression and the current limit.
// We will use this expression to replace the current limit.
if (last_min && multi_version_succeeded) {
Node *cur_min = new MinINode(last_min, limit);
_igvn.register_new_node_with_optimizer(cur_min);
Node *cmp_node = rce_loop_end->cmp_node();
_igvn.replace_input_of(cmp_node, 2, cur_min);
set_ctrl(cur_min, ctrl);
set_loop(cur_min, rce_loop->_parent);

legacy_cl->mark_is_multiversioned();
rce_cl->mark_is_multiversioned();
multi_version_succeeded = true;

C->set_major_progress();
}

return multi_version_succeeded;
}

//-------------------------poison_rce_post_loop--------------------------------
// Causes the rce'd post loop to be optimized away if multiversioning fails
void PhaseIdealLoop::poison_rce_post_loop(IdealLoopTree *rce_loop) {
CountedLoopNode *rce_cl = rce_loop->_head->as_CountedLoop();
Node* ctrl = rce_cl->in(LoopNode::EntryControl);
if (ctrl->is_IfTrue() || ctrl->is_IfFalse()) {
Node* iffm = ctrl->in(0);
if (iffm->is_If()) {
Node* cur_bool = iffm->in(1);
if (cur_bool->is_Bool()) {
Node* cur_cmp = cur_bool->in(1);
if (cur_cmp->is_Cmp()) {
BoolTest::mask new_test = BoolTest::gt;
BoolNode *new_bool = new BoolNode(cur_cmp, new_test);
_igvn.replace_node(cur_bool, new_bool);
_igvn._worklist.push(new_bool);
Node* left_op = cur_cmp->in(1);
_igvn.replace_input_of(cur_cmp, 2, left_op);
C->set_major_progress();
}
}
}
}
}

//------------------------------DCE_loop_body----------------------------------
// Remove simplistic dead code from loop body
void IdealLoopTree::DCE_loop_body() {
Expand Down Expand Up @@ -3864,14 +3678,6 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
phase->do_range_check(this, old_new);
}

if (should_unroll && !should_peel && PostLoopMultiversioning &&
Matcher::has_predicated_vectors()) {
// Try to setup multiversioning on main loops before they are unrolled
if (cl->is_main_loop() && (cl->unrolled_count() == 1)) {
phase->insert_scalar_rced_post_loop(this, old_new);
}
}

// Double loop body for unrolling. Adjust the minimum-trip test (will do
// twice as many iterations as before) and the main body limit (only do
// an even number of trips). If we are peeling, we might enable some RCE
Expand Down
25 changes: 1 addition & 24 deletions src/hotspot/share/opto/loopnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4013,7 +4013,6 @@ void IdealLoopTree::dump_head() {
if (cl->is_post_loop()) tty->print(" post");
if (cl->is_vectorized_loop()) tty->print(" vector");
if (range_checks_present()) tty->print(" rc ");
if (cl->is_multiversioned()) tty->print(" multi ");
}
if (_has_call) tty->print(" has_call");
if (_has_sfpt) tty->print(" has_sfpt");
Expand Down Expand Up @@ -4653,29 +4652,7 @@ void PhaseIdealLoop::build_and_optimize() {
IdealLoopTree* lpt = iter.current();
if (lpt->is_counted()) {
CountedLoopNode *cl = lpt->_head->as_CountedLoop();

if (cl->is_rce_post_loop() && !cl->is_vectorized_loop()) {
assert(PostLoopMultiversioning, "multiversioning must be enabled");
// Check that the rce'd post loop is encountered first, multiversion after all
// major main loop optimization are concluded
if (!C->major_progress()) {
IdealLoopTree *lpt_next = lpt->_next;
if (lpt_next && lpt_next->is_counted()) {
CountedLoopNode *cl = lpt_next->_head->as_CountedLoop();
if (cl->is_post_loop() && lpt_next->range_checks_present()) {
if (!cl->is_multiversioned()) {
if (multi_version_post_loops(lpt, lpt_next) == false) {
// Cause the rce loop to be optimized away if we fail
cl->mark_is_multiversioned();
cl->set_slp_max_unroll(0);
poison_rce_post_loop(lpt);
}
}
}
}
sw.transform_loop(lpt, true);
}
} else if (cl->is_main_loop()) {
if (cl->is_main_loop()) {
if (!sw.transform_loop(lpt, true)) {
// Instigate more unrolling for optimization when vectorization fails.
if (cl->has_passed_slp()) {
Expand Down
32 changes: 6 additions & 26 deletions src/hotspot/share/opto/loopnode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,13 @@ class LoopNode : public RegionNode {
DoUnrollOnly = 1<<9,
VectorizedLoop = 1<<10,
HasAtomicPostLoop = 1<<11,
IsMultiversioned = 1<<12,
StripMined = 1<<13,
SubwordLoop = 1<<14,
ProfileTripFailed = 1<<15,
LoopNestInnerLoop = 1<<16,
LoopNestLongOuterLoop = 1<<17};
StripMined = 1<<12,
SubwordLoop = 1<<13,
ProfileTripFailed = 1<<14,
LoopNestInnerLoop = 1<<15,
LoopNestLongOuterLoop = 1<<16 };
char _unswitch_count;
enum { _unswitch_max=3 };
char _postloop_flags;
enum { RCEPostLoop = 1 };

// Expected trip count from profile data
float _profile_trip_cnt;
Expand All @@ -93,7 +90,6 @@ class LoopNode : public RegionNode {
bool is_inner_loop() const { return _loop_flags & InnerLoop; }
void set_inner_loop() { _loop_flags |= InnerLoop; }

bool is_multiversioned() const { return _loop_flags & IsMultiversioned; }
bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
Expand All @@ -110,7 +106,6 @@ class LoopNode : public RegionNode {
void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
void mark_strip_mined() { _loop_flags |= StripMined; }
void clear_strip_mined() { _loop_flags &= ~StripMined; }
void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
Expand All @@ -121,9 +116,6 @@ class LoopNode : public RegionNode {
int unswitch_max() { return _unswitch_max; }
int unswitch_count() { return _unswitch_count; }

int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; }
void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; }

void set_unswitch_count(int val) {
assert (val <= unswitch_max(), "too many unswitches");
_unswitch_count = val;
Expand All @@ -134,7 +126,7 @@ class LoopNode : public RegionNode {

LoopNode(Node *entry, Node *backedge)
: RegionNode(3), _loop_flags(0), _unswitch_count(0),
_postloop_flags(0), _profile_trip_cnt(COUNT_UNKNOWN) {
_profile_trip_cnt(COUNT_UNKNOWN) {
init_class_id(Class_Loop);
init_req(EntryControl, entry);
init_req(LoopBackControl, backedge);
Expand Down Expand Up @@ -322,8 +314,6 @@ class CountedLoopNode : public BaseCountedLoopNode {
int node_count_before_unroll() { return _node_count_before_unroll; }
void set_slp_max_unroll(int unroll_factor) { _slp_maximum_unroll_factor = unroll_factor; }
int slp_max_unroll() const { return _slp_maximum_unroll_factor; }
void set_slp_pack_count(int pack_count) { _slp_vector_pack_count = pack_count; }
int slp_pack_count() const { return _slp_vector_pack_count; }

virtual LoopNode* skip_strip_mined(int expect_skeleton = 1);
OuterStripMinedLoopNode* outer_loop() const;
Expand Down Expand Up @@ -1305,9 +1295,6 @@ class PhaseIdealLoop : public PhaseTransform {
CountedLoopNode* main_head, CountedLoopEndNode* main_end,
Node*& incr, Node* limit, CountedLoopNode*& post_head);

// Add an RCE'd post loop which we will multi-version adapt for run time test path usage
void insert_scalar_rced_post_loop( IdealLoopTree *loop, Node_List &old_new );

// Add a vector post loop between a vector main loop and the current post loop
void insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new);
// If Node n lives in the back_ctrl block, we clone a private version of n
Expand Down Expand Up @@ -1402,13 +1389,6 @@ class PhaseIdealLoop : public PhaseTransform {
// Eliminate range-checks and other trip-counter vs loop-invariant tests.
void do_range_check(IdealLoopTree *loop, Node_List &old_new);

// Process post loops which have range checks and try to build a multi-version
// guard to safely determine if we can execute the post loop which was RCE'd.
bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);

// Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
void poison_rce_post_loop(IdealLoopTree *rce_loop);

// Create a slow version of the loop by cloning the loop
// and inserting an if to select fast-slow versions.
// Return the inserted if.
Expand Down
Loading