From 199832a7101ca9dbfe7744ca0a1c4ff11d8832f2 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 2 Jun 2022 06:49:23 +0000 Subject: [PATCH] 8283466: C2: missing skeleton predicates in peeled loop Reviewed-by: roland, chagedorn --- src/hotspot/share/opto/loopPredicate.cpp | 55 ++++++--- src/hotspot/share/opto/loopTransform.cpp | 109 +++++++++++++++--- src/hotspot/share/opto/loopnode.hpp | 41 ++++++- ...eelingSkeletonPredicateInitialization.java | 80 +++++++++++++ 4 files changed, 248 insertions(+), 37 deletions(-) create mode 100644 test/hotspot/jtreg/compiler/loopopts/TestPeelingSkeletonPredicateInitialization.java diff --git a/src/hotspot/share/opto/loopPredicate.cpp b/src/hotspot/share/opto/loopPredicate.cpp index 31e01eb03a64a..3fead4bee5014 100644 --- a/src/hotspot/share/opto/loopPredicate.cpp +++ b/src/hotspot/share/opto/loopPredicate.cpp @@ -491,24 +491,25 @@ Node* PhaseIdealLoop::skip_loop_predicates(Node* entry) { } Node* PhaseIdealLoop::skip_all_loop_predicates(Node* entry) { - Node* predicate = NULL; - predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); - if (predicate != NULL) { - entry = skip_loop_predicates(entry); - } - if (UseProfiledLoopPredicate) { - predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate); - if (predicate != NULL) { // right pattern that can be used by loop predication - entry = skip_loop_predicates(entry); - } - } - if (UseLoopPredicate) { - predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); - if (predicate != NULL) { // right pattern that can be used by loop predication - entry = skip_loop_predicates(entry); + Predicates predicates(entry); + return predicates.skip_all(); +} + +//--------------------------next_predicate--------------------------------- +// Find next related predicate, useful for iterating over all related predicates +ProjNode* PhaseIdealLoop::next_predicate(ProjNode* predicate) { + IfNode* iff = predicate->in(0)->as_If(); + ProjNode* uncommon_proj = iff->proj_out(1 - predicate->_con); + Node* rgn = uncommon_proj->unique_ctrl_out(); + assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); + Node* next = iff->in(0); + if (next != nullptr && next->is_Proj() && next->in(0)->is_If()) { + uncommon_proj = next->in(0)->as_If()->proj_out(1 - next->as_Proj()->_con); + if (uncommon_proj->unique_ctrl_out() == rgn) { // lead into same region + return next->as_Proj(); } } - return entry; + return nullptr; } //--------------------------find_predicate_insertion_point------------------- @@ -522,6 +523,28 @@ ProjNode* PhaseIdealLoop::find_predicate_insertion_point(Node* start_c, Deoptimi return NULL; } +//--------------------------Predicates::Predicates-------------------------- +// given loop entry, find all predicates above loop +PhaseIdealLoop::Predicates::Predicates(Node* entry) { + _loop_limit_check = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); + if (_loop_limit_check != nullptr) { + entry = skip_loop_predicates(entry); + } + if (UseProfiledLoopPredicate) { + _profile_predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate); + if (_profile_predicate != nullptr) { + entry = skip_loop_predicates(entry); + } + } + if (UseLoopPredicate) { + _predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); + if (_predicate != nullptr) { + entry = skip_loop_predicates(entry); + } + } + _entry_to_all_predicates = entry; +} + //--------------------------find_predicate------------------------------------ // Find a predicate Node* PhaseIdealLoop::find_predicate(Node* entry) { diff --git a/src/hotspot/share/opto/loopTransform.cpp b/src/hotspot/share/opto/loopTransform.cpp index 9e8148ce220da..a7f0055af3b38 100644 --- a/src/hotspot/share/opto/loopTransform.cpp +++ b/src/hotspot/share/opto/loopTransform.cpp @@ -614,13 +614,16 @@ void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree* loop, Node_List& old_ne // after peel and predicate move // // stmt1 +// | +// v +// loop predicate // / // / // clone / orig // / // / +----------+ // / | | -// / loop predicate | +// / | | // / | | // v v | // TOP-->loop clone loop<----+ | @@ -647,7 +650,10 @@ void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree* loop, Node_List& old_ne // // final graph // -// stmt1 +// stmt1 +// | +// v +// loop predicate // | // v // stmt2 clone @@ -660,7 +666,7 @@ void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree* loop, Node_List& old_ne // false true // | | // | v -// | loop predicate +// | initialized skeleton predicates // | | // | v // | loop<----+ @@ -714,7 +720,9 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) { // Step 1: Clone the loop body. The clone becomes the peeled iteration. // The pre-loop illegally has 2 control users (old & new loops). - clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), ControlAroundStripMined); + const uint idx_before_clone = Compile::current()->unique(); + LoopNode* outer_loop_head = head->skip_strip_mined(); + clone_loop(loop, old_new, dom_depth(outer_loop_head), ControlAroundStripMined); // Step 2: Make the old-loop fall-in edges point to the peeled iteration. // Do this by making the old-loop fall-in edges act as if they came @@ -723,8 +731,8 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) { // the pre-loop with only 1 user (the new peeled iteration), but the // peeled-loop backedge has 2 users. Node* new_entry = old_new[head->in(LoopNode::LoopBackControl)->_idx]; - _igvn.hash_delete(head->skip_strip_mined()); - head->skip_strip_mined()->set_req(LoopNode::EntryControl, new_entry); + _igvn.hash_delete(outer_loop_head); + outer_loop_head->set_req(LoopNode::EntryControl, new_entry); for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) { Node* old = head->fast_out(j); if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) { @@ -753,16 +761,33 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) { // Step 4: Correct dom-depth info. Set to loop-head depth. - int dd = dom_depth(head->skip_strip_mined()); - set_idom(head->skip_strip_mined(), head->skip_strip_mined()->in(LoopNode::EntryControl), dd); + int dd_outer_loop_head = dom_depth(outer_loop_head); + set_idom(outer_loop_head, outer_loop_head->in(LoopNode::EntryControl), dd_outer_loop_head); for (uint j3 = 0; j3 < loop->_body.size(); j3++) { Node *old = loop->_body.at(j3); Node *nnn = old_new[old->_idx]; if (!has_ctrl(nnn)) { - set_idom(nnn, idom(nnn), dd-1); + set_idom(nnn, idom(nnn), dd_outer_loop_head-1); } } + // Step 5: skeleton_predicates instantiation + if (counted_loop && UseLoopPredicate) { + CountedLoopNode *cl_head = head->as_CountedLoop(); + Node* init = cl_head->init_trip(); + Node* stride = cl_head->stride(); + IdealLoopTree* outer_loop = get_loop(outer_loop_head); + Predicates predicates(new_head->in(LoopNode::EntryControl)); + initialize_skeleton_predicates_for_peeled_loop(predicates.predicate(), + outer_loop_head, dd_outer_loop_head, + init, stride, outer_loop, + idx_before_clone, old_new); + initialize_skeleton_predicates_for_peeled_loop(predicates.profile_predicate(), + outer_loop_head, dd_outer_loop_head, + init, stride, outer_loop, + idx_before_clone, old_new); + } + // Now force out all loop-invariant dominating tests. The optimizer // finds some, but we _know_ they are all useless. peeled_dom_test_elim(loop,old_new); @@ -1318,12 +1343,12 @@ void PhaseIdealLoop::copy_skeleton_predicates_to_main_loop_helper(Node* predicat // Clone the skeleton predicate twice and initialize one with the initial // value of the loop induction variable. Leave the other predicate // to be initialized when increasing the stride during loop unrolling. - prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, opaque_init, NULL, predicate, uncommon_proj, - current_proj, outer_loop, prev_proj); + prev_proj = clone_skeleton_predicate_and_initialize(iff, opaque_init, NULL, predicate, uncommon_proj, + current_proj, outer_loop, prev_proj); assert(skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), ""); - prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, init, stride, predicate, uncommon_proj, - current_proj, outer_loop, prev_proj); + prev_proj = clone_skeleton_predicate_and_initialize(iff, init, stride, predicate, uncommon_proj, + current_proj, outer_loop, prev_proj); assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), ""); // Rewire any control inputs from the cloned skeleton predicates down to the main and post loop for data nodes that are part of the @@ -1476,8 +1501,8 @@ Node* PhaseIdealLoop::clone_skeleton_predicate_bool(Node* iff, Node* new_init, N // Clone a skeleton predicate for the main loop. new_init and new_stride are set as new inputs. Since the predicates cannot fail at runtime, // Halt nodes are inserted instead of uncommon traps. -Node* PhaseIdealLoop::clone_skeleton_predicate_for_main_or_post_loop(Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj, - Node* control, IdealLoopTree* outer_loop, Node* input_proj) { +Node* PhaseIdealLoop::clone_skeleton_predicate_and_initialize(Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj, + Node* control, IdealLoopTree* outer_loop, Node* input_proj) { Node* result = clone_skeleton_predicate_bool(iff, new_init, new_stride, control); Node* proj = predicate->clone(); Node* other_proj = uncommon_proj->clone(); @@ -2007,8 +2032,8 @@ void PhaseIdealLoop::update_main_loop_skeleton_predicates(Node* ctrl, CountedLoo _igvn.replace_input_of(iff, 1, iff->in(1)->in(2)); } else { // Add back predicates updated for the new stride. - prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, init, max_value, entry, proj, ctrl, outer_loop, - prev_proj); + prev_proj = clone_skeleton_predicate_and_initialize(iff, init, max_value, entry, proj, ctrl, outer_loop, + prev_proj); assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "unexpected"); } } @@ -2036,8 +2061,8 @@ void PhaseIdealLoop::copy_skeleton_predicates_to_post_loop(LoopNode* main_loop_h break; } if (iff->in(1)->Opcode() == Op_Opaque4 && skeleton_predicate_has_opaque(iff)) { - prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, init, stride, ctrl, proj, post_loop_entry, - post_loop, prev_proj); + prev_proj = clone_skeleton_predicate_and_initialize(iff, init, stride, ctrl, proj, post_loop_entry, + post_loop, prev_proj); assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "unexpected"); } ctrl = ctrl->in(0)->in(0); @@ -2048,6 +2073,52 @@ void PhaseIdealLoop::copy_skeleton_predicates_to_post_loop(LoopNode* main_loop_h } } +void PhaseIdealLoop::initialize_skeleton_predicates_for_peeled_loop(ProjNode* predicate, + LoopNode* outer_loop_head, + int dd_outer_loop_head, + Node* init, + Node* stride, + IdealLoopTree* outer_loop, + const uint idx_before_clone, + const Node_List &old_new) { + if (predicate == nullptr) { + return; + } + Node* control = outer_loop_head->in(LoopNode::EntryControl); + Node* input_proj = control; + + predicate = next_predicate(predicate); + while (predicate != nullptr) { + IfNode* iff = predicate->in(0)->as_If(); + if (iff->in(1)->Opcode() == Op_Opaque4) { + assert(skeleton_predicate_has_opaque(iff), "unexpected"); + ProjNode* uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con); + input_proj = clone_skeleton_predicate_and_initialize(iff, init, stride, predicate, uncommon_proj, control, outer_loop, input_proj); + + // Rewire any control inputs from the old skeleton predicates above the peeled iteration down to the initialized + // skeleton predicates above the peeled loop. + for (DUIterator i = predicate->outs(); predicate->has_out(i); i++) { + Node* dependent = predicate->out(i); + Node* new_node = old_new[dependent->_idx]; + + if (!dependent->is_CFG() && + dependent->_idx < idx_before_clone && // old node + new_node != nullptr && // cloned + new_node->_idx >= idx_before_clone) { // for peeling + // The old nodes from the peeled loop still point to the predicate above the peeled loop. + // We need to rewire the dependencies to the newly initialized skeleton predicates. + _igvn.replace_input_of(dependent, 0, input_proj); + --i; // correct for just deleted predicate->out(i) + } + } + } + predicate = next_predicate(predicate); + } + + _igvn.replace_input_of(outer_loop_head, LoopNode::EntryControl, input_proj); + set_idom(outer_loop_head, input_proj, dd_outer_loop_head); +} + //------------------------------do_unroll-------------------------------------- // Unroll the loop body one step - make each trip do 2 iterations. void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip) { diff --git a/src/hotspot/share/opto/loopnode.hpp b/src/hotspot/share/opto/loopnode.hpp index e474da6ed2be9..13ca3a60816fe 100644 --- a/src/hotspot/share/opto/loopnode.hpp +++ b/src/hotspot/share/opto/loopnode.hpp @@ -927,13 +927,16 @@ class PhaseIdealLoop : public PhaseTransform { void copy_skeleton_predicates_to_main_loop(CountedLoopNode* pre_head, Node* init, Node* stride, IdealLoopTree* outer_loop, LoopNode* outer_main_head, uint dd_main_head, const uint idx_before_pre_post, const uint idx_after_post_before_pre, Node* zero_trip_guard_proj_main, Node* zero_trip_guard_proj_post, const Node_List &old_new); - Node* clone_skeleton_predicate_for_main_or_post_loop(Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj, Node* control, - IdealLoopTree* outer_loop, Node* input_proj); + Node* clone_skeleton_predicate_and_initialize(Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj, Node* control, + IdealLoopTree* outer_loop, Node* input_proj); Node* clone_skeleton_predicate_bool(Node* iff, Node* new_init, Node* new_stride, Node* control); static bool skeleton_predicate_has_opaque(IfNode* iff); static void get_skeleton_predicates(Node* predicate, Unique_Node_List& list, bool get_opaque = false); void update_main_loop_skeleton_predicates(Node* ctrl, CountedLoopNode* loop_head, Node* init, int stride_con); void copy_skeleton_predicates_to_post_loop(LoopNode* main_loop_head, CountedLoopNode* post_loop_head, Node* init, Node* stride); + void initialize_skeleton_predicates_for_peeled_loop(ProjNode* predicate, LoopNode* outer_loop_head, int dd_outer_loop_head, + Node* init, Node* stride, IdealLoopTree* outer_loop, + const uint idx_before_clone, const Node_List& old_new); void insert_loop_limit_check(ProjNode* limit_check_proj, Node* cmp_limit, Node* bol); #ifdef ASSERT bool only_has_infinite_loops(); @@ -1328,9 +1331,43 @@ class PhaseIdealLoop : public PhaseTransform { static Node* skip_all_loop_predicates(Node* entry); static Node* skip_loop_predicates(Node* entry); + static ProjNode* next_predicate(ProjNode* predicate); // Find a good location to insert a predicate static ProjNode* find_predicate_insertion_point(Node* start_c, Deoptimization::DeoptReason reason); + + class Predicates { + public: + // given loop entry, find all predicates above loop + Predicates(Node* entry); + + // Proj of empty loop limit check predicate + ProjNode* loop_limit_check() { + return _loop_limit_check; + } + + // Proj of empty profile predicate + ProjNode* profile_predicate() { + return _profile_predicate; + } + + // Proj of empty predicate + ProjNode* predicate() { + return _predicate; + } + + // First control node above all predicates + Node* skip_all() { + return _entry_to_all_predicates; + } + + private: + ProjNode*_loop_limit_check = nullptr; + ProjNode* _profile_predicate = nullptr; + ProjNode* _predicate = nullptr; + Node* _entry_to_all_predicates = nullptr; + }; + // Find a predicate static Node* find_predicate(Node* entry); // Construct a range check for a predicate if diff --git a/test/hotspot/jtreg/compiler/loopopts/TestPeelingSkeletonPredicateInitialization.java b/test/hotspot/jtreg/compiler/loopopts/TestPeelingSkeletonPredicateInitialization.java new file mode 100644 index 0000000000000..9c07c210324ec --- /dev/null +++ b/test/hotspot/jtreg/compiler/loopopts/TestPeelingSkeletonPredicateInitialization.java @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8283466 + * @summary When skeleton predicates are not copied into peeled loop and initialized, this can happen: + * 1. The rangecheck from a load is hoisted outside of the counted loop. + * 2. The counted loop is peeled (we disable unswitching and unrolling with arguments) + * 3. The type inside the peeled loop may now be narrower. + * 4. The dataflow can die when a type becomes impossible. + * 5. The rangecheck is still before the peeling, and is not copied to the peeled loop. Hence + * we do not statically realize that the peeled loop can never be entered. + * + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+StressLCM -XX:+StressGCM -XX:+StressCCP -XX:+StressIGVN + * -Xcomp -XX:-TieredCompilation + * -XX:LoopMaxUnroll=0 -XX:LoopUnrollLimit=0 -XX:-LoopUnswitching + * -XX:CompileCommand=compileonly,compiler.loopopts.TestPeelingSkeletonPredicateInitialization::* + * compiler.loopopts.TestPeelingSkeletonPredicateInitialization +*/ + +package compiler.loopopts; + +public class TestPeelingSkeletonPredicateInitialization { + int N = 400; + int array[] = new int[N]; + int array2[] = new int[N]; + void run(int X, int inv, boolean b) { + // have the arguments so the values are unknown to C2, cannot optimize things away + try { + // not sure why needed. maybe has sth to do with div_by_zero below? + int tmp = 1 / 0; + } catch (ArithmeticException e) { + } + for(int i = 2; i > X; i-=3) { + // potential div_by_zero: somehow the exit is not loop exit but rethrow + // also: i-1 only works in peeled iteration + // in peeled loop: ConvI2L dies because it knows dataflow is impossible + // If skeleton_predicate is missing for peeled loop, then controlflow does not die + array[i - 1] /= inv; + // loop invariant check that is not hoisted: this becomes reason for peeling + array[inv] += 1; + if (b) { + // seems to be required for the memory phi so that it can be mangled when data flow dies + array[inv] += 1; + array2[inv] += 1; + } + } + } + public static void main(String[] strArr) { + try { + TestPeelingSkeletonPredicateInitialization _instance = new TestPeelingSkeletonPredicateInitialization(); + for (int i = 0; i < 10000; i++) { + _instance.run(100, 3, false); + } + } catch (Exception ex) { + } + } +} +