8229499: Node budget assert in fuzzed test

Reviewed-by: thartmann, neliasso
openjdk · Sep 12, 2019 · 8275b17 · 8275b17
1 parent c1865c4
commit 8275b17
Show file tree

Hide file tree

Showing 4 changed files with 171 additions and 85 deletions.
diff --git a/src/hotspot/share/opto/loopTransform.cpp b/src/hotspot/share/opto/loopTransform.cpp
@@ -671,76 +671,50 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
   loop->record_for_igvn();
 }
 
-// The Estimated Loop Unroll Size: UnrollFactor * (106% * BodySize + BC) + CC,
-// where BC  and CC are  (totally) ad-hoc/magic "body" and  "clone" constants,
-// respectively, used to ensure that node usage estimates made are on the safe
-// side, for the  most part.  This is  a simplified version of  the loop clone
-// size calculation in est_loop_clone_sz(),  defined for unroll factors larger
-// than one  (>1), performing  an overflow check  and returning  'UINT_MAX' in
-// case of an overflow.
-static uint est_loop_unroll_sz(uint factor, uint size) {
-  precond(0 < factor);
-
-  uint const bc = 5;
-  uint const cc = 7;
-  uint const sz = size + (size + 15) / 16;
-  uint estimate = factor * (sz + bc) + cc;
-
-  return (estimate - cc) / factor == sz + bc ? estimate : UINT_MAX;
-}
-
-#define EMPTY_LOOP_SIZE 7   // Number of nodes in an empty loop.
-
 //------------------------------policy_maximally_unroll------------------------
 // Calculate the exact  loop trip-count and return TRUE if loop can be fully,
 // i.e. maximally, unrolled, otherwise return FALSE. When TRUE, the estimated
 // node budget is also requested.
-bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop *phase) const {
-  CountedLoopNode *cl = _head->as_CountedLoop();
+bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop* phase) const {
+  CountedLoopNode* cl = _head->as_CountedLoop();
   assert(cl->is_normal_loop(), "");
   if (!cl->is_valid_counted_loop()) {
-    return false; // Malformed counted loop
+    return false;   // Malformed counted loop.
   }
   if (!cl->has_exact_trip_count()) {
-    // Trip count is not exact.
-    return false;
+    return false;   // Trip count is not exact.
   }
 
   uint trip_count = cl->trip_count();
   // Note, max_juint is used to indicate unknown trip count.
   assert(trip_count > 1, "one iteration loop should be optimized out already");
-  assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");
+  assert(trip_count < max_juint, "exact trip_count should be less than max_juint.");
 
   // If nodes are depleted, some transform has miscalculated its needs.
   assert(!phase->exceeding_node_budget(), "sanity");
 
-  // Real policy: if we maximally unroll, does it get too big?
-  // Allow the unrolled mess to get larger than standard loop
-  // size.  After all, it will no longer be a loop.
-  uint body_size    = _body.size();
+  // Allow the unrolled body to get larger than the standard loop size limit.
   uint unroll_limit = (uint)LoopUnrollLimit * 4;
   assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
-  if (trip_count > unroll_limit || body_size > unroll_limit) {
+  if (trip_count > unroll_limit || _body.size() > unroll_limit) {
     return false;
   }
 
-  // Take into account that after unroll conjoined heads and tails will fold,
-  // otherwise policy_unroll() may allow more unrolling than max unrolling.
-  uint new_body_size = est_loop_unroll_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
+  uint new_body_size = est_loop_unroll_sz(trip_count);
 
   if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
     return false;
   }
 
-  // Fully unroll a loop with few iterations regardless next conditions since
-  // following loop optimizations will split such loop anyway (pre-main-post).
+  // Fully unroll a loop with few iterations, regardless of other conditions,
+  // since the following (general) loop optimizations will split such loop in
+  // any case (into pre-main-post).
   if (trip_count <= 3) {
     return phase->may_require_nodes(new_body_size);
   }
 
-  if (new_body_size > unroll_limit ||
-      // Unrolling can result in a large amount of node construction
-      phase->exceeding_node_budget(new_body_size)) {
+  // Reject if unrolling will result in too much node construction.
+  if (new_body_size > unroll_limit || phase->exceeding_node_budget(new_body_size)) {
     return false;
   }
 

diff --git a/src/hotspot/share/opto/loopnode.cpp b/src/hotspot/share/opto/loopnode.cpp
@@ -2471,6 +2471,39 @@ uint IdealLoopTree::est_loop_clone_sz(uint factor) const {
 
   assert((estimate - cc) / factor == sz + bc, "overflow");
 
+  return estimate + est_loop_flow_merge_sz();
+}
+
+// The Estimated Loop (full-) Unroll Size:
+//   UnrollFactor * (~106% * BodySize) + CC + FanOutTerm,
+// where CC is a (totally) ad-hoc/magic "clone" constant, used to ensure that
+// node usage estimates made are on the safe side, for the most part. This is
+// a "light" version of the loop clone size calculation (above), based on the
+// assumption that most of the loop-construct overhead will be unraveled when
+// (fully) unrolled. Defined for unroll factors larger or equal to one (>=1),
+// including an overflow check and returning UINT_MAX in case of an overflow.
+uint IdealLoopTree::est_loop_unroll_sz(uint factor) const {
+
+  precond(factor > 0);
+
+  // Take into account that after unroll conjoined heads and tails will fold.
+  uint const b0 = _body.size() - EMPTY_LOOP_SIZE;
+  uint const cc = 7;
+  uint const sz = b0 + (b0 + 15) / 16;
+  uint estimate = factor * sz + cc;
+
+  if ((estimate - cc) / factor != sz) {
+    return UINT_MAX;
+  }
+
+  return estimate + est_loop_flow_merge_sz();
+}
+
+// Estimate the growth effect (in nodes) of merging control and data flow when
+// cloning a loop body, based on the amount of  control and data flow reaching
+// outside of the (current) loop body.
+uint IdealLoopTree::est_loop_flow_merge_sz() const {
+
   uint ctrl_edge_out_cnt = 0;
   uint data_edge_out_cnt = 0;
 
@@ -2494,24 +2527,21 @@ uint IdealLoopTree::est_loop_clone_sz(uint factor) const {
       }
     }
   }
-  // Add data and control count (x2.0) to estimate iff both are > 0. This is
+  // Use data and control count (x2.0) in estimate iff both are > 0. This is
   // a rather pessimistic estimate for the most part, in particular for some
   // complex loops, but still not enough to capture all loops.
   if (ctrl_edge_out_cnt > 0 && data_edge_out_cnt > 0) {
-    estimate += 2 * (ctrl_edge_out_cnt + data_edge_out_cnt);
+    return 2 * (ctrl_edge_out_cnt + data_edge_out_cnt);
   }
-
-  return estimate;
+  return 0;
 }
 
 #ifndef PRODUCT
 //------------------------------dump_head--------------------------------------
 // Dump 1 liner for loop header info
 void IdealLoopTree::dump_head() const {
-  for (uint i = 0; i < _nest; i++) {
-    tty->print("  ");
-  }
-  tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx);
+  tty->sp(2 * _nest);
+  tty->print("Loop: N%d/N%d ", _head->_idx, _tail->_idx);
   if (_irreducible) tty->print(" IRREDUCIBLE");
   Node* entry = _head->is_Loop() ? _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl) : _head->in(LoopNode::EntryControl);
   Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
@@ -4501,69 +4531,67 @@ void PhaseIdealLoop::dump_bad_graph(const char* msg, Node* n, Node* early, Node*
 
 #ifndef PRODUCT
 //------------------------------dump-------------------------------------------
-void PhaseIdealLoop::dump( ) const {
+void PhaseIdealLoop::dump() const {
   ResourceMark rm;
   Arena* arena = Thread::current()->resource_area();
   Node_Stack stack(arena, C->live_nodes() >> 2);
   Node_List rpo_list;
   VectorSet visited(arena);
   visited.set(C->top()->_idx);
-  rpo( C->root(), stack, visited, rpo_list );
+  rpo(C->root(), stack, visited, rpo_list);
   // Dump root loop indexed by last element in PO order
-  dump( _ltree_root, rpo_list.size(), rpo_list );
+  dump(_ltree_root, rpo_list.size(), rpo_list);
 }
 
-void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
+void PhaseIdealLoop::dump(IdealLoopTree* loop, uint idx, Node_List &rpo_list) const {
   loop->dump_head();
 
   // Now scan for CFG nodes in the same loop
-  for( uint j=idx; j > 0;  j-- ) {
-    Node *n = rpo_list[j-1];
-    if( !_nodes[n->_idx] )      // Skip dead nodes
+  for (uint j = idx; j > 0; j--) {
+    Node* n = rpo_list[j-1];
+    if (!_nodes[n->_idx])      // Skip dead nodes
       continue;
-    if( get_loop(n) != loop ) { // Wrong loop nest
-      if( get_loop(n)->_head == n &&    // Found nested loop?
-          get_loop(n)->_parent == loop )
-        dump(get_loop(n),rpo_list.size(),rpo_list);     // Print it nested-ly
+
+    if (get_loop(n) != loop) { // Wrong loop nest
+      if (get_loop(n)->_head == n &&    // Found nested loop?
+          get_loop(n)->_parent == loop)
+        dump(get_loop(n), rpo_list.size(), rpo_list);     // Print it nested-ly
       continue;
     }
 
     // Dump controlling node
-    for( uint x = 0; x < loop->_nest; x++ )
-      tty->print("  ");
+    tty->sp(2 * loop->_nest);
     tty->print("C");
-    if( n == C->root() ) {
+    if (n == C->root()) {
       n->dump();
     } else {
       Node* cached_idom   = idom_no_update(n);
-      Node *computed_idom = n->in(0);
-      if( n->is_Region() ) {
+      Node* computed_idom = n->in(0);
+      if (n->is_Region()) {
         computed_idom = compute_idom(n);
         // computed_idom() will return n->in(0) when idom(n) is an IfNode (or
         // any MultiBranch ctrl node), so apply a similar transform to
         // the cached idom returned from idom_no_update.
         cached_idom = find_non_split_ctrl(cached_idom);
       }
-      tty->print(" ID:%d",computed_idom->_idx);
+      tty->print(" ID:%d", computed_idom->_idx);
       n->dump();
-      if( cached_idom != computed_idom ) {
+      if (cached_idom != computed_idom) {
         tty->print_cr("*** BROKEN IDOM!  Computed as: %d, cached as: %d",
                       computed_idom->_idx, cached_idom->_idx);
       }
     }
     // Dump nodes it controls
-    for( uint k = 0; k < _nodes.Size(); k++ ) {
+    for (uint k = 0; k < _nodes.Size(); k++) {
       // (k < C->unique() && get_ctrl(find(k)) == n)
       if (k < C->unique() && _nodes[k] == (Node*)((intptr_t)n + 1)) {
-        Node *m = C->root()->find(k);
-        if( m && m->outcnt() > 0 ) {
+        Node* m = C->root()->find(k);
+        if (m && m->outcnt() > 0) {
           if (!(has_ctrl(m) && get_ctrl_no_update(m) == n)) {
             tty->print_cr("*** BROKEN CTRL ACCESSOR!  _nodes[k] is %p, ctrl is %p",
                           _nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL);
           }
-          for( uint j = 0; j < loop->_nest; j++ )
-            tty->print("  ");
-          tty->print(" ");
+          tty->sp(2 * loop->_nest + 1);
           m->dump();
         }
       }
@@ -4574,7 +4602,7 @@ void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list )
 
 // Collect a R-P-O for the whole CFG.
 // Result list is in post-order (scan backwards for RPO)
-void PhaseIdealLoop::rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const {
+void PhaseIdealLoop::rpo(Node* start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list) const {
   stk.push(start, 0);
   visited.set(start->_idx);
 
@@ -4596,7 +4624,7 @@ void PhaseIdealLoop::rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node
 
 
 //=============================================================================
-//------------------------------LoopTreeIterator-----------------------------------
+//------------------------------LoopTreeIterator-------------------------------
 
 // Advance to next loop tree using a preorder, left-to-right traversal.
 void LoopTreeIterator::next() {

diff --git a/src/hotspot/share/opto/loopnode.hpp b/src/hotspot/share/opto/loopnode.hpp
@@ -623,6 +623,8 @@ class IdealLoopTree : public ResourceObj {
 
   // Estimate the number of nodes required when cloning a loop (body).
   uint est_loop_clone_sz(uint factor) const;
+  // Estimate the number of nodes required when unrolling a loop (body).
+  uint est_loop_unroll_sz(uint factor) const;
 
   // Compute loop trip count if possible
   void compute_trip_count(PhaseIdealLoop* phase);
@@ -654,11 +656,16 @@ class IdealLoopTree : public ResourceObj {
   void remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase);
 
 #ifndef PRODUCT
-  void dump_head( ) const;      // Dump loop head only
+  void dump_head() const;       // Dump loop head only
   void dump() const;            // Dump this loop recursively
   void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const;
 #endif
 
+ private:
+  enum { EMPTY_LOOP_SIZE = 7 }; // Number of nodes in an empty loop.
+
+  // Estimate the number of nodes resulting from control and data flow merge.
+  uint est_loop_flow_merge_sz() const;
 };
 
 // -----------------------------PhaseIdealLoop---------------------------------
@@ -675,7 +682,7 @@ class PhaseIdealLoop : public PhaseTransform {
   PhaseIterGVN &_igvn;
 
   // Head of loop tree
-  IdealLoopTree *_ltree_root;
+  IdealLoopTree* _ltree_root;
 
   // Array of pre-order numbers, plus post-visited bit.
   // ZERO for not pre-visited.  EVEN for pre-visited but not post-visited.
@@ -1017,9 +1024,9 @@ class PhaseIdealLoop : public PhaseTransform {
   bool _has_irreducible_loops;
 
   // Per-Node transform
-  virtual Node *transform( Node *a_node ) { return 0; }
+  virtual Node* transform(Node* n) { return 0; }
 
-  bool is_counted_loop(Node* x, IdealLoopTree*& loop);
+  bool is_counted_loop(Node* n, IdealLoopTree* &loop);
   IdealLoopTree* create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
                                                IdealLoopTree* loop, float cl_prob, float le_fcnt,
                                                Node*& entry_control, Node*& iffalse);
@@ -1034,7 +1041,7 @@ class PhaseIdealLoop : public PhaseTransform {
     return (IdealLoopTree*)_nodes[n->_idx];
   }
 
-  IdealLoopTree *ltree_root() const { return _ltree_root; }
+  IdealLoopTree* ltree_root() const { return _ltree_root; }
 
   // Is 'n' a (nested) member of 'loop'?
   int is_member( const IdealLoopTree *loop, Node *n ) const {
@@ -1319,7 +1326,7 @@ class PhaseIdealLoop : public PhaseTransform {
   // same block.  Split thru the Region.
   void do_split_if( Node *iff );
 
-  // Conversion of fill/copy patterns into intrisic versions
+  // Conversion of fill/copy patterns into intrinsic versions
   bool do_intrinsify_fill();
   bool intrinsify_fill(IdealLoopTree* lpt);
   bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
@@ -1419,18 +1426,18 @@ class PhaseIdealLoop : public PhaseTransform {
 public:
   void set_created_loop_node() { _created_loop_node = true; }
   bool created_loop_node()     { return _created_loop_node; }
-  void register_new_node( Node *n, Node *blk );
+  void register_new_node(Node* n, Node* blk);
 
 #ifdef ASSERT
   void dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA);
 #endif
 
 #ifndef PRODUCT
-  void dump( ) const;
-  void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const;
+  void dump() const;
+  void dump(IdealLoopTree* loop, uint rpo_idx, Node_List &rpo_list) const;
   void verify() const;          // Major slow  :-)
-  void verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const;
-  IdealLoopTree *get_loop_idx(Node* n) const {
+  void verify_compare(Node* n, const PhaseIdealLoop* loop_verify, VectorSet &visited) const;
+  IdealLoopTree* get_loop_idx(Node* n) const {
     // Dead nodes have no loop, so return the top level loop instead
     return _nodes[n->_idx] ? (IdealLoopTree*)_nodes[n->_idx] : _ltree_root;
   }
@@ -1439,7 +1446,8 @@ class PhaseIdealLoop : public PhaseTransform {
   static int _loop_invokes;     // Count of PhaseIdealLoop invokes
   static int _loop_work;        // Sum of PhaseIdealLoop x _unique
 #endif
-  void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const;
+
+  void rpo(Node* start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list) const;
 };