[PM/LCG] Remove the lazy RefSCC formation from the LazyCallGraph during

iteration. The lazy formation of RefSCCs isn't really the most important part of the laziness here -- that has to do with walking the functions themselves -- and isn't essential to maintain. Originally, there were incremental update algorithms that relied on updates happening predominantly near the most recent RefSCC formed, but those have been replaced with ones that have much tighter general case bounds at this point. We do still perform asserts that only scale well due to this incrementality, but those are easy to place behind EXPENSIVE_CHECKS. Removing this simplifies the entire analysis by having a single up-front step that builds all of the RefSCCs in a direct Tarjan walk. We can even easily replace this with other or better algorithms at will and with much less confusion now that there is no iterator-based incremental logic involved. This removes a lot of complexity from LCG. Another advantage of moving in this direction is that it simplifies testing the system substantially as we no longer have to worry about observing and mutating the graph half-way through the RefSCC formation. We still need a somewhat special iterator for RefSCCs because we want the iterator to remain stable in the face of graph updates. However, this now merely involves relative indexing to the current RefSCC's position in the sequence which isn't too hard. Differential Revision: https://reviews.llvm.org/D29381 llvm-svn: 294227
llvm · Feb 6, 2017 · 2e0fe3e · 2e0fe3e
1 parent 8cdfe8e
commit 2e0fe3e
Show file tree

Hide file tree

Showing 5 changed files with 167 additions and 470 deletions.
diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -334,6 +334,7 @@ class ModuleToPostOrderCGSCCPassAdaptor
                             InvalidSCCSet, nullptr,   nullptr};
 
     PreservedAnalyses PA = PreservedAnalyses::all();
+    CG.buildRefSCCs();
     for (auto RCI = CG.postorder_ref_scc_begin(),
               RCE = CG.postorder_ref_scc_end();
          RCI != RCE;) {

diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -794,14 +794,10 @@ class LazyCallGraph {
 
   /// A post-order depth-first RefSCC iterator over the call graph.
   ///
-  /// This iterator triggers the Tarjan DFS-based formation of the RefSCC (and
-  /// SCC) DAG for the call graph, walking it lazily in depth-first post-order.
-  /// That is, it always visits RefSCCs for the target of a reference edge
-  /// prior to visiting the RefSCC for a source of the edge (when they are in
-  /// different RefSCCs).
-  ///
-  /// When forming each RefSCC, the call edges within it are used to form SCCs
-  /// within it, so iterating this also controls the lazy formation of SCCs.
+  /// This iterator walks the cached post-order sequence of RefSCCs. However,
+  /// it trades stability for flexibility. It is restricted to a forward
+  /// iterator but will survive mutations which insert new RefSCCs and continue
+  /// to point to the same RefSCC even if it moves in the post-order sequence.
   class postorder_ref_scc_iterator
       : public iterator_facade_base<postorder_ref_scc_iterator,
                                     std::forward_iterator_tag, RefSCC> {
@@ -825,12 +821,9 @@ class LazyCallGraph {
     /// populating it if necessary.
     static RefSCC *getRC(LazyCallGraph &G, int Index) {
       if (Index == (int)G.PostOrderRefSCCs.size())
-        if (!G.buildNextRefSCCInPostOrder())
-          // We're at the end.
-          return nullptr;
+        // We're at the end.
+        return nullptr;
 
-      assert(Index < (int)G.PostOrderRefSCCs.size() &&
-             "Built the next post-order RefSCC without growing list!");
       return G.PostOrderRefSCCs[Index];
     }
 
@@ -866,10 +859,18 @@ class LazyCallGraph {
     return edge_iterator(EntryEdges.end(), EntryEdges.end());
   }
 
+  void buildRefSCCs();
+
   postorder_ref_scc_iterator postorder_ref_scc_begin() {
+    if (!EntryEdges.empty())
+      assert(!PostOrderRefSCCs.empty() &&
+             "Must form RefSCCs before iterating them!");
     return postorder_ref_scc_iterator(*this);
   }
   postorder_ref_scc_iterator postorder_ref_scc_end() {
+    if (!EntryEdges.empty())
+      assert(!PostOrderRefSCCs.empty() &&
+             "Must form RefSCCs before iterating them!");
     return postorder_ref_scc_iterator(*this,
                                       postorder_ref_scc_iterator::IsAtEndT());
   }
@@ -1045,18 +1046,6 @@ class LazyCallGraph {
   /// These are all of the RefSCCs which have no children.
   SmallVector<RefSCC *, 4> LeafRefSCCs;
 
-  /// Stack of nodes in the DFS walk.
-  SmallVector<std::pair<Node *, edge_iterator>, 4> DFSStack;
-
-  /// Set of entry nodes not-yet-processed into RefSCCs.
-  SmallVector<Function *, 4> RefSCCEntryNodes;
-
-  /// Stack of nodes the DFS has walked but not yet put into a RefSCC.
-  SmallVector<Node *, 4> PendingRefSCCStack;
-
-  /// Counter for the next DFS number to assign.
-  int NextDFSNumber;
-
   /// Helper to insert a new function, with an already looked-up entry in
   /// the NodeMap.
   Node &insertInto(Function &F, Node *&MappedN);
@@ -1078,6 +1067,23 @@ class LazyCallGraph {
     return new (RefSCCBPA.Allocate()) RefSCC(std::forward<Ts>(Args)...);
   }
 
+  /// Common logic for building SCCs from a sequence of roots.
+  ///
+  /// This is a very generic implementation of the depth-first walk and SCC
+  /// formation algorithm. It uses a generic sequence of roots and generic
+  /// callbacks for each step. This is designed to be used to implement both
+  /// the RefSCC formation and SCC formation with shared logic.
+  ///
+  /// Currently this is a relatively naive implementation of Tarjan's DFS
+  /// algorithm to form the SCCs.
+  ///
+  /// FIXME: We should consider newer variants such as Nuutila.
+  template <typename RootsT, typename GetBeginT, typename GetEndT,
+            typename GetNodeT, typename FormSCCCallbackT>
+  static void buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin,
+                               GetEndT &&GetEnd, GetNodeT &&GetNode,
+                               FormSCCCallbackT &&FormSCC);
+
   /// Build the SCCs for a RefSCC out of a list of nodes.
   void buildSCCs(RefSCC &RC, node_stack_range Nodes);
 
@@ -1098,13 +1104,6 @@ class LazyCallGraph {
            "Index does not point back at RC!");
     return IndexIt->second;
   }
-
-  /// Builds the next node in the post-order RefSCC walk of the call graph and
-  /// appends it to the \c PostOrderRefSCCs vector.
-  ///
-  /// Returns true if a new RefSCC was successfully constructed, and false if
-  /// there are no more RefSCCs to build in the graph.
-  bool buildNextRefSCCInPostOrder();
 };
 
 inline LazyCallGraph::Edge::Edge() : Value() {}

diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -117,6 +117,7 @@ bool CGSCCAnalysisManagerModuleProxy::Result::invalidate(
       PA.allAnalysesInSetPreserved<AllAnalysesOn<LazyCallGraph::SCC>>();
 
   // Ok, we have a graph, so we can propagate the invalidation down into it.
+  G->buildRefSCCs();
   for (auto &RC : G->postorder_ref_sccs())
     for (auto &C : RC) {
       Optional<PreservedAnalyses> InnerPA;