NewGVN: Move leaders around properly to ensure we have a canonical do…

…minating leader. Fixes PR 31613. Summary: This is a testcase where phi node cycling happens, and because we do not order the leaders by domination or anything similar, the leader keeps changing. Using std::set for the members is too expensive, and we actually don't need them sorted all the time, only at leader changes. We could keep both a set and a vector, and keep them mostly sorted and resort as necessary, or use a set and a fibheap, but all of this seems premature. After running some statistics, we are able to avoid the vast majority of sorting by keeping a "next leader" field. Most congruence classes only have leader changes once or twice during GVN. Reviewers: davide Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D28594 llvm-svn: 291968
llvm · Jan 13, 2017 · c0431fd · c0431fd
1 parent c109bbe
commit c0431fd
Show file tree

Hide file tree

Showing 2 changed files with 224 additions and 40 deletions.
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -81,6 +81,10 @@ STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified");
 STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same");
 STATISTIC(NumGVNMaxIterations,
           "Maximum Number of iterations it took to converge GVN");
+STATISTIC(NumGVNLeaderChanges, "Number of leader changes");
+STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes");
+STATISTIC(NumGVNAvoidedSortedLeaderChanges,
+          "Number of avoided sorted leader changes");
 
 //===----------------------------------------------------------------------===//
 //                                GVN Pass
@@ -139,6 +143,10 @@ struct CongruenceClass {
   // This is used so we can detect store equivalence changes properly.
   int StoreCount = 0;
 
+  // The most dominating leader after our current leader, because the member set
+  // is not sorted and is expensive to keep sorted all the time.
+  std::pair<Value *, unsigned int> NextLeader = {nullptr, ~0U};
+
   explicit CongruenceClass(unsigned ID) : ID(ID) {}
   CongruenceClass(unsigned ID, Value *Leader, const Expression *E)
       : ID(ID), RepLeader(Leader), DefiningExpr(E) {}
@@ -320,8 +328,8 @@ class NewGVN : public FunctionPass {
   // Templated to allow them to work both on BB's and BB-edges.
   template <class T>
   Value *lookupOperandLeader(Value *, const User *, const T &) const;
-  void performCongruenceFinding(Value *, const Expression *);
-  void moveValueToNewCongruenceClass(Value *, CongruenceClass *,
+  void performCongruenceFinding(Instruction *, const Expression *);
+  void moveValueToNewCongruenceClass(Instruction *, CongruenceClass *,
                                      CongruenceClass *);
   // Reachability handling.
   void updateReachableEdge(BasicBlock *, BasicBlock *);
@@ -1056,20 +1064,43 @@ void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) {
 
 // Move a value, currently in OldClass, to be part of NewClass
 // Update OldClass for the move (including changing leaders, etc)
-void NewGVN::moveValueToNewCongruenceClass(Value *V, CongruenceClass *OldClass,
+void NewGVN::moveValueToNewCongruenceClass(Instruction *I,
+                                           CongruenceClass *OldClass,
                                            CongruenceClass *NewClass) {
-  DEBUG(dbgs() << "New congruence class for " << V << " is " << NewClass->ID
+  DEBUG(dbgs() << "New congruence class for " << I << " is " << NewClass->ID
                << "\n");
-  OldClass->Members.erase(V);
-  NewClass->Members.insert(V);
-  if (isa<StoreInst>(V)) {
+
+  if (I == OldClass->NextLeader.first)
+    OldClass->NextLeader = {nullptr, ~0U};
+
+  // The new instruction and new class leader may either be siblings in the
+  // dominator tree, or the new class leader should dominate the new member
+  // instruction.  We simply check that the member instruction does not properly
+  // dominate the new class leader.
+  assert(
+      !isa<Instruction>(NewClass->RepLeader) || !NewClass->RepLeader ||
+      I == NewClass->RepLeader ||
+      !DT->properlyDominates(
+          I->getParent(),
+          cast<Instruction>(NewClass->RepLeader)->getParent()) &&
+          "New class for instruction should not be dominated by instruction");
+
+  if (NewClass->RepLeader != I) {
+    auto DFSNum = InstrDFS.lookup(I);
+    if (DFSNum < NewClass->NextLeader.second)
+      NewClass->NextLeader = {I, DFSNum};
+  }
+
+  OldClass->Members.erase(I);
+  NewClass->Members.insert(I);
+  if (isa<StoreInst>(I)) {
     --OldClass->StoreCount;
     assert(OldClass->StoreCount >= 0);
     ++NewClass->StoreCount;
     assert(NewClass->StoreCount > 0);
   }
 
-  ValueToClass[V] = NewClass;
+  ValueToClass[I] = NewClass;
   // See if we destroyed the class or need to swap leaders.
   if (OldClass->Members.empty() && OldClass != InitialClass) {
     if (OldClass->DefiningExpr) {
@@ -1078,25 +1109,48 @@ void NewGVN::moveValueToNewCongruenceClass(Value *V, CongruenceClass *OldClass,
                    << " from table\n");
       ExpressionToClass.erase(OldClass->DefiningExpr);
     }
-  } else if (OldClass->RepLeader == V) {
+  } else if (OldClass->RepLeader == I) {
     // When the leader changes, the value numbering of
     // everything may change due to symbolization changes, so we need to
     // reprocess.
-    OldClass->RepLeader = *(OldClass->Members.begin());
+    DEBUG(dbgs() << "Leader change!\n");
+    ++NumGVNLeaderChanges;
+    // We don't need to sort members if there is only 1, and we don't care about
+    // sorting the initial class because everything either gets out of it or is
+    // unreachable.
+    if (OldClass->Members.size() == 1 || OldClass == InitialClass) {
+      OldClass->RepLeader = *(OldClass->Members.begin());
+    } else if (OldClass->NextLeader.first) {
+      ++NumGVNAvoidedSortedLeaderChanges;
+      OldClass->RepLeader = OldClass->NextLeader.first;
+      OldClass->NextLeader = {nullptr, ~0U};
+    } else {
+      ++NumGVNSortedLeaderChanges;
+      // TODO: If this ends up to slow, we can maintain a dual structure for
+      // member testing/insertion, or keep things mostly sorted, and sort only
+      // here, or ....
+      std::pair<Value *, unsigned> MinDFS = {nullptr, ~0U};
+      for (const auto X : OldClass->Members) {
+        auto DFSNum = InstrDFS.lookup(X);
+        if (DFSNum < MinDFS.second)
+          MinDFS = {X, DFSNum};
+      }
+      OldClass->RepLeader = MinDFS.first;
+    }
     markLeaderChangeTouched(OldClass);
   }
 }
 
 // Perform congruence finding on a given value numbering expression.
-void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
-  ValueToExpression[V] = E;
+void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
+  ValueToExpression[I] = E;
   // This is guaranteed to return something, since it will at least find
   // INITIAL.
 
-  CongruenceClass *VClass = ValueToClass[V];
-  assert(VClass && "Should have found a vclass");
+  CongruenceClass *IClass = ValueToClass[I];
+  assert(IClass && "Should have found a IClass");
   // Dead classes should have been eliminated from the mapping.
-  assert(!VClass->Dead && "Found a dead class");
+  assert(!IClass->Dead && "Found a dead class");
 
   CongruenceClass *EClass;
   if (const auto *VE = dyn_cast<VariableExpression>(E)) {
@@ -1118,13 +1172,13 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
         NewClass->RepLeader =
             lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
       } else {
-        NewClass->RepLeader = V;
+        NewClass->RepLeader = I;
       }
       assert(!isa<VariableExpression>(E) &&
              "VariableExpression should have been handled already");
 
       EClass = NewClass;
-      DEBUG(dbgs() << "Created new congruence class for " << *V
+      DEBUG(dbgs() << "Created new congruence class for " << *I
                    << " using expression " << *E << " at " << NewClass->ID
                    << " and leader " << *(NewClass->RepLeader) << "\n");
       DEBUG(dbgs() << "Hash value was " << E->getHashValue() << "\n");
@@ -1140,36 +1194,31 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
       assert(!EClass->Dead && "We accidentally looked up a dead class");
     }
   }
-  bool ClassChanged = VClass != EClass;
-  bool LeaderChanged = LeaderChanges.erase(V);
+  bool ClassChanged = IClass != EClass;
+  bool LeaderChanged = LeaderChanges.erase(I);
   if (ClassChanged || LeaderChanged) {
     DEBUG(dbgs() << "Found class " << EClass->ID << " for expression " << E
                  << "\n");
 
     if (ClassChanged)
-
-      moveValueToNewCongruenceClass(V, VClass, EClass);
-
-
-    markUsersTouched(V);
-    if (auto *I = dyn_cast<Instruction>(V)) {
-      if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
-        // If this is a MemoryDef, we need to update the equivalence table. If
-        // we determined the expression is congruent to a different memory
-        // state, use that different memory state.  If we determined it didn't,
-        // we update that as well.  Right now, we only support store
-        // expressions.
-        if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) &&
-            EClass->Members.size() != 1) {
-          auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
-          setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
-        } else {
-          setMemoryAccessEquivTo(MA, nullptr);
-        }
-        markMemoryUsersTouched(MA);
+      moveValueToNewCongruenceClass(I, IClass, EClass);
+    markUsersTouched(I);
+    if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
+      // If this is a MemoryDef, we need to update the equivalence table. If
+      // we determined the expression is congruent to a different memory
+      // state, use that different memory state.  If we determined it didn't,
+      // we update that as well.  Right now, we only support store
+      // expressions.
+      if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) &&
+          EClass->Members.size() != 1) {
+        auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
+        setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
+      } else {
+        setMemoryAccessEquivTo(MA, nullptr);
       }
+      markMemoryUsersTouched(MA);
     }
-  } else if (StoreInst *SI = dyn_cast<StoreInst>(V)) {
+  } else if (auto *SI = dyn_cast<StoreInst>(I)) {
     // There is, sadly, one complicating thing for stores.  Stores do not
     // produce values, only consume them.  However, in order to make loads and
     // stores value number the same, we ignore the value operand of the store.

diff --git a/llvm/test/Transforms/NewGVN/pr31613.ll b/llvm/test/Transforms/NewGVN/pr31613.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+;; Both of these tests are tests of phi nodes that end up all equivalent to each other
+;; Without proper leader ordering, we will end up cycling the leader between all of them and never converge.
+
+define void @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ 1, [[BB18:%.*]] ]
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB4:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    br i1 undef, label [[BB18]], label [[BB7:%.*]]
+; CHECK:       bb7:
+; CHECK-NEXT:    br label [[BB9:%.*]]
+; CHECK:       bb9:
+; CHECK-NEXT:    br i1 undef, label [[BB2]], label [[BB11:%.*]]
+; CHECK:       bb11:
+; CHECK-NEXT:    br i1 undef, label [[BB16:%.*]], label [[BB14:%.*]]
+; CHECK:       bb14:
+; CHECK-NEXT:    br label [[BB4]]
+; CHECK:       bb16:
+; CHECK-NEXT:    br label [[BB7]]
+; CHECK:       bb18:
+; CHECK-NEXT:    br label [[BB1]]
+;
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb18, %bb
+  %tmp = phi i32 [ 0, %bb ], [ 1, %bb18 ]
+  br label %bb2
+
+bb2:                                              ; preds = %bb9, %bb1
+  %tmp3 = phi i32 [ %tmp, %bb1 ], [ %tmp8, %bb9 ]
+  br label %bb4
+
+bb4:                                              ; preds = %bb14, %bb2
+  %tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp15, %bb14 ]
+  br i1 undef, label %bb18, label %bb7
+
+bb7:                                              ; preds = %bb16, %bb4
+  %tmp8 = phi i32 [ %tmp17, %bb16 ], [ %tmp5, %bb4 ]
+  br label %bb9
+
+bb9:                                              ; preds = %bb7
+  br i1 undef, label %bb2, label %bb11
+
+bb11:                                             ; preds = %bb9
+  br i1 undef, label %bb16, label %bb14
+
+bb14:                                             ; preds = %bb11
+  %tmp15 = phi i32 [ %tmp8, %bb11 ]
+  br label %bb4
+
+bb16:                                             ; preds = %bb11
+  %tmp17 = phi i32 [ %tmp8, %bb11 ]
+  br label %bb7
+
+bb18:                                             ; preds = %bb4
+  br label %bb1
+}
+
+%struct.a = type {}
+%struct.b = type {}
+
+declare void @c.d.p(i64, i8*)
+
+define void @e() {
+; CHECK-LABEL: @e(
+; CHECK-NEXT:    [[F:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 undef, i32* [[F]], !g !0
+; CHECK-NEXT:    br label [[H:%.*]]
+; CHECK:       h:
+; CHECK-NEXT:    call void @c.d.p(i64 8, i8* undef)
+; CHECK-NEXT:    [[I:%.*]] = load i32, i32* [[F]]
+; CHECK-NEXT:    [[J:%.*]] = load i32, i32* null
+; CHECK-NEXT:    [[K:%.*]] = icmp eq i32 [[I]], [[J]]
+; CHECK-NEXT:    br i1 [[K]], label [[L:%.*]], label [[Q:%.*]]
+; CHECK:       l:
+; CHECK-NEXT:    br label [[R:%.*]]
+; CHECK:       q:
+; CHECK-NEXT:    [[M:%.*]] = load [[STRUCT_A*:%.*]], [[STRUCT_A**:%.*]] null
+; CHECK-NEXT:    br label [[R]]
+; CHECK:       r:
+; CHECK-NEXT:    switch i32 undef, label [[N:%.*]] [
+; CHECK-NEXT:    i32 0, label [[S:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       s:
+; CHECK-NEXT:    store i32 undef, i32* [[F]], !g !0
+; CHECK-NEXT:    br label [[H]]
+; CHECK:       n:
+; CHECK-NEXT:    [[O:%.*]] = load [[STRUCT_A*]], [[STRUCT_A**]] null
+; CHECK-NEXT:    ret void
+;
+  %f = alloca i32
+  store i32 undef, i32* %f, !g !0
+  br label %h
+
+h:                                                ; preds = %s, %0
+  call void @c.d.p(i64 8, i8* undef)
+  %i = load i32, i32* %f
+  %j = load i32, i32* null
+  %k = icmp eq i32 %i, %j
+  br i1 %k, label %l, label %q
+
+l:                                                ; preds = %h
+  br label %r
+
+q:                                                ; preds = %h
+  %m = load %struct.a*, %struct.a** null
+  %1 = bitcast %struct.a* %m to %struct.b*
+  br label %r
+
+r:                                                ; preds = %q, %l
+  switch i32 undef, label %n [
+  i32 0, label %s
+  ]
+
+s:                                                ; preds = %r
+  store i32 undef, i32* %f, !g !0
+  br label %h
+
+n:                                                ; preds = %r
+  %o = load %struct.a*, %struct.a** null
+  %2 = bitcast %struct.a* %o to %struct.b*
+  ret void
+}
+
+!0 = !{}