Reapply "[GVN] Prevent LoadPRE from hoisting across instructions that…

… don't pass control flow to successors" This patch fixes the miscompile that happens when PRE hoists loads across guards and other instructions that don't always pass control flow to their successors. PRE is now prohibited to hoist across such instructions because there is no guarantee that the load standing after such instruction is still valid before such instruction. For example, a load from under a guard may be invalid before the guard in the following case: int array[LEN]; ... guard(0 <= index && index < LEN); use(array[index]); Differential Revision: https://reviews.llvm.org/D37460 llvm-svn: 316975
llvm · Oct 31, 2017 · 488ec97 · 488ec97
1 parent 39a8dbf
commit 488ec97
Show file tree

Hide file tree

Showing 6 changed files with 533 additions and 0 deletions.
diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -18,6 +18,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -27,6 +28,7 @@
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Transforms/Utils/OrderedInstructions.h"
 #include <cstdint>
 #include <utility>
 #include <vector>
@@ -156,7 +158,11 @@ class GVN : public PassInfoMixin<GVN> {
   AssumptionCache *AC;
   SetVector<BasicBlock *> DeadBlocks;
   OptimizationRemarkEmitter *ORE;
+  // Maps a block to the topmost instruction with implicit control flow in it.
+  DenseMap<const BasicBlock *, const Instruction *>
+      FirstImplicitControlFlowInsts;
 
+  OrderedInstructions *OI;
   ValueTable VN;
 
   /// A mapping from value numbers to lists of Value*'s that
@@ -268,6 +274,7 @@ class GVN : public PassInfoMixin<GVN> {
                                  BasicBlock *Curr, unsigned int ValNo);
   Value *findLeader(const BasicBlock *BB, uint32_t num);
   void cleanupGlobalSets();
+  void fillImplicitControlFlowInfo(BasicBlock *BB);
   void verifyRemoved(const Instruction *I) const;
   bool splitCriticalEdges();
   BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);

diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
@@ -1048,7 +1049,32 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
   // backwards through predecessors if needed.
   BasicBlock *LoadBB = LI->getParent();
   BasicBlock *TmpBB = LoadBB;
+  bool IsSafeToSpeculativelyExecute = isSafeToSpeculativelyExecute(LI);
 
+  // Check that there is no implicit control flow instructions above our load in
+  // its block. If there is an instruction that doesn't always pass the
+  // execution to the following instruction, then moving through it may become
+  // invalid. For example:
+  //
+  // int arr[LEN];
+  // int index = ???;
+  // ...
+  // guard(0 <= index && index < LEN);
+  // use(arr[index]);
+  //
+  // It is illegal to move the array access to any point above the guard,
+  // because if the index is out of bounds we should deoptimize rather than
+  // access the array.
+  // Check that there is no guard in this block above our intruction.
+  if (!IsSafeToSpeculativelyExecute) {
+    auto It = FirstImplicitControlFlowInsts.find(TmpBB);
+    if (It != FirstImplicitControlFlowInsts.end()) {
+      assert(It->second->getParent() == TmpBB &&
+             "Implicit control flow map broken?");
+      if (OI->dominates(It->second, LI))
+        return false;
+    }
+  }
   while (TmpBB->getSinglePredecessor()) {
     TmpBB = TmpBB->getSinglePredecessor();
     if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1063,6 +1089,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
     // which it was not previously executed.
     if (TmpBB->getTerminator()->getNumSuccessors() != 1)
       return false;
+
+    // Check that there is no implicit control flow in a block above.
+    if (!IsSafeToSpeculativelyExecute &&
+        FirstImplicitControlFlowInsts.count(TmpBB))
+      return false;
   }
 
   assert(TmpBB);
@@ -1982,6 +2013,8 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
   TLI = &RunTLI;
   VN.setAliasAnalysis(&RunAA);
   MD = RunMD;
+  OrderedInstructions OrderedInstrs(DT);
+  OI = &OrderedInstrs;
   VN.setMemDep(MD);
   ORE = RunORE;
 
@@ -2064,14 +2097,26 @@ bool GVN::processBlock(BasicBlock *BB) {
     if (!AtStart)
       --BI;
 
+    bool InvalidateImplicitCF = false;
+    const Instruction *MaybeFirstICF = FirstImplicitControlFlowInsts.lookup(BB);
     for (auto *I : InstrsToErase) {
       assert(I->getParent() == BB && "Removing instruction from wrong block?");
       DEBUG(dbgs() << "GVN removed: " << *I << '\n');
       if (MD) MD->removeInstruction(I);
       DEBUG(verifyRemoved(I));
+      if (MaybeFirstICF == I) {
+        // We have erased the first ICF in block. The map needs to be updated.
+        InvalidateImplicitCF = true;
+        // Do not keep dangling pointer on the erased instruction.
+        MaybeFirstICF = nullptr;
+      }
       I->eraseFromParent();
     }
+
+    OI->invalidateBlock(BB);
     InstrsToErase.clear();
+    if (InvalidateImplicitCF)
+      fillImplicitControlFlowInfo(BB);
 
     if (AtStart)
       BI = BB->begin();
@@ -2265,7 +2310,14 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
   if (MD)
     MD->removeInstruction(CurInst);
   DEBUG(verifyRemoved(CurInst));
+  bool InvalidateImplicitCF =
+      FirstImplicitControlFlowInsts.lookup(CurInst->getParent()) == CurInst;
+  // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes
+  // some assertion failures.
+  OI->invalidateBlock(CurrentBlock);
   CurInst->eraseFromParent();
+  if (InvalidateImplicitCF)
+    fillImplicitControlFlowInfo(CurrentBlock);
   ++NumGVNInstr;
 
   return true;
@@ -2332,6 +2384,9 @@ bool GVN::iterateOnFunction(Function &F) {
   // RPOT walks the graph in its constructor and will not be invalidated during
   // processBlock.
   ReversePostOrderTraversal<Function *> RPOT(&F);
+
+  for (BasicBlock *BB : RPOT)
+    fillImplicitControlFlowInfo(BB);
   for (BasicBlock *BB : RPOT)
     Changed |= processBlock(BB);
 
@@ -2343,6 +2398,48 @@ void GVN::cleanupGlobalSets() {
   LeaderTable.clear();
   BlockRPONumber.clear();
   TableAllocator.Reset();
+  FirstImplicitControlFlowInsts.clear();
+}
+
+void
+GVN::fillImplicitControlFlowInfo(BasicBlock *BB) {
+  // Make sure that all marked instructions are actually deleted by this point,
+  // so that we don't need to care about omitting them.
+  assert(InstrsToErase.empty() && "Filling before removed all marked insns?");
+  auto MayNotTransferExecutionToSuccessor = [&](const Instruction *I) {
+    // If a block's instruction doesn't always pass the control to its successor
+    // instruction, mark the block as having implicit control flow. We use them
+    // to avoid wrong assumptions of sort "if A is executed and B post-dominates
+    // A, then B is also executed". This is not true is there is an implicit
+    // control flow instruction (e.g. a guard) between them.
+    //
+    // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false
+    // for volatile stores and loads because they can trap. The discussion on
+    // whether or not it is correct is still ongoing. We might want to get rid
+    // of this logic in the future. Anyways, trapping instructions shouldn't
+    // introduce implicit control flow, so we explicitly allow them here. This
+    // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed.
+    if (isGuaranteedToTransferExecutionToSuccessor(I))
+      return false;
+    if (auto *LI = dyn_cast<LoadInst>(I)) {
+      assert(LI->isVolatile() && "Non-volatile load should transfer execution"
+                                 " to successor!");
+      return false;
+    }
+    if (auto *SI = dyn_cast<StoreInst>(I)) {
+      assert(SI->isVolatile() && "Non-volatile store should transfer execution"
+                                 " to successor!");
+      return false;
+    }
+    return true;
+  };
+  FirstImplicitControlFlowInsts.erase(BB);
+
+  for (auto &I : *BB)
+    if (MayNotTransferExecutionToSuccessor(&I)) {
+      FirstImplicitControlFlowInsts[BB] = &I;
+      break;
+    }
 }
 
 /// Verify that the specified instruction does not occur in our

diff --git a/llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll b/llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll
@@ -17,6 +17,13 @@ declare i64* @getaddr_i64(i64 addrspace(100)*) #0
 define hidden void @wrapon_fn173() {
 
 ; CHECK-LABEL: @wrapon_fn173
+; CHECK:       entry:
+; CHECK-NEXT:    call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef)
+; CHECK-NEXT:    %.pre = load i64 addrspace(100)*, i64 addrspace(100)** null, align 8
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    call i64* @getaddr_i64(i64 addrspace(100)* %.pre)
+; CHECK-NEXT:    br label %loop
 
 entry:
   %0 = call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef)

diff --git a/llvm/test/Transforms/GVN/PRE/pre-load-guards.ll b/llvm/test/Transforms/GVN/PRE/pre-load-guards.ll
@@ -0,0 +1,146 @@
+; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+declare void @llvm.experimental.guard(i1, ...)
+
+; This is a motivating example on why we prohibit hoisting through guards.
+; In the bottom block, we check that the index is within bounds and only access
+; the element in this case and deoptimize otherwise. If we hoist the load to a
+; place above the guard, it will may lead to out-of-bound array access.
+define i32 @test_motivation(i32* %p, i32* %q, i1 %C, i32 %index, i32 %len) {
+; CHECK-LABEL: @test_motivation(
+block1:
+  %el1 = getelementptr inbounds i32, i32* %q, i32 %index
+  %el2 = getelementptr inbounds i32, i32* %p, i32 %index
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     br
+; CHECK-NOT:      load
+; CHECK-NOT:      sge
+; CHECK-NOT:      slt
+; CHECK-NOT:      and
+  br label %block4
+
+block3:
+  store i32 0, i32* %el1
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK:          %cond1 = icmp sge i32 %index, 0
+; CHECK-NEXT:     %cond2 = icmp slt i32 %index, %len
+; CHECK-NEXT:     %in.bounds = and i1 %cond1, %cond2
+; CHECK:          call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds)
+; CHECK-NEXT:     %PRE = load i32, i32* %P2
+; CHECK:          ret i32 %PRE
+
+  %P2 = phi i32* [%el2, %block3], [%el1, %block2]
+  %cond1 = icmp sge i32 %index, 0
+  %cond2 = icmp slt i32 %index, %len
+  %in.bounds = and i1 %cond1, %cond2
+  call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) [ "deopt"() ]
+  %PRE = load i32, i32* %P2
+  ret i32 %PRE
+}
+
+; Guard in load's block that is above the load should prohibit the PRE.
+define i32 @test_guard_01(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_01(
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     br
+; CHECK-NOT:      load
+
+ br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK:          call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NEXT:     load
+; CHECK:          ret i32
+
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+  %PRE = load i32, i32* %P2
+  ret i32 %PRE
+}
+
+; Guard in load's block that is below the load should not prohibit the PRE.
+define i32 @test_guard_02(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_02(
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     load i32, i32* %q
+
+ br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK-NEXT:     phi i32 [
+; CHECK-NEXT:     phi i32* [
+; CHECK-NEXT:     call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NOT:      load
+; CHECK:          ret i32
+
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  %PRE = load i32, i32* %P2
+  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+  ret i32 %PRE
+}
+
+; Guard above the load's block should prevent PRE from hoisting through it.
+define i32 @test_guard_03(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_03(
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     br
+; CHECK-NOT:      load
+
+ br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK-NEXT:     phi i32*
+; CHECK-NEXT:     call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NEXT:     load
+; CHECK-NEXT:     ret i32
+
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+  br label %block5
+
+block5:
+  %PRE = load i32, i32* %P2
+  ret i32 %PRE
+}