Skip to content

Commit

Permalink
Reapply "[GVN] Prevent LoadPRE from hoisting across instructions that…
Browse files Browse the repository at this point in the history
… don't pass control flow to successors"

This patch fixes the miscompile that happens when PRE hoists loads across guards and
other instructions that don't always pass control flow to their successors. PRE is now prohibited
to hoist across such instructions because there is no guarantee that the load standing after such
instruction is still valid before such instruction. For example, a load from under a guard may be
invalid before the guard in the following case:
  int array[LEN];
  ...
  guard(0 <= index && index < LEN);
  use(array[index]);

Differential Revision: https://reviews.llvm.org/D37460

llvm-svn: 316975
  • Loading branch information
Max Kazantsev committed Oct 31, 2017
1 parent 39a8dbf commit 488ec97
Show file tree
Hide file tree
Showing 6 changed files with 533 additions and 0 deletions.
7 changes: 7 additions & 0 deletions llvm/include/llvm/Transforms/Scalar/GVN.h
Expand Up @@ -18,6 +18,7 @@

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
Expand All @@ -27,6 +28,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Transforms/Utils/OrderedInstructions.h"
#include <cstdint>
#include <utility>
#include <vector>
Expand Down Expand Up @@ -156,7 +158,11 @@ class GVN : public PassInfoMixin<GVN> {
AssumptionCache *AC;
SetVector<BasicBlock *> DeadBlocks;
OptimizationRemarkEmitter *ORE;
// Maps a block to the topmost instruction with implicit control flow in it.
DenseMap<const BasicBlock *, const Instruction *>
FirstImplicitControlFlowInsts;

OrderedInstructions *OI;
ValueTable VN;

/// A mapping from value numbers to lists of Value*'s that
Expand Down Expand Up @@ -268,6 +274,7 @@ class GVN : public PassInfoMixin<GVN> {
BasicBlock *Curr, unsigned int ValNo);
Value *findLeader(const BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
void fillImplicitControlFlowInfo(BasicBlock *BB);
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
Expand Down
97 changes: 97 additions & 0 deletions llvm/lib/Transforms/Scalar/GVN.cpp
Expand Up @@ -38,6 +38,7 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
Expand Down Expand Up @@ -1048,7 +1049,32 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// backwards through predecessors if needed.
BasicBlock *LoadBB = LI->getParent();
BasicBlock *TmpBB = LoadBB;
bool IsSafeToSpeculativelyExecute = isSafeToSpeculativelyExecute(LI);

// Check that there is no implicit control flow instructions above our load in
// its block. If there is an instruction that doesn't always pass the
// execution to the following instruction, then moving through it may become
// invalid. For example:
//
// int arr[LEN];
// int index = ???;
// ...
// guard(0 <= index && index < LEN);
// use(arr[index]);
//
// It is illegal to move the array access to any point above the guard,
// because if the index is out of bounds we should deoptimize rather than
// access the array.
// Check that there is no guard in this block above our intruction.
if (!IsSafeToSpeculativelyExecute) {
auto It = FirstImplicitControlFlowInsts.find(TmpBB);
if (It != FirstImplicitControlFlowInsts.end()) {
assert(It->second->getParent() == TmpBB &&
"Implicit control flow map broken?");
if (OI->dominates(It->second, LI))
return false;
}
}
while (TmpBB->getSinglePredecessor()) {
TmpBB = TmpBB->getSinglePredecessor();
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
Expand All @@ -1063,6 +1089,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// which it was not previously executed.
if (TmpBB->getTerminator()->getNumSuccessors() != 1)
return false;

// Check that there is no implicit control flow in a block above.
if (!IsSafeToSpeculativelyExecute &&
FirstImplicitControlFlowInsts.count(TmpBB))
return false;
}

assert(TmpBB);
Expand Down Expand Up @@ -1982,6 +2013,8 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
TLI = &RunTLI;
VN.setAliasAnalysis(&RunAA);
MD = RunMD;
OrderedInstructions OrderedInstrs(DT);
OI = &OrderedInstrs;
VN.setMemDep(MD);
ORE = RunORE;

Expand Down Expand Up @@ -2064,14 +2097,26 @@ bool GVN::processBlock(BasicBlock *BB) {
if (!AtStart)
--BI;

bool InvalidateImplicitCF = false;
const Instruction *MaybeFirstICF = FirstImplicitControlFlowInsts.lookup(BB);
for (auto *I : InstrsToErase) {
assert(I->getParent() == BB && "Removing instruction from wrong block?");
DEBUG(dbgs() << "GVN removed: " << *I << '\n');
if (MD) MD->removeInstruction(I);
DEBUG(verifyRemoved(I));
if (MaybeFirstICF == I) {
// We have erased the first ICF in block. The map needs to be updated.
InvalidateImplicitCF = true;
// Do not keep dangling pointer on the erased instruction.
MaybeFirstICF = nullptr;
}
I->eraseFromParent();
}

OI->invalidateBlock(BB);
InstrsToErase.clear();
if (InvalidateImplicitCF)
fillImplicitControlFlowInfo(BB);

if (AtStart)
BI = BB->begin();
Expand Down Expand Up @@ -2265,7 +2310,14 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
if (MD)
MD->removeInstruction(CurInst);
DEBUG(verifyRemoved(CurInst));
bool InvalidateImplicitCF =
FirstImplicitControlFlowInsts.lookup(CurInst->getParent()) == CurInst;
// FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes
// some assertion failures.
OI->invalidateBlock(CurrentBlock);
CurInst->eraseFromParent();
if (InvalidateImplicitCF)
fillImplicitControlFlowInfo(CurrentBlock);
++NumGVNInstr;

return true;
Expand Down Expand Up @@ -2332,6 +2384,9 @@ bool GVN::iterateOnFunction(Function &F) {
// RPOT walks the graph in its constructor and will not be invalidated during
// processBlock.
ReversePostOrderTraversal<Function *> RPOT(&F);

for (BasicBlock *BB : RPOT)
fillImplicitControlFlowInfo(BB);
for (BasicBlock *BB : RPOT)
Changed |= processBlock(BB);

Expand All @@ -2343,6 +2398,48 @@ void GVN::cleanupGlobalSets() {
LeaderTable.clear();
BlockRPONumber.clear();
TableAllocator.Reset();
FirstImplicitControlFlowInsts.clear();
}

void
GVN::fillImplicitControlFlowInfo(BasicBlock *BB) {
// Make sure that all marked instructions are actually deleted by this point,
// so that we don't need to care about omitting them.
assert(InstrsToErase.empty() && "Filling before removed all marked insns?");
auto MayNotTransferExecutionToSuccessor = [&](const Instruction *I) {
// If a block's instruction doesn't always pass the control to its successor
// instruction, mark the block as having implicit control flow. We use them
// to avoid wrong assumptions of sort "if A is executed and B post-dominates
// A, then B is also executed". This is not true is there is an implicit
// control flow instruction (e.g. a guard) between them.
//
// TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false
// for volatile stores and loads because they can trap. The discussion on
// whether or not it is correct is still ongoing. We might want to get rid
// of this logic in the future. Anyways, trapping instructions shouldn't
// introduce implicit control flow, so we explicitly allow them here. This
// must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed.
if (isGuaranteedToTransferExecutionToSuccessor(I))
return false;
if (auto *LI = dyn_cast<LoadInst>(I)) {
assert(LI->isVolatile() && "Non-volatile load should transfer execution"
" to successor!");
return false;
}
if (auto *SI = dyn_cast<StoreInst>(I)) {
assert(SI->isVolatile() && "Non-volatile store should transfer execution"
" to successor!");
return false;
}
return true;
};
FirstImplicitControlFlowInsts.erase(BB);

for (auto &I : *BB)
if (MayNotTransferExecutionToSuccessor(&I)) {
FirstImplicitControlFlowInsts[BB] = &I;
break;
}
}

/// Verify that the specified instruction does not occur in our
Expand Down
7 changes: 7 additions & 0 deletions llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll
Expand Up @@ -17,6 +17,13 @@ declare i64* @getaddr_i64(i64 addrspace(100)*) #0
define hidden void @wrapon_fn173() {

; CHECK-LABEL: @wrapon_fn173
; CHECK: entry:
; CHECK-NEXT: call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef)
; CHECK-NEXT: %.pre = load i64 addrspace(100)*, i64 addrspace(100)** null, align 8
; CHECK-NEXT: br label %loop
; CHECK: loop:
; CHECK-NEXT: call i64* @getaddr_i64(i64 addrspace(100)* %.pre)
; CHECK-NEXT: br label %loop

entry:
%0 = call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef)
Expand Down
146 changes: 146 additions & 0 deletions llvm/test/Transforms/GVN/PRE/pre-load-guards.ll
@@ -0,0 +1,146 @@
; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"

declare void @llvm.experimental.guard(i1, ...)

; This is a motivating example on why we prohibit hoisting through guards.
; In the bottom block, we check that the index is within bounds and only access
; the element in this case and deoptimize otherwise. If we hoist the load to a
; place above the guard, it will may lead to out-of-bound array access.
define i32 @test_motivation(i32* %p, i32* %q, i1 %C, i32 %index, i32 %len) {
; CHECK-LABEL: @test_motivation(
block1:
%el1 = getelementptr inbounds i32, i32* %q, i32 %index
%el2 = getelementptr inbounds i32, i32* %p, i32 %index
br i1 %C, label %block2, label %block3

block2:

; CHECK: block2:
; CHECK-NEXT: br
; CHECK-NOT: load
; CHECK-NOT: sge
; CHECK-NOT: slt
; CHECK-NOT: and
br label %block4

block3:
store i32 0, i32* %el1
br label %block4

block4:

; CHECK: block4:
; CHECK: %cond1 = icmp sge i32 %index, 0
; CHECK-NEXT: %cond2 = icmp slt i32 %index, %len
; CHECK-NEXT: %in.bounds = and i1 %cond1, %cond2
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds)
; CHECK-NEXT: %PRE = load i32, i32* %P2
; CHECK: ret i32 %PRE

%P2 = phi i32* [%el2, %block3], [%el1, %block2]
%cond1 = icmp sge i32 %index, 0
%cond2 = icmp slt i32 %index, %len
%in.bounds = and i1 %cond1, %cond2
call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) [ "deopt"() ]
%PRE = load i32, i32* %P2
ret i32 %PRE
}

; Guard in load's block that is above the load should prohibit the PRE.
define i32 @test_guard_01(i32* %p, i32* %q, i1 %C, i1 %G) {
; CHECK-LABEL: @test_guard_01(
block1:
br i1 %C, label %block2, label %block3

block2:

; CHECK: block2:
; CHECK-NEXT: br
; CHECK-NOT: load

br label %block4

block3:
store i32 0, i32* %p
br label %block4

block4:

; CHECK: block4:
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %G)
; CHECK-NEXT: load
; CHECK: ret i32

%P2 = phi i32* [%p, %block3], [%q, %block2]
call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
%PRE = load i32, i32* %P2
ret i32 %PRE
}

; Guard in load's block that is below the load should not prohibit the PRE.
define i32 @test_guard_02(i32* %p, i32* %q, i1 %C, i1 %G) {
; CHECK-LABEL: @test_guard_02(
block1:
br i1 %C, label %block2, label %block3

block2:

; CHECK: block2:
; CHECK-NEXT: load i32, i32* %q

br label %block4

block3:
store i32 0, i32* %p
br label %block4

block4:

; CHECK: block4:
; CHECK-NEXT: phi i32 [
; CHECK-NEXT: phi i32* [
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %G)
; CHECK-NOT: load
; CHECK: ret i32

%P2 = phi i32* [%p, %block3], [%q, %block2]
%PRE = load i32, i32* %P2
call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
ret i32 %PRE
}

; Guard above the load's block should prevent PRE from hoisting through it.
define i32 @test_guard_03(i32* %p, i32* %q, i1 %C, i1 %G) {
; CHECK-LABEL: @test_guard_03(
block1:
br i1 %C, label %block2, label %block3

block2:

; CHECK: block2:
; CHECK-NEXT: br
; CHECK-NOT: load

br label %block4

block3:
store i32 0, i32* %p
br label %block4

block4:

; CHECK: block4:
; CHECK-NEXT: phi i32*
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %G)
; CHECK-NEXT: load
; CHECK-NEXT: ret i32

%P2 = phi i32* [%p, %block3], [%q, %block2]
call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
br label %block5

block5:
%PRE = load i32, i32* %P2
ret i32 %PRE
}

0 comments on commit 488ec97

Please sign in to comment.