Skip to content

Commit

Permalink
Do not widen load for different variable in GVN.
Browse files Browse the repository at this point in the history
Summary:
Widening load in GVN is too early because it will block other optimizations like PRE, LICM.

https://llvm.org/bugs/show_bug.cgi?id=29110

The SPECCPU2006 benchmark impact of this patch:

Reference: o2_nopatch
(1): o2_patched

           Benchmark             Base:Reference   (1)  
-------------------------------------------------------
spec/2006/fp/C++/444.namd                  25.2  -0.08%
spec/2006/fp/C++/447.dealII               45.92  +1.05%
spec/2006/fp/C++/450.soplex                41.7  -0.26%
spec/2006/fp/C++/453.povray               35.65  +1.68%
spec/2006/fp/C/433.milc                   23.79  +0.42%
spec/2006/fp/C/470.lbm                    41.88  -1.12%
spec/2006/fp/C/482.sphinx3                47.94  +1.67%
spec/2006/int/C++/471.omnetpp             22.46  -0.36%
spec/2006/int/C++/473.astar               21.19  +0.24%
spec/2006/int/C++/483.xalancbmk           36.09  -0.11%
spec/2006/int/C/400.perlbench             33.28  +1.35%
spec/2006/int/C/401.bzip2                 22.76  -0.04%
spec/2006/int/C/403.gcc                   32.36  +0.12%
spec/2006/int/C/429.mcf                   41.04  -0.41%
spec/2006/int/C/445.gobmk                 26.94  +0.04%
spec/2006/int/C/456.hmmer                  24.5  -0.20%
spec/2006/int/C/458.sjeng                    28  -0.46%
spec/2006/int/C/462.libquantum            55.25  +0.27%
spec/2006/int/C/464.h264ref               45.87  +0.72%

geometric mean                                   +0.23%

For most benchmarks, it's a wash, but we do see stable improvements on some benchmarks, e.g. 447,453,482,400.

Reviewers: davidxl, hfinkel, dberlin, sanjoy, reames

Subscribers: gberry, junbuml

Differential Revision: https://reviews.llvm.org/D24096

llvm-svn: 281074
  • Loading branch information
danielcdh committed Sep 9, 2016
1 parent a5edf65 commit 22ce5eb
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 54 deletions.
38 changes: 1 addition & 37 deletions llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
Expand Up @@ -234,26 +234,6 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
return MemDepResult::getNonFuncLocal();
}

/// Return true if LI is a load that would fully overlap MemLoc if done as
/// a wider legal integer load.
///
/// MemLocBase, MemLocOffset are lazily computed here the first time the
/// base/offs of memloc is needed.
static bool isLoadLoadClobberIfExtendedToFullWidth(const MemoryLocation &MemLoc,
const Value *&MemLocBase,
int64_t &MemLocOffs,
const LoadInst *LI) {
const DataLayout &DL = LI->getModule()->getDataLayout();

// If we haven't already computed the base/offset of MemLoc, do so now.
if (!MemLocBase)
MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);

unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
MemLocBase, MemLocOffs, MemLoc.Size, LI);
return Size != 0;
}

unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
const LoadInst *LI) {
Expand Down Expand Up @@ -410,9 +390,6 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {

const Value *MemLocBase = nullptr;
int64_t MemLocOffset = 0;
bool isInvariantLoad = false;

if (!Limit) {
Expand Down Expand Up @@ -550,21 +527,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
AliasResult R = AA.alias(LoadLoc, MemLoc);

if (isLoad) {
if (R == NoAlias) {
// If this is an over-aligned integer load (for example,
// "load i8* %P, align 4") see if it would obviously overlap with the
// queried location if widened to a larger load (e.g. if the queried
// location is 1 byte at P+1). If so, return it as a load/load
// clobber result, allowing the client to decide to widen the load if
// it wants to.
if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() &&
isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
MemLocOffset, LI))
return MemDepResult::getClobber(Inst);
}
if (R == NoAlias)
continue;
}

// Must aliased loads are defs of each other.
if (R == MustAlias)
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
Expand Up @@ -34,7 +34,7 @@ define void @end_test_widening_bad() {
ret void
}

;; Accessing bytes 4 and 5. Ok to widen to i16.
;; Accessing bytes 4 and 5. No widen to i16.

define i32 @test_widening_ok(i8* %P) nounwind ssp noredzone sanitize_address {
entry:
Expand All @@ -45,7 +45,8 @@ entry:
%add = add nsw i32 %conv, %conv2
ret i32 %add
; CHECK: @test_widening_ok
; CHECK: __asan_report_load2
; CHECK: __asan_report_load1
; CHECK: __asan_report_load1
; CHECK-NOT: __asan_report
; CHECK: end_test_widening_ok
}
Expand Down
11 changes: 6 additions & 5 deletions llvm/test/Transforms/GVN/PRE/load-pre-nonlocal.ll
Expand Up @@ -51,15 +51,16 @@ for.end:
}

; %1 is partially redundant if %0 can be widened to a 64-bit load.
; But we should not widen %0 to 64-bit load.

; CHECK-LABEL: define i32 @overaligned_load
; CHECK: if.then:
; CHECK: %0 = load i64
; CHECK: [[LSHR:%[0-9]+]] = lshr i64 %0, 32, !dbg [[LSHR_LOC:![0-9]+]]
; CHECK: trunc i64 [[LSHR]] to i32
; CHECK-NOT: %0 = load i64
; CHECK-NOT: [[LSHR:%[0-9]+]] = lshr i64 %0, 32, !dbg [[LSHR_LOC:![0-9]+]]
; CHECK-NOT: trunc i64 [[LSHR]] to i32
; CHECK: if.end:
; CHECK-NOT: %1 = load i32, i32*
; CHECK: [[LSHR_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
; CHECK: %1 = load i32, i32*
; CHECK-NOT: [[LSHR_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})

define i32 @overaligned_load(i32 %a, i32* nocapture %b) !dbg !13 {
entry:
Expand Down
9 changes: 7 additions & 2 deletions llvm/test/Transforms/GVN/PRE/rle.ll
Expand Up @@ -624,6 +624,7 @@ if.end:

;;===----------------------------------------------------------------------===;;
;; Load Widening
;; We explicitly choose NOT to widen. And are testing to make sure we don't.
;;===----------------------------------------------------------------------===;;

%widening1 = type { i32, i8, i8, i8, i8 }
Expand All @@ -640,7 +641,8 @@ entry:
ret i32 %add
; CHECK-LABEL: @test_widening1(
; CHECK-NOT: load
; CHECK: load i16, i16*
; CHECK: load i8, i8*
; CHECK: load i8, i8*
; CHECK-NOT: load
; CHECK: ret i32
}
Expand All @@ -664,7 +666,10 @@ entry:
ret i32 %add3
; CHECK-LABEL: @test_widening2(
; CHECK-NOT: load
; CHECK: load i32, i32*
; CHECK: load i8, i8*
; CHECK: load i8, i8*
; CHECK: load i8, i8*
; CHECK: load i8, i8*
; CHECK-NOT: load
; CHECK: ret i32
}
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/GVN/big-endian.ll
Expand Up @@ -7,9 +7,9 @@ target triple = "powerpc64-unknown-linux-gnu"
;; loads reusing a load value.
define i64 @test1({ i1, i8 }* %predA, { i1, i8 }* %predB) {
; CHECK-LABEL: @test1
; CHECK: [[V1:%.*]] = load i16, i16* %{{.*}}
; CHECK: [[V2:%.*]] = lshr i16 [[V1]], 8
; CHECK: trunc i16 [[V2]] to i1
; CHECK-NOT: [[V1:%.*]] = load i16, i16* %{{.*}}
; CHECK-NOT: [[V2:%.*]] = lshr i16 [[V1]], 8
; CHECK-NOT: trunc i16 [[V2]] to i1

%valueLoadA.fca.0.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predA, i64 0, i32 0
%valueLoadA.fca.0.load = load i1, i1* %valueLoadA.fca.0.gep, align 8
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/Transforms/GVN/no_speculative_loads_with_asan.ll
Expand Up @@ -25,9 +25,7 @@ define i32 @TestNoAsan() {
}

; CHECK-LABEL: @TestNoAsan
; CHECK: %[[LOAD:[^ ]+]] = load i32
; CHECK: {{.*}} = ashr i32 %[[LOAD]]
; CHECK-NOT: {{.*}} = phi
; CHECK: ret i32 0

define i32 @TestAsan() sanitize_address {
%1 = tail call noalias i8* @_Znam(i64 2)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/GVN/pr25440.ll
Expand Up @@ -19,7 +19,7 @@ bb0: ; preds = %land.lhs.true, %entry
%x.tr = phi %struct.a* [ %x, %entry ], [ null, %land.lhs.true ]
%code1 = getelementptr inbounds %struct.a, %struct.a* %x.tr, i32 0, i32 0
%0 = load i16, i16* %code1, align 4
; CHECK: load i32, i32*
; CHECK: load i16, i16*
%conv = zext i16 %0 to i32
switch i32 %conv, label %if.end.50 [
i32 43, label %cleanup
Expand All @@ -38,7 +38,7 @@ if.then.26: ; preds = %if.then.5

cond.false: ; preds = %if.then.26
; CHECK: cond.false:
; CHECK-NOT: load
; CHECK: load i16
%mode = getelementptr inbounds %struct.a, %struct.a* %x.tr.lcssa163, i32 0, i32 1
%bf.load = load i16, i16* %mode, align 2
%bf.shl = shl i16 %bf.load, 8
Expand Down

0 comments on commit 22ce5eb

Please sign in to comment.