Skip to content

Commit

Permalink
[DSE] Use optimized access if available for redundant store elimination.
Browse files Browse the repository at this point in the history
Using the optimized access enables additional optimizations in cases
where the defining access is a non-aliasing store.

Alternatively we could also walk upwards and skip non-aliasing defs
here, but my experiments so far showed that this will noticeably
increase compile-time for little extra gain compared to just using the
optimized access.

Improvements of dse.NumRedundantStores on MultiSource/CINT2006/CPF2006
on X86 with -O3:

     test-suite...-typeset/consumer-typeset.test     1.00                  76.00              7500.0%
     test-suite.../Benchmarks/Bullet/bullet.test     3.00                  12.00              300.0%
     test-suite...006/453.povray/453.povray.test     3.00                   6.00              100.0%
     test-suite...telecomm-gsm/telecomm-gsm.test     1.00                   2.00              100.0%
     test-suite...ediabench/gsm/toast/toast.test     1.00                   2.00              100.0%
     test-suite...marks/7zip/7zip-benchmark.test     1.00                   2.00              100.0%
     test-suite...ications/JM/lencod/lencod.test     7.00                  10.00              42.9%
     test-suite...6/464.h264ref/464.h264ref.test     6.00                   8.00              33.3%
     test-suite...ications/JM/ldecod/ldecod.test     6.00                   7.00              16.7%
     test-suite...006/447.dealII/447.dealII.test    33.00                  33.00               0.0%
     test-suite...6/471.omnetpp/471.omnetpp.test    NaN                     1.00               nan%
     test-suite...006/450.soplex/450.soplex.test    NaN                     2.00               nan%
     test-suite.../CINT2006/403.gcc/403.gcc.test    NaN                     7.00               nan%
     test-suite...lications/ClamAV/clamscan.test    NaN                     1.00               nan%
     test-suite...CI_Purple/SMG2000/smg2000.test    NaN                     3.00               nan%

Follow-up to D111727.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D112315
  • Loading branch information
fhahn committed Nov 30, 2021
1 parent 316e627 commit c9ad356
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 22 deletions.
9 changes: 8 additions & 1 deletion llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
Expand Up @@ -1928,7 +1928,14 @@ struct DSEState {
if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def) ||
!isRemovable(Def->getMemoryInst()))
continue;
auto *UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
MemoryDef *UpperDef;
// To conserve compile-time, we avoid walking to the next clobbering def.
// Instead, we just try to get the optimized access, if it exists. DSE
// will try to optimize defs during the earlier traversal.
if (Def->isOptimized())
UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
else
UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
continue;

Expand Down
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -basic-aa -dse -S %s | FileCheck %s
; RUN: opt -basic-aa -dse -dse-optimize-memoryssa=false -S %s | FileCheck --check-prefixes=CHECK,UNOPT %s
; RUN: opt -basic-aa -dse -dse-optimize-memoryssa -S %s | FileCheck --check-prefixes=CHECK,OPT %s
; RUN: opt -basic-aa -dse -S %s | FileCheck --check-prefixes=CHECK,UNOPT %s

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"

Expand Down Expand Up @@ -315,17 +317,28 @@ bb3:

; The store in bb3 can be eliminated, because the store in bb1 cannot alias it.
define void @test10(i32* noalias %P, i32* %Q, i1 %c) {
; CHECK-LABEL: @test10(
; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: store i32 10, i32* [[Q:%.*]], align 4
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: ret void
; CHECK: bb3:
; CHECK-NEXT: store i32 0, i32* [[P]], align 4
; CHECK-NEXT: ret void
; UNOPT-LABEL: @test10(
; UNOPT-NEXT: store i32 0, i32* [[P:%.*]], align 4
; UNOPT-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
; UNOPT: bb1:
; UNOPT-NEXT: store i32 10, i32* [[Q:%.*]], align 4
; UNOPT-NEXT: br label [[BB3:%.*]]
; UNOPT: bb2:
; UNOPT-NEXT: ret void
; UNOPT: bb3:
; UNOPT-NEXT: store i32 0, i32* [[P]], align 4
; UNOPT-NEXT: ret void
;
; OPT-LABEL: @test10(
; OPT-NEXT: store i32 0, i32* [[P:%.*]], align 4
; OPT-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
; OPT: bb1:
; OPT-NEXT: store i32 10, i32* [[Q:%.*]], align 4
; OPT-NEXT: br label [[BB3:%.*]]
; OPT: bb2:
; OPT-NEXT: ret void
; OPT: bb3:
; OPT-NEXT: ret void
;
store i32 0, i32* %P
br i1 %c, label %bb1, label %bb2
Expand Down Expand Up @@ -412,13 +425,19 @@ define void @test12_memset_simple(i8* %ptr) {
}

define void @test12_memset_other_store_in_between(i8* %ptr) {
; CHECK-LABEL: @test12_memset_other_store_in_between(
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[PTR:%.*]], i8 0, i64 10, i1 false)
; CHECK-NEXT: [[PTR_4:%.*]] = getelementptr i8, i8* [[PTR]], i64 4
; CHECK-NEXT: store i8 8, i8* [[PTR_4]], align 1
; CHECK-NEXT: [[PTR_5:%.*]] = getelementptr i8, i8* [[PTR]], i64 5
; CHECK-NEXT: store i8 0, i8* [[PTR_5]], align 1
; CHECK-NEXT: ret void
; UNOPT-LABEL: @test12_memset_other_store_in_between(
; UNOPT-NEXT: call void @llvm.memset.p0i8.i64(i8* [[PTR:%.*]], i8 0, i64 10, i1 false)
; UNOPT-NEXT: [[PTR_4:%.*]] = getelementptr i8, i8* [[PTR]], i64 4
; UNOPT-NEXT: store i8 8, i8* [[PTR_4]], align 1
; UNOPT-NEXT: [[PTR_5:%.*]] = getelementptr i8, i8* [[PTR]], i64 5
; UNOPT-NEXT: store i8 0, i8* [[PTR_5]], align 1
; UNOPT-NEXT: ret void
;
; OPT-LABEL: @test12_memset_other_store_in_between(
; OPT-NEXT: call void @llvm.memset.p0i8.i64(i8* [[PTR:%.*]], i8 0, i64 10, i1 false)
; OPT-NEXT: [[PTR_4:%.*]] = getelementptr i8, i8* [[PTR]], i64 4
; OPT-NEXT: store i8 8, i8* [[PTR_4]], align 1
; OPT-NEXT: ret void
;
call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 10, i1 false)
%ptr.4 = getelementptr i8, i8* %ptr, i64 4
Expand Down Expand Up @@ -525,8 +544,8 @@ declare i8* @strcat(i8*, i8*) nounwind argmemonly

define void @test14_strcat(i8* noalias %P, i8* noalias %Q) {
; CHECK-LABEL: @test14_strcat(
; CHECK-NEXT: call i8* @strcat(i8* [[P:%.*]], i8* [[Q:%.*]])
; CHECK-NEXT: call i8* @strcat(i8* [[P]], i8* [[Q]])
; CHECK-NEXT: [[CALL1:%.*]] = call i8* @strcat(i8* [[P:%.*]], i8* [[Q:%.*]])
; CHECK-NEXT: [[CALL2:%.*]] = call i8* @strcat(i8* [[P]], i8* [[Q]])
; CHECK-NEXT: ret void
;
%call1 = call i8* @strcat(i8* %P, i8* %Q)
Expand Down

0 comments on commit c9ad356

Please sign in to comment.