Skip to content

Commit

Permalink
[DSE] Merge stores when the later store only writes to memory locatio…
Browse files Browse the repository at this point in the history
…ns the early store also wrote to (2nd try)

This is a 2nd attempt at:
https://reviews.llvm.org/rL310055
...which was reverted at rL310123 because of PR34074:
https://bugs.llvm.org/show_bug.cgi?id=34074

In this version, we break out of the inner loop after we successfully merge and kill a pair of stores. In the
earlier rev, we were continuing instead, which meant we could process the invalid info from a now dead store.

Original commit message (authored by Filipe Cabecinhas):

This fixes PR31777.

If both stores' values are ConstantInt, we merge the two stores
(shifting the smaller store appropriately) and replace the earlier (and
larger) store with an updated constant.

In the future we should also support vectors of integers. And maybe
float/double if we can.  

Differential Revision: https://reviews.llvm.org/D30703

llvm-svn: 314206
  • Loading branch information
rotateright committed Sep 26, 2017
1 parent f47c4b4 commit 1d04b5b
Show file tree
Hide file tree
Showing 5 changed files with 494 additions and 5 deletions.
102 changes: 99 additions & 3 deletions llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
Expand Up @@ -34,6 +34,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
Expand All @@ -49,12 +50,18 @@ STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumFastOther , "Number of other instrs removed");
STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
STATISTIC(NumModifiedStores, "Number of stores modified");

static cl::opt<bool>
EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
cl::init(true), cl::Hidden,
cl::desc("Enable partial-overwrite tracking in DSE"));

static cl::opt<bool>
EnablePartialStoreMerging("enable-dse-partial-store-merging",
cl::init(true), cl::Hidden,
cl::desc("Enable partial store merging in DSE"));


//===----------------------------------------------------------------------===//
// Helper functions
Expand Down Expand Up @@ -287,14 +294,22 @@ static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
}

namespace {
enum OverwriteResult { OW_Begin, OW_Complete, OW_End, OW_Unknown };
enum OverwriteResult {
OW_Begin,
OW_Complete,
OW_End,
OW_PartialEarlierWithFullLater,
OW_Unknown
};
}

/// Return 'OW_Complete' if a store to the 'Later' location completely
/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
/// beginning of the 'Earlier' location is overwritten by 'Later', or
/// 'OW_Unknown' if nothing can be determined.
/// beginning of the 'Earlier' location is overwritten by 'Later'.
/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was
/// overwritten by a latter (smaller) store which doesn't write outside the big
/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
static OverwriteResult isOverwrite(const MemoryLocation &Later,
const MemoryLocation &Earlier,
const DataLayout &DL,
Expand Down Expand Up @@ -427,6 +442,19 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
}
}

// Check for an earlier store which writes to all the memory locations that
// the later store writes to.
if (EnablePartialStoreMerging && LaterOff >= EarlierOff &&
int64_t(EarlierOff + Earlier.Size) > LaterOff &&
uint64_t(LaterOff - EarlierOff) + Later.Size <= Earlier.Size) {
DEBUG(dbgs() << "DSE: Partial overwrite an earlier load [" << EarlierOff
<< ", " << int64_t(EarlierOff + Earlier.Size)
<< ") by a later store [" << LaterOff << ", "
<< int64_t(LaterOff + Later.Size) << ")\n");
// TODO: Maybe come up with a better name?
return OW_PartialEarlierWithFullLater;
}

// Another interesting case is if the later store overwrites the end of the
// earlier store.
//
Expand Down Expand Up @@ -1094,6 +1122,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
// If we find a write that is a) removable (i.e., non-volatile), b) is
// completely obliterated by the store to 'Loc', and c) which we know that
// 'Inst' doesn't load from, then we can remove it.
// Also try to merge two stores if a later one only touches memory written
// to by the earlier one.
if (isRemovable(DepWrite) &&
!isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
int64_t InstWriteOffset, DepWriteOffset;
Expand Down Expand Up @@ -1123,6 +1153,72 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
bool IsOverwriteEnd = (OR == OW_End);
MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
InstWriteOffset, LaterSize, IsOverwriteEnd);
} else if (EnablePartialStoreMerging &&
OR == OW_PartialEarlierWithFullLater) {
auto *Earlier = dyn_cast<StoreInst>(DepWrite);
auto *Later = dyn_cast<StoreInst>(Inst);
if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
Later && isa<ConstantInt>(Later->getValueOperand())) {
// If the store we find is:
// a) partially overwritten by the store to 'Loc'
// b) the later store is fully contained in the earlier one and
// c) they both have a constant value
// Merge the two stores, replacing the earlier store's value with a
// merge of both values.
// TODO: Deal with other constant types (vectors, etc), and probably
// some mem intrinsics (if needed)

APInt EarlierValue =
cast<ConstantInt>(Earlier->getValueOperand())->getValue();
APInt LaterValue =
cast<ConstantInt>(Later->getValueOperand())->getValue();
unsigned LaterBits = LaterValue.getBitWidth();
assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth());
LaterValue = LaterValue.zext(EarlierValue.getBitWidth());

// Offset of the smaller store inside the larger store
unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8;
unsigned LShiftAmount =
DL.isBigEndian()
? EarlierValue.getBitWidth() - BitOffsetDiff - LaterBits
: BitOffsetDiff;
APInt Mask =
APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount,
LShiftAmount + LaterBits);
// Clear the bits we'll be replacing, then OR with the smaller
// store, shifted appropriately.
APInt Merged =
(EarlierValue & ~Mask) | (LaterValue << LShiftAmount);
DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *DepWrite
<< "\n Later: " << *Inst
<< "\n Merged Value: " << Merged << '\n');

auto *SI = new StoreInst(
ConstantInt::get(Earlier->getValueOperand()->getType(), Merged),
Earlier->getPointerOperand(), false, Earlier->getAlignment(),
Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite);

unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa,
LLVMContext::MD_alias_scope,
LLVMContext::MD_noalias,
LLVMContext::MD_nontemporal};
SI->copyMetadata(*DepWrite, MDToKeep);
++NumModifiedStores;

// Remove earlier, wider, store
size_t Idx = InstrOrdering.lookup(DepWrite);
InstrOrdering.erase(DepWrite);
InstrOrdering.insert(std::make_pair(SI, Idx));

// Delete the old stores and now-dead instructions that feed them.
deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, &InstrOrdering);
deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL,
&InstrOrdering);
MadeChange = true;

// We erased DepWrite and Inst (Loc); start over.
break;
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/DeadStoreElimination/PartialStore.ll
@@ -1,4 +1,4 @@
; RUN: opt < %s -basicaa -dse -S | FileCheck %s
; RUN: opt < %s -basicaa -dse -enable-dse-partial-store-merging=false -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

; Ensure that the dead store is deleted in this case. It is wholely
Expand Down
@@ -1,4 +1,4 @@
; RUN: opt -S -dse < %s | FileCheck %s
; RUN: opt -S -dse -enable-dse-partial-store-merging=false < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"

Expand Down
173 changes: 173 additions & 0 deletions llvm/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll
@@ -0,0 +1,173 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128"

define void @byte_by_byte_replacement(i32 *%ptr) {
; CHECK-LABEL: @byte_by_byte_replacement(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i32 151653132, i32* [[PTR:%.*]]
; CHECK-NEXT: ret void
;
entry:
;; This store's value should be modified as it should be better to use one
;; larger store than several smaller ones.
;; store will turn into 0x090A0B0C == 151653132
store i32 305419896, i32* %ptr ; 0x12345678
%bptr = bitcast i32* %ptr to i8*
%bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
%bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
%bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3

;; We should be able to merge these four stores with the i32 above
; value (and bytes) stored before ; 0x12345678
store i8 9, i8* %bptr ; 09
store i8 10, i8* %bptr1 ; 0A
store i8 11, i8* %bptr2 ; 0B
store i8 12, i8* %bptr3 ; 0C
; 0x090A0B0C

ret void
}

define void @word_replacement(i64 *%ptr) {
; CHECK-LABEL: @word_replacement(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i64 72638273700655232, i64* [[PTR:%.*]]
; CHECK-NEXT: ret void
;
entry:
store i64 72623859790382856, i64* %ptr ; 0x0102030405060708

%wptr = bitcast i64* %ptr to i16*
%wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
%wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
%wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3

;; We should be able to merge these two stores with the i64 one above
; value (and bytes) stored before ; 0x0102030405060708
store i16 4128, i16* %wptr1 ; 1020
store i16 28800, i16* %wptr3 ; 7080
; 0x0102102005067080

ret void
}


define void @differently_sized_replacements(i64 *%ptr) {
; CHECK-LABEL: @differently_sized_replacements(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i64 289077004501059343, i64* [[PTR:%.*]]
; CHECK-NEXT: ret void
;
entry:
store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f

%bptr = bitcast i64* %ptr to i8*
%bptr6 = getelementptr inbounds i8, i8* %bptr, i64 6
%wptr = bitcast i64* %ptr to i16*
%wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
%dptr = bitcast i64* %ptr to i32*

;; We should be able to merge all these stores with the i64 one above
; value (and bytes) stored before ; 0x08090a0b0c0d0e0f
store i8 7, i8* %bptr6 ; 07
store i16 1541, i16* %wptr2 ; 0605
store i32 67305985, i32* %dptr ; 04030201
; 0x040302010605070f
ret void
}


define void @multiple_replacements_to_same_byte(i64 *%ptr) {
; CHECK-LABEL: @multiple_replacements_to_same_byte(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i64 289077004602248719, i64* [[PTR:%.*]]
; CHECK-NEXT: ret void
;
entry:
store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f

%bptr = bitcast i64* %ptr to i8*
%bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
%wptr = bitcast i64* %ptr to i16*
%wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
%dptr = bitcast i64* %ptr to i32*

;; We should be able to merge all these stores with the i64 one above
; value (and bytes) stored before ; 0x08090a0b0c0d0e0f
store i8 7, i8* %bptr3 ; 07
store i16 1541, i16* %wptr1 ; 0605
store i32 67305985, i32* %dptr ; 04030201
; 0x040302010c0d0e0f
ret void
}

define void @merged_merges(i64 *%ptr) {
; CHECK-LABEL: @merged_merges(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i64 289081428418563599, i64* [[PTR:%.*]]
; CHECK-NEXT: ret void
;
entry:
store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f

%bptr = bitcast i64* %ptr to i8*
%bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
%wptr = bitcast i64* %ptr to i16*
%wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
%dptr = bitcast i64* %ptr to i32*

;; We should be able to merge all these stores with the i64 one above
; value (not bytes) stored before ; 0x08090a0b0c0d0e0f
store i32 67305985, i32* %dptr ; 04030201
store i16 1541, i16* %wptr1 ; 0605
store i8 7, i8* %bptr3 ; 07
; 0x040306070c0d0e0f
ret void
}

define signext i8 @shouldnt_merge_since_theres_a_full_overlap(i64 *%ptr) {
; CHECK-LABEL: @shouldnt_merge_since_theres_a_full_overlap(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i8*
; CHECK-NEXT: [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1
; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3
; CHECK-NEXT: [[DPTR:%.*]] = bitcast i8* [[BPTRM1]] to i32*
; CHECK-NEXT: [[QPTR:%.*]] = bitcast i8* [[BPTR3]] to i64*
; CHECK-NEXT: store i32 1234, i32* [[DPTR]], align 1
; CHECK-NEXT: store i64 5678, i64* [[QPTR]], align 1
; CHECK-NEXT: ret i8 0
;
entry:

store i64 0, i64* %ptr

%bptr = bitcast i64* %ptr to i8*
%bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1
%bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
%dptr = bitcast i8* %bptrm1 to i32*
%qptr = bitcast i8* %bptr3 to i64*

store i32 1234, i32* %dptr, align 1
store i64 5678, i64* %qptr, align 1

ret i8 0
}

;; Test case from PR31777
%union.U = type { i64 }

define void @foo(%union.U* nocapture %u) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [[UNION_U:%.*]], %union.U* [[U:%.*]], i64 0, i32 0
; CHECK-NEXT: store i64 11821949021847552, i64* [[I]], align 8
; CHECK-NEXT: ret void
;
entry:
%i = getelementptr inbounds %union.U, %union.U* %u, i64 0, i32 0
store i64 0, i64* %i, align 8
%s = bitcast %union.U* %u to i16*
store i16 42, i16* %s, align 8
ret void
}

0 comments on commit 1d04b5b

Please sign in to comment.