Skip to content

Commit

Permalink
[SDAG] Relax conditions under stores of loaded values can be merged
Browse files Browse the repository at this point in the history
Summary:

Allow consecutive stores whose values come from consecutive loads to
merged in the presense of other uses of the loads. Previously this was
disallowed as in general the merged load cannot be shared with the
other uses. Merging N stores into 1 may cause as many as N redundant
loads. However in the context of caching this should have neglible
affect on memory pressure and reduce instruction count making it
almost always a win.

Fixes PR32086.

Reviewers: spatel, jyknight, andreadb, hfinkel, efriedma

Reviewed By: efriedma

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D30471

llvm-svn: 302712
  • Loading branch information
niravhdave committed May 10, 2017
1 parent f69a7c3 commit a38c049
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 28 deletions.
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/ISDOpcodes.h
Expand Up @@ -44,6 +44,10 @@ namespace ISD {
/// EntryToken - This is the marker used to indicate the start of a region.
EntryToken,

/// DummyNode - Temporary node for node replacement. These nodes
/// should not persist beyond their introduction.
DummyNode,

/// TokenFactor - This node takes multiple tokens as input and produces a
/// single token result. This is used to represent the fact that the operand
/// operators are independent of each other.
Expand Down
32 changes: 22 additions & 10 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -12783,10 +12783,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
if (!Ld) break;

// Loads must only have one use.
if (!Ld->hasNUsesOfValue(1, 0))
break;

// The memory operands must not be volatile.
if (Ld->isVolatile() || Ld->isIndexed())
break;
Expand All @@ -12795,10 +12791,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
break;

// The stored memory type must be the same.
if (Ld->getMemoryVT() != MemVT)
break;

BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
// If this is not the first ptr that we check.
if (LdBasePtr.Base.getNode()) {
Expand Down Expand Up @@ -12930,8 +12922,28 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// Transfer chain users from old loads to the new load.
for (unsigned i = 0; i < NumElem; ++i) {
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
SDValue(NewLoad.getNode(), 1));
if (SDValue(Ld, 0).hasOneUse()) {
// Only the original store used value so just replace chain.
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
SDValue(NewLoad.getNode(), 1));
} else {
// Multiple uses exist. Keep the old load in line with the new
// load, i.e. Replace chains using Ld's chain with a
// TokenFactor. Create a temporary node to serve as a placer so
// we do not replace the reference to original Load's chain in
// the TokenFactor.
SDValue TokenDummy = DAG.getNode(ISD::DummyNode, SDLoc(Ld), MVT::Other);

// Replace all references to Load's output chain to TokenDummy
CombineTo(Ld, SDValue(Ld, 0), TokenDummy, false);
SDValue Token =
DAG.getNode(ISD::TokenFactor, SDLoc(Ld), MVT::Other, SDValue(Ld, 1),
SDValue(NewLoad.getNode(), 1));
// Replace all uses of TokenDummy from itself to Ld's output chain.
CombineTo(TokenDummy.getNode(), Token);
assert(TokenDummy.use_empty() && "TokenDummy should be unused");
AddToWorklist(Ld);
}
}

// Replace the all stores with the new store.
Expand Down
28 changes: 10 additions & 18 deletions llvm/test/CodeGen/X86/merge_store_duplicated_loads.ll
@@ -1,18 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -o - | FileCheck %s


; PR32086
target triple = "x86_64-unknown-linux-gnu"

define void @merge_double(double* noalias nocapture %st, double* noalias nocapture readonly %ld) #0 {
; CHECK-LABEL: merge_double:
; CHECK: # BB#0:
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: movsd %xmm0, (%rdi)
; CHECK-NEXT: movsd %xmm1, 8(%rdi)
; CHECK-NEXT: movsd %xmm0, 16(%rdi)
; CHECK-NEXT: movsd %xmm1, 24(%rdi)
; CHECK-NEXT: movups (%rsi), %xmm0
; CHECK-NEXT: movups %xmm0, (%rdi)
; CHECK-NEXT: movups %xmm0, 16(%rdi)
; CHECK-NEXT: retq
%ld_idx1 = getelementptr inbounds double, double* %ld, i64 1
%ld0 = load double, double* %ld, align 8, !tbaa !2
Expand All @@ -32,12 +29,9 @@ define void @merge_double(double* noalias nocapture %st, double* noalias nocaptu
define void @merge_loadstore_int(i64* noalias nocapture readonly %p, i64* noalias nocapture %q) local_unnamed_addr #0 {
; CHECK-LABEL: merge_loadstore_int:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq 8(%rdi), %rcx
; CHECK-NEXT: movq %rax, (%rsi)
; CHECK-NEXT: movq %rcx, 8(%rsi)
; CHECK-NEXT: movq %rax, 16(%rsi)
; CHECK-NEXT: movq %rcx, 24(%rsi)
; CHECK-NEXT: movups (%rdi), %xmm0
; CHECK-NEXT: movups %xmm0, (%rsi)
; CHECK-NEXT: movups %xmm0, 16(%rsi)
; CHECK-NEXT: retq
entry:
%0 = load i64, i64* %p, align 8, !tbaa !1
Expand All @@ -57,11 +51,9 @@ define i64 @merge_loadstore_int_with_extra_use(i64* noalias nocapture readonly %
; CHECK-LABEL: merge_loadstore_int_with_extra_use:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq 8(%rdi), %rcx
; CHECK-NEXT: movq %rax, (%rsi)
; CHECK-NEXT: movq %rcx, 8(%rsi)
; CHECK-NEXT: movq %rax, 16(%rsi)
; CHECK-NEXT: movq %rcx, 24(%rsi)
; CHECK-NEXT: movups (%rdi), %xmm0
; CHECK-NEXT: movups %xmm0, (%rsi)
; CHECK-NEXT: movups %xmm0, 16(%rsi)
; CHECK-NEXT: retq
entry:
%0 = load i64, i64* %p, align 8, !tbaa !1
Expand Down

0 comments on commit a38c049

Please sign in to comment.