Skip to content

Commit

Permalink
Fix a missed opportunity to merge stores.
Browse files Browse the repository at this point in the history
This commit fixes a missed opportunity in merging consecutive stores.
The code that searches for stores skipped the case of stores that
directly connect to the root. The comment above the implementation lists
this case but the code did not handle it. I found this pattern when
looking into the shared_ptr destructor. GCC generates the right
sequence. Here is a small repo:

    int foo(int* buff) {
        buff[0] = 0;
        int x = buff[1];
        buff[1] = 0;
        return x;
    }

Differential Revision: https://reviews.llvm.org/D116895
  • Loading branch information
nadavrot committed Jan 10, 2022
1 parent 8bed953 commit e2cc091
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 38 deletions.
4 changes: 4 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -17492,6 +17492,10 @@ void DAGCombiner::getStoreMergeCandidates(
for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
TryToAddCandidate(I2);
}
// Check stores that depend on the root (e.g. Store 3 in the chart above).
if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
TryToAddCandidate(I);
}
}
} else {
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll
@@ -1,4 +1,4 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; RUN: llc -march=hexagon --combiner-store-merging=false < %s | FileCheck %s
; CHECK-NOT: memh
; Check that store widening does not merge the two stores.

Expand Down
64 changes: 27 additions & 37 deletions llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
Expand Up @@ -562,69 +562,59 @@ define dso_local void @testUnalignedLdStPair() {
; LE-PWR9-LABEL: testUnalignedLdStPair:
; LE-PWR9: # %bb.0: # %entry
; LE-PWR9-NEXT: addis r3, r2, g@toc@ha
; LE-PWR9-NEXT: li r6, 19
; LE-PWR9-NEXT: li r4, 11
; LE-PWR9-NEXT: li r5, 35
; LE-PWR9-NEXT: li r7, 27
; LE-PWR9-NEXT: addi r3, r3, g@toc@l
; LE-PWR9-NEXT: lxvx vs0, r3, r6
; LE-PWR9-NEXT: ldx r4, r3, r4
; LE-PWR9-NEXT: ldx r5, r3, r5
; LE-PWR9-NEXT: stdx r4, r3, r6
; LE-PWR9-NEXT: stxvx vs0, r3, r7
; LE-PWR9-NEXT: li r7, 43
; LE-PWR9-NEXT: stdx r5, r3, r7
; LE-PWR9-NEXT: lxvx vs0, r3, r4
; LE-PWR9-NEXT: li r4, 27
; LE-PWR9-NEXT: lxvx vs1, r3, r4
; LE-PWR9-NEXT: li r4, 35
; LE-PWR9-NEXT: stxvx vs1, r3, r4
; LE-PWR9-NEXT: li r4, 19
; LE-PWR9-NEXT: stxvx vs0, r3, r4
; LE-PWR9-NEXT: blr
;
; LE-PWR8-LABEL: testUnalignedLdStPair:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: addis r3, r2, g@toc@ha
; LE-PWR8-NEXT: li r4, 19
; LE-PWR8-NEXT: li r4, 27
; LE-PWR8-NEXT: li r5, 11
; LE-PWR8-NEXT: li r6, 35
; LE-PWR8-NEXT: li r7, 43
; LE-PWR8-NEXT: li r8, 27
; LE-PWR8-NEXT: li r6, 19
; LE-PWR8-NEXT: li r8, 35
; LE-PWR8-NEXT: addi r3, r3, g@toc@l
; LE-PWR8-NEXT: lxvd2x vs0, r3, r4
; LE-PWR8-NEXT: ldx r5, r3, r5
; LE-PWR8-NEXT: ldx r6, r3, r6
; LE-PWR8-NEXT: stdx r6, r3, r7
; LE-PWR8-NEXT: stdx r5, r3, r4
; LE-PWR8-NEXT: ldx r7, r3, r6
; LE-PWR8-NEXT: stdx r7, r3, r4
; LE-PWR8-NEXT: stdx r5, r3, r6
; LE-PWR8-NEXT: stxvd2x vs0, r3, r8
; LE-PWR8-NEXT: blr
;
; BE-PWR9-LABEL: testUnalignedLdStPair:
; BE-PWR9: # %bb.0: # %entry
; BE-PWR9-NEXT: addis r3, r2, g@toc@ha
; BE-PWR9-NEXT: li r6, 19
; BE-PWR9-NEXT: li r4, 11
; BE-PWR9-NEXT: li r5, 35
; BE-PWR9-NEXT: li r7, 27
; BE-PWR9-NEXT: addi r3, r3, g@toc@l
; BE-PWR9-NEXT: lxvx vs0, r3, r6
; BE-PWR9-NEXT: ldx r4, r3, r4
; BE-PWR9-NEXT: ldx r5, r3, r5
; BE-PWR9-NEXT: stdx r4, r3, r6
; BE-PWR9-NEXT: stxvx vs0, r3, r7
; BE-PWR9-NEXT: li r7, 43
; BE-PWR9-NEXT: stdx r5, r3, r7
; BE-PWR9-NEXT: lxvx vs0, r3, r4
; BE-PWR9-NEXT: li r4, 27
; BE-PWR9-NEXT: lxvx vs1, r3, r4
; BE-PWR9-NEXT: li r4, 35
; BE-PWR9-NEXT: stxvx vs1, r3, r4
; BE-PWR9-NEXT: li r4, 19
; BE-PWR9-NEXT: stxvx vs0, r3, r4
; BE-PWR9-NEXT: blr
;
; BE-PWR8-LABEL: testUnalignedLdStPair:
; BE-PWR8: # %bb.0: # %entry
; BE-PWR8-NEXT: addis r3, r2, g@toc@ha
; BE-PWR8-NEXT: li r4, 19
; BE-PWR8-NEXT: li r5, 11
; BE-PWR8-NEXT: li r6, 35
; BE-PWR8-NEXT: li r7, 27
; BE-PWR8-NEXT: li r4, 11
; BE-PWR8-NEXT: li r5, 27
; BE-PWR8-NEXT: addi r3, r3, g@toc@l
; BE-PWR8-NEXT: lxvd2x vs0, r3, r4
; BE-PWR8-NEXT: ldx r5, r3, r5
; BE-PWR8-NEXT: ldx r6, r3, r6
; BE-PWR8-NEXT: stxvd2x vs0, r3, r7
; BE-PWR8-NEXT: li r7, 43
; BE-PWR8-NEXT: stdx r5, r3, r4
; BE-PWR8-NEXT: stdx r6, r3, r7
; BE-PWR8-NEXT: lxvd2x vs1, r3, r5
; BE-PWR8-NEXT: li r4, 35
; BE-PWR8-NEXT: li r5, 19
; BE-PWR8-NEXT: stxvd2x vs1, r3, r4
; BE-PWR8-NEXT: stxvd2x vs0, r3, r5
; BE-PWR8-NEXT: blr
entry:
%0 = bitcast <256 x i1>* @g to i8*
Expand Down
28 changes: 28 additions & 0 deletions llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
Expand Up @@ -920,3 +920,31 @@ define void @merge_heterogeneous(%struct.C* nocapture %p, %struct.C* nocapture %
ret void
}

define i32 @merge_store_load_store_seq(i32* %buff) {
entry:
; CHECK-LABEL: merge_store_load_store_seq:
; CHECK: movl 4(%rdi), %eax
; CHECK-NEXT: movq $0, (%rdi)
; CHECK-NEXT: retq

store i32 0, i32* %buff, align 4
%arrayidx1 = getelementptr inbounds i32, i32* %buff, i64 1
%0 = load i32, i32* %arrayidx1, align 4
store i32 0, i32* %arrayidx1, align 4
ret i32 %0
}

define i32 @merge_store_alias(i32* %buff, i32* %other) {
entry:
; CHECK-LABEL: merge_store_alias:
; CHECK: movl $0, (%rdi)
; CHECK-NEXT: movl (%rsi), %eax
; CHECK-NEXT: movl $0, 4(%rdi)
; CHECK-NEXT: retq

store i32 0, i32* %buff, align 4
%arrayidx1 = getelementptr inbounds i32, i32* %buff, i64 1
%0 = load i32, i32* %other, align 4
store i32 0, i32* %arrayidx1, align 4
ret i32 %0
}

0 comments on commit e2cc091

Please sign in to comment.