Skip to content

Commit

Permalink
Cleanup Store Merging in UseAA case
Browse files Browse the repository at this point in the history
This patch fixes a bug (PR26827) when using anti-aliasing in store
merging. This sets the chain users of the component stores to point to
the new store instead of the component stores chain parent.

Reviewers: jyknight

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D18909

llvm-svn: 266217
  • Loading branch information
niravhdave committed Apr 13, 2016
1 parent fe6df26 commit 2477491
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 30 deletions.
74 changes: 44 additions & 30 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -11226,26 +11226,34 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
false, false,
FirstInChain->getAlignment());

// Replace the last store with the new store
CombineTo(LatestOp, NewStore);
// Erase all other stores.
for (unsigned i = 0; i < NumStores; ++i) {
if (StoreNodes[i].MemNode == LatestOp)
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
// ReplaceAllUsesWith will replace all uses that existed when it was
// called, but graph optimizations may cause new ones to appear. For
// example, the case in pr14333 looks like
//
// St's chain -> St -> another store -> X
//
// And the only difference from St to the other store is the chain.
// When we change it's chain to be St's chain they become identical,
// get CSEed and the net result is that X is now a use of St.
// Since we know that St is redundant, just iterate.
while (!St->use_empty())
DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
deleteAndRecombine(St);
bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
: DAG.getSubtarget().useAA();
if (UseAA) {
// Replace all merged stores with the new store.
for (unsigned i = 0; i < NumStores; ++i)
CombineTo(StoreNodes[i].MemNode, NewStore);
} else {
// Replace the last store with the new store.
CombineTo(LatestOp, NewStore);
// Erase all other stores.
for (unsigned i = 0; i < NumStores; ++i) {
if (StoreNodes[i].MemNode == LatestOp)
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
// ReplaceAllUsesWith will replace all uses that existed when it was
// called, but graph optimizations may cause new ones to appear. For
// example, the case in pr14333 looks like
//
// St's chain -> St -> another store -> X
//
// And the only difference from St to the other store is the chain.
// When we change it's chain to be St's chain they become identical,
// get CSEed and the net result is that X is now a use of St.
// Since we know that St is redundant, just iterate.
while (!St->use_empty())
DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
deleteAndRecombine(St);
}
}

return true;
Expand Down Expand Up @@ -11778,16 +11786,22 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
SDValue(NewLoad.getNode(), 1));
}

// Replace the last store with the new store.
CombineTo(LatestOp, NewStore);
// Erase all other stores.
for (unsigned i = 0; i < NumElem ; ++i) {
// Remove all Store nodes.
if (StoreNodes[i].MemNode == LatestOp)
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
deleteAndRecombine(St);
if (UseAA) {
// Replace the all stores with the new store.
for (unsigned i = 0; i < NumElem; ++i)
CombineTo(StoreNodes[i].MemNode, NewStore);
} else {
// Replace the last store with the new store.
CombineTo(LatestOp, NewStore);
// Erase all other stores.
for (unsigned i = 0; i < NumElem; ++i) {
// Remove all Store nodes.
if (StoreNodes[i].MemNode == LatestOp)
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
deleteAndRecombine(St);
}
}

return true;
Expand Down
64 changes: 64 additions & 0 deletions llvm/test/CodeGen/AArch64/merge-store.ll
@@ -1,5 +1,6 @@
; RUN: llc -march aarch64 %s -o - | FileCheck %s
; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cyclone | FileCheck %s --check-prefix=CYCLONE
; RUN: llc -mcpu cortex-a53 -march aarch64 %s -o - | FileCheck %s --check-prefix=A53

@g0 = external global <3 x float>, align 16
@g1 = external global <3 x float>, align 4
Expand Down Expand Up @@ -48,3 +49,66 @@ define void @merge_vec_extract_stores(<4 x float> %v1, <2 x float>* %ptr) {
; CYCLONE-NEXT: str d1, [x0, #32]
; CYCLONE-NEXT: ret
}


; PR26827 - Merge stores causes wrong dependency.
%struct1 = type { %struct1*, %struct1*, i32, i32, i16, i16, void (i32, i32, i8*)*, i8* }
@gv0 = internal unnamed_addr global i32 0, align 4
@gv1 = internal unnamed_addr global %struct1** null, align 8

define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg) {
;CHECK-LABEL: test
entry:
;A53: mov [[DATA:w[0-9]+]], w1
;A53: str q{{[0-9]+}}, {{.*}}
;A53: str q{{[0-9]+}}, {{.*}}
;A53: str [[DATA]], {{.*}}

%0 = bitcast %struct1* %fde to i8*
tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 8, i1 false)
%state = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 4
store i16 256, i16* %state, align 8
%fd1 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 2
store i32 %fd, i32* %fd1, align 8
%force_eof = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 3
store i32 0, i32* %force_eof, align 4
%func2 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 6
store void (i32, i32, i8*)* %func, void (i32, i32, i8*)** %func2, align 8
%arg3 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 7
store i8* %arg, i8** %arg3, align 8
%call = tail call i32 (i32, i32, ...) @fcntl(i32 %fd, i32 4, i8* %0) #6
%1 = load i32, i32* %fd1, align 8
%cmp.i = icmp slt i32 %1, 0
br i1 %cmp.i, label %if.then.i, label %while.body.i.preheader
if.then.i:
unreachable

while.body.i.preheader:
%2 = load i32, i32* @gv0, align 4
%3 = icmp eq i32* %fd1, @gv0
br i1 %3, label %while.body.i.split, label %while.body.i.split.ver.us.preheader

while.body.i.split.ver.us.preheader:
br label %while.body.i.split.ver.us

while.body.i.split.ver.us:
%.reg2mem21.0 = phi i32 [ %mul.i.ver.us, %while.body.i.split.ver.us ], [ %2, %while.body.i.split.ver.us.preheader ]
%mul.i.ver.us = shl nsw i32 %.reg2mem21.0, 1
%4 = icmp sgt i32 %mul.i.ver.us, %1
br i1 %4, label %while.end.i, label %while.body.i.split.ver.us

while.body.i.split:
br label %while.body.i.split

while.end.i:
%call.i = tail call i8* @foo()
store i8* %call.i, i8** bitcast (%struct1*** @gv1 to i8**), align 8
br label %exit

exit:
ret void
}

declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
declare i32 @fcntl(i32, i32, ...)
declare noalias i8* @foo()

0 comments on commit 2477491

Please sign in to comment.