Skip to content

Commit

Permalink
Improve Aliasing of operations to static alloca
Browse files Browse the repository at this point in the history
Recommiting after adding check to avoid miscomputing alias information
on addresses of the same base but different subindices.

Memory accesses offset from frame indices may alias, e.g., we
may merge write from function arguments passed on the stack when they
are contiguous. As a result, when checking aliasing, we consider the
underlying frame index's offset from the stack pointer.

Static allocs are realized as stack objects in SelectionDAG, but its
offset is not set until post-DAG causing DAGCombiner's alias check to
consider access to static allocas to frequently alias. Modify isAlias
to consider access between static allocas and access from other frame
objects to be considered aliasing.

Many test changes are included here. Most are fixes for tests which
indirectly relied on our aliasing ability and needed to be modified to
preserve their original intent.

The remaining tests have minor improvements due to relaxed
ordering. The exception is CodeGen/X86/2011-10-19-widen_vselect.ll
which has a minor degradation dispite though the pre-legalized DAG is
improved.

Reviewers: rnk, mkuper, jonpa, hfinkel, uweigand

Reviewed By: rnk

Subscribers: sdardis, nemanjai, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D33345

llvm-svn: 308025
  • Loading branch information
niravhdave committed Jul 14, 2017
1 parent 89ca10d commit a8f63af
Show file tree
Hide file tree
Showing 24 changed files with 168 additions and 125 deletions.
22 changes: 16 additions & 6 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11375,12 +11375,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
MVT::Other, Chain, ReplLoad.getValue(1));

// Make sure the new and old chains are cleaned up.
AddToWorklist(Token.getNode());

// Replace uses with load result and token factor. Don't add users
// to work list.
return CombineTo(N, ReplLoad.getValue(0), Token, false);
// Replace uses with load result and token factor
return CombineTo(N, ReplLoad.getValue(0), Token);
}
}

Expand Down Expand Up @@ -16705,6 +16701,20 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));

// If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
// able to calculate their relative offset if at least one arises
// from an alloca. However, these allocas cannot overlap and we
// can infer there is no alias.
if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
// If the base are the same frame index but the we couldn't find a
// constant offset, (indices are different) be conservative.
if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
!MFI.isFixedObjectIndex(B->getIndex())))
return false;
}

// FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis
// modified to use BaseIndexOffset.

Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@ define void @fn9(i32* %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7,
; CHECK: add {{x[0-9]+}}, [[ARGS]], #8
; First vararg
; CHECK: ldr {{w[0-9]+}}, [sp, #72]
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8
; Second vararg
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8
; Third vararg
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AArch64/arm64-abi_align.ll
Original file line number Diff line number Diff line change
Expand Up @@ -280,10 +280,10 @@ entry:
define i32 @caller42() #3 {
entry:
; CHECK-LABEL: caller42
; CHECK: str {{x[0-9]+}}, [sp, #48]
; CHECK: str {{q[0-9]+}}, [sp, #32]
; CHECK: str {{x[0-9]+}}, [sp, #16]
; CHECK: str {{q[0-9]+}}, [sp]
; CHECK-DAG: str {{x[0-9]+}}, [sp, #48]
; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
; CHECK-DAG: str {{x[0-9]+}}, [sp, #16]
; CHECK-DAG: str {{q[0-9]+}}, [sp]
; CHECK: add x1, sp, #32
; CHECK: mov x2, sp
; Space for s1 is allocated at sp+32
Expand Down Expand Up @@ -318,10 +318,10 @@ entry:
; CHECK-LABEL: caller42_stack
; CHECK: sub sp, sp, #112
; CHECK: add x29, sp, #96
; CHECK: stur {{x[0-9]+}}, [x29, #-16]
; CHECK: stur {{q[0-9]+}}, [x29, #-32]
; CHECK: str {{x[0-9]+}}, [sp, #48]
; CHECK: str {{q[0-9]+}}, [sp, #32]
; CHECK-DAG: stur {{x[0-9]+}}, [x29, #-16]
; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32]
; CHECK-DAG: str {{x[0-9]+}}, [sp, #48]
; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
; Space for s1 is allocated at x29-32 = sp+64
; Space for s2 is allocated at sp+32
; CHECK: add x[[B:[0-9]+]], sp, #32
Expand Down Expand Up @@ -388,10 +388,10 @@ entry:
define i32 @caller43() #3 {
entry:
; CHECK-LABEL: caller43
; CHECK: str {{q[0-9]+}}, [sp, #48]
; CHECK: str {{q[0-9]+}}, [sp, #32]
; CHECK: str {{q[0-9]+}}, [sp, #16]
; CHECK: str {{q[0-9]+}}, [sp]
; CHECK-DAG: str {{q[0-9]+}}, [sp, #48]
; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
; CHECK-DAG: str {{q[0-9]+}}, [sp, #16]
; CHECK-DAG: str {{q[0-9]+}}, [sp]
; CHECK: add x1, sp, #32
; CHECK: mov x2, sp
; Space for s1 is allocated at sp+32
Expand Down Expand Up @@ -430,10 +430,10 @@ entry:
; CHECK-LABEL: caller43_stack
; CHECK: sub sp, sp, #112
; CHECK: add x29, sp, #96
; CHECK: stur {{q[0-9]+}}, [x29, #-16]
; CHECK: stur {{q[0-9]+}}, [x29, #-32]
; CHECK: str {{q[0-9]+}}, [sp, #48]
; CHECK: str {{q[0-9]+}}, [sp, #32]
; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-16]
; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32]
; CHECK-DAG: str {{q[0-9]+}}, [sp, #48]
; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
; Space for s1 is allocated at x29-32 = sp+64
; Space for s2 is allocated at sp+32
; CHECK: add x[[B:[0-9]+]], sp, #32
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
; RUN: llc -mtriple=arm64-eabi -mcpu=cyclone < %s | FileCheck %s

; CHECK: foo
; CHECK: str w[[REG0:[0-9]+]], [x19, #264]
; CHECK: mov w[[REG1:[0-9]+]], w[[REG0]]
; CHECK: str w[[REG1]], [x19, #132]

; CHECK-DAG: str w[[REG0:[0-9]+]], [x19, #132]
; CHECK-DAG: str w[[REG0]], [x19, #264]
define i32 @foo(i32 %a) nounwind {
%retval = alloca i32, align 4
%a.addr = alloca i32, align 4
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/arm64-vext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ define void @test_vext_p16() nounwind ssp {

define void @test_vext_s32() nounwind ssp {
; CHECK-LABEL: test_vext_s32:
; CHECK: {{ext.8.*#4}}
; CHECK: {{rev64.2s.*}}
%xS32x2 = alloca <2 x i32>, align 8
%__a = alloca <2 x i32>, align 8
%__b = alloca <2 x i32>, align 8
Expand All @@ -137,7 +137,7 @@ define void @test_vext_s32() nounwind ssp {

define void @test_vext_u32() nounwind ssp {
; CHECK-LABEL: test_vext_u32:
; CHECK: {{ext.8.*#4}}
; CHECK: {{rev64.2s.*}}
%xU32x2 = alloca <2 x i32>, align 8
%__a = alloca <2 x i32>, align 8
%__b = alloca <2 x i32>, align 8
Expand All @@ -158,7 +158,7 @@ define void @test_vext_u32() nounwind ssp {

define void @test_vext_f32() nounwind ssp {
; CHECK-LABEL: test_vext_f32:
; CHECK: {{ext.8.*#4}}
; CHECK: {{rev64.2s.*}}
%xF32x2 = alloca <2 x float>, align 8
%__a = alloca <2 x float>, align 8
%__b = alloca <2 x float>, align 8
Expand All @@ -179,7 +179,7 @@ define void @test_vext_f32() nounwind ssp {

define void @test_vext_s64() nounwind ssp {
; CHECK-LABEL: test_vext_s64:
; CHECK_FIXME: {{ext.8.*#1}}
; CHECK_FIXME: {{rev64.2s.*}}
; this just turns into a load of the second element
%xS64x1 = alloca <1 x i64>, align 8
%__a = alloca <1 x i64>, align 8
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/dag-combine-invaraints.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ main_:
%i32T = alloca i32, align 4
%i32F = alloca i32, align 4
%i32X = alloca i32, align 4
store i32 0, i32* %tmp
store i32 %argc, i32* %tmp
store i32 15, i32* %i32T, align 4
store i32 5, i32* %i32F, align 4
%tmp6 = load i32, i32* %tmp, align 4
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AArch64/swifterror.ll
Original file line number Diff line number Diff line change
Expand Up @@ -309,17 +309,17 @@ define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
; CHECK-APPLE-LABEL: foo_vararg:
; CHECK-APPLE: orr w0, wzr, #0x10
; CHECK-APPLE: malloc
; CHECK-APPLE: orr [[ID:w[0-9]+]], wzr, #0x1
; CHECK-APPLE: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16
; CHECK-APPLE: strb [[ID]], [x0, #8]
; CHECK-APPLE-DAG: orr [[ID:w[0-9]+]], wzr, #0x1
; CHECK-APPLE-DAG: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16
; CHECK-APPLE-DAG: strb [[ID]], [x0, #8]

; First vararg
; CHECK-APPLE-DAG: orr {{x[0-9]+}}, [[ARGS]], #0x8
; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #16]
; CHECK-APPLE: add {{x[0-9]+}}, {{x[0-9]+}}, #8
; CHECK-APPLE-DAG: add {{x[0-9]+}}, {{x[0-9]+}}, #8
; Second vararg
; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
; CHECK-APPLE: add {{x[0-9]+}}, {{x[0-9]+}}, #8
; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
; CHECK-APPLE-DAG: add {{x[0-9]+}}, {{x[0-9]+}}, #16
; Third vararg
; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}]

Expand Down
15 changes: 15 additions & 0 deletions llvm/test/CodeGen/ARM/atomic-op.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ entry:
store i32 3855, i32* %xort
store i32 4, i32* %temp
%tmp = load i32, i32* %temp
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
; CHECK: ldrex
; CHECK: add
; CHECK: strex
Expand All @@ -35,6 +36,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%0 = atomicrmw add i32* %val1, i32 %tmp monotonic
store i32 %0, i32* %old
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
; CHECK: ldrex
; CHECK: sub
; CHECK: strex
Expand All @@ -44,6 +46,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%1 = atomicrmw sub i32* %val2, i32 30 monotonic
store i32 %1, i32* %old
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
; CHECK: ldrex
; CHECK: add
; CHECK: strex
Expand All @@ -53,6 +56,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%2 = atomicrmw add i32* %val2, i32 1 monotonic
store i32 %2, i32* %old
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
; CHECK: ldrex
; CHECK: sub
; CHECK: strex
Expand All @@ -62,6 +66,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%3 = atomicrmw sub i32* %val2, i32 1 monotonic
store i32 %3, i32* %old
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
; CHECK: ldrex
; CHECK: and
; CHECK: strex
Expand All @@ -71,6 +76,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%4 = atomicrmw and i32* %andt, i32 4080 monotonic
store i32 %4, i32* %old
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
; CHECK: ldrex
; CHECK: or
; CHECK: strex
Expand All @@ -80,6 +86,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%5 = atomicrmw or i32* %ort, i32 4080 monotonic
store i32 %5, i32* %old
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
; CHECK: ldrex
; CHECK: eor
; CHECK: strex
Expand All @@ -89,6 +96,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%6 = atomicrmw xor i32* %xort, i32 4080 monotonic
store i32 %6, i32* %old
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
; CHECK: ldrex
; CHECK: cmp
; CHECK: strex
Expand All @@ -98,6 +106,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%7 = atomicrmw min i32* %val2, i32 16 monotonic
store i32 %7, i32* %old
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
%neg = sub i32 0, 1
; CHECK: ldrex
; CHECK: cmp
Expand All @@ -108,6 +117,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%8 = atomicrmw min i32* %val2, i32 %neg monotonic
store i32 %8, i32* %old
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
; CHECK: ldrex
; CHECK: cmp
; CHECK: strex
Expand All @@ -117,6 +127,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%9 = atomicrmw max i32* %val2, i32 1 monotonic
store i32 %9, i32* %old
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
; CHECK: ldrex
; CHECK: cmp
; CHECK: strex
Expand All @@ -126,6 +137,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%10 = atomicrmw max i32* %val2, i32 0 monotonic
store i32 %10, i32* %old
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
; CHECK: ldrex
; CHECK: cmp
; CHECK: strex
Expand All @@ -135,6 +147,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%11 = atomicrmw umin i32* %val2, i32 16 monotonic
store i32 %11, i32* %old
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
%uneg = sub i32 0, 1
; CHECK: ldrex
; CHECK: cmp
Expand All @@ -145,6 +158,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
store i32 %12, i32* %old
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
; CHECK: ldrex
; CHECK: cmp
; CHECK: strex
Expand All @@ -154,6 +168,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%13 = atomicrmw umax i32* %val2, i32 1 monotonic
store i32 %13, i32* %old
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
; CHECK: ldrex
; CHECK: cmp
; CHECK: strex
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/MSP430/vararg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ entry:
; CHECK-LABEL: va_copy:
%vl.addr = alloca i8*, align 2
%vl2 = alloca i8*, align 2
; CHECK: mov.w r12, 2(r1)
; CHECK-DAG: mov.w r12, 2(r1)
store i8* %vl, i8** %vl.addr, align 2
%0 = bitcast i8** %vl2 to i8*
%1 = bitcast i8** %vl.addr to i8*
; CHECK-NEXT: mov.w r12, 0(r1)
; CHECK-DAG: mov.w r12, 0(r1)
call void @llvm.va_copy(i8* %0, i8* %1)
ret void
}
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Mips/dins.ll
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ entry:
; CHECK-LABEL: f123:
; MIPS64R2: daddiu $[[R0:[0-9]+]], $zero, 123
; MIPS64R2: dins $[[R0:[0-9]+]], $[[R1:[0-9]+]], 27, 37
; MIPS64R2: daddiu $[[R0:[0-9]+]], $zero, 5
; MIPS64R2: daddiu $[[R0:[0-9]+]], $zero, 4
; MIPS64R2: dins $[[R0:[0-9]+]], $[[R1:[0-9]+]], 28, 6
; MIPS64R2: daddiu $[[R0:[0-9]+]], $zero, 5
; MIPS64R2: dins $[[R0:[0-9]+]], $[[R1:[0-9]+]], 50, 14
; MIPS64R2: dsrl $[[R0:[0-9]+]], $[[R1:[0-9]+]], 50
; MIPS64R2: dins $[[R0:[0-9]+]], $[[R1:[0-9]+]], 34, 16
Expand Down Expand Up @@ -94,4 +94,4 @@ entry:
; MIPS32R2: ori $[[R0:[0-9]+]], $[[R0:[0-9]+]], 8
; MIPS32R2-NOT: ins {{[[:space:]].*}}
; MIPS64R2N32: ori $[[R0:[0-9]+]], $[[R0:[0-9]+]], 8
; MIPS64R2N32-NOT: ins {{[[:space:]].*}}
; MIPS64R2N32-NOT: ins {{[[:space:]].*}}
7 changes: 4 additions & 3 deletions llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,11 @@ define void @full_test() {
; X32-NEXT: cmpeqps %xmm2, %xmm1
; X32-NEXT: movaps %xmm1, %xmm0
; X32-NEXT: blendvps %xmm0, %xmm2, %xmm4
; X32-NEXT: extractps $1, %xmm4, {{[0-9]+}}(%esp)
; X32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; X32-NEXT: movshdup {{.*#+}} xmm0 = xmm4[1,1,3,3]
; X32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
; X32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X32-NEXT: addl $60, %esp
; X32-NEXT: retl
;
Expand Down
37 changes: 37 additions & 0 deletions llvm/test/CodeGen/X86/alias-static-alloca.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
; RUN: llc -o - -mtriple=x86_64-linux-gnu %s | FileCheck %s

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; We should be able to bypass the load values to their corresponding
; stores here.

; CHECK-LABEL: foo
; CHECK-DAG: movl %esi, -8(%rsp)
; CHECK-DAG: movl %ecx, -16(%rsp)
; CHECK-DAG: movl %edi, -4(%rsp)
; CHECK-DAG: movl %edx, -12(%rsp)
; CHECK: leal
; CHECK: addl
; CHECK: addl
; CHECK: retq

define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) {
entry:
%a0 = alloca i32
%a1 = alloca i32
%a2 = alloca i32
%a3 = alloca i32
store i32 %b, i32* %a1
store i32 %d, i32* %a3
store i32 %a, i32* %a0
store i32 %c, i32* %a2
%l0 = load i32, i32* %a0
%l1 = load i32, i32* %a1
%l2 = load i32, i32* %a2
%l3 = load i32, i32* %a3
%add0 = add nsw i32 %l0, %l1
%add1 = add nsw i32 %add0, %l2
%add2 = add nsw i32 %add1, %l3
ret i32 %add2
}
Loading

0 comments on commit a8f63af

Please sign in to comment.