Skip to content

Commit

Permalink
[DAGCombine] Improve ReduceLoadWidth for SRL
Browse files Browse the repository at this point in the history
If the SRL node is only used by an AND, we may be able to set the
ExtVT to the width of the mask, making the AND redundant. To support
this, another check has been added in isLegalNarrowLoad which queries
whether the load is valid.

Differential Revision: https://reviews.llvm.org/D41350

llvm-svn: 321259
  • Loading branch information
sparker-arm committed Dec 21, 2017
1 parent 17fb580 commit 59efb8c
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 22 deletions.
26 changes: 26 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -3788,6 +3788,16 @@ bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
if (LoadN->getNumValues() > 2)
return false;

// Only allow byte offsets.
if (ShAmt % 8)
return false;

// Ensure that this isn't going to produce an unsupported unaligned access.
if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
ExtVT, LoadN->getAddressSpace(),
ShAmt / 8))
return false;

// If the load that we're shrinking is an extload and we're not just
// discarding the extension we can't simply shrink the load. Bail.
// TODO: It would be possible to merge the extensions in some cases.
Expand Down Expand Up @@ -8274,6 +8284,22 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
// then the result of the shift+trunc is zero/undef (handled elsewhere).
if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
return SDValue();

// If the SRL is only used by a masking AND, we may be able to adjust
// the ExtVT to make the AND redundant.
SDNode *Mask = *(N->use_begin());
if (Mask->getOpcode() == ISD::AND &&
isa<ConstantSDNode>(Mask->getOperand(1))) {
const APInt &ShiftMask =
cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
if (ShiftMask.isMask()) {
EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
ShiftMask.countTrailingOnes());
// Recompute the type.
if (TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
ExtVT = MaskedVT;
}
}
}
}

Expand Down
106 changes: 102 additions & 4 deletions llvm/test/CodeGen/ARM/shift-combine.ll
Expand Up @@ -217,10 +217,23 @@ entry:
ret i32 %conv
}

; CHECK-LABEL: test_shift8_mask8
; CHECK-LABEL: test_shift7_mask8
; CHECK-BE: ldr r1, [r0]
; CHECK-COMMON: ldr r1, [r0]
; CHECK-COMMON: ubfx r1, r1, #8, #8
; CHECK-COMMON: ubfx r1, r1, #7, #8
; CHECK-COMMON: str r1, [r0]
define arm_aapcscc void @test_shift7_mask8(i32* nocapture %p) {
entry:
%0 = load i32, i32* %p, align 4
%shl = lshr i32 %0, 7
%and = and i32 %shl, 255
store i32 %and, i32* %p, align 4
ret void
}

; CHECK-LABEL: test_shift8_mask8
; CHECK-BE: ldrb r1, [r0, #2]
; CHECK-COMMON: ldrb r1, [r0, #1]
; CHECK-COMMON: str r1, [r0]
define arm_aapcscc void @test_shift8_mask8(i32* nocapture %p) {
entry:
Expand All @@ -231,10 +244,40 @@ entry:
ret void
}

; CHECK-LABEL: test_shift8_mask16
; CHECK-LABEL: test_shift8_mask7
; CHECK-BE: ldr r1, [r0]
; CHECK-COMMON: ldr r1, [r0]
; CHECK-COMMON: ubfx r1, r1, #8, #7
; CHECK-COMMON: str r1, [r0]
define arm_aapcscc void @test_shift8_mask7(i32* nocapture %p) {
entry:
%0 = load i32, i32* %p, align 4
%shl = lshr i32 %0, 8
%and = and i32 %shl, 127
store i32 %and, i32* %p, align 4
ret void
}

; CHECK-LABEL: test_shift9_mask8
; CHECK-BE: ldr r1, [r0]
; CHECK-COMMON: ldr r1, [r0]
; CHECK-COMMON: ubfx r1, r1, #8, #16
; CHECK-COMMON: ubfx r1, r1, #9, #8
; CHECK-COMMON: str r1, [r0]
define arm_aapcscc void @test_shift9_mask8(i32* nocapture %p) {
entry:
%0 = load i32, i32* %p, align 4
%shl = lshr i32 %0, 9
%and = and i32 %shl, 255
store i32 %and, i32* %p, align 4
ret void
}

; CHECK-LABEL: test_shift8_mask16
; CHECK-ALIGN: ldr r1, [r0]
; CHECK-ALIGN: ubfx r1, r1, #8, #16
; CHECK-BE: ldrh r1, [r0, #1]
; CHECK-ARM: ldrh r1, [r0, #1]
; CHECK-THUMB: ldrh.w r1, [r0, #1]
; CHECK-COMMON: str r1, [r0]
define arm_aapcscc void @test_shift8_mask16(i32* nocapture %p) {
entry:
Expand All @@ -245,6 +288,61 @@ entry:
ret void
}

; CHECK-LABEL: test_shift15_mask16
; CHECK-COMMON: ldr r1, [r0]
; CHECK-COMMON: ubfx r1, r1, #15, #16
; CHECK-COMMON: str r1, [r0]
define arm_aapcscc void @test_shift15_mask16(i32* nocapture %p) {
entry:
%0 = load i32, i32* %p, align 4
%shl = lshr i32 %0, 15
%and = and i32 %shl, 65535
store i32 %and, i32* %p, align 4
ret void
}

; CHECK-LABEL: test_shift16_mask15
; CHECK-BE: ldrh r1, [r0]
; CHECK-COMMON: ldrh r1, [r0, #2]
; CHECK-COMMON: bfc r1, #15, #17
; CHECK-COMMON: str r1, [r0]
define arm_aapcscc void @test_shift16_mask15(i32* nocapture %p) {
entry:
%0 = load i32, i32* %p, align 4
%shl = lshr i32 %0, 16
%and = and i32 %shl, 32767
store i32 %and, i32* %p, align 4
ret void
}

; CHECK-LABEL: test_shift8_mask24
; CHECK-BE: ldr r1, [r0]
; CHECK-COMMON: ldr r1, [r0]
; CHECK-ARM: lsr r1, r1, #8
; CHECK-THUMB: lsrs r1, r1, #8
; CHECK-COMMON: str r1, [r0]
define arm_aapcscc void @test_shift8_mask24(i32* nocapture %p) {
entry:
%0 = load i32, i32* %p, align 4
%shl = lshr i32 %0, 8
%and = and i32 %shl, 16777215
store i32 %and, i32* %p, align 4
ret void
}

; CHECK-LABEL: test_shift24_mask16
; CHECK-BE: ldrb r1, [r0]
; CHECK-COMMON: ldrb r1, [r0, #3]
; CHECK-COMMON: str r1, [r0]
define arm_aapcscc void @test_shift24_mask16(i32* nocapture %p) {
entry:
%0 = load i32, i32* %p, align 4
%shl = lshr i32 %0, 24
%and = and i32 %shl, 65535
store i32 %and, i32* %p, align 4
ret void
}

; CHECK-LABEL: test_sext_shift8_mask8
; CHECK-BE: ldrb r0, [r0]
; CHECK-COMMON: ldrb r0, [r0, #1]
Expand Down
30 changes: 12 additions & 18 deletions llvm/test/CodeGen/X86/h-registers-1.ll
Expand Up @@ -22,21 +22,18 @@ define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h)
; CHECK-NEXT: movzbl %ah, %eax # NOREX
; CHECK-NEXT: movq %rax, %r10
; CHECK-NEXT: movzbl %dh, %edx # NOREX
; CHECK-NEXT: movzbl %ch, %eax # NOREX
; CHECK-NEXT: movq %rax, %r11
; CHECK-NEXT: movzbl %ch, %ebp # NOREX
; CHECK-NEXT: movq %r8, %rax
; CHECK-NEXT: movzbl %ah, %ecx # NOREX
; CHECK-NEXT: movq %r9, %rax
; CHECK-NEXT: movzbl %ah, %ebp # NOREX
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: movzbl %ah, %eax # NOREX
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebx
; CHECK-NEXT: movzbl %bh, %edi # NOREX
; CHECK-NEXT: movzbl %ah, %ebx # NOREX
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; CHECK-NEXT: movq %r10, %r8
; CHECK-NEXT: addq %r8, %rsi
; CHECK-NEXT: addq %r11, %rdx
; CHECK-NEXT: addq %rbp, %rdx
; CHECK-NEXT: addq %rsi, %rdx
; CHECK-NEXT: addq %rbp, %rcx
; CHECK-NEXT: addq %rbx, %rcx
; CHECK-NEXT: addq %rdi, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: addq %rdx, %rax
Expand All @@ -58,21 +55,18 @@ define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h)
; GNUX32-NEXT: movzbl %ah, %eax # NOREX
; GNUX32-NEXT: movq %rax, %r10
; GNUX32-NEXT: movzbl %dh, %edx # NOREX
; GNUX32-NEXT: movzbl %ch, %eax # NOREX
; GNUX32-NEXT: movq %rax, %r11
; GNUX32-NEXT: movzbl %ch, %ebp # NOREX
; GNUX32-NEXT: movq %r8, %rax
; GNUX32-NEXT: movzbl %ah, %ecx # NOREX
; GNUX32-NEXT: movq %r9, %rax
; GNUX32-NEXT: movzbl %ah, %ebp # NOREX
; GNUX32-NEXT: movl {{[0-9]+}}(%esp), %eax
; GNUX32-NEXT: movzbl %ah, %eax # NOREX
; GNUX32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; GNUX32-NEXT: movzbl %bh, %edi # NOREX
; GNUX32-NEXT: movzbl %ah, %ebx # NOREX
; GNUX32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; GNUX32-NEXT: movzbl {{[0-9]+}}(%esp), %edi
; GNUX32-NEXT: movq %r10, %r8
; GNUX32-NEXT: addq %r8, %rsi
; GNUX32-NEXT: addq %r11, %rdx
; GNUX32-NEXT: addq %rbp, %rdx
; GNUX32-NEXT: addq %rsi, %rdx
; GNUX32-NEXT: addq %rbp, %rcx
; GNUX32-NEXT: addq %rbx, %rcx
; GNUX32-NEXT: addq %rdi, %rax
; GNUX32-NEXT: addq %rcx, %rax
; GNUX32-NEXT: addq %rdx, %rax
Expand Down

0 comments on commit 59efb8c

Please sign in to comment.