Skip to content

Commit

Permalink
[X86] Only reorder srl/and on last DAG combiner run
Browse files Browse the repository at this point in the history
This seems to interfere with a target independent brcond combine that looks for the (srl (and X, C1), C2) pattern to enable TEST instructions. Once we flip, that combine doesn't fire and we end up exposing it to the X86 specific BT combine which causes us to emit a BT instruction. BT has lower throughput than TEST.

We could try to make the brcond combine aware of the alternate pattern, but since the flip was just a code size reduction and not likely to enable other combines, it seemed easier to just delay it until after lowering.

Differential Revision: https://reviews.llvm.org/D43201

llvm-svn: 325371
  • Loading branch information
topperc committed Feb 16, 2018
1 parent 3d1f4b9 commit de565fc
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 30 deletions.
10 changes: 8 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -32941,11 +32941,17 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}

static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG) {
static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();

// Only do this on the last DAG combine as it can interfere with other
// combines.
if (!DCI.isAfterLegalizeVectorOps())
return SDValue();

// Try to improve a sequence of srl (and X, C1), C2 by inverting the order.
// TODO: This is a generic DAG combine that became an x86-only combine to
// avoid shortcomings in other folds such as bswap, bit-test ('bt'), and
Expand Down Expand Up @@ -32996,7 +33002,7 @@ static SDValue combineShift(SDNode* N, SelectionDAG &DAG,
return V;

if (N->getOpcode() == ISD::SRL)
if (SDValue V = combineShiftRightLogical(N, DAG))
if (SDValue V = combineShiftRightLogical(N, DAG, DCI))
return V;

return SDValue();
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/live-out-reg-info.ll
Expand Up @@ -12,8 +12,8 @@ define void @foo(i32 %a) {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: shrl $23, %edi
; CHECK-NEXT: btl $8, %edi
; CHECK-NEXT: jb .LBB0_2
; CHECK-NEXT: testl $256, %edi # imm = 0x100
; CHECK-NEXT: jne .LBB0_2
; CHECK-NEXT: # %bb.1: # %true
; CHECK-NEXT: callq qux
; CHECK-NEXT: .LBB0_2: # %false
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/X86/test-shrink.ll
Expand Up @@ -6,8 +6,8 @@
define void @g64xh(i64 inreg %x) nounwind {
; CHECK-LINUX64-LABEL: g64xh:
; CHECK-LINUX64: # %bb.0:
; CHECK-LINUX64-NEXT: btl $11, %edi
; CHECK-LINUX64-NEXT: jb .LBB0_2
; CHECK-LINUX64-NEXT: testl $2048, %edi # imm = 0x800
; CHECK-LINUX64-NEXT: jne .LBB0_2
; CHECK-LINUX64-NEXT: # %bb.1: # %yes
; CHECK-LINUX64-NEXT: pushq %rax
; CHECK-LINUX64-NEXT: callq bar
Expand All @@ -18,8 +18,8 @@ define void @g64xh(i64 inreg %x) nounwind {
; CHECK-WIN32-64-LABEL: g64xh:
; CHECK-WIN32-64: # %bb.0:
; CHECK-WIN32-64-NEXT: subq $40, %rsp
; CHECK-WIN32-64-NEXT: btl $11, %ecx
; CHECK-WIN32-64-NEXT: jb .LBB0_2
; CHECK-WIN32-64-NEXT: testl $2048, %ecx # imm = 0x800
; CHECK-WIN32-64-NEXT: jne .LBB0_2
; CHECK-WIN32-64-NEXT: # %bb.1: # %yes
; CHECK-WIN32-64-NEXT: callq bar
; CHECK-WIN32-64-NEXT: .LBB0_2: # %no
Expand All @@ -28,8 +28,8 @@ define void @g64xh(i64 inreg %x) nounwind {
;
; CHECK-X86-LABEL: g64xh:
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: btl $11, %eax
; CHECK-X86-NEXT: jb .LBB0_2
; CHECK-X86-NEXT: testl $2048, %eax # imm = 0x800
; CHECK-X86-NEXT: jne .LBB0_2
; CHECK-X86-NEXT: # %bb.1: # %yes
; CHECK-X86-NEXT: calll bar
; CHECK-X86-NEXT: .LBB0_2: # %no
Expand Down Expand Up @@ -90,8 +90,8 @@ no:
define void @g32xh(i32 inreg %x) nounwind {
; CHECK-LINUX64-LABEL: g32xh:
; CHECK-LINUX64: # %bb.0:
; CHECK-LINUX64-NEXT: btl $11, %edi
; CHECK-LINUX64-NEXT: jb .LBB2_2
; CHECK-LINUX64-NEXT: testl $2048, %edi # imm = 0x800
; CHECK-LINUX64-NEXT: jne .LBB2_2
; CHECK-LINUX64-NEXT: # %bb.1: # %yes
; CHECK-LINUX64-NEXT: pushq %rax
; CHECK-LINUX64-NEXT: callq bar
Expand All @@ -102,8 +102,8 @@ define void @g32xh(i32 inreg %x) nounwind {
; CHECK-WIN32-64-LABEL: g32xh:
; CHECK-WIN32-64: # %bb.0:
; CHECK-WIN32-64-NEXT: subq $40, %rsp
; CHECK-WIN32-64-NEXT: btl $11, %ecx
; CHECK-WIN32-64-NEXT: jb .LBB2_2
; CHECK-WIN32-64-NEXT: testl $2048, %ecx # imm = 0x800
; CHECK-WIN32-64-NEXT: jne .LBB2_2
; CHECK-WIN32-64-NEXT: # %bb.1: # %yes
; CHECK-WIN32-64-NEXT: callq bar
; CHECK-WIN32-64-NEXT: .LBB2_2: # %no
Expand All @@ -112,8 +112,8 @@ define void @g32xh(i32 inreg %x) nounwind {
;
; CHECK-X86-LABEL: g32xh:
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: btl $11, %eax
; CHECK-X86-NEXT: jb .LBB2_2
; CHECK-X86-NEXT: testl $2048, %eax # imm = 0x800
; CHECK-X86-NEXT: jne .LBB2_2
; CHECK-X86-NEXT: # %bb.1: # %yes
; CHECK-X86-NEXT: calll bar
; CHECK-X86-NEXT: .LBB2_2: # %no
Expand Down Expand Up @@ -174,8 +174,8 @@ no:
define void @g16xh(i16 inreg %x) nounwind {
; CHECK-LINUX64-LABEL: g16xh:
; CHECK-LINUX64: # %bb.0:
; CHECK-LINUX64-NEXT: btl $11, %edi
; CHECK-LINUX64-NEXT: jb .LBB4_2
; CHECK-LINUX64-NEXT: testl $2048, %edi # imm = 0x800
; CHECK-LINUX64-NEXT: jne .LBB4_2
; CHECK-LINUX64-NEXT: # %bb.1: # %yes
; CHECK-LINUX64-NEXT: pushq %rax
; CHECK-LINUX64-NEXT: callq bar
Expand All @@ -186,8 +186,8 @@ define void @g16xh(i16 inreg %x) nounwind {
; CHECK-WIN32-64-LABEL: g16xh:
; CHECK-WIN32-64: # %bb.0:
; CHECK-WIN32-64-NEXT: subq $40, %rsp
; CHECK-WIN32-64-NEXT: btl $11, %ecx
; CHECK-WIN32-64-NEXT: jb .LBB4_2
; CHECK-WIN32-64-NEXT: testl $2048, %ecx # imm = 0x800
; CHECK-WIN32-64-NEXT: jne .LBB4_2
; CHECK-WIN32-64-NEXT: # %bb.1: # %yes
; CHECK-WIN32-64-NEXT: callq bar
; CHECK-WIN32-64-NEXT: .LBB4_2: # %no
Expand All @@ -196,8 +196,8 @@ define void @g16xh(i16 inreg %x) nounwind {
;
; CHECK-X86-LABEL: g16xh:
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: btl $11, %eax
; CHECK-X86-NEXT: jb .LBB4_2
; CHECK-X86-NEXT: testl $2048, %eax # imm = 0x800
; CHECK-X86-NEXT: jne .LBB4_2
; CHECK-X86-NEXT: # %bb.1: # %yes
; CHECK-X86-NEXT: calll bar
; CHECK-X86-NEXT: .LBB4_2: # %no
Expand Down
17 changes: 11 additions & 6 deletions llvm/test/CodeGen/X86/test-vs-bittest.ll
Expand Up @@ -6,8 +6,8 @@ define void @test64(i64 inreg %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btl $11, %edi
; CHECK-NEXT: jb .LBB0_2
; CHECK-NEXT: testl $2048, %edi # imm = 0x800
; CHECK-NEXT: jne .LBB0_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB0_2: # %no
Expand Down Expand Up @@ -47,6 +47,11 @@ no:
ret void
}

; This test is identical to test64 above with only the destination of the br
; reversed. This somehow causes the two functions to get slightly different
; initial IR. One has an extra invert of the setcc. This previous caused one
; the functions to use a BT while the other used a TEST due to another DAG
; combine messing with an expected canonical form.
define void @test64_2(i64 inreg %x) {
; CHECK-LABEL: test64_2:
; CHECK: # %bb.0:
Expand Down Expand Up @@ -190,8 +195,8 @@ define void @test32(i32 inreg %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btl $11, %edi
; CHECK-NEXT: jb .LBB8_2
; CHECK-NEXT: testl $2048, %edi # imm = 0x800
; CHECK-NEXT: jne .LBB8_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB8_2: # %no
Expand Down Expand Up @@ -282,8 +287,8 @@ define void @test16(i16 inreg %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btl $11, %edi
; CHECK-NEXT: jb .LBB12_2
; CHECK-NEXT: testl $2048, %edi # imm = 0x800
; CHECK-NEXT: jne .LBB12_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB12_2: # %no
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/xor-icmp.ll
Expand Up @@ -19,8 +19,8 @@ define i32 @t(i32 %a, i32 %b) nounwind ssp {
; X64: # %bb.0: # %entry
; X64-NEXT: xorl %esi, %edi
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: btl $14, %edi
; X64-NEXT: jae .LBB0_1
; X64-NEXT: testl $16384, %edi # imm = 0x4000
; X64-NEXT: je .LBB0_1
; X64-NEXT: # %bb.2: # %bb1
; X64-NEXT: jmp bar # TAILCALL
; X64-NEXT: .LBB0_1: # %bb
Expand Down

0 comments on commit de565fc

Please sign in to comment.