Skip to content

Commit

Permalink
X86: Perform integer comparisons at i32 or larger.
Browse files Browse the repository at this point in the history
Utilizing the 8 and 16 bit comparison instructions, even when an input can
be folded into the comparison instruction itself, is typically not worth it.
There are too many partial register stalls as a result, leading to significant
slowdowns. By always performing comparisons on at least 32-bit
registers, performance of the calculation chain leading to the
comparison improves. Continue to use the smaller comparisons when
minimizing size, as that allows better folding of loads into the
comparison instructions.

rdar://15386341

llvm-svn: 195496
  • Loading branch information
Jim Grosbach committed Nov 22, 2013
1 parent 4b7f23d commit 860934a
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 106 deletions.
29 changes: 29 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3419,6 +3419,24 @@ bool X86::isCalleePop(CallingConv::ID CallingConv,
}
}

/// \brief Return true if the condition is an unsigned comparison operation.
static bool isX86CCUnsigned(unsigned X86CC) {
switch (X86CC) {
default: llvm_unreachable("Invalid integer condition!");
case X86::COND_E: return true;
case X86::COND_G: return false;
case X86::COND_GE: return false;
case X86::COND_L: return false;
case X86::COND_LE: return false;
case X86::COND_NE: return true;
case X86::COND_B: return true;
case X86::COND_A: return true;
case X86::COND_BE: return true;
case X86::COND_AE: return true;
}
llvm_unreachable("covered switch fell through?!");
}

/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
/// specific condition code, returning the condition code and the LHS/RHS of the
/// comparison to make.
Expand Down Expand Up @@ -9662,6 +9680,17 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
SDLoc dl(Op0);
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
// Do the comparison at i32 if it's smaller. This avoids subregister
// aliasing issues. Keep the smaller reference if we're optimizing for
// size, however, as that'll allow better folding of memory operations.
if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 &&
!DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
AttributeSet::FunctionIndex, Attribute::MinSize)) {
unsigned ExtendOp =
isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
Op0 = DAG.getNode(ExtendOp, dl, MVT::i32, Op0);
Op1 = DAG.getNode(ExtendOp, dl, MVT::i32, Op1);
}
// Use SUB instead of CMP to enable CSE between SUB and CMP.
SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs,
Expand Down
87 changes: 0 additions & 87 deletions llvm/test/CodeGen/X86/2007-10-17-IllegalAsm.ll

This file was deleted.

6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/3addr-16bit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ entry:

; 64BIT-LABEL: t2:
; 64BIT-NOT: movw %si, %ax
; 64BIT: decl %eax
; 64BIT: leal -1(%rsi), %eax
; 64BIT: movzwl %ax
%0 = icmp eq i16 %k, %c ; <i1> [#uses=1]
%1 = add i16 %k, -1 ; <i16> [#uses=3]
Expand All @@ -59,7 +59,7 @@ entry:

; 64BIT-LABEL: t3:
; 64BIT-NOT: movw %si, %ax
; 64BIT: addl $2, %eax
; 64BIT: leal 2(%rsi), %eax
%0 = add i16 %k, 2 ; <i16> [#uses=3]
%1 = icmp eq i16 %k, %c ; <i1> [#uses=1]
br i1 %1, label %bb, label %bb1
Expand All @@ -82,7 +82,7 @@ entry:

; 64BIT-LABEL: t4:
; 64BIT-NOT: movw %si, %ax
; 64BIT: addl %edi, %eax
; 64BIT: leal (%rsi,%rdi), %eax
%0 = add i16 %k, %c ; <i16> [#uses=3]
%1 = icmp eq i16 %k, %c ; <i1> [#uses=1]
br i1 %1, label %bb, label %bb1
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/codegen-prepare-extload.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
; CodeGenPrepare should move the zext into the block with the load
; so that SelectionDAG can select it with the load.

; CHECK: movzbl ({{%rdi|%rcx}}), %eax
; CHECK: movsbl ({{%rdi|%rcx}}), %eax

define void @foo(i8* %p, i32* %q) {
entry:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/ctpop-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,6 @@ define i32 @test3(i64 %x) nounwind readnone {
%conv = zext i1 %cmp to i32
ret i32 %conv
; CHECK-LABEL: test3:
; CHECK: cmpb $2
; CHECK: cmpl $2
; CHECK: ret
}
22 changes: 12 additions & 10 deletions llvm/test/CodeGen/X86/memcmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ bb: ; preds = %entry
return: ; preds = %entry
ret void
; CHECK-LABEL: memcmp2:
; CHECK: movw ([[A0:%rdi|%rcx]]), %ax
; CHECK: cmpw ([[A1:%rsi|%rdx]]), %ax
; CHECK: movzwl
; CHECK-NEXT: movzwl
; CHECK-NEXT: cmpl
; NOBUILTIN-LABEL: memcmp2:
; NOBUILTIN: callq
}
Expand All @@ -41,7 +42,8 @@ bb: ; preds = %entry
return: ; preds = %entry
ret void
; CHECK-LABEL: memcmp2a:
; CHECK: cmpw $28527, ([[A0]])
; CHECK: movzwl
; CHECK-NEXT: cmpl $28527,
}


Expand All @@ -58,8 +60,8 @@ bb: ; preds = %entry
return: ; preds = %entry
ret void
; CHECK-LABEL: memcmp4:
; CHECK: movl ([[A0]]), %eax
; CHECK: cmpl ([[A1]]), %eax
; CHECK: movl
; CHECK-NEXT: cmpl
}

define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
Expand All @@ -75,7 +77,7 @@ bb: ; preds = %entry
return: ; preds = %entry
ret void
; CHECK-LABEL: memcmp4a:
; CHECK: cmpl $1869573999, ([[A0]])
; CHECK: cmpl $1869573999,
}

define void @memcmp8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
Expand All @@ -91,8 +93,8 @@ bb: ; preds = %entry
return: ; preds = %entry
ret void
; CHECK-LABEL: memcmp8:
; CHECK: movq ([[A0]]), %rax
; CHECK: cmpq ([[A1]]), %rax
; CHECK: movq
; CHECK: cmpq
}

define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind {
Expand All @@ -108,7 +110,7 @@ bb: ; preds = %entry
return: ; preds = %entry
ret void
; CHECK-LABEL: memcmp8a:
; CHECK: movabsq $8029759185026510694, %rax
; CHECK: cmpq %rax, ([[A0]])
; CHECK: movabsq $8029759185026510694,
; CHECK: cmpq
}

8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/shrink-compare.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

declare void @bar()

define void @test1(i32* nocapture %X) nounwind {
define void @test1(i32* nocapture %X) nounwind minsize {
entry:
%tmp1 = load i32* %X, align 4
%and = and i32 %tmp1, 255
Expand All @@ -19,7 +19,7 @@ if.end:
; CHECK: cmpb $47, (%{{rdi|rcx}})
}

define void @test2(i32 %X) nounwind {
define void @test2(i32 %X) nounwind minsize {
entry:
%and = and i32 %X, 255
%cmp = icmp eq i32 %and, 47
Expand All @@ -35,7 +35,7 @@ if.end:
; CHECK: cmpb $47, %{{dil|cl}}
}

define void @test3(i32 %X) nounwind {
define void @test3(i32 %X) nounwind minsize {
entry:
%and = and i32 %X, 255
%cmp = icmp eq i32 %and, 255
Expand Down Expand Up @@ -70,7 +70,7 @@ lor.end: ; preds = %lor.rhs, %entry
@x = global { i8, i8, i8, i8, i8, i8, i8, i8 } { i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 1 }, align 4

; PR16551
define void @test5(i32 %X) nounwind {
define void @test5(i32 %X) nounwind minsize {
entry:
%bf.load = load i56* bitcast ({ i8, i8, i8, i8, i8, i8, i8, i8 }* @x to i56*), align 4
%bf.lshr = lshr i56 %bf.load, 32
Expand Down

0 comments on commit 860934a

Please sign in to comment.