Skip to content

Commit

Permalink
[X86] The TEST instruction is eliminated when BSF/TZCNT is used
Browse files Browse the repository at this point in the history
Summary:
These changes cover the PR#31399.
Now the ffs(x) function is lowered to (x != 0) ? llvm.cttz(x) + 1 : 0
and it corresponds to the following llvm code:
  %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
  %tobool = icmp eq i32 %v, 0
  %.op = add nuw nsw i32 %cnt, 1
  %add = select i1 %tobool, i32 0, i32 %.op
and x86 asm code:
  bsfl     %edi, %ecx
  addl     $1, %ecx
  testl    %edi, %edi
  movl     $0, %eax
  cmovnel  %ecx, %eax
In this case the 'test' instruction can't be eliminated because
the 'add' instruction modifies the EFLAGS, namely, ZF flag
that is set by the 'bsf' instruction when 'x' is zero.

We now produce the following code:
  bsfl     %edi, %ecx
  movl     $-1, %eax
  cmovnel  %ecx, %eax
  addl     $1, %eax

Patch by Ivan Kulagin

Reviewers: davide, craig.topper, spatel, RKSimon

Reviewed By: craig.topper

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D48765

llvm-svn: 336768
  • Loading branch information
topperc committed Jul 11, 2018
1 parent 709f773 commit 02867f0
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 0 deletions.
30 changes: 30 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33452,6 +33452,36 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
}
}

// Handle (CMOV (ADD (CTTZ X), C), C-1, (X != 0)) ->
// (ADD (CMOV (CTTZ X), -1, (X != 0)), C) or
// (CMOV C-1, (ADD (CTTZ X), C), (X == 0)) ->
// (ADD (CMOV C-1, (CTTZ X), (X == 0)), C)
if (CC == X86::COND_NE || CC == X86::COND_E) {
auto *Cnst = CC == X86::COND_E ? dyn_cast<ConstantSDNode>(TrueOp)
: dyn_cast<ConstantSDNode>(FalseOp);
SDValue Add = CC == X86::COND_E ? FalseOp : TrueOp;

if (Cnst && Add.getOpcode() == ISD::ADD && Add.hasOneUse()) {
auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1));
SDValue AddOp2 = Add.getOperand(0);
if (AddOp1 && (AddOp2.getOpcode() == ISD::CTTZ_ZERO_UNDEF ||
AddOp2.getOpcode() == ISD::CTTZ)) {
APInt Diff = Cnst->getAPIntValue() - AddOp1->getAPIntValue();
if (CC == X86::COND_NE) {
Add = DAG.getNode(X86ISD::CMOV, DL, Add.getValueType(), AddOp2,
DAG.getConstant(Diff, DL, Add.getValueType()),
DAG.getConstant(CC, DL, MVT::i8), Cond);
} else {
Add = DAG.getNode(X86ISD::CMOV, DL, Add.getValueType(),
DAG.getConstant(Diff, DL, Add.getValueType()),
AddOp2, DAG.getConstant(CC, DL, MVT::i8), Cond);
}
return DAG.getNode(X86ISD::ADD, DL, Add.getValueType(), Add,
SDValue(AddOp1, 0));
}
}
}

return SDValue();
}

Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3591,6 +3591,13 @@ static X86::CondCode isUseDefConvertible(MachineInstr &MI) {
case X86::TZCNT32rr: case X86::TZCNT32rm:
case X86::TZCNT64rr: case X86::TZCNT64rm:
return X86::COND_B;
case X86::BSF16rr:
case X86::BSF16rm:
case X86::BSF32rr:
case X86::BSF32rm:
case X86::BSF64rr:
case X86::BSF64rm:
return X86::COND_E;
}
}

Expand Down
95 changes: 95 additions & 0 deletions llvm/test/CodeGen/X86/dagcombine-select.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -verify-machineinstrs < %s | FileCheck -enable-var-scope %s
; RUN: llc -mtriple=x86_64-unknown-unknown -verify-machineinstrs -mattr=+bmi < %s | FileCheck -check-prefix=BMI -enable-var-scope %s

define i32 @select_and1(i32 %x, i32 %y) {
; CHECK-LABEL: select_and1:
Expand Down Expand Up @@ -279,3 +280,97 @@ define double @frem_constant_sel_constants(i1 %cond) {
%bo = frem double 5.1, %sel
ret double %bo
}

declare i64 @llvm.cttz.i64(i64, i1)
define i64 @cttz_64_eq_select(i64 %v) nounwind {
; CHECK-LABEL: cttz_64_eq_select:
; CHECK: # %bb.0:
; CHECK-NEXT: bsfq %rdi, %rcx
; CHECK-NEXT: movq $-1, %rax
; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: addq $6, %rax
; CHECK-NEXT: retq

; BMI-LABEL: cttz_64_eq_select:
; BMI: # %bb.0:
; BMI-NEXT: tzcntq %rdi, %rcx
; BMI-NEXT: movq $-1, %rax
; BMI-NEXT: cmovbq %rcx, %rax
; BMI-NEXT: addq $6, %rax
; BMI-NEXT: retq
%cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 %v, 0
%.op = add nuw nsw i64 %cnt, 6
%add = select i1 %tobool, i64 5, i64 %.op
ret i64 %add
}

define i64 @cttz_64_ne_select(i64 %v) nounwind {
; CHECK-LABEL: cttz_64_ne_select:
; CHECK: # %bb.0:
; CHECK-NEXT: bsfq %rdi, %rcx
; CHECK-NEXT: movq $-1, %rax
; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: addq $6, %rax
; CHECK-NEXT: retq

; BMI-LABEL: cttz_64_ne_select:
; BMI: # %bb.0:
; BMI-NEXT: tzcntq %rdi, %rcx
; BMI-NEXT: movq $-1, %rax
; BMI-NEXT: cmovbq %rcx, %rax
; BMI-NEXT: addq $6, %rax
; BMI-NEXT: retq
%cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
%tobool = icmp ne i64 %v, 0
%.op = add nuw nsw i64 %cnt, 6
%add = select i1 %tobool, i64 %.op, i64 5
ret i64 %add
}

declare i32 @llvm.cttz.i32(i32, i1)
define i32 @cttz_32_eq_select(i32 %v) nounwind {
; CHECK-LABEL: cttz_32_eq_select:
; CHECK: # %bb.0:
; CHECK-NEXT: bsfl %edi, %ecx
; CHECK-NEXT: movl $-1, %eax
; CHECK-NEXT: cmovel %ecx, %eax
; CHECK-NEXT: addl $6, %eax
; CHECK-NEXT: retq

; BMI-LABEL: cttz_32_eq_select:
; BMI: # %bb.0:
; BMI-NEXT: tzcntl %edi, %ecx
; BMI-NEXT: movl $-1, %eax
; BMI-NEXT: cmovbl %ecx, %eax
; BMI-NEXT: addl $6, %eax
; BMI-NEXT: retq
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 %v, 0
%.op = add nuw nsw i32 %cnt, 6
%add = select i1 %tobool, i32 5, i32 %.op
ret i32 %add
}

define i32 @cttz_32_ne_select(i32 %v) nounwind {
; CHECK-LABEL: cttz_32_ne_select:
; CHECK: # %bb.0:
; CHECK-NEXT: bsfl %edi, %ecx
; CHECK-NEXT: movl $-1, %eax
; CHECK-NEXT: cmovel %ecx, %eax
; CHECK-NEXT: addl $6, %eax
; CHECK-NEXT: retq

; BMI-LABEL: cttz_32_ne_select:
; BMI: # %bb.0:
; BMI-NEXT: tzcntl %edi, %ecx
; BMI-NEXT: movl $-1, %eax
; BMI-NEXT: cmovbl %ecx, %eax
; BMI-NEXT: addl $6, %eax
; BMI-NEXT: retq
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
%tobool = icmp ne i32 %v, 0
%.op = add nuw nsw i32 %cnt, 6
%add = select i1 %tobool, i32 %.op, i32 5
ret i32 %add
}

0 comments on commit 02867f0

Please sign in to comment.