Skip to content

Commit

Permalink
[X86] Don't fold AND(SRL(X,Y),1) -> SETCC(BT(X,Y)) on BMI2 targets
Browse files Browse the repository at this point in the history
With BMI2 we have SHRX which is a lot quicker than regular x86 shifts.

Fixes #55138
  • Loading branch information
RKSimon committed Apr 28, 2022
1 parent 181dcbd commit ab17ed0
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 19 deletions.
9 changes: 7 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -47380,11 +47380,13 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
Src.getOpcode() == ISD::TRUNCATE) &&
Src.getOperand(0)->hasOneUse())
Src = Src.getOperand(0);
bool ContainsNOT = false;
X86::CondCode X86CC = X86::COND_B;
// Peek through AND(NOT(SRL(X,Y)),1).
if (isBitwiseNot(Src)) {
Src = Src.getOperand(0);
X86CC = X86::COND_AE;
ContainsNOT = true;
}
if (Src.getOpcode() == ISD::SRL &&
!isa<ConstantSDNode>(Src.getOperand(1))) {
Expand All @@ -47394,9 +47396,12 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (isBitwiseNot(Src)) {
Src = Src.getOperand(0);
X86CC = X86CC == X86::COND_AE ? X86::COND_B : X86::COND_AE;
ContainsNOT = true;
}
if (SDValue BT = getBT(Src, BitNo, dl, DAG))
return DAG.getZExtOrTrunc(getSETCC(X86CC, BT, dl, DAG), dl, VT);
// If we have BMI2 then SHRX should be faster for i32/i64 cases.
if (!(Subtarget.hasBMI2() && !ContainsNOT && VT.getSizeInBits() >= 32))
if (SDValue BT = getBT(Src, BitNo, dl, DAG))
return DAG.getZExtOrTrunc(getSETCC(X86CC, BT, dl, DAG), dl, VT);
}
}

Expand Down
48 changes: 31 additions & 17 deletions llvm/test/CodeGen/X86/setcc.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefixes=X64,X64-NOTBM
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+tbm | FileCheck %s --check-prefixes=X64,X64-TBM
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+bmi2 | FileCheck %s --check-prefixes=X64,X64-NOTBM
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefixes=X64,X64-NOTBM,X64-NOBMI2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+tbm | FileCheck %s --check-prefixes=X64,X64-NOBMI2,X64-TBM
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+bmi2 | FileCheck %s --check-prefixes=X64,X64-NOTBM,X64-BMI2
; rdar://7329206

define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp {
Expand Down Expand Up @@ -191,12 +191,18 @@ define i64 @t9(i32 %0, i32 %1) {
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: retl
;
; X64-LABEL: t9:
; X64: ## %bb.0:
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: btl %esi, %edi
; X64-NEXT: setb %al
; X64-NEXT: retq
; X64-NOBMI2-LABEL: t9:
; X64-NOBMI2: ## %bb.0:
; X64-NOBMI2-NEXT: xorl %eax, %eax
; X64-NOBMI2-NEXT: btl %esi, %edi
; X64-NOBMI2-NEXT: setb %al
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: t9:
; X64-BMI2: ## %bb.0:
; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
; X64-BMI2-NEXT: andl $1, %eax
; X64-BMI2-NEXT: retq
%3 = lshr i32 %0, %1
%4 = and i32 %3, 1
%5 = icmp ne i32 %4, 0
Expand Down Expand Up @@ -311,14 +317,22 @@ define i32 @PR55138(i32 %x) {
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: PR55138:
; X64: ## %bb.0:
; X64-NEXT: andl $15, %edi
; X64-NEXT: movl $27030, %ecx ## imm = 0x6996
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: btl %edi, %ecx
; X64-NEXT: setb %al
; X64-NEXT: retq
; X64-NOBMI2-LABEL: PR55138:
; X64-NOBMI2: ## %bb.0:
; X64-NOBMI2-NEXT: andl $15, %edi
; X64-NOBMI2-NEXT: movl $27030, %ecx ## imm = 0x6996
; X64-NOBMI2-NEXT: xorl %eax, %eax
; X64-NOBMI2-NEXT: btl %edi, %ecx
; X64-NOBMI2-NEXT: setb %al
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: PR55138:
; X64-BMI2: ## %bb.0:
; X64-BMI2-NEXT: andb $15, %dil
; X64-BMI2-NEXT: movl $27030, %eax ## imm = 0x6996
; X64-BMI2-NEXT: shrxl %edi, %eax, %eax
; X64-BMI2-NEXT: andl $1, %eax
; X64-BMI2-NEXT: retq
%urem = and i32 %x, 15
%shr = lshr i32 27030, %urem
%and = and i32 %shr, 1
Expand Down

0 comments on commit ab17ed0

Please sign in to comment.