Skip to content

Commit 4e54cf3

Browse files
committed
[DAGCombiner] try to form test+set out of shift+mask patterns
The motivating bugs are: https://bugs.llvm.org/show_bug.cgi?id=41340 https://bugs.llvm.org/show_bug.cgi?id=42697 As discussed there, we could view this as a failure of IR canonicalization, but then we would need to implement a backend fixup with target overrides to get this right in all cases. Instead, we can just view this as a codegen opportunity. It's not even clear for x86 exactly when we should favor test+set; some CPUs have better theoretical throughput for the ALU ops than bt/test. This patch is made more complicated than I expected because there's an early DAGCombine for 'and' that can change types of the intermediate ops via trunc+anyext. Differential Revision: https://reviews.llvm.org/D66687 llvm-svn: 370668
1 parent 6e18266 commit 4e54cf3

File tree

3 files changed

+109
-32
lines changed

3 files changed

+109
-32
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5188,6 +5188,59 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
51885188
return T1;
51895189
}
51905190

5191+
/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5192+
/// For a target with a bit test, this is expected to become test + set and save
5193+
/// at least 1 instruction.
5194+
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5195+
assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5196+
5197+
// This is probably not worthwhile without a supported type.
5198+
EVT VT = And->getValueType(0);
5199+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5200+
if (!TLI.isTypeLegal(VT))
5201+
return SDValue();
5202+
5203+
// Look through an optional extension and find a 'not'.
5204+
// TODO: Should we favor test+set even without the 'not' op?
5205+
SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5206+
if (Not.getOpcode() == ISD::ANY_EXTEND)
5207+
Not = Not.getOperand(0);
5208+
if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5209+
return SDValue();
5210+
5211+
// Look though an optional truncation. The source operand may not be the same
5212+
// type as the original 'and', but that is ok because we are masking off
5213+
// everything but the low bit.
5214+
SDValue Srl = Not.getOperand(0);
5215+
if (Srl.getOpcode() == ISD::TRUNCATE)
5216+
Srl = Srl.getOperand(0);
5217+
5218+
// Match a shift-right by constant.
5219+
if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5220+
!isa<ConstantSDNode>(Srl.getOperand(1)))
5221+
return SDValue();
5222+
5223+
// We might have looked through casts that make this transform invalid.
5224+
// TODO: If the source type is wider than the result type, do the mask and
5225+
// compare in the source type.
5226+
const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5227+
unsigned VTBitWidth = VT.getSizeInBits();
5228+
if (ShiftAmt.uge(VTBitWidth))
5229+
return SDValue();
5230+
5231+
// Turn this into a bit-test pattern using mask op + setcc:
5232+
// and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5233+
SDLoc DL(And);
5234+
SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5235+
EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5236+
SDValue Mask = DAG.getConstant(
5237+
APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5238+
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5239+
SDValue Zero = DAG.getConstant(0, DL, VT);
5240+
SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5241+
return DAG.getZExtOrTrunc(Setcc, DL, VT);
5242+
}
5243+
51915244
SDValue DAGCombiner::visitAND(SDNode *N) {
51925245
SDValue N0 = N->getOperand(0);
51935246
SDValue N1 = N->getOperand(1);
@@ -5471,6 +5524,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
54715524
if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
54725525
return Shifts;
54735526

5527+
if (TLI.hasBitTest(N0, N1))
5528+
if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5529+
return V;
5530+
54745531
return SDValue();
54755532
}
54765533

llvm/test/CodeGen/Hexagon/tstbit.ll

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,25 @@ b0:
2020
ret i32 %v3
2121
}
2222

23+
; TODO: Match to tstbit?
24+
2325
define i64 @is_upper_bit_clear_i64(i64 %x) #0 {
2426
; CHECK-LABEL: is_upper_bit_clear_i64:
2527
; CHECK: // %bb.0:
2628
; CHECK-NEXT: {
27-
; CHECK-NEXT: p0 = tstbit(r1,#5)
28-
; CHECK-NEXT: r1 = #0
29+
; CHECK-NEXT: r4 = #0
30+
; CHECK-NEXT: r2 = #32
31+
; CHECK-NEXT: r7:6 = combine(#0,#0)
32+
; CHECK-NEXT: }
33+
; CHECK-NEXT: {
34+
; CHECK-NEXT: r5 = and(r1,r2)
35+
; CHECK-NEXT: r1 = r4
2936
; CHECK-NEXT: }
3037
; CHECK-NEXT: {
31-
; CHECK-NEXT: r0 = mux(p0,#0,#1)
38+
; CHECK-NEXT: p0 = cmp.eq(r5:4,r7:6)
39+
; CHECK-NEXT: }
40+
; CHECK-NEXT: {
41+
; CHECK-NEXT: r0 = mux(p0,#1,#0)
3242
; CHECK-NEXT: jumpr r31
3343
; CHECK-NEXT: }
3444
%sh = lshr i64 %x, 37
@@ -37,15 +47,24 @@ define i64 @is_upper_bit_clear_i64(i64 %x) #0 {
3747
ret i64 %r
3848
}
3949

50+
; TODO: Match to tstbit?
51+
4052
define i64 @is_lower_bit_clear_i64(i64 %x) #0 {
4153
; CHECK-LABEL: is_lower_bit_clear_i64:
4254
; CHECK: // %bb.0:
4355
; CHECK-NEXT: {
44-
; CHECK-NEXT: p0 = tstbit(r0,#27)
56+
; CHECK-NEXT: r5:4 = combine(#0,#0)
57+
; CHECK-NEXT: r2 = ##134217728
4558
; CHECK-NEXT: r1 = #0
4659
; CHECK-NEXT: }
4760
; CHECK-NEXT: {
48-
; CHECK-NEXT: r0 = mux(p0,#0,#1)
61+
; CHECK-NEXT: r0 = and(r0,r2)
62+
; CHECK-NEXT: }
63+
; CHECK-NEXT: {
64+
; CHECK-NEXT: p0 = cmp.eq(r1:0,r5:4)
65+
; CHECK-NEXT: }
66+
; CHECK-NEXT: {
67+
; CHECK-NEXT: r0 = mux(p0,#1,#0)
4968
; CHECK-NEXT: jumpr r31
5069
; CHECK-NEXT: }
5170
%sh = lshr i64 %x, 27
@@ -54,14 +73,16 @@ define i64 @is_lower_bit_clear_i64(i64 %x) #0 {
5473
ret i64 %r
5574
}
5675

76+
; TODO: Match to tstbit?
77+
5778
define i32 @is_bit_clear_i32(i32 %x) #0 {
5879
; CHECK-LABEL: is_bit_clear_i32:
5980
; CHECK: // %bb.0:
6081
; CHECK-NEXT: {
61-
; CHECK-NEXT: p0 = tstbit(r0,#27)
82+
; CHECK-NEXT: r0 = and(r0,##134217728)
6283
; CHECK-NEXT: }
6384
; CHECK-NEXT: {
64-
; CHECK-NEXT: r0 = mux(p0,#0,#1)
85+
; CHECK-NEXT: r0 = cmp.eq(r0,#0)
6586
; CHECK-NEXT: jumpr r31
6687
; CHECK-NEXT: }
6788
%sh = lshr i32 %x, 27
@@ -70,14 +91,16 @@ define i32 @is_bit_clear_i32(i32 %x) #0 {
7091
ret i32 %r
7192
}
7293

94+
; TODO: Match to tstbit?
95+
7396
define i16 @is_bit_clear_i16(i16 %x) #0 {
7497
; CHECK-LABEL: is_bit_clear_i16:
7598
; CHECK: // %bb.0:
7699
; CHECK-NEXT: {
77-
; CHECK-NEXT: p0 = tstbit(r0,#7)
100+
; CHECK-NEXT: r0 = and(r0,#128)
78101
; CHECK-NEXT: }
79102
; CHECK-NEXT: {
80-
; CHECK-NEXT: r0 = mux(p0,#0,#1)
103+
; CHECK-NEXT: r0 = cmp.eq(r0,#0)
81104
; CHECK-NEXT: jumpr r31
82105
; CHECK-NEXT: }
83106
%sh = lshr i16 %x, 7
@@ -86,14 +109,16 @@ define i16 @is_bit_clear_i16(i16 %x) #0 {
86109
ret i16 %r
87110
}
88111

112+
; TODO: Match to tstbit?
113+
89114
define i8 @is_bit_clear_i8(i8 %x) #0 {
90115
; CHECK-LABEL: is_bit_clear_i8:
91116
; CHECK: // %bb.0:
92117
; CHECK-NEXT: {
93-
; CHECK-NEXT: p0 = tstbit(r0,#3)
118+
; CHECK-NEXT: r0 = and(r0,#8)
94119
; CHECK-NEXT: }
95120
; CHECK-NEXT: {
96-
; CHECK-NEXT: r0 = mux(p0,#0,#1)
121+
; CHECK-NEXT: r0 = cmp.eq(r0,#0)
97122
; CHECK-NEXT: jumpr r31
98123
; CHECK-NEXT: }
99124
%sh = lshr i8 %x, 3

llvm/test/CodeGen/X86/test-vs-bittest.ll

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -393,10 +393,9 @@ no:
393393
define i64 @is_upper_bit_clear_i64(i64 %x) {
394394
; CHECK-LABEL: is_upper_bit_clear_i64:
395395
; CHECK: # %bb.0:
396-
; CHECK-NEXT: movq %rdi, %rax
397-
; CHECK-NEXT: shrq $37, %rax
398-
; CHECK-NEXT: notl %eax
399-
; CHECK-NEXT: andl $1, %eax
396+
; CHECK-NEXT: xorl %eax, %eax
397+
; CHECK-NEXT: btq $37, %rdi
398+
; CHECK-NEXT: setae %al
400399
; CHECK-NEXT: retq
401400
%sh = lshr i64 %x, 37
402401
%m = and i64 %sh, 1
@@ -407,10 +406,9 @@ define i64 @is_upper_bit_clear_i64(i64 %x) {
407406
define i64 @is_lower_bit_clear_i64(i64 %x) {
408407
; CHECK-LABEL: is_lower_bit_clear_i64:
409408
; CHECK: # %bb.0:
410-
; CHECK-NEXT: movq %rdi, %rax
411-
; CHECK-NEXT: shrl $27, %eax
412-
; CHECK-NEXT: notl %eax
413-
; CHECK-NEXT: andl $1, %eax
409+
; CHECK-NEXT: xorl %eax, %eax
410+
; CHECK-NEXT: testl $134217728, %edi # imm = 0x8000000
411+
; CHECK-NEXT: sete %al
414412
; CHECK-NEXT: retq
415413
%sh = lshr i64 %x, 27
416414
%m = and i64 %sh, 1
@@ -421,10 +419,9 @@ define i64 @is_lower_bit_clear_i64(i64 %x) {
421419
define i32 @is_bit_clear_i32(i32 %x) {
422420
; CHECK-LABEL: is_bit_clear_i32:
423421
; CHECK: # %bb.0:
424-
; CHECK-NEXT: movl %edi, %eax
425-
; CHECK-NEXT: shrl $27, %eax
426-
; CHECK-NEXT: notl %eax
427-
; CHECK-NEXT: andl $1, %eax
422+
; CHECK-NEXT: xorl %eax, %eax
423+
; CHECK-NEXT: testl $134217728, %edi # imm = 0x8000000
424+
; CHECK-NEXT: sete %al
428425
; CHECK-NEXT: retq
429426
%sh = lshr i32 %x, 27
430427
%n = xor i32 %sh, -1
@@ -435,10 +432,9 @@ define i32 @is_bit_clear_i32(i32 %x) {
435432
define i16 @is_bit_clear_i16(i16 %x) {
436433
; CHECK-LABEL: is_bit_clear_i16:
437434
; CHECK: # %bb.0:
438-
; CHECK-NEXT: movzwl %di, %eax
439-
; CHECK-NEXT: shrl $7, %eax
440-
; CHECK-NEXT: notl %eax
441-
; CHECK-NEXT: andl $1, %eax
435+
; CHECK-NEXT: xorl %eax, %eax
436+
; CHECK-NEXT: testb $-128, %dil
437+
; CHECK-NEXT: sete %al
442438
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
443439
; CHECK-NEXT: retq
444440
%sh = lshr i16 %x, 7
@@ -450,18 +446,17 @@ define i16 @is_bit_clear_i16(i16 %x) {
450446
define i8 @is_bit_clear_i8(i8 %x) {
451447
; CHECK-LABEL: is_bit_clear_i8:
452448
; CHECK: # %bb.0:
453-
; CHECK-NEXT: movl %edi, %eax
454-
; CHECK-NEXT: shrb $3, %al
455-
; CHECK-NEXT: notb %al
456-
; CHECK-NEXT: andb $1, %al
457-
; CHECK-NEXT: # kill: def $al killed $al killed $eax
449+
; CHECK-NEXT: testb $8, %dil
450+
; CHECK-NEXT: sete %al
458451
; CHECK-NEXT: retq
459452
%sh = lshr i8 %x, 3
460453
%m = and i8 %sh, 1
461454
%r = xor i8 %m, 1
462455
ret i8 %r
463456
}
464457

458+
; TODO: We could use bt/test on the 64-bit value.
459+
465460
define i8 @overshift(i64 %x) {
466461
; CHECK-LABEL: overshift:
467462
; CHECK: # %bb.0:

0 commit comments

Comments
 (0)