Skip to content

Commit

Permalink
[ARM] Complete the Thumb1 shift+and->shift+shift transforms.
Browse files Browse the repository at this point in the history
This saves materializing the immediate.  The additional forms are less
common (they don't usually show up for bitfield insert/extract), but
they're still relevant.

I had to add a new target hook to prevent DAGCombine from reversing the
transform. That isn't the only possible way to solve the conflict, but
it seems straightforward enough.

Differential Revision: https://reviews.llvm.org/D55630

llvm-svn: 349857
  • Loading branch information
Eli Friedman committed Dec 20, 2018
1 parent 4c7f5d5 commit b1bbd5d
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 17 deletions.
9 changes: 9 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Expand Up @@ -3090,6 +3090,15 @@ class TargetLowering : public TargetLoweringBase {
return true;
}

/// Return true if it is profitable to fold a pair of shifts into a mask.
/// This is usually true on most targets. But some targets, like Thumb1,
/// have immediate shift instructions, but no immediate "and" instruction;
/// this makes the fold unprofitable.
virtual bool shouldFoldShiftPairToMask(const SDNode *N,
CombineLevel Level) const {
return true;
}

// Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern
// to a shuffle and a truncate.
// Example of such a combine:
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -6518,7 +6518,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// (and (srl x, (sub c1, c2), MASK)
// Only fold this if the inner shift has no other uses -- if it does, folding
// this will increase the total number of instructions.
if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
TLI.shouldFoldShiftPairToMask(N, Level)) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
uint64_t c1 = N0C1->getZExtValue();
if (c1 < OpSizeInBits) {
Expand Down
53 changes: 46 additions & 7 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Expand Up @@ -10432,6 +10432,18 @@ ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
return false;
}

bool
ARMTargetLowering::shouldFoldShiftPairToMask(const SDNode *N,
CombineLevel Level) const {
if (!Subtarget->isThumb1Only())
return true;

if (Level == BeforeLegalizeTypes)
return true;

return false;
}

static SDValue PerformSHLSimplify(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *ST) {
Expand Down Expand Up @@ -10735,16 +10747,20 @@ static SDValue CombineANDShift(SDNode *N,
if (!C2 || C2 >= 32)
return SDValue();

// Clear irrelevant bits in the mask.
if (LeftShift)
C1 &= (-1U << C2);
else
C1 &= (-1U >> C2);

SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);

// We have a pattern of the form "(and (shl x, c2) c1)" or
// "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
// transform to a pair of shifts, to save materializing c1.

// First pattern: right shift, and c1+1 is a power of two.
// FIXME: Also check reversed pattern (left shift, and ~c1+1 is a power
// of two).
// First pattern: right shift, then mask off leading bits.
// FIXME: Use demanded bits?
if (!LeftShift && isMask_32(C1)) {
uint32_t C3 = countLeadingZeros(C1);
Expand All @@ -10756,20 +10772,43 @@ static SDValue CombineANDShift(SDNode *N,
}
}

// Second pattern: left shift, and (c1>>c2)+1 is a power of two.
// FIXME: Also check reversed pattern (right shift, and ~(c1<<c2)+1
// is a power of two).
// First pattern, reversed: left shift, then mask off trailing bits.
if (LeftShift && isMask_32(~C1)) {
uint32_t C3 = countTrailingZeros(C1);
if (C2 < C3) {
SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
DAG.getConstant(C3 - C2, DL, MVT::i32));
return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
DAG.getConstant(C3, DL, MVT::i32));
}
}

// Second pattern: left shift, then mask off leading bits.
// FIXME: Use demanded bits?
if (LeftShift && isShiftedMask_32(C1)) {
uint32_t Trailing = countTrailingZeros(C1);
uint32_t C3 = countLeadingZeros(C1);
if (C2 + C3 < 32 && C1 == ((-1U << (C2 + C3)) >> C3)) {
if (Trailing == C2 && C2 + C3 < 32) {
SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
DAG.getConstant(C2 + C3, DL, MVT::i32));
return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
DAG.getConstant(C3, DL, MVT::i32));
}
}

// Second pattern, reversed: right shift, then mask off trailing bits.
// FIXME: Handle other patterns of known/demanded bits.
if (!LeftShift && isShiftedMask_32(C1)) {
uint32_t Leading = countLeadingZeros(C1);
uint32_t C3 = countTrailingZeros(C1);
if (Leading == C2 && C2 + C3 < 32) {
SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
DAG.getConstant(C2 + C3, DL, MVT::i32));
return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
DAG.getConstant(C3, DL, MVT::i32));
}
}

// FIXME: Transform "(and (shl x, c2) c1)" ->
// "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
// c1.
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/ARM/ARMISelLowering.h
Expand Up @@ -593,6 +593,8 @@ class VectorType;
bool isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const override;

bool shouldFoldShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
Expand Down
75 changes: 66 additions & 9 deletions llvm/test/CodeGen/Thumb/shift-and.ll
Expand Up @@ -45,9 +45,8 @@ entry:
define i32 @test4(i32 %x) {
; CHECK-LABEL: test4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: lsls r0, r0, #4
; CHECK-NEXT: movs r1, #112
; CHECK-NEXT: bics r0, r1
; CHECK-NEXT: lsrs r0, r0, #3
; CHECK-NEXT: lsls r0, r0, #7
; CHECK-NEXT: bx lr
entry:
%0 = shl i32 %x, 4
Expand Down Expand Up @@ -84,9 +83,8 @@ entry:
define i32 @test7(i32 %x) {
; CHECK-LABEL: test7:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: lsrs r1, r0, #29
; CHECK-NEXT: movs r0, #4
; CHECK-NEXT: ands r0, r1
; CHECK-NEXT: lsrs r0, r0, #31
; CHECK-NEXT: lsls r0, r0, #2
; CHECK-NEXT: bx lr
entry:
%0 = lshr i32 %x, 29
Expand All @@ -110,9 +108,8 @@ entry:
define i32 @test9(i32 %x) {
; CHECK-LABEL: test9:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: lsrs r0, r0, #2
; CHECK-NEXT: movs r1, #1
; CHECK-NEXT: bics r0, r1
; CHECK-NEXT: lsrs r0, r0, #3
; CHECK-NEXT: lsls r0, r0, #1
; CHECK-NEXT: bx lr
entry:
%and = lshr i32 %x, 2
Expand All @@ -131,3 +128,63 @@ entry:
%shr = and i32 %0, 255
ret i32 %shr
}

define i32 @test11(i32 %x) {
; CHECK-LABEL: test11:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: lsrs r0, r0, #24
; CHECK-NEXT: lsls r0, r0, #2
; CHECK-NEXT: bx lr
entry:
%shl = lshr i32 %x, 22
%and = and i32 %shl, 1020
ret i32 %and
}

define i32 @test12(i32 %x) {
; CHECK-LABEL: test12:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: lsrs r0, r0, #3
; CHECK-NEXT: lsls r0, r0, #4
; CHECK-NEXT: bx lr
entry:
%0 = shl i32 %x, 1
%shr = and i32 %0, -16
ret i32 %shr
}

define i32 @test13(i32 %x) {
; CHECK-LABEL: test13:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: lsrs r0, r0, #3
; CHECK-NEXT: lsls r0, r0, #4
; CHECK-NEXT: bx lr
entry:
%shr = lshr i32 %x, 3
%shl = shl i32 %shr, 4
ret i32 %shl
}

define i32 @test14(i32 %x) {
; CHECK-LABEL: test14:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: lsrs r0, r0, #6
; CHECK-NEXT: lsls r0, r0, #10
; CHECK-NEXT: bx lr
entry:
%shl = shl i32 %x, 4
%and = and i32 %shl, -1024
ret i32 %and
}

define i32 @test15(i32 %x) {
; CHECK-LABEL: test15:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: lsrs r0, r0, #4
; CHECK-NEXT: lsls r0, r0, #3
; CHECK-NEXT: bx lr
entry:
%shr = lshr i32 %x, 4
%shl = shl i32 %shr, 3
ret i32 %shl
}

0 comments on commit b1bbd5d

Please sign in to comment.