Skip to content

Commit

Permalink
[ARM] Recognize SSAT and USAT from SMIN/SMAX
Browse files Browse the repository at this point in the history
We have some recognition of SSAT and USAT from SELECT_CC at the moment.
This extends the matching to SMIN/SMAX which can help catch more cases,
either from min/max being the canonical form in instcombine or from some
expanded nodes like fp_to_si_sat.

Differential Revision: https://reviews.llvm.org/D119819
  • Loading branch information
davemgreen committed Feb 23, 2022
1 parent 65dc78d commit a10789d
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 349 deletions.
51 changes: 51 additions & 0 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Expand Up @@ -1564,6 +1564,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SRL);
if (Subtarget->isThumb1Only())
setTargetDAGCombine(ISD::SHL);
// Attempt to lower smin/smax to ssat/usat
if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
Subtarget->isThumb2()) {
setTargetDAGCombine(ISD::SMIN);
setTargetDAGCombine(ISD::SMAX);
}

setStackPointerRegisterToSaveRestore(ARM::SP);

Expand Down Expand Up @@ -17557,12 +17563,57 @@ static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}

// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
// constant bounds.
static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
!Subtarget->isThumb2())
return SDValue();

EVT VT = Op.getValueType();
SDValue Op0 = Op.getOperand(0);

if (VT != MVT::i32 ||
(Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
!isa<ConstantSDNode>(Op.getOperand(1)) ||
!isa<ConstantSDNode>(Op0.getOperand(1)))
return SDValue();

SDValue Min = Op;
SDValue Max = Op0;
SDValue Input = Op0.getOperand(0);
if (Min.getOpcode() == ISD::SMAX)
std::swap(Min, Max);

APInt MinC = Min.getConstantOperandAPInt(1);
APInt MaxC = Max.getConstantOperandAPInt(1);

if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
!(MinC + 1).isPowerOf2())
return SDValue();

SDLoc DL(Op);
if (MinC == ~MaxC)
return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
DAG.getConstant(MinC.countTrailingOnes(), DL, VT));
if (MaxC == 0)
return DAG.getNode(ARMISD::USAT, DL, VT, Input,
DAG.getConstant(MinC.countTrailingOnes(), DL, VT));

return SDValue();
}

/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
/// saturates.
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);

if (VT == MVT::i32)
return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);

if (!ST->hasMVEIntegerOps())
return SDValue();

Expand Down
117 changes: 20 additions & 97 deletions llvm/test/CodeGen/ARM/fpclamptosat.ll
Expand Up @@ -2718,28 +2718,14 @@ define i16 @stest_f64i16_mm(double %x) {
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: vmov r0, r1, d0
; VFP2-NEXT: bl __aeabi_d2iz
; VFP2-NEXT: movw r1, #32767
; VFP2-NEXT: cmp r0, r1
; VFP2-NEXT: it ge
; VFP2-NEXT: movge r0, r1
; VFP2-NEXT: movw r1, #32768
; VFP2-NEXT: movt r1, #65535
; VFP2-NEXT: cmn.w r0, #32768
; VFP2-NEXT: it le
; VFP2-NEXT: movle r0, r1
; VFP2-NEXT: ssat r0, #16, r0
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: stest_f64i16_mm:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.s32.f64 s0, d0
; FULL-NEXT: movw r1, #32767
; FULL-NEXT: vmov r0, s0
; FULL-NEXT: cmp r0, r1
; FULL-NEXT: csel r0, r0, r1, lt
; FULL-NEXT: movw r1, #32768
; FULL-NEXT: movt r1, #65535
; FULL-NEXT: cmn.w r0, #32768
; FULL-NEXT: csel r0, r0, r1, gt
; FULL-NEXT: ssat r0, #16, r0
; FULL-NEXT: bx lr
entry:
%conv = fptosi double %x to i32
Expand Down Expand Up @@ -2820,21 +2806,14 @@ define i16 @ustest_f64i16_mm(double %x) {
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: vmov r0, r1, d0
; VFP2-NEXT: bl __aeabi_d2iz
; VFP2-NEXT: movw r1, #65535
; VFP2-NEXT: cmp r0, r1
; VFP2-NEXT: it lt
; VFP2-NEXT: movlt r1, r0
; VFP2-NEXT: bic.w r0, r1, r1, asr #31
; VFP2-NEXT: usat r0, #16, r0
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f64i16_mm:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.s32.f64 s0, d0
; FULL-NEXT: movw r1, #65535
; FULL-NEXT: vmov r0, s0
; FULL-NEXT: cmp r0, r1
; FULL-NEXT: csel r0, r0, r1, lt
; FULL-NEXT: bic.w r0, r0, r0, asr #31
; FULL-NEXT: usat r0, #16, r0
; FULL-NEXT: bx lr
entry:
%conv = fptosi double %x to i32
Expand Down Expand Up @@ -2870,33 +2849,12 @@ define i16 @stest_f32i16_mm(float %x) {
; SOFT-NEXT: .LCPI39_1:
; SOFT-NEXT: .long 4294934528 @ 0xffff8000
;
; VFP2-LABEL: stest_f32i16_mm:
; VFP2: @ %bb.0: @ %entry
; VFP2-NEXT: vcvt.s32.f32 s0, s0
; VFP2-NEXT: movw r1, #32767
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: cmp r0, r1
; VFP2-NEXT: it lt
; VFP2-NEXT: movlt r1, r0
; VFP2-NEXT: movw r0, #32768
; VFP2-NEXT: cmn.w r1, #32768
; VFP2-NEXT: movt r0, #65535
; VFP2-NEXT: it gt
; VFP2-NEXT: movgt r0, r1
; VFP2-NEXT: bx lr
;
; FULL-LABEL: stest_f32i16_mm:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.s32.f32 s0, s0
; FULL-NEXT: movw r1, #32767
; FULL-NEXT: vmov r0, s0
; FULL-NEXT: cmp r0, r1
; FULL-NEXT: csel r0, r0, r1, lt
; FULL-NEXT: movw r1, #32768
; FULL-NEXT: movt r1, #65535
; FULL-NEXT: cmn.w r0, #32768
; FULL-NEXT: csel r0, r0, r1, gt
; FULL-NEXT: bx lr
; VFP-LABEL: stest_f32i16_mm:
; VFP: @ %bb.0: @ %entry
; VFP-NEXT: vcvt.s32.f32 s0, s0
; VFP-NEXT: vmov r0, s0
; VFP-NEXT: ssat r0, #16, r0
; VFP-NEXT: bx lr
entry:
%conv = fptosi float %x to i32
%spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 32767)
Expand Down Expand Up @@ -2968,26 +2926,12 @@ define i16 @ustest_f32i16_mm(float %x) {
; SOFT-NEXT: .LCPI41_0:
; SOFT-NEXT: .long 65535 @ 0xffff
;
; VFP2-LABEL: ustest_f32i16_mm:
; VFP2: @ %bb.0: @ %entry
; VFP2-NEXT: vcvt.s32.f32 s0, s0
; VFP2-NEXT: movw r1, #65535
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: cmp r0, r1
; VFP2-NEXT: it lt
; VFP2-NEXT: movlt r1, r0
; VFP2-NEXT: bic.w r0, r1, r1, asr #31
; VFP2-NEXT: bx lr
;
; FULL-LABEL: ustest_f32i16_mm:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.s32.f32 s0, s0
; FULL-NEXT: movw r1, #65535
; FULL-NEXT: vmov r0, s0
; FULL-NEXT: cmp r0, r1
; FULL-NEXT: csel r0, r0, r1, lt
; FULL-NEXT: bic.w r0, r0, r0, asr #31
; FULL-NEXT: bx lr
; VFP-LABEL: ustest_f32i16_mm:
; VFP: @ %bb.0: @ %entry
; VFP-NEXT: vcvt.s32.f32 s0, s0
; VFP-NEXT: vmov r0, s0
; VFP-NEXT: usat r0, #16, r0
; VFP-NEXT: bx lr
entry:
%conv = fptosi float %x to i32
%spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 65535)
Expand Down Expand Up @@ -3031,30 +2975,16 @@ define i16 @stest_f16i16_mm(half %x) {
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: bl __aeabi_h2f
; VFP2-NEXT: vmov s0, r0
; VFP2-NEXT: movw r1, #32767
; VFP2-NEXT: vcvt.s32.f32 s0, s0
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: cmp r0, r1
; VFP2-NEXT: it lt
; VFP2-NEXT: movlt r1, r0
; VFP2-NEXT: movw r0, #32768
; VFP2-NEXT: cmn.w r1, #32768
; VFP2-NEXT: movt r0, #65535
; VFP2-NEXT: it gt
; VFP2-NEXT: movgt r0, r1
; VFP2-NEXT: ssat r0, #16, r0
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: stest_f16i16_mm:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.s32.f16 s0, s0
; FULL-NEXT: movw r1, #32767
; FULL-NEXT: vmov r0, s0
; FULL-NEXT: cmp r0, r1
; FULL-NEXT: csel r0, r0, r1, lt
; FULL-NEXT: movw r1, #32768
; FULL-NEXT: movt r1, #65535
; FULL-NEXT: cmn.w r0, #32768
; FULL-NEXT: csel r0, r0, r1, gt
; FULL-NEXT: ssat r0, #16, r0
; FULL-NEXT: bx lr
entry:
%conv = fptosi half %x to i32
Expand Down Expand Up @@ -3143,23 +3073,16 @@ define i16 @ustest_f16i16_mm(half %x) {
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: bl __aeabi_h2f
; VFP2-NEXT: vmov s0, r0
; VFP2-NEXT: movw r1, #65535
; VFP2-NEXT: vcvt.s32.f32 s0, s0
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: cmp r0, r1
; VFP2-NEXT: it lt
; VFP2-NEXT: movlt r1, r0
; VFP2-NEXT: bic.w r0, r1, r1, asr #31
; VFP2-NEXT: usat r0, #16, r0
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f16i16_mm:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.s32.f16 s0, s0
; FULL-NEXT: movw r1, #65535
; FULL-NEXT: vmov r0, s0
; FULL-NEXT: cmp r0, r1
; FULL-NEXT: csel r0, r0, r1, lt
; FULL-NEXT: bic.w r0, r0, r0, asr #31
; FULL-NEXT: usat r0, #16, r0
; FULL-NEXT: bx lr
entry:
%conv = fptosi half %x to i32
Expand Down
24 changes: 3 additions & 21 deletions llvm/test/CodeGen/ARM/sadd_sat.ll
Expand Up @@ -148,15 +148,7 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; CHECK-T2NODSP-LABEL: func16:
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: add r0, r1
; CHECK-T2NODSP-NEXT: movw r1, #32767
; CHECK-T2NODSP-NEXT: cmp r0, r1
; CHECK-T2NODSP-NEXT: it lt
; CHECK-T2NODSP-NEXT: movlt r1, r0
; CHECK-T2NODSP-NEXT: movw r0, #32768
; CHECK-T2NODSP-NEXT: cmn.w r1, #32768
; CHECK-T2NODSP-NEXT: movt r0, #65535
; CHECK-T2NODSP-NEXT: it gt
; CHECK-T2NODSP-NEXT: movgt r0, r1
; CHECK-T2NODSP-NEXT: ssat r0, #16, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func16:
Expand Down Expand Up @@ -219,12 +211,7 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; CHECK-T2NODSP-LABEL: func8:
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: add r0, r1
; CHECK-T2NODSP-NEXT: cmp r0, #127
; CHECK-T2NODSP-NEXT: it ge
; CHECK-T2NODSP-NEXT: movge r0, #127
; CHECK-T2NODSP-NEXT: cmn.w r0, #128
; CHECK-T2NODSP-NEXT: it le
; CHECK-T2NODSP-NEXT: mvnle r0, #127
; CHECK-T2NODSP-NEXT: ssat r0, #8, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func8:
Expand Down Expand Up @@ -280,12 +267,7 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; CHECK-T2NODSP-LABEL: func3:
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: add r0, r1
; CHECK-T2NODSP-NEXT: cmp r0, #7
; CHECK-T2NODSP-NEXT: it ge
; CHECK-T2NODSP-NEXT: movge r0, #7
; CHECK-T2NODSP-NEXT: cmn.w r0, #8
; CHECK-T2NODSP-NEXT: it le
; CHECK-T2NODSP-NEXT: mvnle r0, #7
; CHECK-T2NODSP-NEXT: ssat r0, #4, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func3:
Expand Down
24 changes: 3 additions & 21 deletions llvm/test/CodeGen/ARM/sadd_sat_plus.ll
Expand Up @@ -151,15 +151,7 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw
; CHECK-T2NODSP-NEXT: muls r1, r2, r1
; CHECK-T2NODSP-NEXT: sxth r1, r1
; CHECK-T2NODSP-NEXT: add r0, r1
; CHECK-T2NODSP-NEXT: movw r1, #32767
; CHECK-T2NODSP-NEXT: cmp r0, r1
; CHECK-T2NODSP-NEXT: it lt
; CHECK-T2NODSP-NEXT: movlt r1, r0
; CHECK-T2NODSP-NEXT: movw r0, #32768
; CHECK-T2NODSP-NEXT: movt r0, #65535
; CHECK-T2NODSP-NEXT: cmn.w r1, #32768
; CHECK-T2NODSP-NEXT: it gt
; CHECK-T2NODSP-NEXT: movgt r0, r1
; CHECK-T2NODSP-NEXT: ssat r0, #16, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func16:
Expand Down Expand Up @@ -205,12 +197,7 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
; CHECK-T2NODSP-NEXT: muls r1, r2, r1
; CHECK-T2NODSP-NEXT: sxtb r1, r1
; CHECK-T2NODSP-NEXT: add r0, r1
; CHECK-T2NODSP-NEXT: cmp r0, #127
; CHECK-T2NODSP-NEXT: it ge
; CHECK-T2NODSP-NEXT: movge r0, #127
; CHECK-T2NODSP-NEXT: cmn.w r0, #128
; CHECK-T2NODSP-NEXT: it le
; CHECK-T2NODSP-NEXT: mvnle r0, #127
; CHECK-T2NODSP-NEXT: ssat r0, #8, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func8:
Expand Down Expand Up @@ -257,12 +244,7 @@ define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind {
; CHECK-T2NODSP-NEXT: muls r1, r2, r1
; CHECK-T2NODSP-NEXT: lsls r1, r1, #28
; CHECK-T2NODSP-NEXT: add.w r0, r0, r1, asr #28
; CHECK-T2NODSP-NEXT: cmp r0, #7
; CHECK-T2NODSP-NEXT: it ge
; CHECK-T2NODSP-NEXT: movge r0, #7
; CHECK-T2NODSP-NEXT: cmn.w r0, #8
; CHECK-T2NODSP-NEXT: it le
; CHECK-T2NODSP-NEXT: mvnle r0, #7
; CHECK-T2NODSP-NEXT: ssat r0, #4, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func4:
Expand Down

0 comments on commit a10789d

Please sign in to comment.