diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 7269ed9124180..9e419a5d1239c 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1115,6 +1115,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SSUBSAT, MVT::i8, Custom); setOperationAction(ISD::SADDSAT, MVT::i16, Custom); setOperationAction(ISD::SSUBSAT, MVT::i16, Custom); + setOperationAction(ISD::UADDSAT, MVT::i8, Custom); + setOperationAction(ISD::USUBSAT, MVT::i8, Custom); + setOperationAction(ISD::UADDSAT, MVT::i16, Custom); + setOperationAction(ISD::USUBSAT, MVT::i16, Custom); } if (Subtarget->hasBaseDSP()) { setOperationAction(ISD::SADDSAT, MVT::i32, Legal); @@ -1776,6 +1780,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(ARMISD::QSUB16b) MAKE_CASE(ARMISD::QADD8b) MAKE_CASE(ARMISD::QSUB8b) + MAKE_CASE(ARMISD::UQADD16b) + MAKE_CASE(ARMISD::UQSUB16b) + MAKE_CASE(ARMISD::UQADD8b) + MAKE_CASE(ARMISD::UQSUB8b) MAKE_CASE(ARMISD::BUILD_VECTOR) MAKE_CASE(ARMISD::BFI) MAKE_CASE(ARMISD::VORRIMM) @@ -4948,8 +4956,8 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op, return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } -static SDValue LowerSADDSUBSAT(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) { +static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { EVT VT = Op.getValueType(); if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) return SDValue(); @@ -4957,15 +4965,40 @@ static SDValue LowerSADDSUBSAT(SDValue Op, SelectionDAG &DAG, return SDValue(); unsigned NewOpcode; - bool IsAdd = Op->getOpcode() == ISD::SADDSAT; switch (VT.getSimpleVT().SimpleTy) { default: return SDValue(); case MVT::i8: - NewOpcode = IsAdd ? ARMISD::QADD8b : ARMISD::QSUB8b; + switch (Op->getOpcode()) { + case ISD::UADDSAT: + NewOpcode = ARMISD::UQADD8b; + break; + case ISD::SADDSAT: + NewOpcode = ARMISD::QADD8b; + break; + case ISD::USUBSAT: + NewOpcode = ARMISD::UQSUB8b; + break; + case ISD::SSUBSAT: + NewOpcode = ARMISD::QSUB8b; + break; + } break; case MVT::i16: - NewOpcode = IsAdd ? ARMISD::QADD16b : ARMISD::QSUB16b; + switch (Op->getOpcode()) { + case ISD::UADDSAT: + NewOpcode = ARMISD::UQADD16b; + break; + case ISD::SADDSAT: + NewOpcode = ARMISD::QADD16b; + break; + case ISD::USUBSAT: + NewOpcode = ARMISD::UQSUB16b; + break; + case ISD::SSUBSAT: + NewOpcode = ARMISD::QSUB16b; + break; + } break; } @@ -10129,7 +10162,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerUnsignedALUO(Op, DAG); case ISD::SADDSAT: case ISD::SSUBSAT: - return LowerSADDSUBSAT(Op, DAG, Subtarget); + case ISD::UADDSAT: + case ISD::USUBSAT: + return LowerADDSUBSAT(Op, DAG, Subtarget); case ISD::LOAD: return LowerPredicateLoad(Op, DAG); case ISD::STORE: @@ -10229,7 +10264,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, return; case ISD::SADDSAT: case ISD::SSUBSAT: - Res = LowerSADDSUBSAT(SDValue(N, 0), DAG, Subtarget); + case ISD::UADDSAT: + case ISD::USUBSAT: + Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget); break; case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); @@ -17455,7 +17492,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, } case ARMISD::SMLALBB: case ARMISD::QADD16b: - case ARMISD::QSUB16b: { + case ARMISD::QSUB16b: + case ARMISD::UQADD16b: + case ARMISD::UQSUB16b: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || @@ -17492,7 +17531,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, break; } case ARMISD::QADD8b: - case ARMISD::QSUB8b: { + case ARMISD::QSUB8b: + case ARMISD::UQADD8b: + case ARMISD::UQSUB8b: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8); if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 5a6dc047cf350..f91e7854f1992 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -279,6 +279,10 @@ class VectorType; QSUB8b, QADD16b, QSUB16b, + UQADD8b, + UQSUB8b, + UQADD16b, + UQSUB16b, // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index f6f38e7978a26..7466cecb9b33b 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -240,6 +240,11 @@ def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>; def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>; def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>; +def ARMuqadd8b : SDNode<"ARMISD::UQADD8b", SDT_ARMAnd, []>; +def ARMuqsub8b : SDNode<"ARMISD::UQSUB8b", SDT_ARMAnd, []>; +def ARMuqadd16b : SDNode<"ARMISD::UQADD16b", SDT_ARMAnd, []>; +def ARMuqsub16b : SDNode<"ARMISD::UQSUB16b", SDT_ARMAnd, []>; + def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; @@ -3945,6 +3950,7 @@ def : ARMV5TEPat<(saddsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)), (QDADD rGPR:$Rm, rGPR:$Rn)>; def : ARMV5TEPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)), (QDSUB rGPR:$Rm, rGPR:$Rn)>; + def : ARMV6Pat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn), (QADD8 rGPR:$Rm, rGPR:$Rn)>; def : ARMV6Pat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn), @@ -3963,6 +3969,16 @@ def QSAX : AAIIntrinsic<0b01100010, 0b11110101, "qsax", int_arm_qsax>; def UQASX : AAIIntrinsic<0b01100110, 0b11110011, "uqasx", int_arm_uqasx>; def UQSAX : AAIIntrinsic<0b01100110, 0b11110101, "uqsax", int_arm_uqsax>; +def : ARMV6Pat<(ARMuqadd8b rGPR:$Rm, rGPR:$Rn), + (UQADD8 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMuqsub8b rGPR:$Rm, rGPR:$Rn), + (UQSUB8 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMuqadd16b rGPR:$Rm, rGPR:$Rn), + (UQADD16 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMuqsub16b rGPR:$Rm, rGPR:$Rn), + (UQSUB16 rGPR:$Rm, rGPR:$Rn)>; + + // Signed/Unsigned add/subtract def SASX : AAIIntrinsic<0b01100001, 0b11110011, "sasx", int_arm_sasx>; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 1258c70b81f6f..e7eed2a0bbb1a 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2521,6 +2521,7 @@ def : Thumb2DSPPat<(saddsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)), (t2QDADD rGPR:$Rm, rGPR:$Rn)>; def : Thumb2DSPPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)), (t2QDSUB rGPR:$Rm, rGPR:$Rn)>; + def : Thumb2DSPPat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn), (t2QADD8 rGPR:$Rm, rGPR:$Rn)>; def : Thumb2DSPPat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn), @@ -2530,6 +2531,15 @@ def : Thumb2DSPPat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn), def : Thumb2DSPPat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn), (t2QSUB16 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMuqadd8b rGPR:$Rm, rGPR:$Rn), + (t2UQADD8 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMuqsub8b rGPR:$Rm, rGPR:$Rn), + (t2UQSUB8 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMuqadd16b rGPR:$Rm, rGPR:$Rn), + (t2UQADD16 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMuqsub16b rGPR:$Rm, rGPR:$Rn), + (t2UQSUB16 rGPR:$Rm, rGPR:$Rn)>; + // Signed/Unsigned add/subtract def t2SASX : T2I_pam_intrinsics<0b010, 0b0000, "sasx", int_arm_sasx>; diff --git a/llvm/test/CodeGen/ARM/uadd_sat.ll b/llvm/test/CodeGen/ARM/uadd_sat.ll index 5036168b33d4e..39c79f4104e6e 100644 --- a/llvm/test/CodeGen/ARM/uadd_sat.ll +++ b/llvm/test/CodeGen/ARM/uadd_sat.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-T1 -; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 -; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 +; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP +; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP ; RUN: llc < %s -mtriple=armv8a-none-eabi | FileCheck %s --check-prefix=CHECK-ARM declare i4 @llvm.uadd.sat.i4(i4, i4) @@ -106,21 +106,25 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y) nounwind { ; CHECK-T1-NEXT: .LCPI2_0: ; CHECK-T1-NEXT: .long 65535 @ 0xffff ; -; CHECK-T2-LABEL: func16: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: add r1, r0 -; CHECK-T2-NEXT: movw r0, #65535 -; CHECK-T2-NEXT: cmp r1, r0 -; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo r0, r1 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: add r1, r0 +; CHECK-T2NODSP-NEXT: movw r0, #65535 +; CHECK-T2NODSP-NEXT: cmp r1, r0 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r0, r1 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: uqadd16 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxth r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: add r1, r0, r1 -; CHECK-ARM-NEXT: movw r0, #65535 -; CHECK-ARM-NEXT: cmp r1, r0 -; CHECK-ARM-NEXT: movlo r0, r1 +; CHECK-ARM-NEXT: uqadd16 r0, r0, r1 +; CHECK-ARM-NEXT: uxth r0, r0 ; CHECK-ARM-NEXT: bx lr %tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %y) ret i16 %tmp @@ -137,19 +141,24 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind { ; CHECK-T1-NEXT: .LBB3_2: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func8: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: add r0, r1 -; CHECK-T2-NEXT: cmp r0, #255 -; CHECK-T2-NEXT: it hs -; CHECK-T2-NEXT: movhs r0, #255 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: add r0, r1 +; CHECK-T2NODSP-NEXT: cmp r0, #255 +; CHECK-T2NODSP-NEXT: it hs +; CHECK-T2NODSP-NEXT: movhs r0, #255 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: uqadd8 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxtb r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: add r0, r0, r1 -; CHECK-ARM-NEXT: cmp r0, #255 -; CHECK-ARM-NEXT: movhs r0, #255 +; CHECK-ARM-NEXT: uqadd8 r0, r0, r1 +; CHECK-ARM-NEXT: uxtb r0, r0 ; CHECK-ARM-NEXT: bx lr %tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %y) ret i8 %tmp diff --git a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll index 3ebdafa49d97e..451b32f730424 100644 --- a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll @@ -130,20 +130,15 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y, i16 zeroext %z) nounw ; CHECK-T2DSP-LABEL: func16: ; CHECK-T2DSP: @ %bb.0: ; CHECK-T2DSP-NEXT: muls r1, r2, r1 -; CHECK-T2DSP-NEXT: uxtah r1, r0, r1 -; CHECK-T2DSP-NEXT: movw r0, #65535 -; CHECK-T2DSP-NEXT: cmp r1, r0 -; CHECK-T2DSP-NEXT: it lo -; CHECK-T2DSP-NEXT: movlo r0, r1 +; CHECK-T2DSP-NEXT: uqadd16 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxth r0, r0 ; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: mul r1, r1, r2 -; CHECK-ARM-NEXT: uxtah r1, r0, r1 -; CHECK-ARM-NEXT: movw r0, #65535 -; CHECK-ARM-NEXT: cmp r1, r0 -; CHECK-ARM-NEXT: movlo r0, r1 +; CHECK-ARM-NEXT: uqadd16 r0, r0, r1 +; CHECK-ARM-NEXT: uxth r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i16 %y, %z %tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %a) @@ -176,18 +171,15 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind { ; CHECK-T2DSP-LABEL: func8: ; CHECK-T2DSP: @ %bb.0: ; CHECK-T2DSP-NEXT: muls r1, r2, r1 -; CHECK-T2DSP-NEXT: uxtab r0, r0, r1 -; CHECK-T2DSP-NEXT: cmp r0, #255 -; CHECK-T2DSP-NEXT: it hs -; CHECK-T2DSP-NEXT: movhs r0, #255 +; CHECK-T2DSP-NEXT: uqadd8 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxtb r0, r0 ; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: smulbb r1, r1, r2 -; CHECK-ARM-NEXT: uxtab r0, r0, r1 -; CHECK-ARM-NEXT: cmp r0, #255 -; CHECK-ARM-NEXT: movhs r0, #255 +; CHECK-ARM-NEXT: uqadd8 r0, r0, r1 +; CHECK-ARM-NEXT: uxtb r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i8 %y, %z %tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %a) diff --git a/llvm/test/CodeGen/ARM/usub_sat.ll b/llvm/test/CodeGen/ARM/usub_sat.ll index 4bf42dbb59eff..c16869f4b4ddb 100644 --- a/llvm/test/CodeGen/ARM/usub_sat.ll +++ b/llvm/test/CodeGen/ARM/usub_sat.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-T1 -; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 -; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 +; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP +; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP ; RUN: llc < %s -mtriple=armv8a-none-eabi | FileCheck %s --check-prefix=CHECK-ARM declare i4 @llvm.usub.sat.i4(i4, i4) @@ -100,17 +100,23 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y) nounwind { ; CHECK-T1-NEXT: .LBB2_2: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func16: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo r0, #0 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r0, #0 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: uqsub16 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxth r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: subs r0, r0, r1 -; CHECK-ARM-NEXT: movlo r0, #0 +; CHECK-ARM-NEXT: uqsub16 r0, r0, r1 +; CHECK-ARM-NEXT: uxth r0, r0 ; CHECK-ARM-NEXT: bx lr %tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %y) ret i16 %tmp @@ -126,17 +132,23 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind { ; CHECK-T1-NEXT: .LBB3_2: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func8: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo r0, #0 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r0, #0 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: uqsub8 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxtb r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: subs r0, r0, r1 -; CHECK-ARM-NEXT: movlo r0, #0 +; CHECK-ARM-NEXT: uqsub8 r0, r0, r1 +; CHECK-ARM-NEXT: uxtb r0, r0 ; CHECK-ARM-NEXT: bx lr %tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %y) ret i8 %tmp diff --git a/llvm/test/CodeGen/ARM/usub_sat_plus.ll b/llvm/test/CodeGen/ARM/usub_sat_plus.ll index c0fcd5e8b1a63..04494a2e40599 100644 --- a/llvm/test/CodeGen/ARM/usub_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/usub_sat_plus.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-T1 -; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 -; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 +; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP +; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP ; RUN: llc < %s -mtriple=armv8a-none-eabi | FileCheck %s --check-prefix=CHECK-ARM declare i4 @llvm.usub.sat.i4(i4, i4) @@ -112,21 +112,27 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y, i16 zeroext %z) nounw ; CHECK-T1-NEXT: .LBB2_2: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func16: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: muls r1, r2, r1 -; CHECK-T2-NEXT: uxth r1, r1 -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo r0, #0 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: muls r1, r2, r1 +; CHECK-T2NODSP-NEXT: uxth r1, r1 +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r0, #0 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: muls r1, r2, r1 +; CHECK-T2DSP-NEXT: uqsub16 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxth r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: mul r1, r1, r2 -; CHECK-ARM-NEXT: uxth r1, r1 -; CHECK-ARM-NEXT: subs r0, r0, r1 -; CHECK-ARM-NEXT: movlo r0, #0 +; CHECK-ARM-NEXT: uqsub16 r0, r0, r1 +; CHECK-ARM-NEXT: uxth r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i16 %y, %z %tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %a) @@ -145,21 +151,27 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind { ; CHECK-T1-NEXT: .LBB3_2: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func8: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: muls r1, r2, r1 -; CHECK-T2-NEXT: uxtb r1, r1 -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo r0, #0 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: muls r1, r2, r1 +; CHECK-T2NODSP-NEXT: uxtb r1, r1 +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r0, #0 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: muls r1, r2, r1 +; CHECK-T2DSP-NEXT: uqsub8 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxtb r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: smulbb r1, r1, r2 -; CHECK-ARM-NEXT: uxtb r1, r1 -; CHECK-ARM-NEXT: subs r0, r0, r1 -; CHECK-ARM-NEXT: movlo r0, #0 +; CHECK-ARM-NEXT: uqsub8 r0, r0, r1 +; CHECK-ARM-NEXT: uxtb r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i8 %y, %z %tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %a)