diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 35e1127000b8a..35adff3f23ba2 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -601,10 +601,20 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom); - if (!Subtarget->hasVFP2Base()) + if (!Subtarget->hasVFP2Base()) { setAllExpand(MVT::f32); - if (!Subtarget->hasFP64()) + } else { + for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, + ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT}) + setOperationAction(Op, MVT::f32, Legal); + } + if (!Subtarget->hasFP64()) { setAllExpand(MVT::f64); + } else { + for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, + ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT}) + setOperationAction(Op, MVT::f64, Legal); + } } if (Subtarget->hasFullFP16()) { @@ -1337,30 +1347,42 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, } // FP16 often need to be promoted to call lib functions + // clang-format off if (Subtarget->hasFullFP16()) { - setOperationAction(ISD::FREM, MVT::f16, Promote); - setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); - setOperationAction(ISD::FSIN, MVT::f16, Promote); - setOperationAction(ISD::FCOS, MVT::f16, Promote); - setOperationAction(ISD::FTAN, MVT::f16, Promote); - setOperationAction(ISD::FSINCOS, MVT::f16, Promote); - setOperationAction(ISD::FPOWI, MVT::f16, Promote); - setOperationAction(ISD::FPOW, MVT::f16, Promote); - setOperationAction(ISD::FEXP, MVT::f16, Promote); - setOperationAction(ISD::FEXP2, MVT::f16, Promote); - setOperationAction(ISD::FEXP10, MVT::f16, Promote); - setOperationAction(ISD::FLOG, MVT::f16, Promote); - setOperationAction(ISD::FLOG10, MVT::f16, Promote); - setOperationAction(ISD::FLOG2, MVT::f16, Promote); setOperationAction(ISD::LRINT, MVT::f16, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); + + for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI, + ISD::FCOS, ISD::FSIN, ISD::FSINCOS, + ISD::FSINCOSPI, ISD::FMODF, ISD::FACOS, + ISD::FASIN, ISD::FATAN, ISD::FATAN2, + ISD::FCOSH, ISD::FSINH, ISD::FTANH, + ISD::FTAN, ISD::FEXP, ISD::FEXP2, + ISD::FEXP10, ISD::FLOG, ISD::FLOG2, + ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW, + ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN, + ISD::STRICT_FACOS, ISD::STRICT_FASIN, ISD::STRICT_FATAN, + ISD::STRICT_FATAN2, ISD::STRICT_FCOSH, ISD::STRICT_FSINH, + ISD::STRICT_FTANH, ISD::STRICT_FEXP, ISD::STRICT_FEXP2, + ISD::STRICT_FLOG, ISD::STRICT_FLOG2, ISD::STRICT_FLOG10, + ISD::STRICT_FTAN}) { + setOperationAction(Op, MVT::f16, Promote); + } - setOperationAction(ISD::FROUND, MVT::f16, Legal); - setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal); - setOperationAction(ISD::FTRUNC, MVT::f16, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal); - setOperationAction(ISD::FRINT, MVT::f16, Legal); - setOperationAction(ISD::FFLOOR, MVT::f16, Legal); - setOperationAction(ISD::FCEIL, MVT::f16, Legal); + // Round-to-integer need custom lowering for fp16, as Promote doesn't work + // because the result type is integer. + for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LLRINT, ISD::STRICT_LROUND, + ISD::STRICT_LLROUND, ISD::STRICT_LRINT, ISD::STRICT_LLRINT}) + setOperationAction(Op, MVT::f16, Custom); + + for (auto Op : {ISD::FROUND, ISD::FROUNDEVEN, ISD::FTRUNC, + ISD::FNEARBYINT, ISD::FRINT, ISD::FFLOOR, + ISD::FCEIL, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, + ISD::STRICT_FTRUNC, ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT, + ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL}) { + setOperationAction(Op, MVT::f16, Legal); + } + // clang-format on } if (Subtarget->hasNEON()) { @@ -10721,6 +10743,30 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerCMP(Op, DAG); case ISD::ABS: return LowerABS(Op, DAG); + case ISD::LRINT: + case ISD::LLRINT: + case ISD::LROUND: + case ISD::LLROUND: { + assert((Op.getOperand(0).getValueType() == MVT::f16 || + Op.getOperand(1).getValueType() == MVT::bf16) && + "Expected custom lowering of rounding operations only for f16"); + SDLoc DL(Op); + SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); + return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext); + } + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: { + assert((Op.getOperand(1).getValueType() == MVT::f16 || + Op.getOperand(1).getValueType() == MVT::bf16) && + "Expected custom lowering of rounding operations only for f16"); + SDLoc DL(Op); + SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1)}); + return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other}, + {Ext.getValue(1), Ext.getValue(0)}); + } } } diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 282ff534fc112..85c2fb1a914dd 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -473,15 +473,15 @@ def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs)>; // An 'fmul' node with a single use. let HasOneUse = 1 in -def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs)>; +def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (any_fmul node:$lhs, node:$rhs)>; // An 'fadd' node which checks for single non-hazardous use. -def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{ +def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(any_fadd node:$lhs, node:$rhs),[{ return hasNoVMLxHazardUse(N); }]>; // An 'fsub' node which checks for single non-hazardous use. -def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ +def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(any_fsub node:$lhs, node:$rhs),[{ return hasNoVMLxHazardUse(N); }]>; diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index 6771106ef2d89..e2cc97b7b4634 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -439,14 +439,14 @@ let TwoOperandAliasConstraint = "$Dn = $Dd", mayRaiseFPException = 1, Uses = [FP def VADDD : ADbI<0b11100, 0b11, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>, + [(set DPR:$Dd, (any_fadd DPR:$Dn, (f64 DPR:$Dm)))]>, Sched<[WriteFPALU64]>; let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in def VADDS : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]>, + [(set SPR:$Sd, (any_fadd SPR:$Sn, SPR:$Sm))]>, Sched<[WriteFPALU32]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -457,21 +457,21 @@ let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FP def VADDH : AHbI<0b11100, 0b11, 0, 0, (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm), IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm", - [(set (f16 HPR:$Sd), (fadd (f16 HPR:$Sn), (f16 HPR:$Sm)))]>, + [(set (f16 HPR:$Sd), (any_fadd (f16 HPR:$Sn), (f16 HPR:$Sm)))]>, Sched<[WriteFPALU32]>; let TwoOperandAliasConstraint = "$Dn = $Dd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in def VSUBD : ADbI<0b11100, 0b11, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>, + [(set DPR:$Dd, (any_fsub DPR:$Dn, (f64 DPR:$Dm)))]>, Sched<[WriteFPALU64]>; let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]>, + [(set SPR:$Sd, (any_fsub SPR:$Sn, SPR:$Sm))]>, Sched<[WriteFPALU32]>{ // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -482,42 +482,42 @@ let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FP def VSUBH : AHbI<0b11100, 0b11, 1, 0, (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm), IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm", - [(set (f16 HPR:$Sd), (fsub (f16 HPR:$Sn), (f16 HPR:$Sm)))]>, + [(set (f16 HPR:$Sd), (any_fsub (f16 HPR:$Sn), (f16 HPR:$Sm)))]>, Sched<[WriteFPALU32]>; let TwoOperandAliasConstraint = "$Dn = $Dd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in def VDIVD : ADbI<0b11101, 0b00, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>, + [(set DPR:$Dd, (any_fdiv DPR:$Dn, (f64 DPR:$Dm)))]>, Sched<[WriteFPDIV64]>; let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in def VDIVS : ASbI<0b11101, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>, + [(set SPR:$Sd, (any_fdiv SPR:$Sn, SPR:$Sm))]>, Sched<[WriteFPDIV32]>; let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in def VDIVH : AHbI<0b11101, 0b00, 0, 0, (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm), IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm", - [(set (f16 HPR:$Sd), (fdiv (f16 HPR:$Sn), (f16 HPR:$Sm)))]>, + [(set (f16 HPR:$Sd), (any_fdiv (f16 HPR:$Sn), (f16 HPR:$Sm)))]>, Sched<[WriteFPDIV32]>; let TwoOperandAliasConstraint = "$Dn = $Dd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in def VMULD : ADbI<0b11100, 0b10, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>, + [(set DPR:$Dd, (any_fmul DPR:$Dn, (f64 DPR:$Dm)))]>, Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>; let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]>, + [(set SPR:$Sd, (any_fmul SPR:$Sn, SPR:$Sm))]>, Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -528,21 +528,21 @@ let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FP def VMULH : AHbI<0b11100, 0b10, 0, 0, (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm), IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm", - [(set (f16 HPR:$Sd), (fmul (f16 HPR:$Sn), (f16 HPR:$Sm)))]>, + [(set (f16 HPR:$Sd), (any_fmul (f16 HPR:$Sn), (f16 HPR:$Sm)))]>, Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>; let TwoOperandAliasConstraint = "$Dn = $Dd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in def VNMULD : ADbI<0b11100, 0b10, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>, + [(set DPR:$Dd, (fneg (any_fmul DPR:$Dn, (f64 DPR:$Dm))))]>, Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>; let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in def VNMULS : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]>, + [(set SPR:$Sd, (fneg (any_fmul SPR:$Sn, SPR:$Sm)))]>, Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -553,7 +553,7 @@ let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FP def VNMULH : AHbI<0b11100, 0b10, 1, 0, (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm), IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm", - [(set (f16 HPR:$Sd), (fneg (fmul (f16 HPR:$Sn), (f16 HPR:$Sm))))]>, + [(set (f16 HPR:$Sd), (fneg (any_fmul (f16 HPR:$Sn), (f16 HPR:$Sm))))]>, Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>; multiclass vsel_inst opc, int CC> { @@ -587,7 +587,7 @@ defm VSELGE : vsel_inst<"ge", 0b10, 10>; defm VSELEQ : vsel_inst<"eq", 0b00, 0>; defm VSELVS : vsel_inst<"vs", 0b01, 6>; -multiclass vmaxmin_inst { +multiclass vmaxmin_inst { let DecoderNamespace = "VFPV8", PostEncoderMethod = "", isUnpredicable = 1, mayRaiseFPException = 1 in { def H : AHbInp<0b11101, 0b00, opc, @@ -610,8 +610,8 @@ multiclass vmaxmin_inst { } } -defm VFP_VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>; -defm VFP_VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>; +defm VFP_VMAXNM : vmaxmin_inst<"vmaxnm", 0, any_fmaxnum>; +defm VFP_VMINNM : vmaxmin_inst<"vminnm", 1, any_fminnum>; // Match reassociated forms only if not sign dependent rounding. def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)), @@ -746,7 +746,7 @@ let mayRaiseFPException = 1, Uses = [FPSCR_RM] in def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", "", - [(set DPR:$Dd, (fpextend SPR:$Sm))]>, + [(set DPR:$Dd, (any_fpextend SPR:$Sm))]>, Sched<[WriteFPCVT]> { // Instruction operands. bits<5> Dd; @@ -766,7 +766,7 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, let mayRaiseFPException = 1, Uses = [FPSCR_RM] in def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", "", - [(set SPR:$Sd, (fpround DPR:$Dm))]>, + [(set SPR:$Sd, (any_fpround DPR:$Dm))]>, Sched<[WriteFPCVT]> { // Instruction operands. bits<5> Sd; @@ -796,7 +796,7 @@ def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), Requires<[HasFP16]>, Sched<[WriteFPCVT]>; -def : FP16Pat<(f32 (fpextend (f16 HPR:$Sm))), +def : FP16Pat<(f32 (any_fpextend (f16 HPR:$Sm))), (VCVTBHS (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>; def : FP16Pat<(f16_to_fp GPR:$a), (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; @@ -808,16 +808,16 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sda, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; -def : FP16Pat<(f16 (fpround SPR:$Sm)), +def : FP16Pat<(f16 (any_fpround SPR:$Sm)), (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$Sm), HPR)>; def : FP16Pat<(fp_to_f16 SPR:$a), (i32 (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$a), GPR))>; -def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane), +def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (any_fpround (f32 SPR:$src2))), imm_even:$lane), (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTBSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)), SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; -def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane), +def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (any_fpround (f32 SPR:$src2))), imm_even:$lane), (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTBSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)), SPR:$src2), @@ -830,9 +830,9 @@ def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), Requires<[HasFP16]>, Sched<[WriteFPCVT]>; -def : FP16Pat<(f32 (fpextend (extractelt (v8f16 MQPR:$src), imm_odd:$lane))), +def : FP16Pat<(f32 (any_fpextend (extractelt (v8f16 MQPR:$src), imm_odd:$lane))), (VCVTTHS (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_odd:$lane)))>; -def : FP16Pat<(f32 (fpextend (extractelt (v4f16 DPR:$src), imm_odd:$lane))), +def : FP16Pat<(f32 (any_fpextend (extractelt (v4f16 DPR:$src), imm_odd:$lane))), (VCVTTHS (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)), (SSubReg_f16_reg imm_odd:$lane)))>; @@ -844,12 +844,12 @@ def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sda, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; -def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane), +def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (any_fpround (f32 SPR:$src2))), imm_odd:$lane), (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTTSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)), SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; -def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane), +def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (any_fpround (f32 SPR:$src2))), imm_odd:$lane), (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTTSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)), SPR:$src2), @@ -872,7 +872,7 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, let hasSideEffects = 0; } -def : FullFP16Pat<(f64 (fpextend (f16 HPR:$Sm))), +def : FullFP16Pat<(f64 (any_fpextend (f16 HPR:$Sm))), (VCVTBHD (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>, Requires<[HasFPARMv8, HasDPVFP]>; def : FP16Pat<(f64 (f16_to_fp GPR:$a)), @@ -898,7 +898,7 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, let hasSideEffects = 0; } -def : FullFP16Pat<(f16 (fpround DPR:$Dm)), +def : FullFP16Pat<(f16 (any_fpround DPR:$Dm)), (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$Dm), HPR)>, Requires<[HasFPARMv8, HasDPVFP]>; def : FP16Pat<(fp_to_f16 (f64 DPR:$a)), @@ -1007,41 +1007,41 @@ multiclass vcvt_inst rm, let Predicates = [HasFPARMv8] in { let Predicates = [HasFullFP16] in { - def : Pat<(i32 (fp_to_sint (node (f16 HPR:$a)))), + def : Pat<(i32 (any_fp_to_sint (node (f16 HPR:$a)))), (COPY_TO_REGCLASS (!cast(NAME#"SH") (f16 HPR:$a)), GPR)>; - def : Pat<(i32 (fp_to_uint (node (f16 HPR:$a)))), + def : Pat<(i32 (any_fp_to_uint (node (f16 HPR:$a)))), (COPY_TO_REGCLASS (!cast(NAME#"UH") (f16 HPR:$a)), GPR)>; } - def : Pat<(i32 (fp_to_sint (node SPR:$a))), + def : Pat<(i32 (any_fp_to_sint (node SPR:$a))), (COPY_TO_REGCLASS (!cast(NAME#"SS") SPR:$a), GPR)>; - def : Pat<(i32 (fp_to_uint (node SPR:$a))), + def : Pat<(i32 (any_fp_to_uint (node SPR:$a))), (COPY_TO_REGCLASS (!cast(NAME#"US") SPR:$a), GPR)>; } let Predicates = [HasFPARMv8, HasDPVFP] in { - def : Pat<(i32 (fp_to_sint (node (f64 DPR:$a)))), + def : Pat<(i32 (any_fp_to_sint (node (f64 DPR:$a)))), (COPY_TO_REGCLASS (!cast(NAME#"SD") DPR:$a), GPR)>; - def : Pat<(i32 (fp_to_uint (node (f64 DPR:$a)))), + def : Pat<(i32 (any_fp_to_uint (node (f64 DPR:$a)))), (COPY_TO_REGCLASS (!cast(NAME#"UD") DPR:$a), GPR)>; } } -defm VCVTA : vcvt_inst<"a", 0b00, fround>; +defm VCVTA : vcvt_inst<"a", 0b00, any_fround>; defm VCVTN : vcvt_inst<"n", 0b01>; -defm VCVTP : vcvt_inst<"p", 0b10, fceil>; -defm VCVTM : vcvt_inst<"m", 0b11, ffloor>; +defm VCVTP : vcvt_inst<"p", 0b10, any_fceil>; +defm VCVTM : vcvt_inst<"m", 0b11, any_ffloor>; def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), @@ -1103,9 +1103,9 @@ multiclass vrint_inst_zrx; } -defm VRINTZ : vrint_inst_zrx<"z", 0, 1, ftrunc, [], 0>; -defm VRINTR : vrint_inst_zrx<"r", 0, 0, fnearbyint, [FPSCR_RM], 0>; -defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint, [FPSCR_RM], 1>; +defm VRINTZ : vrint_inst_zrx<"z", 0, 1, any_ftrunc, [], 0>; +defm VRINTR : vrint_inst_zrx<"r", 0, 0, any_fnearbyint, [FPSCR_RM], 0>; +defm VRINTX : vrint_inst_zrx<"x", 1, 0, any_frint, [FPSCR_RM], 1>; multiclass vrint_inst_anpm rm, SDPatternOperator node = null_frag> { @@ -1145,30 +1145,31 @@ multiclass vrint_inst_anpm rm, Requires<[HasFPARMv8,HasDPVFP]>; } -defm VRINTA : vrint_inst_anpm<"a", 0b00, fround>; -defm VRINTN : vrint_inst_anpm<"n", 0b01, froundeven>; -defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>; -defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>; +defm VRINTA : vrint_inst_anpm<"a", 0b00, any_fround>; +defm VRINTN : vrint_inst_anpm<"n", 0b01, any_froundeven>; +defm VRINTP : vrint_inst_anpm<"p", 0b10, any_fceil>; +defm VRINTM : vrint_inst_anpm<"m", 0b11, any_ffloor>; + let mayRaiseFPException = 1, Uses = [FPSCR_RM] in def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", "", - [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>, + [(set DPR:$Dd, (any_fsqrt (f64 DPR:$Dm)))]>, Sched<[WriteFPSQRT64]>; let mayRaiseFPException = 1, Uses = [FPSCR_RM] in def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", "", - [(set SPR:$Sd, (fsqrt SPR:$Sm))]>, + [(set SPR:$Sd, (any_fsqrt SPR:$Sm))]>, Sched<[WriteFPSQRT32]>; let mayRaiseFPException = 1, Uses = [FPSCR_RM] in def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs HPR:$Sd), (ins HPR:$Sm), IIC_fpSQRT16, "vsqrt", ".f16\t$Sd, $Sm", - [(set (f16 HPR:$Sd), (fsqrt (f16 HPR:$Sm)))]>; + [(set (f16 HPR:$Sd), (any_fsqrt (f16 HPR:$Sm)))]>; let hasSideEffects = 0 in { let isMoveReg = 1 in { @@ -1509,10 +1510,10 @@ def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, } let Predicates=[HasVFP2, HasDPVFP] in { - def : VFPPat<(f64 (sint_to_fp GPR:$a)), + def : VFPPat<(f64 (any_sint_to_fp GPR:$a)), (VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; - def : VFPPat<(f64 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))), + def : VFPPat<(f64 (any_sint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VSITOD (VLDRS addrmode5:$a))>; } @@ -1529,10 +1530,10 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, let D = VFPNeonA8Domain; } -def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)), +def : VFPNoNEONPat<(f32 (any_sint_to_fp GPR:$a)), (VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>; -def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))), +def : VFPNoNEONPat<(f32 (any_sint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VSITOS (VLDRS addrmode5:$a))>; let mayRaiseFPException = 1 in @@ -1545,7 +1546,7 @@ def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001, let isUnpredicable = 1; } -def : VFPNoNEONPat<(f16 (sint_to_fp GPR:$a)), +def : VFPNoNEONPat<(f16 (any_sint_to_fp GPR:$a)), (VSITOH (COPY_TO_REGCLASS GPR:$a, SPR))>; let mayRaiseFPException = 1 in @@ -1558,10 +1559,10 @@ def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, } let Predicates=[HasVFP2, HasDPVFP] in { - def : VFPPat<(f64 (uint_to_fp GPR:$a)), + def : VFPPat<(f64 (any_uint_to_fp GPR:$a)), (VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; - def : VFPPat<(f64 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))), + def : VFPPat<(f64 (any_uint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VUITOD (VLDRS addrmode5:$a))>; } @@ -1578,10 +1579,10 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, let D = VFPNeonA8Domain; } -def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)), +def : VFPNoNEONPat<(f32 (any_uint_to_fp GPR:$a)), (VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>; -def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))), +def : VFPNoNEONPat<(f32 (any_uint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VUITOS (VLDRS addrmode5:$a))>; let mayRaiseFPException = 1 in @@ -1594,7 +1595,7 @@ def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001, let isUnpredicable = 1; } -def : VFPNoNEONPat<(f16 (uint_to_fp GPR:$a)), +def : VFPNoNEONPat<(f16 (any_uint_to_fp GPR:$a)), (VUITOH (COPY_TO_REGCLASS GPR:$a, SPR))>; // FP -> Int: @@ -1669,12 +1670,12 @@ def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, } let Predicates=[HasVFP2, HasDPVFP] in { - def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))), + def : VFPPat<(i32 (any_fp_to_sint (f64 DPR:$a))), (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>; def : VFPPat<(i32 (fp_to_sint_sat (f64 DPR:$a), i32)), (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>; - def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr), + def : VFPPat<(alignedstore32 (i32 (any_fp_to_sint (f64 DPR:$a))), addrmode5:$ptr), (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>; def : VFPPat<(alignedstore32 (i32 (fp_to_sint_sat (f64 DPR:$a), i32)), addrmode5:$ptr), (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>; @@ -1693,12 +1694,12 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, let D = VFPNeonA8Domain; } -def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)), +def : VFPNoNEONPat<(i32 (any_fp_to_sint SPR:$a)), (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>; def : VFPPat<(i32 (fp_to_sint_sat SPR:$a, i32)), (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>; -def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))), +def : VFPNoNEONPat<(alignedstore32 (i32 (any_fp_to_sint (f32 SPR:$a))), addrmode5:$ptr), (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>; def : VFPPat<(alignedstore32 (i32 (fp_to_sint_sat (f32 SPR:$a), i32)), @@ -1715,7 +1716,7 @@ def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001, let isUnpredicable = 1; } -def : VFPNoNEONPat<(i32 (fp_to_sint (f16 HPR:$a))), +def : VFPNoNEONPat<(i32 (any_fp_to_sint (f16 HPR:$a))), (COPY_TO_REGCLASS (VTOSIZH (f16 HPR:$a)), GPR)>; def : VFPPat<(i32 (fp_to_sint_sat (f16 HPR:$a), i32)), (COPY_TO_REGCLASS (VTOSIZH (f16 HPR:$a)), GPR)>; @@ -1730,12 +1731,12 @@ def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, } let Predicates=[HasVFP2, HasDPVFP] in { - def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))), + def : VFPPat<(i32 (any_fp_to_uint (f64 DPR:$a))), (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>; def : VFPPat<(i32 (fp_to_uint_sat (f64 DPR:$a), i32)), (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>; - def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr), + def : VFPPat<(alignedstore32 (i32 (any_fp_to_uint (f64 DPR:$a))), addrmode5:$ptr), (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>; def : VFPPat<(alignedstore32 (i32 (fp_to_uint_sat (f64 DPR:$a), i32)), addrmode5:$ptr), (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>; @@ -1754,12 +1755,12 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, let D = VFPNeonA8Domain; } -def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)), +def : VFPNoNEONPat<(i32 (any_fp_to_uint SPR:$a)), (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>; def : VFPPat<(i32 (fp_to_uint_sat SPR:$a, i32)), (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>; -def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))), +def : VFPNoNEONPat<(alignedstore32 (i32 (any_fp_to_uint (f32 SPR:$a))), addrmode5:$ptr), (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>; def : VFPPat<(alignedstore32 (i32 (fp_to_uint_sat (f32 SPR:$a), i32)), @@ -1776,7 +1777,7 @@ def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, let isUnpredicable = 1; } -def : VFPNoNEONPat<(i32 (fp_to_uint (f16 HPR:$a))), +def : VFPNoNEONPat<(i32 (any_fp_to_uint (f16 HPR:$a))), (COPY_TO_REGCLASS (VTOUIZH (f16 HPR:$a)), GPR)>; def : VFPPat<(i32 (fp_to_uint_sat (f16 HPR:$a), i32)), (COPY_TO_REGCLASS (VTOUIZH (f16 HPR:$a)), GPR)>; @@ -2320,13 +2321,13 @@ def : Pat<(fadd_mlx HPR:$dstin, (fmul_su (f16 HPR:$a), HPR:$b)), // Match @llvm.fma.* intrinsics // (fma x, y, z) -> (vfms z, x, y) -def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)), +def : Pat<(f64 (any_fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)), (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4,HasDPVFP]>; -def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)), +def : Pat<(f32 (any_fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)), (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; -def : Pat<(f16 (fma HPR:$Sn, HPR:$Sm, (f16 HPR:$Sdin))), +def : Pat<(f16 (any_fma HPR:$Sn, HPR:$Sm, (f16 HPR:$Sdin))), (VFMAH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>, Requires<[HasFullFP16]>; @@ -2375,13 +2376,13 @@ def : Pat<(fsub_mlx HPR:$dstin, (fmul_su (f16 HPR:$a), HPR:$b)), // Match @llvm.fma.* intrinsics // (fma (fneg x), y, z) -> (vfms z, x, y) -def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)), +def : Pat<(f64 (any_fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)), (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4,HasDPVFP]>; -def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)), +def : Pat<(f32 (any_fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)), (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; -def : Pat<(f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin))), +def : Pat<(f16 (any_fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin))), (VFMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>, Requires<[HasFullFP16]>; @@ -2427,23 +2428,23 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), // Match @llvm.fma.* intrinsics // (fneg (fma x, y, z)) -> (vfnma z, x, y) -def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))), +def : Pat<(fneg (any_fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))), (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4,HasDPVFP]>; -def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))), +def : Pat<(fneg (any_fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))), (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; -def : Pat<(fneg (fma (f16 HPR:$Sn), (f16 HPR:$Sm), (f16 (f16 HPR:$Sdin)))), +def : Pat<(fneg (any_fma (f16 HPR:$Sn), (f16 HPR:$Sm), (f16 (f16 HPR:$Sdin)))), (VFNMAH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>, Requires<[HasFullFP16]>; // (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y) -def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))), +def : Pat<(f64 (any_fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))), (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4,HasDPVFP]>; -def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))), +def : Pat<(f32 (any_fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))), (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; -def : Pat<(f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (fneg (f16 HPR:$Sdin)))), +def : Pat<(f16 (any_fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (fneg (f16 HPR:$Sdin)))), (VFNMAH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>, Requires<[HasFullFP16]>; @@ -2488,23 +2489,23 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), // Match @llvm.fma.* intrinsics // (fma x, y, (fneg z)) -> (vfnms z, x, y)) -def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))), +def : Pat<(f64 (any_fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4,HasDPVFP]>; -def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))), +def : Pat<(f32 (any_fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; -def : Pat<(f16 (fma (f16 HPR:$Sn), (f16 HPR:$Sm), (fneg (f16 HPR:$Sdin)))), +def : Pat<(f16 (any_fma (f16 HPR:$Sn), (f16 HPR:$Sm), (fneg (f16 HPR:$Sdin)))), (VFNMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>, Requires<[HasFullFP16]>; // (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y) -def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))), +def : Pat<(fneg (f64 (any_fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4,HasDPVFP]>; -def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))), +def : Pat<(fneg (f32 (any_fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; -def : Pat<(fneg (f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin)))), +def : Pat<(fneg (f16 (any_fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin)))), (VFNMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>, Requires<[HasFullFP16]>; diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics.ll b/llvm/test/CodeGen/ARM/fp-intrinsics.ll index 93b6a58a22b6c..cb87508d53342 100644 --- a/llvm/test/CodeGen/ARM/fp-intrinsics.ll +++ b/llvm/test/CodeGen/ARM/fp-intrinsics.ll @@ -76,7 +76,6 @@ define i32 @fptosi_f32(float %x) #0 { ; CHECK-NOSP: bl __aeabi_f2iz ; CHECK-NOSP: bl __aeabi_f2iz ; CHECK-SP: vcvt.s32.f32 -; FIXME-CHECK-SP: vcvt.s32.f32 define void @fptosi_f32_twice(float %arg, ptr %ptr) #0 { entry: %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %arg, metadata !"fpexcept.strict") #0 @@ -146,6 +145,80 @@ define float @tan_f32(float %x) #0 { ret float %val } +; CHECK-LABEL: acos_f32: +; CHECK: bl acosf +define float @acos_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.acos.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: asin_f32: +; CHECK: bl asinf +define float @asin_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.asin.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: atan_f32: +; CHECK: bl atanf +define float @atan_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.atan.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: cosh_f32: +; CHECK: bl coshf +define float @cosh_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.cosh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: sinh_f32: +; CHECK: bl sinhf +define float @sinh_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.sinh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: tanh_f32: +; CHECK: bl tanhf +define float @tanh_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.tanh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: fmuladd_f32: +; CHECK-SP: vfma.f32 +; CHECK-NOSP: bl __aeabi_fmul +; CHECK-NOSP: bl __aeabi_fadd +define float @fmuladd_f32(float %x, float %y, float %z) #0 { + %val = call float @llvm.experimental.constrained.fmuladd.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: ldexp_f32: +; CHECK: bl ldexpf +define float @ldexp_f32(float %x, i32 %y) #0 { + %val = call float @llvm.experimental.constrained.ldexp.f32.i32(float %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: roundeven_f32: +; CHECK-SP-V8: vrintn.f32 +; CHECK-NOSP: bl roundevenf +define float @roundeven_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.roundeven.f32(float %x, metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: uitofp_f32_i32: +; CHECK-NOSP: bl __aeabi_ui2f +; FIXME-CHECK-SP: vcvt.f32.f64 +define float @uitofp_f32_i32(i32 %x) #0 { + %val = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + ; CHECK-LABEL: atan2_f32: ; CHECK: bl atan2f define float @atan2_f32(float %x, float %y) #0 { @@ -617,6 +690,80 @@ define double @tan_f64(double %x) #0 { ret double %val } +; CHECK-LABEL: acos_f64: +; CHECK: bl acos +define double @acos_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.acos.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: asin_f64: +; CHECK: bl asin +define double @asin_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.asin.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: atan_f64: +; CHECK: bl atan +define double @atan_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.atan.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: cosh_f64: +; CHECK: bl cosh +define double @cosh_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.cosh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: sinh_f64: +; CHECK: bl sinh +define double @sinh_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.sinh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: tanh_f64: +; CHECK: bl tanh +define double @tanh_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.tanh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: fmuladd_f64: +; CHECK-DP: vfma.f64 +; CHECK-NODP: bl __aeabi_dmul +; CHECK-NODP: bl __aeabi_dadd +define double @fmuladd_f64(double %x, double %y, double %z) #0 { + %val = call double @llvm.experimental.constrained.fmuladd.f64(double %x, double %y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: ldexp_f64: +; CHECK: bl ldexp +define double @ldexp_f64(double %x, i32 %y) #0 { + %val = call double @llvm.experimental.constrained.ldexp.f64.i32(double %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: roundeven_f64: +; CHECK-DP-V8: vrintn.f64 +; CHECK-NODP: bl roundeven +define double @roundeven_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.roundeven.f64(double %x, metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: uitofp_f64_i32: +; CHECK-NOSP: bl __aeabi_ui2d +; FIXME-CHECK-SP: vsub.f64 +define double @uitofp_f64_i32(i32 %x) #0 { + %val = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + ; CHECK-LABEL: atan2_f64: ; CHECK: bl atan2 define double @atan2_f64(double %x, double %y) #0 { @@ -1052,6 +1199,16 @@ declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, meta declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.cos.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.acos.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.asin.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.atan.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.cosh.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.sinh.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.tanh.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.fmuladd.f32(float, float, float, metadata, metadata) +declare float @llvm.experimental.constrained.ldexp.f32.i32(float, i32, metadata, metadata) +declare float @llvm.experimental.constrained.roundeven.f32(float, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata) declare float @llvm.experimental.constrained.atan2.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.log.f32(float, metadata, metadata) @@ -1087,6 +1244,16 @@ declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, me declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.asin.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.cosh.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.tanh.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.fmuladd.f64(double, double, double, metadata, metadata) +declare double @llvm.experimental.constrained.ldexp.f64.i32(double, i32, metadata, metadata) +declare double @llvm.experimental.constrained.roundeven.f64(double, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll index 200b14bae56ed..b4060d5fdb574 100644 --- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll +++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll @@ -98,12 +98,18 @@ define i32 @test_fptosi_i32(ptr %p) { ret i32 %r } -; FIXME -;define i64 @test_fptosi_i64(ptr %p) { -; %a = load half, ptr %p, align 2 -; %r = fptosi half %a to i64 -; ret i64 %r -;} +define i64 @test_fptosi_i64(ptr %p) { +; CHECK-LABEL: test_fptosi_i64: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: ldrh r0, [r0] +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: bl __fixhfdi +; CHECK-NEXT: pop {r11, pc} + %a = load half, ptr %p, align 2 + %r = fptosi half %a to i64 + ret i64 %r +} define i32 @test_fptoui_i32(ptr %p) { ; CHECK-LABEL: test_fptoui_i32: @@ -116,12 +122,18 @@ define i32 @test_fptoui_i32(ptr %p) { ret i32 %r } -; FIXME -;define i64 @test_fptoui_i64(ptr %p) { -; %a = load half, ptr %p, align 2 -; %r = fptoui half %a to i64 -; ret i64 %r -;} +define i64 @test_fptoui_i64(ptr %p) { +; CHECK-LABEL: test_fptoui_i64: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: ldrh r0, [r0] +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: bl __fixunshfdi +; CHECK-NEXT: pop {r11, pc} + %a = load half, ptr %p, align 2 + %r = fptoui half %a to i64 + ret i64 %r +} define void @test_sitofp_i32(i32 %a, ptr %p) { ; CHECK-LABEL: test_sitofp_i32: @@ -145,19 +157,31 @@ define void @test_uitofp_i32(i32 %a, ptr %p) { ret void } -; FIXME -;define void @test_sitofp_i64(i64 %a, ptr %p) { -; %r = sitofp i64 %a to half -; store half %r, ptr %p -; ret void -;} +define void @test_sitofp_i64(i64 %a, ptr %p) { +; CHECK-LABEL: test_sitofp_i64: +; CHECK: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: mov r4, r2 +; CHECK-NEXT: bl __floatdihf +; CHECK-NEXT: vstr.16 s0, [r4] +; CHECK-NEXT: pop {r4, pc} + %r = sitofp i64 %a to half + store half %r, ptr %p + ret void +} -; FIXME -;define void @test_uitofp_i64(i64 %a, ptr %p) { -; %r = uitofp i64 %a to half -; store half %r, ptr %p -; ret void -;} +define void @test_uitofp_i64(i64 %a, ptr %p) { +; CHECK-LABEL: test_uitofp_i64: +; CHECK: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: mov r4, r2 +; CHECK-NEXT: bl __floatundihf +; CHECK-NEXT: vstr.16 s0, [r4] +; CHECK-NEXT: pop {r4, pc} + %r = uitofp i64 %a to half + store half %r, ptr %p + ret void +} define void @test_fptrunc_float(float %f, ptr %p) { ; CHECK-LABEL: test_fptrunc_float: @@ -613,6 +637,902 @@ define void @test_fmuladd(ptr %p, ptr %q, ptr %r) { ret void } +; Half-precision intrinsics + +define half @add_f16(half %x, half %y) #0 { +; CHECK-LABEL: add_f16: +; CHECK: vadd.f16 s0, s0, s1 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sub_f16(half %x, half %y) #0 { +; CHECK-LABEL: sub_f16: +; CHECK: vsub.f16 s0, s0, s1 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @mul_f16(half %x, half %y) #0 { +; CHECK-LABEL: mul_f16: +; CHECK: vmul.f16 s0, s0, s1 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @div_f16(half %x, half %y) #0 { +; CHECK-LABEL: div_f16: +; CHECK: vdiv.f16 s0, s0, s1 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.fdiv.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @frem_f16(half %x, half %y) #0 { +; CHECK-LABEL: frem_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: vcvtb.f32.f16 s1, s1 +; CHECK-NEXT: bl fmodf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.frem.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @fma_f16(half %x, half %y, half %z) #0 { +; CHECK-LABEL: fma_f16: +; CHECK: vfma.f16 s2, s0, s1 +; CHECK-NEXT: vmov.f32 s0, s2 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @fmuladd_f16(half %x, half %y, half %z) #0 { +; CHECK-LABEL: fmuladd_f16: +; CHECK: vfma.f16 s2, s0, s1 +; CHECK-NEXT: vmov.f32 s0, s2 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.fmuladd.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define i32 @fptosi_i32_f16(half %x) #0 { +; CHECK-LABEL: fptosi_i32_f16: +; CHECK: vcvt.s32.f16 s0, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr + %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +define i32 @fptoui_i32_f16(half %x) #0 { +; CHECK-LABEL: fptoui_i32_f16: +; CHECK: vcvt.s32.f16 s0, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr + %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +define i64 @fptosi_i64_f16(half %x) #0 { +; CHECK-LABEL: fptosi_i64_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: bl __fixhfdi +; CHECK-NEXT: pop {r11, pc} + %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict") #0 + ret i64 %val +} + +define i64 @fptoui_i64_f16(half %x) #0 { +; CHECK-LABEL: fptoui_i64_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: bl __fixunshfdi +; CHECK-NEXT: pop {r11, pc} + %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict") #0 + ret i64 %val +} + +define half @sitofp_f16_i32(i32 %x) #0 { +; CHECK-LABEL: sitofp_f16_i32: +; CHECK: .pad #8 +; CHECK-NEXT: sub sp, sp, #8 +; CHECK-NEXT: movw r1, #0 +; CHECK-NEXT: eor r0, r0, #-2147483648 +; CHECK-NEXT: movt r1, #17200 +; CHECK-NEXT: str r0, [sp] +; CHECK-NEXT: str r1, [sp, #4] +; CHECK-NEXT: vldr d16, .LCPI57_0 +; CHECK-NEXT: vldr d17, [sp] +; CHECK-NEXT: vsub.f64 d16, d17, d16 +; CHECK-NEXT: vcvtb.f16.f64 s0, d16 +; CHECK-NEXT: add sp, sp, #8 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: .LCPI57_0: +; CHECK-NEXT: .long 2147483648 +; CHECK-NEXT: .long 1127219200 + %val = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @uitofp_f16_i32(i32 %x) #0 { +; CHECK-LABEL: uitofp_f16_i32: +; CHECK: .pad #8 +; CHECK-NEXT: sub sp, sp, #8 +; CHECK-NEXT: movw r1, #0 +; CHECK-NEXT: str r0, [sp] +; CHECK-NEXT: movt r1, #17200 +; CHECK-NEXT: vldr d16, .LCPI58_0 +; CHECK-NEXT: str r1, [sp, #4] +; CHECK-NEXT: vldr d17, [sp] +; CHECK-NEXT: vsub.f64 d16, d17, d16 +; CHECK-NEXT: vcvtb.f16.f64 s0, d16 +; CHECK-NEXT: add sp, sp, #8 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: .LCPI58_0: +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 1127219200 + %val = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sitofp_f16_i64(i64 %x) #0 { +; CHECK-LABEL: sitofp_f16_i64: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl __floatdihf +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @uitofp_f16_i64(i64 %x) #0 { +; CHECK-LABEL: uitofp_f16_i64: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl __floatundihf +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sitofp_f16_i128(i128 %x) #0 { +; CHECK-LABEL: sitofp_f16_i128: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl __floattihf +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.sitofp.f16.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @uitofp_f16_i128(i128 %x) #0 { +; CHECK-LABEL: uitofp_f16_i128: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl __floatuntihf +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.uitofp.f16.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sqrt_f16(half %x) #0 { +; CHECK-LABEL: sqrt_f16: +; CHECK: vsqrt.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.sqrt.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @powi_f16(half %x, i32 %y) #0 { +; CHECK-LABEL: powi_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl __powisf2 +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.powi.f16(half %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sin_f16(half %x) #0 { +; CHECK-LABEL: sin_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl sinf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.sin.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @cos_f16(half %x) #0 { +; CHECK-LABEL: cos_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl cosf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.cos.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @tan_f16(half %x) #0 { +; CHECK-LABEL: tan_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.tan.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @asin_f16(half %x) #0 { +; CHECK-LABEL: asin_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl asinf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.asin.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @acos_f16(half %x) #0 { +; CHECK-LABEL: acos_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl acosf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.acos.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @atan_f16(half %x) #0 { +; CHECK-LABEL: atan_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl atanf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.atan.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @atan2_f16(half %x, half %y) #0 { +; CHECK-LABEL: atan2_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: vcvtb.f32.f16 s1, s1 +; CHECK-NEXT: bl atan2f +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.atan2.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sinh_f16(half %x) #0 { +; CHECK-LABEL: sinh_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl sinhf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.sinh.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @cosh_f16(half %x) #0 { +; CHECK-LABEL: cosh_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl coshf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.cosh.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @tanh_f16(half %x) #0 { +; CHECK-LABEL: tanh_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl tanhf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.tanh.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @pow_f16(half %x, half %y) #0 { +; CHECK-LABEL: pow_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: vcvtb.f32.f16 s1, s1 +; CHECK-NEXT: bl powf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.pow.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @log_f16(half %x) #0 { +; CHECK-LABEL: log_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl logf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.log.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @log10_f16(half %x) #0 { +; CHECK-LABEL: log10_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl log10f +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.log10.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @log2_f16(half %x) #0 { +; CHECK-LABEL: log2_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl log2f +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.log2.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @exp_f16(half %x) #0 { +; CHECK-LABEL: exp_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl expf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.exp.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @exp2_f16(half %x) #0 { +; CHECK-LABEL: exp2_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl exp2f +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.exp2.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @rint_f16(half %x) #0 { +; CHECK-LABEL: rint_f16: +; CHECK: vrintx.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.rint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @nearbyint_f16(half %x) #0 { +; CHECK-LABEL: nearbyint_f16: +; CHECK: vrintr.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.nearbyint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define i32 @lrint_f16(half %x) #0 { +; CHECK-LABEL: lrint_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: pop {r11, pc} + %val = call i32 @llvm.experimental.constrained.lrint.i32.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i32 %val +} + +define i64 @llrint_f16(half %x) #0 { +; CHECK-LABEL: llrint_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: pop {r11, pc} + %val = call i64 @llvm.experimental.constrained.llrint.i64.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i64 %val +} + +define half @maxnum_f16(half %x, half %y) #0 { +; CHECK-LABEL: maxnum_f16: +; CHECK: vmaxnm.f16 s0, s0, s1 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.maxnum.f16(half %x, half %y, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @minnum_f16(half %x, half %y) #0 { +; CHECK-LABEL: minnum_f16: +; CHECK: vminnm.f16 s0, s0, s1 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.minnum.f16(half %x, half %y, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @ceil_f16(half %x) #0 { +; CHECK-LABEL: ceil_f16: +; CHECK: vrintp.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.ceil.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @floor_f16(half %x) #0 { +; CHECK-LABEL: floor_f16: +; CHECK: vrintm.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.floor.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +define i32 @lround_f16(half %x) #0 { +; CHECK-LABEL: lround_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl lroundf +; CHECK-NEXT: pop {r11, pc} + %val = call i32 @llvm.experimental.constrained.lround.i32.f16(half %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +define i64 @llround_f16(half %x) #0 { +; CHECK-LABEL: llround_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl llroundf +; CHECK-NEXT: pop {r11, pc} + %val = call i64 @llvm.experimental.constrained.llround.i64.f16(half %x, metadata !"fpexcept.strict") #0 + ret i64 %val +} + +define half @round_f16(half %x) #0 { +; CHECK-LABEL: round_f16: +; CHECK: vrinta.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.round.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @roundeven_f16(half %x) #0 { +; CHECK-LABEL: roundeven_f16: +; CHECK: vrintn.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.roundeven.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @trunc_f16(half %x) #0 { +; CHECK-LABEL: trunc_f16: +; CHECK: vrintz.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.trunc.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @ldexp_f16(half %x, i32 %y) #0 { +; CHECK-LABEL: ldexp_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl ldexpf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.ldexp.f16.i32(half %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define i32 @fcmp_olt_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_olt_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwmi r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ole_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_ole_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwls r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ogt_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_ogt_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwgt r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_oge_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_oge_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwge r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_oeq_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_oeq_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movweq r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_one_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_one_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwmi r0, #1 +; CHECK-NEXT: movwgt r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"one", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ult_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_ult_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ult", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ule_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_ule_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwle r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ule", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ugt_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_ugt_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwhi r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ugt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_uge_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_uge_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwpl r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"uge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ueq_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_ueq_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movweq r0, #1 +; CHECK-NEXT: movwvs r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ueq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_une_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_une_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwne r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"une", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_olt_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_olt_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwmi r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ole_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_ole_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwls r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ogt_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_ogt_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwgt r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_oge_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_oge_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwge r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_oeq_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_oeq_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movweq r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_one_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_one_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwmi r0, #1 +; CHECK-NEXT: movwgt r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"one", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ult_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_ult_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ult", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ule_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_ule_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwle r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ule", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ugt_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_ugt_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwhi r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ugt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_uge_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_uge_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwpl r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"uge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ueq_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_ueq_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movweq r0, #1 +; CHECK-NEXT: movwvs r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ueq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_une_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_une_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwne r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"une", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + + +; Intrinsics to convert between floating-point types + +define half @fptrunc_f16_f32(float %x) #0 { +; CHECK-LABEL: fptrunc_f16_f32: +; CHECK: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define float @fpext_f32_f16(half %x) #0 { +; CHECK-LABEL: fpext_f32_f16: +; CHECK: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call float @llvm.experimental.constrained.fpext.f32.f16(half %x, metadata !"fpexcept.strict") #0 + ret float %val +} + + +attributes #0 = { strictfp } + +declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fsub.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fdiv.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.frem.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fma.f16(half, half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fmuladd.f16(half, half, half, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata) +declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata) +declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata) +declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata) +declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata) +declare half @llvm.experimental.constrained.sitofp.f16.i128(i128, metadata, metadata) +declare half @llvm.experimental.constrained.uitofp.f16.i128(i128, metadata, metadata) +declare half @llvm.experimental.constrained.sqrt.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.powi.f16(half, i32, metadata, metadata) +declare half @llvm.experimental.constrained.sin.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.cos.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.tan.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.pow.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.log.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.log10.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.log2.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.exp.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.exp2.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.rint.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.nearbyint.f16(half, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.i32.f16(half, metadata, metadata) +declare i64 @llvm.experimental.constrained.llrint.i64.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.maxnum.f16(half, half, metadata) +declare half @llvm.experimental.constrained.minnum.f16(half, half, metadata) +declare half @llvm.experimental.constrained.ceil.f16(half, metadata) +declare half @llvm.experimental.constrained.floor.f16(half, metadata) +declare i32 @llvm.experimental.constrained.lround.i32.f16(half, metadata) +declare i64 @llvm.experimental.constrained.llround.i64.f16(half, metadata) +declare half @llvm.experimental.constrained.round.f16(half, metadata) +declare half @llvm.experimental.constrained.roundeven.f16(half, metadata) +declare half @llvm.experimental.constrained.trunc.f16(half, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f16(half, half, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f16(half, half, metadata, metadata) + +declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) + + declare half @llvm.sqrt.f16(half %a) declare half @llvm.powi.f16.i32(half %a, i32 %b) declare half @llvm.sin.f16(half %a) diff --git a/llvm/test/CodeGen/ARM/strict-fp-int-promote.ll b/llvm/test/CodeGen/ARM/strict-fp-int-promote.ll new file mode 100644 index 0000000000000..6e5b58974fc50 --- /dev/null +++ b/llvm/test/CodeGen/ARM/strict-fp-int-promote.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple armv7-- -mattr=+vfp4 -O0 -o - %s | FileCheck %s +; RUN: llc -mtriple armv7-- -mattr=+vfp4 -O3 -o - %s | FileCheck %s --check-prefix=CHECK-O3 + +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i16(i16, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i16(i16, metadata, metadata) + +define i32 @test(i32 %a, i16 %b) #0 { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: sxth r0, r1 +; CHECK-NEXT: movw r1, #0 +; CHECK-NEXT: movt r1, #17200 +; CHECK-NEXT: str r1, [sp, #4] +; CHECK-NEXT: eor r2, r2, #-2147483648 +; CHECK-NEXT: str r2, [sp] +; CHECK-NEXT: vldr d16, [sp] +; CHECK-NEXT: vldr d17, .LCPI0_0 +; CHECK-NEXT: vsub.f64 d16, d16, d17 +; CHECK-NEXT: vcvt.f32.f64 s0, d16 +; CHECK-NEXT: str r1, [sp, #12] +; CHECK-NEXT: eor r0, r0, #-2147483648 +; CHECK-NEXT: str r0, [sp, #8] +; CHECK-NEXT: vldr d16, [sp, #8] +; CHECK-NEXT: vsub.f64 d16, d16, d17 +; CHECK-NEXT: vcvt.f32.f64 s2, d16 +; CHECK-NEXT: vcmp.f32 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movweq r0, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long 2147483648 @ double 4503601774854144 +; CHECK-NEXT: .long 1127219200 +; +; CHECK-O3-LABEL: test: +; CHECK-O3: @ %bb.0: @ %entry +; CHECK-O3-NEXT: sub sp, sp, #16 +; CHECK-O3-NEXT: sxth r1, r1 +; CHECK-O3-NEXT: movw r2, #0 +; CHECK-O3-NEXT: movt r2, #17200 +; CHECK-O3-NEXT: str r2, [sp, #4] +; CHECK-O3-NEXT: eor r0, r0, #-2147483648 +; CHECK-O3-NEXT: str r0, [sp] +; CHECK-O3-NEXT: vldr d16, [sp] +; CHECK-O3-NEXT: vldr d17, .LCPI0_0 +; CHECK-O3-NEXT: vsub.f64 d16, d16, d17 +; CHECK-O3-NEXT: vcvt.f32.f64 s0, d16 +; CHECK-O3-NEXT: str r2, [sp, #12] +; CHECK-O3-NEXT: eor r0, r1, #-2147483648 +; CHECK-O3-NEXT: str r0, [sp, #8] +; CHECK-O3-NEXT: vldr d16, [sp, #8] +; CHECK-O3-NEXT: vsub.f64 d16, d16, d17 +; CHECK-O3-NEXT: vcvt.f32.f64 s2, d16 +; CHECK-O3-NEXT: vcmp.f32 s0, s2 +; CHECK-O3-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-O3-NEXT: mov r0, #0 +; CHECK-O3-NEXT: movweq r0, #1 +; CHECK-O3-NEXT: add sp, sp, #16 +; CHECK-O3-NEXT: bx lr +; CHECK-O3-NEXT: .p2align 3 +; CHECK-O3-NEXT: @ %bb.1: +; CHECK-O3-NEXT: .LCPI0_0: +; CHECK-O3-NEXT: .long 2147483648 @ double 4503601774854144 +; CHECK-O3-NEXT: .long 1127219200 +entry: + %conv = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1 + %conv1 = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %b, metadata !"round.tonearest", metadata !"fpexcept.strict") #1 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %conv, float %conv1, metadata !"oeq", metadata !"fpexcept.strict") #1 + %conv2 = zext i1 %cmp to i32 + ret i32 %conv2 +} + +define i32 @test2(i32 %a, i16 %b) #0 { +; CHECK-LABEL: test2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: uxth r0, r1 +; CHECK-NEXT: movw r1, #0 +; CHECK-NEXT: movt r1, #17200 +; CHECK-NEXT: str r1, [sp, #4] +; CHECK-NEXT: eor r2, r2, #-2147483648 +; CHECK-NEXT: str r2, [sp] +; CHECK-NEXT: vldr d16, [sp] +; CHECK-NEXT: vldr d17, .LCPI1_0 +; CHECK-NEXT: vsub.f64 d16, d16, d17 +; CHECK-NEXT: vcvt.f32.f64 s0, d16 +; CHECK-NEXT: str r1, [sp, #12] +; CHECK-NEXT: str r0, [sp, #8] +; CHECK-NEXT: vldr d16, [sp, #8] +; CHECK-NEXT: vldr d17, .LCPI1_1 +; CHECK-NEXT: vsub.f64 d16, d16, d17 +; CHECK-NEXT: vcvt.f32.f64 s2, d16 +; CHECK-NEXT: vcmp.f32 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movweq r0, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .long 2147483648 @ double 4503601774854144 +; CHECK-NEXT: .long 1127219200 +; CHECK-NEXT: .LCPI1_1: +; CHECK-NEXT: .long 0 @ double 4503599627370496 +; CHECK-NEXT: .long 1127219200 +; +; CHECK-O3-LABEL: test2: +; CHECK-O3: @ %bb.0: @ %entry +; CHECK-O3-NEXT: sub sp, sp, #16 +; CHECK-O3-NEXT: uxth r1, r1 +; CHECK-O3-NEXT: movw r2, #0 +; CHECK-O3-NEXT: movt r2, #17200 +; CHECK-O3-NEXT: str r2, [sp, #4] +; CHECK-O3-NEXT: eor r0, r0, #-2147483648 +; CHECK-O3-NEXT: str r0, [sp] +; CHECK-O3-NEXT: vldr d16, [sp] +; CHECK-O3-NEXT: vldr d17, .LCPI1_0 +; CHECK-O3-NEXT: vsub.f64 d16, d16, d17 +; CHECK-O3-NEXT: vcvt.f32.f64 s0, d16 +; CHECK-O3-NEXT: str r2, [sp, #12] +; CHECK-O3-NEXT: str r1, [sp, #8] +; CHECK-O3-NEXT: vldr d16, [sp, #8] +; CHECK-O3-NEXT: vldr d17, .LCPI1_1 +; CHECK-O3-NEXT: vsub.f64 d16, d16, d17 +; CHECK-O3-NEXT: vcvt.f32.f64 s2, d16 +; CHECK-O3-NEXT: vcmp.f32 s0, s2 +; CHECK-O3-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-O3-NEXT: mov r0, #0 +; CHECK-O3-NEXT: movweq r0, #1 +; CHECK-O3-NEXT: add sp, sp, #16 +; CHECK-O3-NEXT: bx lr +; CHECK-O3-NEXT: .p2align 3 +; CHECK-O3-NEXT: @ %bb.1: +; CHECK-O3-NEXT: .LCPI1_0: +; CHECK-O3-NEXT: .long 2147483648 @ double 4503601774854144 +; CHECK-O3-NEXT: .long 1127219200 +; CHECK-O3-NEXT: .LCPI1_1: +; CHECK-O3-NEXT: .long 0 @ double 4503599627370496 +; CHECK-O3-NEXT: .long 1127219200 +entry: + %conv = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1 + %conv1 = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %b, metadata !"round.tonearest", metadata !"fpexcept.strict") #1 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %conv, float %conv1, metadata !"oeq", metadata !"fpexcept.strict") #1 + %conv2 = zext i1 %cmp to i32 + ret i32 %conv2 +} + +attributes #0 = { strictfp noinline optnone } +attributes #1 = { strictfp } diff --git a/llvm/test/CodeGen/ARM/strict-fp-ops.ll b/llvm/test/CodeGen/ARM/strict-fp-ops.ll new file mode 100644 index 0000000000000..8e51e11e7a150 --- /dev/null +++ b/llvm/test/CodeGen/ARM/strict-fp-ops.ll @@ -0,0 +1,201 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple arm-- -mattr=+vfp4 %s -o - | FileCheck %s + + +; Div whose result is unused should be removed unless we have strict exceptions + +define void @unused_div(float %x, float %y) { +; CHECK-LABEL: unused_div: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: mov pc, lr +entry: + %add = fdiv float %x, %y + ret void +} + +define void @unused_div_fpexcept_strict(float %x, float %y) #0 { +; CHECK-LABEL: unused_div_fpexcept_strict: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vdiv.f32 s0, s2, s0 +; CHECK-NEXT: mov pc, lr +entry: + %add = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret void +} + +define void @unused_div_round_dynamic(float %x, float %y) #0 { +; CHECK-LABEL: unused_div_round_dynamic: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: mov pc, lr +entry: + %add = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + ret void +} + + +; Machine CSE should eliminate the second add unless we have strict exceptions + +define float @add_twice(float %x, float %y, i32 %n) { +; CHECK-LABEL: add_twice: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vadd.f32 s0, s2, s0 +; CHECK-NEXT: vmulne.f32 s0, s0, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: mov pc, lr +entry: + %add = fadd float %x, %y + %tobool.not = icmp eq i32 %n, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: + %add1 = fadd float %x, %y + %mul = fmul float %add, %add1 + br label %if.end + +if.end: + %a.0 = phi float [ %mul, %if.then ], [ %add, %entry ] + ret float %a.0 +} + +define float @add_twice_fpexcept_strict(float %x, float %y, i32 %n) #0 { +; CHECK-LABEL: add_twice_fpexcept_strict: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: vmov s4, r0 +; CHECK-NEXT: vadd.f32 s0, s4, s2 +; CHECK-NEXT: vaddne.f32 s2, s4, s2 +; CHECK-NEXT: vmulne.f32 s0, s0, s2 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: mov pc, lr +entry: + %add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %tobool.not = icmp eq i32 %n, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: + %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %mul = call float @llvm.experimental.constrained.fmul.f32(float %add, float %add1, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + br label %if.end + +if.end: + %a.0 = phi float [ %mul, %if.then ], [ %add, %entry ] + ret float %a.0 +} + +define float @add_twice_round_dynamic(float %x, float %y, i32 %n) #0 { +; CHECK-LABEL: add_twice_round_dynamic: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vadd.f32 s0, s2, s0 +; CHECK-NEXT: vmulne.f32 s0, s0, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: mov pc, lr +entry: + %add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %tobool.not = icmp eq i32 %n, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: + %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %mul = call float @llvm.experimental.constrained.fmul.f32(float %add, float %add1, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + br label %if.end + +if.end: + %a.0 = phi float [ %mul, %if.then ], [ %add, %entry ] + ret float %a.0 +} + +; Two adds separated by llvm.set.rounding should be preserved when rounding is +; dynamic (as they may give different results) or when we have strict exceptions +; (the llvm.set.rounding is irrelevant, but both could trap). + +define float @set_rounding(float %x, float %y) { +; CHECK-LABEL: set_rounding: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmrs r2, fpscr +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vadd.f32 s0, s2, s0 +; CHECK-NEXT: vsub.f32 s0, s0, s0 +; CHECK-NEXT: orr r0, r2, #12582912 +; CHECK-NEXT: vmsr fpscr, r0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: vmrs r1, fpscr +; CHECK-NEXT: bic r1, r1, #12582912 +; CHECK-NEXT: vmsr fpscr, r1 +; CHECK-NEXT: mov pc, lr +entry: + %add1 = fadd float %x, %y + call void @llvm.set.rounding(i32 0) + %add2 = fadd float %x, %y + call void @llvm.set.rounding(i32 1) + %sub = fsub float %add1, %add2 + ret float %sub +} + +define float @set_rounding_fpexcept_strict(float %x, float %y) #0 { +; CHECK-LABEL: set_rounding_fpexcept_strict: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vadd.f32 s4, s2, s0 +; CHECK-NEXT: vmrs r0, fpscr +; CHECK-NEXT: orr r0, r0, #12582912 +; CHECK-NEXT: vmsr fpscr, r0 +; CHECK-NEXT: vadd.f32 s0, s2, s0 +; CHECK-NEXT: vmrs r0, fpscr +; CHECK-NEXT: bic r0, r0, #12582912 +; CHECK-NEXT: vmsr fpscr, r0 +; CHECK-NEXT: vsub.f32 s0, s4, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: mov pc, lr +entry: + %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + call void @llvm.set.rounding(i32 0) #0 + %add2 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + call void @llvm.set.rounding(i32 1) #0 + %sub = call float @llvm.experimental.constrained.fsub.f32(float %add1, float %add2, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %sub +} + +define float @set_rounding_round_dynamic(float %x, float %y) #0 { +; CHECK-LABEL: set_rounding_round_dynamic: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmrs r0, fpscr +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vadd.f32 s4, s2, s0 +; CHECK-NEXT: orr r0, r0, #12582912 +; CHECK-NEXT: vmsr fpscr, r0 +; CHECK-NEXT: vmrs r0, fpscr +; CHECK-NEXT: vadd.f32 s0, s2, s0 +; CHECK-NEXT: bic r0, r0, #12582912 +; CHECK-NEXT: vmsr fpscr, r0 +; CHECK-NEXT: vsub.f32 s0, s4, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: mov pc, lr +entry: + %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + call void @llvm.set.rounding(i32 0) #0 + %add2 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + call void @llvm.set.rounding(i32 1) #0 + %sub = call float @llvm.experimental.constrained.fsub.f32(float %add1, float %add2, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + ret float %sub +} + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) +declare i32 @llvm.get.rounding() +declare void @llvm.set.rounding(i32) + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/ARM/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/ARM/strictfp_f16_abi_promote.ll new file mode 100644 index 0000000000000..c51b0378d4591 --- /dev/null +++ b/llvm/test/CodeGen/ARM/strictfp_f16_abi_promote.ll @@ -0,0 +1,254 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=arm-- < %s | FileCheck -check-prefix=NOFP16 %s + +declare void @f16_user(half) +declare half @f16_result() + +declare void @v2f16_user(<2 x half>) +declare <2 x half> @v2f16_result() + +declare void @v4f16_user(<4 x half>) +declare <4 x half> @v4f16_result() + +declare void @v8f16_user(<8 x half>) +declare <8 x half> @v8f16_result() + +define void @f16_arg(half %arg, ptr %ptr) #0 { +; NOFP16-LABEL: f16_arg: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, lr} +; NOFP16-NEXT: mov r4, r1 +; NOFP16-NEXT: mov r1, #255 +; NOFP16-NEXT: orr r1, r1, #65280 +; NOFP16-NEXT: and r0, r0, r1 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: str r0, [r4] +; NOFP16-NEXT: pop {r4, lr} +; NOFP16-NEXT: mov pc, lr + %fpext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict") + store float %fpext, ptr %ptr + ret void +} + +define void @v2f16_arg(<2 x half> %arg, ptr %ptr) #0 { +; NOFP16-LABEL: v2f16_arg: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, r5, r6, lr} +; NOFP16-NEXT: mov r6, #255 +; NOFP16-NEXT: mov r5, r0 +; NOFP16-NEXT: orr r6, r6, #65280 +; NOFP16-NEXT: mov r4, r2 +; NOFP16-NEXT: and r0, r1, r6 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: str r0, [r4, #4] +; NOFP16-NEXT: and r0, r5, r6 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: str r0, [r4] +; NOFP16-NEXT: pop {r4, r5, r6, lr} +; NOFP16-NEXT: mov pc, lr + %fpext = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %arg, metadata !"fpexcept.strict") + store <2 x float> %fpext, ptr %ptr + ret void +} + +define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 { +; NOFP16-LABEL: v3f16_arg: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, r5, r6, r7, r11, lr} +; NOFP16-NEXT: mov r7, #255 +; NOFP16-NEXT: mov r6, r0 +; NOFP16-NEXT: orr r7, r7, #65280 +; NOFP16-NEXT: mov r4, r3 +; NOFP16-NEXT: and r0, r2, r7 +; NOFP16-NEXT: mov r5, r1 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: str r0, [r4, #8] +; NOFP16-NEXT: and r0, r5, r7 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: str r0, [r4, #4] +; NOFP16-NEXT: and r0, r6, r7 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: str r0, [r4] +; NOFP16-NEXT: pop {r4, r5, r6, r7, r11, lr} +; NOFP16-NEXT: mov pc, lr + %fpext = call <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half> %arg, metadata !"fpexcept.strict") + store <3 x float> %fpext, ptr %ptr + ret void +} + +define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 { +; NOFP16-LABEL: v4f16_arg: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, r5, r6, r7, r8, lr} +; NOFP16-NEXT: mov r7, #255 +; NOFP16-NEXT: mov r8, r0 +; NOFP16-NEXT: orr r7, r7, #65280 +; NOFP16-NEXT: mov r4, r2 +; NOFP16-NEXT: and r0, r3, r7 +; NOFP16-NEXT: mov r5, r1 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: ldr r6, [sp, #24] +; NOFP16-NEXT: str r0, [r6, #12] +; NOFP16-NEXT: and r0, r4, r7 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: str r0, [r6, #8] +; NOFP16-NEXT: and r0, r5, r7 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: str r0, [r6, #4] +; NOFP16-NEXT: and r0, r8, r7 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: str r0, [r6] +; NOFP16-NEXT: pop {r4, r5, r6, r7, r8, lr} +; NOFP16-NEXT: mov pc, lr + %fpext = call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %arg, metadata !"fpexcept.strict") + store <4 x float> %fpext, ptr %ptr + ret void +} + + define half @f16_return(float %arg) #0 { +; NOFP16-LABEL: f16_return: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r11, lr} +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: pop {r11, lr} +; NOFP16-NEXT: mov pc, lr + %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret half %fptrunc + } + + define <2 x half> @v2f16_return(<2 x float> %arg) #0 { +; NOFP16-LABEL: v2f16_return: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, r5, r11, lr} +; NOFP16-NEXT: mov r4, r0 +; NOFP16-NEXT: mov r0, r1 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov r5, r0 +; NOFP16-NEXT: mov r0, r4 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov r1, r5 +; NOFP16-NEXT: pop {r4, r5, r11, lr} +; NOFP16-NEXT: mov pc, lr + %fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret <2 x half> %fptrunc + } + + define <3 x half> @v3f16_return(<3 x float> %arg) #0 { +; NOFP16-LABEL: v3f16_return: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, r5, r6, lr} +; NOFP16-NEXT: mov r5, r0 +; NOFP16-NEXT: mov r0, r2 +; NOFP16-NEXT: mov r4, r1 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov r6, r0 +; NOFP16-NEXT: mov r0, r4 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov r4, r0 +; NOFP16-NEXT: mov r0, r5 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov r1, r4 +; NOFP16-NEXT: mov r2, r6 +; NOFP16-NEXT: pop {r4, r5, r6, lr} +; NOFP16-NEXT: mov pc, lr + %fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret <3 x half> %fptrunc + } + + define <4 x half> @v4f16_return(<4 x float> %arg) #0 { +; NOFP16-LABEL: v4f16_return: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, r5, r6, r7, r11, lr} +; NOFP16-NEXT: mov r6, r0 +; NOFP16-NEXT: mov r0, r3 +; NOFP16-NEXT: mov r4, r2 +; NOFP16-NEXT: mov r5, r1 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov r7, r0 +; NOFP16-NEXT: mov r0, r4 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov r4, r0 +; NOFP16-NEXT: mov r0, r5 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov r5, r0 +; NOFP16-NEXT: mov r0, r6 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov r1, r5 +; NOFP16-NEXT: mov r2, r4 +; NOFP16-NEXT: mov r3, r7 +; NOFP16-NEXT: pop {r4, r5, r6, r7, r11, lr} +; NOFP16-NEXT: mov pc, lr + %fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret <4 x half> %fptrunc + } + +define void @outgoing_v4f16_return(ptr %ptr) #0 { +; NOFP16-LABEL: outgoing_v4f16_return: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, lr} +; NOFP16-NEXT: mov r4, r0 +; NOFP16-NEXT: bl v4f16_result +; NOFP16-NEXT: strh r3, [r4, #6] +; NOFP16-NEXT: strh r2, [r4, #4] +; NOFP16-NEXT: strh r1, [r4, #2] +; NOFP16-NEXT: strh r0, [r4] +; NOFP16-NEXT: pop {r4, lr} +; NOFP16-NEXT: mov pc, lr + %val = call <4 x half> @v4f16_result() #0 + store <4 x half> %val, ptr %ptr + ret void +} + +define void @outgoing_v8f16_return(ptr %ptr) #0 { +; NOFP16-LABEL: outgoing_v8f16_return: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, r10, r11, lr} +; NOFP16-NEXT: add r11, sp, #8 +; NOFP16-NEXT: sub sp, sp, #16 +; NOFP16-NEXT: bic sp, sp, #15 +; NOFP16-NEXT: mov r4, r0 +; NOFP16-NEXT: mov r0, sp +; NOFP16-NEXT: bl v8f16_result +; NOFP16-NEXT: ldm sp, {r0, r1, r2, r3} +; NOFP16-NEXT: stm r4, {r0, r1, r2, r3} +; NOFP16-NEXT: sub sp, r11, #8 +; NOFP16-NEXT: pop {r4, r10, r11, lr} +; NOFP16-NEXT: mov pc, lr + %val = call <8 x half> @v8f16_result() #0 + store <8 x half> %val, ptr %ptr + ret void +} + +define half @call_split_type_used_outside_block_v8f16() #0 { +; NOFP16-LABEL: call_split_type_used_outside_block_v8f16: +; NOFP16: @ %bb.0: @ %bb0 +; NOFP16-NEXT: push {r11, lr} +; NOFP16-NEXT: mov r11, sp +; NOFP16-NEXT: sub sp, sp, #24 +; NOFP16-NEXT: bic sp, sp, #15 +; NOFP16-NEXT: mov r0, sp +; NOFP16-NEXT: bl v8f16_result +; NOFP16-NEXT: ldrh r0, [sp] +; NOFP16-NEXT: mov sp, r11 +; NOFP16-NEXT: pop {r11, lr} +; NOFP16-NEXT: mov pc, lr +bb0: + %split.ret.type = call <8 x half> @v8f16_result() #0 + br label %bb1 + +bb1: + %extract = extractelement <8 x half> %split.ret.type, i32 0 + ret half %extract +} + +declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #0 +declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #0 +declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #0 +declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata) #0 + +declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #0 +declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #0 +declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #0 +declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata) #0 + +attributes #0 = { strictfp }