diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index c45975431d833..690dd73014e57 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -17,6 +17,8 @@ def NotBoolXor : PatFrags<(ops node:$val), // LoongArch specific DAG Nodes. //===----------------------------------------------------------------------===// +def SDT_LoongArchMOVGR2FR_W + : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>; def SDT_LoongArchMOVGR2FR_W_LA64 : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>; def SDT_LoongArchMOVFR2GR_S_LA64 @@ -28,6 +30,8 @@ def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>; // ISD::BRCOND is custom-lowered to LoongArchISD::BRCOND for floating-point // comparisons to prevent recursive lowering. def loongarch_brcond : SDNode<"LoongArchISD::BRCOND", SDTBrcond, [SDNPHasChain]>; +def loongarch_movgr2fr_w + : SDNode<"LoongArchISD::MOVGR2FR_W", SDT_LoongArchMOVGR2FR_W>; def loongarch_movgr2fr_w_la64 : SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>; def loongarch_movfr2gr_s_la64 @@ -185,6 +189,14 @@ def : PatFpr; def : PatFpr; def : PatFpr; def : Pat<(fdiv fpimm1, (fsqrt FPR32:$fj)), (FRSQRT_S FPR32:$fj)>; +let Predicates = [HasBasicF, IsLA64] in { +def : Pat<(fdiv (loongarch_movgr2fr_w_la64 (i64 1065353216)), (fsqrt FPR32:$fj)), + (FRSQRT_S FPR32:$fj)>; +} // Predicates = [HasBasicF, IsLA64] +let Predicates = [HasBasicF, IsLA32] in { +def : Pat<(fdiv (loongarch_movgr2fr_w (i32 1065353216)), (fsqrt FPR32:$fj)), + (FRSQRT_S FPR32:$fj)>; +} // Predicates = [HasBasicF, IsLA32] def : Pat<(fcanonicalize FPR32:$fj), (FMAX_S $fj, $fj)>; def : Pat<(is_fpclass FPR32:$fj, (i32 timm:$mask)), (SLTU R0, (ANDI (MOVFR2GR_S (FCLASS_S FPR32:$fj)), @@ -295,6 +307,14 @@ def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_W_S FPR32:$src)>; // FP reciprocal operation def : Pat<(fdiv fpimm1, FPR32:$src), (FRECIP_S $src)>; +let Predicates = [HasBasicF, IsLA64] in { +def : Pat<(fdiv (loongarch_movgr2fr_w_la64 (i64 1065353216)), FPR32:$src), + (FRECIP_S $src)>; +} // Predicates = [HasBasicF, IsLA64] +let Predicates = [HasBasicF, IsLA32] in { +def : Pat<(fdiv (loongarch_movgr2fr_w (i32 1065353216)), FPR32:$src), + (FRECIP_S $src)>; +} // Predicates = [HasBasicF, IsLA32] let Predicates = [HasFrecipe] in { // FP approximate reciprocal operation @@ -350,6 +370,7 @@ def : PatFpr; let Predicates = [HasBasicF, IsLA32] in { // GPR -> FPR def : Pat<(bitconvert (i32 GPR:$src)), (MOVGR2FR_W GPR:$src)>; +def : Pat<(loongarch_movgr2fr_w (i32 GPR:$src)), (MOVGR2FR_W GPR:$src)>; // FPR -> GPR def : Pat<(i32 (bitconvert FPR32:$src)), (MOVFR2GR_S FPR32:$src)>; // int -> f32 diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index 965ad8a0a35c6..daefbaa52d42a 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -10,6 +10,21 @@ // //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// LoongArch specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def SDT_LoongArchMOVGR2FR_D + : SDTypeProfile<1, 1, [SDTCisVT<0, f64>, SDTCisVT<1, i64>]>; +def SDT_LoongArchMOVGR2FR_D_LO_HI + : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, + SDTCisSameAs<1, 2>]>; + +def loongarch_movgr2fr_d + : SDNode<"LoongArchISD::MOVGR2FR_D", SDT_LoongArchMOVGR2FR_D>; +def loongarch_movgr2fr_d_lo_hi + : SDNode<"LoongArchISD::MOVGR2FR_D_LO_HI", SDT_LoongArchMOVGR2FR_D_LO_HI>; + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -147,6 +162,11 @@ def : PatFpr; def : PatFpr; def : PatFpr; def : Pat<(fdiv fpimm1, (fsqrt FPR64:$fj)), (FRSQRT_D FPR64:$fj)>; +let Predicates = [IsLA32] in { +def : Pat<(fdiv (loongarch_movgr2fr_d_lo_hi (i32 0), (i32 1072693248)), + (fsqrt FPR64:$fj)), + (FRSQRT_D FPR64:$fj)>; +} // Predicates = [IsLA32] def : Pat<(fcopysign FPR64:$fj, FPR32:$fk), (FCOPYSIGN_D FPR64:$fj, (FCVT_D_S FPR32:$fk))>; def : Pat<(fcopysign FPR32:$fj, FPR64:$fk), @@ -252,6 +272,10 @@ def : Pat<(f64 (fpextend FPR32:$src)), (FCVT_D_S FPR32:$src)>; // FP reciprocal operation def : Pat<(fdiv fpimm1, FPR64:$src), (FRECIP_D $src)>; +let Predicates = [IsLA32] in { +def : Pat<(fdiv (loongarch_movgr2fr_d_lo_hi (i32 0), (i32 1072693248)), FPR64:$src), + (FRECIP_D FPR64:$src)>; +} // Predicates = [IsLA32] let Predicates = [HasFrecipe] in { // FP approximate reciprocal operation @@ -307,9 +331,13 @@ def : Pat<(f64 (sint_to_fp (i64 (sexti32 (i64 GPR:$src))))), def : Pat<(f64 (sint_to_fp GPR:$src)), (FFINT_D_L (MOVGR2FR_D GPR:$src))>; def : Pat<(bitconvert GPR:$src), (MOVGR2FR_D GPR:$src)>; +def : Pat<(loongarch_movgr2fr_d GPR:$src), (MOVGR2FR_D GPR:$src)>; } // Predicates = [HasBasicD, IsLA64] let Predicates = [HasBasicD, IsLA32] in { def : Pat<(f64 (sint_to_fp (i32 GPR:$src))), (FFINT_D_W (MOVGR2FR_W GPR:$src))>; + +def : Pat<(f64 (loongarch_movgr2fr_d_lo_hi (i32 GPR:$lo), (i32 GPR:$hi))), + (MOVGR2FRH_W (MOVGR2FR_W_64 GPR:$lo), GPR:$hi)>; } // Predicates = [HasBasicD, IsLA32] // Convert FP to int diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 4b78b14a2a89d..32baa2d111270 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -18,6 +18,7 @@ #include "LoongArchSubtarget.h" #include "MCTargetDesc/LoongArchBaseInfo.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "MCTargetDesc/LoongArchMatInt.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" @@ -41,6 +42,34 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); +enum MaterializeFPImm { + NoMaterializeFPImm = 0, + MaterializeFPImm2Ins = 2, + MaterializeFPImm3Ins = 3, + MaterializeFPImm4Ins = 4, + MaterializeFPImm5Ins = 5, + MaterializeFPImm6Ins = 6 +}; + +static cl::opt MaterializeFPImmInsNum( + "loongarch-materialize-float-imm", cl::Hidden, + cl::desc("Maximum number of instructions used (including code sequence " + "to generate the value and moving the value to FPR) when " + "materializing floating-point immediates (default = 3)"), + cl::init(MaterializeFPImm3Ins), + cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), + clEnumValN(MaterializeFPImm2Ins, "2", + "Materialize FP immediate within 2 instructions"), + clEnumValN(MaterializeFPImm3Ins, "3", + "Materialize FP immediate within 3 instructions"), + clEnumValN(MaterializeFPImm4Ins, "4", + "Materialize FP immediate within 4 instructions"), + clEnumValN(MaterializeFPImm5Ins, "5", + "Materialize FP immediate within 5 instructions"), + clEnumValN(MaterializeFPImm6Ins, "6", + "Materialize FP immediate within 6 instructions " + "(behaves same as 5 on loongarch64)"))); + static cl::opt ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false)); @@ -190,6 +219,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::f32, MVT::bf16, Expand); setCondCodeAction(FPCCToExpand, MVT::f32, Expand); + setOperationAction(ISD::ConstantFP, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f32, Legal); @@ -237,6 +267,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::f64, MVT::f32, Expand); setCondCodeAction(FPCCToExpand, MVT::f64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f64, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); @@ -557,10 +588,67 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_UMIN: return lowerVECREDUCE(Op, DAG); + case ISD::ConstantFP: + return lowerConstantFP(Op, DAG); } return SDValue(); } +SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + ConstantFPSDNode *CFP = cast(Op); + const APFloat &FPVal = CFP->getValueAPF(); + SDLoc DL(CFP); + + assert((VT == MVT::f32 && Subtarget.hasBasicF()) || + (VT == MVT::f64 && Subtarget.hasBasicD())); + + // If value is 0.0 or -0.0, just ignore it. + if (FPVal.isZero()) + return SDValue(); + + // If lsx enabled, use cheaper 'vldi' instruction if possible. + if (isFPImmVLDILegal(FPVal, VT)) + return SDValue(); + + // Construct as integer, and move to float register. + APInt INTVal = FPVal.bitcastToAPInt(); + + // If more than MaterializeFPImmInsNum instructions will be used to + // generate the INTVal and move it to float register, fallback to + // use floating point load from the constant pool. + auto Seq = LoongArchMatInt::generateInstSeq(INTVal.getSExtValue()); + int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1); + if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0)) + return SDValue(); + + switch (VT.getSimpleVT().SimpleTy) { + default: + llvm_unreachable("Unexpected floating point type!"); + break; + case MVT::f32: { + SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32); + if (Subtarget.is64Bit()) + NewVal = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, NewVal); + return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64 + : LoongArchISD::MOVGR2FR_W, + DL, VT, NewVal); + } + case MVT::f64: { + if (Subtarget.is64Bit()) { + SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64); + return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal); + } + SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32); + SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32); + return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi); + } + } + + return SDValue(); +} + // Lower vecreduce_add using vhaddw instructions. // For Example: // call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a) @@ -7152,7 +7240,10 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(SRL_W) NODE_NAME_CASE(BSTRINS) NODE_NAME_CASE(BSTRPICK) + NODE_NAME_CASE(MOVGR2FR_W) NODE_NAME_CASE(MOVGR2FR_W_LA64) + NODE_NAME_CASE(MOVGR2FR_D) + NODE_NAME_CASE(MOVGR2FR_D_LO_HI) NODE_NAME_CASE(MOVFR2GR_S_LA64) NODE_NAME_CASE(FTINT) NODE_NAME_CASE(BUILD_PAIR_F64) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 8df3c13f26fea..19c85faa9f9cc 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -57,7 +57,10 @@ enum NodeType : unsigned { MOD_WU, // FPR<->GPR transfer operations + MOVGR2FR_W, MOVGR2FR_W_LA64, + MOVGR2FR_D, + MOVGR2FR_D_LO_HI, MOVFR2GR_S_LA64, MOVFCSR2GR, MOVGR2FCSR, @@ -399,6 +402,7 @@ class LoongArchTargetLowering : public TargetLowering { SDValue lowerBF16_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-half.ll b/llvm/test/CodeGen/LoongArch/calling-conv-half.ll index da8c3e93f6842..d111cf2fcfc07 100644 --- a/llvm/test/CodeGen/LoongArch/calling-conv-half.ll +++ b/llvm/test/CodeGen/LoongArch/calling-conv-half.ll @@ -226,8 +226,8 @@ define i32 @caller_half_in_fregs() nounwind { ; LA32F-ILP32D: # %bb.0: ; LA32F-ILP32D-NEXT: addi.w $sp, $sp, -16 ; LA32F-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) -; LA32F-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI1_0) +; LA32F-ILP32D-NEXT: lu12i.w $a0, -12 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa0, $a0 ; LA32F-ILP32D-NEXT: ori $a0, $zero, 1 ; LA32F-ILP32D-NEXT: ori $a1, $zero, 2 ; LA32F-ILP32D-NEXT: ori $a2, $zero, 3 @@ -264,8 +264,8 @@ define i32 @caller_half_in_fregs() nounwind { ; LA32D-ILP32D: # %bb.0: ; LA32D-ILP32D-NEXT: addi.w $sp, $sp, -16 ; LA32D-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) -; LA32D-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI1_0) +; LA32D-ILP32D-NEXT: lu12i.w $a0, -12 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa0, $a0 ; LA32D-ILP32D-NEXT: ori $a0, $zero, 1 ; LA32D-ILP32D-NEXT: ori $a1, $zero, 2 ; LA32D-ILP32D-NEXT: ori $a2, $zero, 3 @@ -283,8 +283,9 @@ define i32 @caller_half_in_fregs() nounwind { ; LA64S: # %bb.0: ; LA64S-NEXT: addi.d $sp, $sp, -16 ; LA64S-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) -; LA64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI1_0) +; LA64S-NEXT: lu12i.w $a0, -12 +; LA64S-NEXT: lu32i.d $a0, 0 +; LA64S-NEXT: movgr2fr.w $fa0, $a0 ; LA64S-NEXT: ori $a0, $zero, 1 ; LA64S-NEXT: ori $a1, $zero, 2 ; LA64S-NEXT: ori $a2, $zero, 3 @@ -324,8 +325,9 @@ define i32 @caller_half_in_fregs() nounwind { ; LA64F-LP64D: # %bb.0: ; LA64F-LP64D-NEXT: addi.d $sp, $sp, -16 ; LA64F-LP64D-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) -; LA64F-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI1_0) +; LA64F-LP64D-NEXT: lu12i.w $a0, -12 +; LA64F-LP64D-NEXT: lu32i.d $a0, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa0, $a0 ; LA64F-LP64D-NEXT: ori $a0, $zero, 1 ; LA64F-LP64D-NEXT: ori $a1, $zero, 2 ; LA64F-LP64D-NEXT: ori $a2, $zero, 3 @@ -365,8 +367,9 @@ define i32 @caller_half_in_fregs() nounwind { ; LA64D-LP64D: # %bb.0: ; LA64D-LP64D-NEXT: addi.d $sp, $sp, -16 ; LA64D-LP64D-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) -; LA64D-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI1_0) +; LA64D-LP64D-NEXT: lu12i.w $a0, -12 +; LA64D-LP64D-NEXT: lu32i.d $a0, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa0, $a0 ; LA64D-LP64D-NEXT: ori $a0, $zero, 1 ; LA64D-LP64D-NEXT: ori $a1, $zero, 2 ; LA64D-LP64D-NEXT: ori $a2, $zero, 3 @@ -606,24 +609,24 @@ define i32 @caller_half_in_gregs() nounwind { ; LA32F-ILP32D: # %bb.0: ; LA32F-ILP32D-NEXT: addi.w $sp, $sp, -16 ; LA32F-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; LA32F-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_0) -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1) -; LA32F-ILP32D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_1) -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2) -; LA32F-ILP32D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI3_2) -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3) -; LA32F-ILP32D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI3_3) -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4) -; LA32F-ILP32D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI3_4) -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5) -; LA32F-ILP32D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI3_5) -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6) -; LA32F-ILP32D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI3_6) -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_7) -; LA32F-ILP32D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI3_7) -; LA32F-ILP32D-NEXT: lu12i.w $a0, -12 -; LA32F-ILP32D-NEXT: ori $a0, $a0, 2176 +; LA32F-ILP32D-NEXT: lu12i.w $a1, -12 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa1, $a1 +; LA32F-ILP32D-NEXT: ori $a0, $a1, 2176 +; LA32F-ILP32D-NEXT: lu12i.w $a2, -13 +; LA32F-ILP32D-NEXT: ori $a2, $a2, 3072 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa0, $a2 +; LA32F-ILP32D-NEXT: ori $a2, $a1, 512 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa2, $a2 +; LA32F-ILP32D-NEXT: ori $a2, $a1, 1024 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa3, $a2 +; LA32F-ILP32D-NEXT: ori $a2, $a1, 1280 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa4, $a2 +; LA32F-ILP32D-NEXT: ori $a2, $a1, 1536 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa5, $a2 +; LA32F-ILP32D-NEXT: ori $a2, $a1, 1792 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa6, $a2 +; LA32F-ILP32D-NEXT: ori $a1, $a1, 2048 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa7, $a1 ; LA32F-ILP32D-NEXT: ori $a1, $zero, 10 ; LA32F-ILP32D-NEXT: bl callee_half_in_gregs ; LA32F-ILP32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload @@ -656,24 +659,24 @@ define i32 @caller_half_in_gregs() nounwind { ; LA32D-ILP32D: # %bb.0: ; LA32D-ILP32D-NEXT: addi.w $sp, $sp, -16 ; LA32D-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; LA32D-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_0) -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1) -; LA32D-ILP32D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_1) -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2) -; LA32D-ILP32D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI3_2) -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3) -; LA32D-ILP32D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI3_3) -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4) -; LA32D-ILP32D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI3_4) -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5) -; LA32D-ILP32D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI3_5) -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6) -; LA32D-ILP32D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI3_6) -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_7) -; LA32D-ILP32D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI3_7) -; LA32D-ILP32D-NEXT: lu12i.w $a0, -12 -; LA32D-ILP32D-NEXT: ori $a0, $a0, 2176 +; LA32D-ILP32D-NEXT: lu12i.w $a1, -12 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa1, $a1 +; LA32D-ILP32D-NEXT: ori $a0, $a1, 2176 +; LA32D-ILP32D-NEXT: lu12i.w $a2, -13 +; LA32D-ILP32D-NEXT: ori $a2, $a2, 3072 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa0, $a2 +; LA32D-ILP32D-NEXT: ori $a2, $a1, 512 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa2, $a2 +; LA32D-ILP32D-NEXT: ori $a2, $a1, 1024 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa3, $a2 +; LA32D-ILP32D-NEXT: ori $a2, $a1, 1280 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa4, $a2 +; LA32D-ILP32D-NEXT: ori $a2, $a1, 1536 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa5, $a2 +; LA32D-ILP32D-NEXT: ori $a2, $a1, 1792 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa6, $a2 +; LA32D-ILP32D-NEXT: ori $a1, $a1, 2048 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa7, $a1 ; LA32D-ILP32D-NEXT: ori $a1, $zero, 10 ; LA32D-ILP32D-NEXT: bl callee_half_in_gregs ; LA32D-ILP32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload @@ -684,25 +687,33 @@ define i32 @caller_half_in_gregs() nounwind { ; LA64S: # %bb.0: ; LA64S-NEXT: addi.d $sp, $sp, -16 ; LA64S-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; LA64S-NEXT: fld.s $ft0, $a0, %pc_lo12(.LCPI3_0) -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1) -; LA64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_1) -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2) -; LA64S-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_2) -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3) -; LA64S-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI3_3) -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4) -; LA64S-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI3_4) -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5) -; LA64S-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI3_5) -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6) -; LA64S-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI3_6) -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_7) -; LA64S-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI3_7) -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_8) -; LA64S-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI3_8) -; LA64S-NEXT: movfr2gr.s $a0, $ft0 +; LA64S-NEXT: lu12i.w $a1, -12 +; LA64S-NEXT: ori $a0, $a1, 2176 +; LA64S-NEXT: ori $a2, $a1, 512 +; LA64S-NEXT: ori $a3, $a1, 1024 +; LA64S-NEXT: ori $a4, $a1, 1280 +; LA64S-NEXT: ori $a5, $a1, 1536 +; LA64S-NEXT: ori $a6, $a1, 1792 +; LA64S-NEXT: ori $a7, $a1, 2048 +; LA64S-NEXT: lu32i.d $a1, 0 +; LA64S-NEXT: movgr2fr.w $fa1, $a1 +; LA64S-NEXT: lu12i.w $a1, -13 +; LA64S-NEXT: ori $a1, $a1, 3072 +; LA64S-NEXT: lu32i.d $a1, 0 +; LA64S-NEXT: movgr2fr.w $fa0, $a1 +; LA64S-NEXT: lu32i.d $a2, 0 +; LA64S-NEXT: movgr2fr.w $fa2, $a2 +; LA64S-NEXT: lu32i.d $a3, 0 +; LA64S-NEXT: movgr2fr.w $fa3, $a3 +; LA64S-NEXT: lu32i.d $a4, 0 +; LA64S-NEXT: movgr2fr.w $fa4, $a4 +; LA64S-NEXT: lu32i.d $a5, 0 +; LA64S-NEXT: movgr2fr.w $fa5, $a5 +; LA64S-NEXT: lu32i.d $a0, 0 +; LA64S-NEXT: lu32i.d $a6, 0 +; LA64S-NEXT: movgr2fr.w $fa6, $a6 +; LA64S-NEXT: lu32i.d $a7, 0 +; LA64S-NEXT: movgr2fr.w $fa7, $a7 ; LA64S-NEXT: ori $a1, $zero, 10 ; LA64S-NEXT: pcaddu18i $ra, %call36(callee_half_in_gregs) ; LA64S-NEXT: jirl $ra, $ra, 0 @@ -714,35 +725,27 @@ define i32 @caller_half_in_gregs() nounwind { ; LA64F-LP64S: # %bb.0: ; LA64F-LP64S-NEXT: addi.d $sp, $sp, -32 ; LA64F-LP64S-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill -; LA64F-LP64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; LA64F-LP64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_0) -; LA64F-LP64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1) -; LA64F-LP64S-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_1) -; LA64F-LP64S-NEXT: movfr2gr.s $a0, $fa0 -; LA64F-LP64S-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_2) -; LA64F-LP64S-NEXT: fld.s $fa0, $a1, %pc_lo12(.LCPI3_2) -; LA64F-LP64S-NEXT: movfr2gr.s $a1, $fa1 -; LA64F-LP64S-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_3) -; LA64F-LP64S-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI3_3) -; LA64F-LP64S-NEXT: movfr2gr.s $a2, $fa0 -; LA64F-LP64S-NEXT: pcalau12i $a3, %pc_hi20(.LCPI3_4) -; LA64F-LP64S-NEXT: fld.s $fa0, $a3, %pc_lo12(.LCPI3_4) -; LA64F-LP64S-NEXT: movfr2gr.s $a3, $fa1 -; LA64F-LP64S-NEXT: pcalau12i $a4, %pc_hi20(.LCPI3_5) -; LA64F-LP64S-NEXT: fld.s $fa1, $a4, %pc_lo12(.LCPI3_5) -; LA64F-LP64S-NEXT: movfr2gr.s $a4, $fa0 -; LA64F-LP64S-NEXT: pcalau12i $a5, %pc_hi20(.LCPI3_6) -; LA64F-LP64S-NEXT: fld.s $fa0, $a5, %pc_lo12(.LCPI3_6) -; LA64F-LP64S-NEXT: movfr2gr.s $a5, $fa1 -; LA64F-LP64S-NEXT: ori $a6, $zero, 10 -; LA64F-LP64S-NEXT: st.d $a6, $sp, 8 -; LA64F-LP64S-NEXT: movfr2gr.s $a6, $fa0 -; LA64F-LP64S-NEXT: pcalau12i $a7, %pc_hi20(.LCPI3_7) -; LA64F-LP64S-NEXT: fld.s $fa0, $a7, %pc_lo12(.LCPI3_7) -; LA64F-LP64S-NEXT: lu12i.w $a7, -12 -; LA64F-LP64S-NEXT: ori $t0, $a7, 2176 +; LA64F-LP64S-NEXT: ori $a0, $zero, 10 +; LA64F-LP64S-NEXT: st.d $a0, $sp, 8 +; LA64F-LP64S-NEXT: lu12i.w $a1, -12 +; LA64F-LP64S-NEXT: ori $t0, $a1, 2176 ; LA64F-LP64S-NEXT: lu32i.d $t0, 0 -; LA64F-LP64S-NEXT: movfr2gr.s $a7, $fa0 +; LA64F-LP64S-NEXT: ori $a2, $a1, 512 +; LA64F-LP64S-NEXT: ori $a3, $a1, 1024 +; LA64F-LP64S-NEXT: ori $a4, $a1, 1280 +; LA64F-LP64S-NEXT: ori $a5, $a1, 1536 +; LA64F-LP64S-NEXT: ori $a6, $a1, 1792 +; LA64F-LP64S-NEXT: ori $a7, $a1, 2048 +; LA64F-LP64S-NEXT: lu32i.d $a1, 0 +; LA64F-LP64S-NEXT: lu12i.w $a0, -13 +; LA64F-LP64S-NEXT: ori $a0, $a0, 3072 +; LA64F-LP64S-NEXT: lu32i.d $a0, 0 +; LA64F-LP64S-NEXT: lu32i.d $a2, 0 +; LA64F-LP64S-NEXT: lu32i.d $a3, 0 +; LA64F-LP64S-NEXT: lu32i.d $a4, 0 +; LA64F-LP64S-NEXT: lu32i.d $a5, 0 +; LA64F-LP64S-NEXT: lu32i.d $a6, 0 +; LA64F-LP64S-NEXT: lu32i.d $a7, 0 ; LA64F-LP64S-NEXT: st.w $t0, $sp, 0 ; LA64F-LP64S-NEXT: pcaddu18i $ra, %call36(callee_half_in_gregs) ; LA64F-LP64S-NEXT: jirl $ra, $ra, 0 @@ -754,25 +757,33 @@ define i32 @caller_half_in_gregs() nounwind { ; LA64F-LP64D: # %bb.0: ; LA64F-LP64D-NEXT: addi.d $sp, $sp, -16 ; LA64F-LP64D-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; LA64F-LP64D-NEXT: fld.s $ft0, $a0, %pc_lo12(.LCPI3_0) -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1) -; LA64F-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_1) -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2) -; LA64F-LP64D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_2) -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3) -; LA64F-LP64D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI3_3) -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4) -; LA64F-LP64D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI3_4) -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5) -; LA64F-LP64D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI3_5) -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6) -; LA64F-LP64D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI3_6) -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_7) -; LA64F-LP64D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI3_7) -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_8) -; LA64F-LP64D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI3_8) -; LA64F-LP64D-NEXT: movfr2gr.s $a0, $ft0 +; LA64F-LP64D-NEXT: lu12i.w $a1, -12 +; LA64F-LP64D-NEXT: ori $a0, $a1, 2176 +; LA64F-LP64D-NEXT: ori $a2, $a1, 512 +; LA64F-LP64D-NEXT: ori $a3, $a1, 1024 +; LA64F-LP64D-NEXT: ori $a4, $a1, 1280 +; LA64F-LP64D-NEXT: ori $a5, $a1, 1536 +; LA64F-LP64D-NEXT: ori $a6, $a1, 1792 +; LA64F-LP64D-NEXT: ori $a7, $a1, 2048 +; LA64F-LP64D-NEXT: lu32i.d $a1, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-LP64D-NEXT: lu12i.w $a1, -13 +; LA64F-LP64D-NEXT: ori $a1, $a1, 3072 +; LA64F-LP64D-NEXT: lu32i.d $a1, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa0, $a1 +; LA64F-LP64D-NEXT: lu32i.d $a2, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa2, $a2 +; LA64F-LP64D-NEXT: lu32i.d $a3, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa3, $a3 +; LA64F-LP64D-NEXT: lu32i.d $a4, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa4, $a4 +; LA64F-LP64D-NEXT: lu32i.d $a5, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa5, $a5 +; LA64F-LP64D-NEXT: lu32i.d $a0, 0 +; LA64F-LP64D-NEXT: lu32i.d $a6, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa6, $a6 +; LA64F-LP64D-NEXT: lu32i.d $a7, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa7, $a7 ; LA64F-LP64D-NEXT: ori $a1, $zero, 10 ; LA64F-LP64D-NEXT: pcaddu18i $ra, %call36(callee_half_in_gregs) ; LA64F-LP64D-NEXT: jirl $ra, $ra, 0 @@ -784,35 +795,27 @@ define i32 @caller_half_in_gregs() nounwind { ; LA64D-LP64S: # %bb.0: ; LA64D-LP64S-NEXT: addi.d $sp, $sp, -32 ; LA64D-LP64S-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill -; LA64D-LP64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; LA64D-LP64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_0) -; LA64D-LP64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1) -; LA64D-LP64S-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_1) -; LA64D-LP64S-NEXT: movfr2gr.s $a0, $fa0 -; LA64D-LP64S-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_2) -; LA64D-LP64S-NEXT: fld.s $fa0, $a1, %pc_lo12(.LCPI3_2) -; LA64D-LP64S-NEXT: movfr2gr.s $a1, $fa1 -; LA64D-LP64S-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_3) -; LA64D-LP64S-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI3_3) -; LA64D-LP64S-NEXT: movfr2gr.s $a2, $fa0 -; LA64D-LP64S-NEXT: pcalau12i $a3, %pc_hi20(.LCPI3_4) -; LA64D-LP64S-NEXT: fld.s $fa0, $a3, %pc_lo12(.LCPI3_4) -; LA64D-LP64S-NEXT: movfr2gr.s $a3, $fa1 -; LA64D-LP64S-NEXT: pcalau12i $a4, %pc_hi20(.LCPI3_5) -; LA64D-LP64S-NEXT: fld.s $fa1, $a4, %pc_lo12(.LCPI3_5) -; LA64D-LP64S-NEXT: movfr2gr.s $a4, $fa0 -; LA64D-LP64S-NEXT: pcalau12i $a5, %pc_hi20(.LCPI3_6) -; LA64D-LP64S-NEXT: fld.s $fa0, $a5, %pc_lo12(.LCPI3_6) -; LA64D-LP64S-NEXT: movfr2gr.s $a5, $fa1 -; LA64D-LP64S-NEXT: ori $a6, $zero, 10 -; LA64D-LP64S-NEXT: st.d $a6, $sp, 8 -; LA64D-LP64S-NEXT: movfr2gr.s $a6, $fa0 -; LA64D-LP64S-NEXT: pcalau12i $a7, %pc_hi20(.LCPI3_7) -; LA64D-LP64S-NEXT: fld.s $fa0, $a7, %pc_lo12(.LCPI3_7) -; LA64D-LP64S-NEXT: lu12i.w $a7, -12 -; LA64D-LP64S-NEXT: ori $t0, $a7, 2176 +; LA64D-LP64S-NEXT: ori $a0, $zero, 10 +; LA64D-LP64S-NEXT: st.d $a0, $sp, 8 +; LA64D-LP64S-NEXT: lu12i.w $a1, -12 +; LA64D-LP64S-NEXT: ori $t0, $a1, 2176 ; LA64D-LP64S-NEXT: lu32i.d $t0, 0 -; LA64D-LP64S-NEXT: movfr2gr.s $a7, $fa0 +; LA64D-LP64S-NEXT: ori $a2, $a1, 512 +; LA64D-LP64S-NEXT: ori $a3, $a1, 1024 +; LA64D-LP64S-NEXT: ori $a4, $a1, 1280 +; LA64D-LP64S-NEXT: ori $a5, $a1, 1536 +; LA64D-LP64S-NEXT: ori $a6, $a1, 1792 +; LA64D-LP64S-NEXT: ori $a7, $a1, 2048 +; LA64D-LP64S-NEXT: lu32i.d $a1, 0 +; LA64D-LP64S-NEXT: lu12i.w $a0, -13 +; LA64D-LP64S-NEXT: ori $a0, $a0, 3072 +; LA64D-LP64S-NEXT: lu32i.d $a0, 0 +; LA64D-LP64S-NEXT: lu32i.d $a2, 0 +; LA64D-LP64S-NEXT: lu32i.d $a3, 0 +; LA64D-LP64S-NEXT: lu32i.d $a4, 0 +; LA64D-LP64S-NEXT: lu32i.d $a5, 0 +; LA64D-LP64S-NEXT: lu32i.d $a6, 0 +; LA64D-LP64S-NEXT: lu32i.d $a7, 0 ; LA64D-LP64S-NEXT: st.w $t0, $sp, 0 ; LA64D-LP64S-NEXT: pcaddu18i $ra, %call36(callee_half_in_gregs) ; LA64D-LP64S-NEXT: jirl $ra, $ra, 0 @@ -824,25 +827,33 @@ define i32 @caller_half_in_gregs() nounwind { ; LA64D-LP64D: # %bb.0: ; LA64D-LP64D-NEXT: addi.d $sp, $sp, -16 ; LA64D-LP64D-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; LA64D-LP64D-NEXT: fld.s $ft0, $a0, %pc_lo12(.LCPI3_0) -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1) -; LA64D-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_1) -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2) -; LA64D-LP64D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_2) -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3) -; LA64D-LP64D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI3_3) -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4) -; LA64D-LP64D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI3_4) -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5) -; LA64D-LP64D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI3_5) -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6) -; LA64D-LP64D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI3_6) -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_7) -; LA64D-LP64D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI3_7) -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_8) -; LA64D-LP64D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI3_8) -; LA64D-LP64D-NEXT: movfr2gr.s $a0, $ft0 +; LA64D-LP64D-NEXT: lu12i.w $a1, -12 +; LA64D-LP64D-NEXT: ori $a0, $a1, 2176 +; LA64D-LP64D-NEXT: ori $a2, $a1, 512 +; LA64D-LP64D-NEXT: ori $a3, $a1, 1024 +; LA64D-LP64D-NEXT: ori $a4, $a1, 1280 +; LA64D-LP64D-NEXT: ori $a5, $a1, 1536 +; LA64D-LP64D-NEXT: ori $a6, $a1, 1792 +; LA64D-LP64D-NEXT: ori $a7, $a1, 2048 +; LA64D-LP64D-NEXT: lu32i.d $a1, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-LP64D-NEXT: lu12i.w $a1, -13 +; LA64D-LP64D-NEXT: ori $a1, $a1, 3072 +; LA64D-LP64D-NEXT: lu32i.d $a1, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa0, $a1 +; LA64D-LP64D-NEXT: lu32i.d $a2, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa2, $a2 +; LA64D-LP64D-NEXT: lu32i.d $a3, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa3, $a3 +; LA64D-LP64D-NEXT: lu32i.d $a4, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa4, $a4 +; LA64D-LP64D-NEXT: lu32i.d $a5, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa5, $a5 +; LA64D-LP64D-NEXT: lu32i.d $a0, 0 +; LA64D-LP64D-NEXT: lu32i.d $a6, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa6, $a6 +; LA64D-LP64D-NEXT: lu32i.d $a7, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa7, $a7 ; LA64D-LP64D-NEXT: ori $a1, $zero, 10 ; LA64D-LP64D-NEXT: pcaddu18i $ra, %call36(callee_half_in_gregs) ; LA64D-LP64D-NEXT: jirl $ra, $ra, 0 @@ -1110,22 +1121,22 @@ define i32 @caller_half_on_stack() nounwind { ; LA32F-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32F-ILP32D-NEXT: lu12i.w $a0, -12 ; LA32F-ILP32D-NEXT: ori $t0, $a0, 3200 -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; LA32F-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI5_0) -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1) -; LA32F-ILP32D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI5_1) -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2) -; LA32F-ILP32D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI5_2) -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_3) -; LA32F-ILP32D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI5_3) -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_4) -; LA32F-ILP32D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI5_4) -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_5) -; LA32F-ILP32D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI5_5) -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_6) -; LA32F-ILP32D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI5_6) -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_7) -; LA32F-ILP32D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI5_7) +; LA32F-ILP32D-NEXT: ori $a1, $a0, 2304 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa0, $a1 +; LA32F-ILP32D-NEXT: ori $a1, $a0, 2432 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa1, $a1 +; LA32F-ILP32D-NEXT: ori $a1, $a0, 2560 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa2, $a1 +; LA32F-ILP32D-NEXT: ori $a1, $a0, 2688 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa3, $a1 +; LA32F-ILP32D-NEXT: ori $a1, $a0, 2816 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa4, $a1 +; LA32F-ILP32D-NEXT: ori $a1, $a0, 2944 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa5, $a1 +; LA32F-ILP32D-NEXT: ori $a1, $a0, 3072 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa6, $a1 +; LA32F-ILP32D-NEXT: ori $a0, $a0, 3136 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa7, $a0 ; LA32F-ILP32D-NEXT: ori $a0, $zero, 1 ; LA32F-ILP32D-NEXT: ori $a1, $zero, 2 ; LA32F-ILP32D-NEXT: ori $a2, $zero, 3 @@ -1182,22 +1193,22 @@ define i32 @caller_half_on_stack() nounwind { ; LA32D-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32D-ILP32D-NEXT: lu12i.w $a0, -12 ; LA32D-ILP32D-NEXT: ori $t0, $a0, 3200 -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; LA32D-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI5_0) -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1) -; LA32D-ILP32D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI5_1) -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2) -; LA32D-ILP32D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI5_2) -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_3) -; LA32D-ILP32D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI5_3) -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_4) -; LA32D-ILP32D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI5_4) -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_5) -; LA32D-ILP32D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI5_5) -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_6) -; LA32D-ILP32D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI5_6) -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_7) -; LA32D-ILP32D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI5_7) +; LA32D-ILP32D-NEXT: ori $a1, $a0, 2304 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa0, $a1 +; LA32D-ILP32D-NEXT: ori $a1, $a0, 2432 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa1, $a1 +; LA32D-ILP32D-NEXT: ori $a1, $a0, 2560 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa2, $a1 +; LA32D-ILP32D-NEXT: ori $a1, $a0, 2688 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa3, $a1 +; LA32D-ILP32D-NEXT: ori $a1, $a0, 2816 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa4, $a1 +; LA32D-ILP32D-NEXT: ori $a1, $a0, 2944 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa5, $a1 +; LA32D-ILP32D-NEXT: ori $a1, $a0, 3072 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa6, $a1 +; LA32D-ILP32D-NEXT: ori $a0, $a0, 3136 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa7, $a0 ; LA32D-ILP32D-NEXT: ori $a0, $zero, 1 ; LA32D-ILP32D-NEXT: ori $a1, $zero, 2 ; LA32D-ILP32D-NEXT: ori $a2, $zero, 3 @@ -1219,22 +1230,30 @@ define i32 @caller_half_on_stack() nounwind { ; LA64S-NEXT: lu12i.w $a0, -12 ; LA64S-NEXT: ori $t0, $a0, 3200 ; LA64S-NEXT: lu32i.d $t0, 0 -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; LA64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI5_0) -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1) -; LA64S-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI5_1) -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2) -; LA64S-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI5_2) -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_3) -; LA64S-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI5_3) -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_4) -; LA64S-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI5_4) -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_5) -; LA64S-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI5_5) -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_6) -; LA64S-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI5_6) -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_7) -; LA64S-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI5_7) +; LA64S-NEXT: ori $a1, $a0, 2304 +; LA64S-NEXT: lu32i.d $a1, 0 +; LA64S-NEXT: movgr2fr.w $fa0, $a1 +; LA64S-NEXT: ori $a1, $a0, 2432 +; LA64S-NEXT: lu32i.d $a1, 0 +; LA64S-NEXT: movgr2fr.w $fa1, $a1 +; LA64S-NEXT: ori $a1, $a0, 2560 +; LA64S-NEXT: lu32i.d $a1, 0 +; LA64S-NEXT: movgr2fr.w $fa2, $a1 +; LA64S-NEXT: ori $a1, $a0, 2688 +; LA64S-NEXT: lu32i.d $a1, 0 +; LA64S-NEXT: movgr2fr.w $fa3, $a1 +; LA64S-NEXT: ori $a1, $a0, 2816 +; LA64S-NEXT: lu32i.d $a1, 0 +; LA64S-NEXT: movgr2fr.w $fa4, $a1 +; LA64S-NEXT: ori $a1, $a0, 2944 +; LA64S-NEXT: lu32i.d $a1, 0 +; LA64S-NEXT: movgr2fr.w $fa5, $a1 +; LA64S-NEXT: ori $a1, $a0, 3072 +; LA64S-NEXT: lu32i.d $a1, 0 +; LA64S-NEXT: movgr2fr.w $fa6, $a1 +; LA64S-NEXT: ori $a0, $a0, 3136 +; LA64S-NEXT: lu32i.d $a0, 0 +; LA64S-NEXT: movgr2fr.w $fa7, $a0 ; LA64S-NEXT: ori $a0, $zero, 1 ; LA64S-NEXT: ori $a1, $zero, 2 ; LA64S-NEXT: ori $a2, $zero, 3 @@ -1303,22 +1322,30 @@ define i32 @caller_half_on_stack() nounwind { ; LA64F-LP64D-NEXT: lu12i.w $a0, -12 ; LA64F-LP64D-NEXT: ori $t0, $a0, 3200 ; LA64F-LP64D-NEXT: lu32i.d $t0, 0 -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; LA64F-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI5_0) -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1) -; LA64F-LP64D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI5_1) -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2) -; LA64F-LP64D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI5_2) -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_3) -; LA64F-LP64D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI5_3) -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_4) -; LA64F-LP64D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI5_4) -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_5) -; LA64F-LP64D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI5_5) -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_6) -; LA64F-LP64D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI5_6) -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_7) -; LA64F-LP64D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI5_7) +; LA64F-LP64D-NEXT: ori $a1, $a0, 2304 +; LA64F-LP64D-NEXT: lu32i.d $a1, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa0, $a1 +; LA64F-LP64D-NEXT: ori $a1, $a0, 2432 +; LA64F-LP64D-NEXT: lu32i.d $a1, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-LP64D-NEXT: ori $a1, $a0, 2560 +; LA64F-LP64D-NEXT: lu32i.d $a1, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa2, $a1 +; LA64F-LP64D-NEXT: ori $a1, $a0, 2688 +; LA64F-LP64D-NEXT: lu32i.d $a1, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa3, $a1 +; LA64F-LP64D-NEXT: ori $a1, $a0, 2816 +; LA64F-LP64D-NEXT: lu32i.d $a1, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa4, $a1 +; LA64F-LP64D-NEXT: ori $a1, $a0, 2944 +; LA64F-LP64D-NEXT: lu32i.d $a1, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa5, $a1 +; LA64F-LP64D-NEXT: ori $a1, $a0, 3072 +; LA64F-LP64D-NEXT: lu32i.d $a1, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa6, $a1 +; LA64F-LP64D-NEXT: ori $a0, $a0, 3136 +; LA64F-LP64D-NEXT: lu32i.d $a0, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa7, $a0 ; LA64F-LP64D-NEXT: ori $a0, $zero, 1 ; LA64F-LP64D-NEXT: ori $a1, $zero, 2 ; LA64F-LP64D-NEXT: ori $a2, $zero, 3 @@ -1387,22 +1414,30 @@ define i32 @caller_half_on_stack() nounwind { ; LA64D-LP64D-NEXT: lu12i.w $a0, -12 ; LA64D-LP64D-NEXT: ori $t0, $a0, 3200 ; LA64D-LP64D-NEXT: lu32i.d $t0, 0 -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; LA64D-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI5_0) -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1) -; LA64D-LP64D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI5_1) -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2) -; LA64D-LP64D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI5_2) -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_3) -; LA64D-LP64D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI5_3) -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_4) -; LA64D-LP64D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI5_4) -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_5) -; LA64D-LP64D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI5_5) -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_6) -; LA64D-LP64D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI5_6) -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_7) -; LA64D-LP64D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI5_7) +; LA64D-LP64D-NEXT: ori $a1, $a0, 2304 +; LA64D-LP64D-NEXT: lu32i.d $a1, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa0, $a1 +; LA64D-LP64D-NEXT: ori $a1, $a0, 2432 +; LA64D-LP64D-NEXT: lu32i.d $a1, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-LP64D-NEXT: ori $a1, $a0, 2560 +; LA64D-LP64D-NEXT: lu32i.d $a1, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa2, $a1 +; LA64D-LP64D-NEXT: ori $a1, $a0, 2688 +; LA64D-LP64D-NEXT: lu32i.d $a1, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa3, $a1 +; LA64D-LP64D-NEXT: ori $a1, $a0, 2816 +; LA64D-LP64D-NEXT: lu32i.d $a1, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa4, $a1 +; LA64D-LP64D-NEXT: ori $a1, $a0, 2944 +; LA64D-LP64D-NEXT: lu32i.d $a1, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa5, $a1 +; LA64D-LP64D-NEXT: ori $a1, $a0, 3072 +; LA64D-LP64D-NEXT: lu32i.d $a1, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa6, $a1 +; LA64D-LP64D-NEXT: ori $a0, $a0, 3136 +; LA64D-LP64D-NEXT: lu32i.d $a0, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa7, $a0 ; LA64D-LP64D-NEXT: ori $a0, $zero, 1 ; LA64D-LP64D-NEXT: ori $a1, $zero, 2 ; LA64D-LP64D-NEXT: ori $a2, $zero, 3 @@ -1436,8 +1471,9 @@ define half @callee_half_ret() nounwind { ; ; LA32F-ILP32D-LABEL: callee_half_ret: ; LA32F-ILP32D: # %bb.0: -; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; LA32F-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI6_0) +; LA32F-ILP32D-NEXT: lu12i.w $a0, -13 +; LA32F-ILP32D-NEXT: ori $a0, $a0, 3072 +; LA32F-ILP32D-NEXT: movgr2fr.w $fa0, $a0 ; LA32F-ILP32D-NEXT: ret ; ; LA32D-ILP32S-LABEL: callee_half_ret: @@ -1448,40 +1484,47 @@ define half @callee_half_ret() nounwind { ; ; LA32D-ILP32D-LABEL: callee_half_ret: ; LA32D-ILP32D: # %bb.0: -; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; LA32D-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI6_0) +; LA32D-ILP32D-NEXT: lu12i.w $a0, -13 +; LA32D-ILP32D-NEXT: ori $a0, $a0, 3072 +; LA32D-ILP32D-NEXT: movgr2fr.w $fa0, $a0 ; LA32D-ILP32D-NEXT: ret ; ; LA64S-LABEL: callee_half_ret: ; LA64S: # %bb.0: -; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; LA64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI6_0) +; LA64S-NEXT: lu12i.w $a0, -13 +; LA64S-NEXT: ori $a0, $a0, 3072 +; LA64S-NEXT: lu32i.d $a0, 0 +; LA64S-NEXT: movgr2fr.w $fa0, $a0 ; LA64S-NEXT: ret ; ; LA64F-LP64S-LABEL: callee_half_ret: ; LA64F-LP64S: # %bb.0: -; LA64F-LP64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; LA64F-LP64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI6_0) -; LA64F-LP64S-NEXT: movfr2gr.s $a0, $fa0 +; LA64F-LP64S-NEXT: lu12i.w $a0, -13 +; LA64F-LP64S-NEXT: ori $a0, $a0, 3072 +; LA64F-LP64S-NEXT: lu32i.d $a0, 0 ; LA64F-LP64S-NEXT: ret ; ; LA64F-LP64D-LABEL: callee_half_ret: ; LA64F-LP64D: # %bb.0: -; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; LA64F-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI6_0) +; LA64F-LP64D-NEXT: lu12i.w $a0, -13 +; LA64F-LP64D-NEXT: ori $a0, $a0, 3072 +; LA64F-LP64D-NEXT: lu32i.d $a0, 0 +; LA64F-LP64D-NEXT: movgr2fr.w $fa0, $a0 ; LA64F-LP64D-NEXT: ret ; ; LA64D-LP64S-LABEL: callee_half_ret: ; LA64D-LP64S: # %bb.0: -; LA64D-LP64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; LA64D-LP64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI6_0) -; LA64D-LP64S-NEXT: movfr2gr.s $a0, $fa0 +; LA64D-LP64S-NEXT: lu12i.w $a0, -13 +; LA64D-LP64S-NEXT: ori $a0, $a0, 3072 +; LA64D-LP64S-NEXT: lu32i.d $a0, 0 ; LA64D-LP64S-NEXT: ret ; ; LA64D-LP64D-LABEL: callee_half_ret: ; LA64D-LP64D: # %bb.0: -; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; LA64D-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI6_0) +; LA64D-LP64D-NEXT: lu12i.w $a0, -13 +; LA64D-LP64D-NEXT: ori $a0, $a0, 3072 +; LA64D-LP64D-NEXT: lu32i.d $a0, 0 +; LA64D-LP64D-NEXT: movgr2fr.w $fa0, $a0 ; LA64D-LP64D-NEXT: ret ret half 1.0 } diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll b/llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll index 62c2cc999456c..95f9aa514b340 100644 --- a/llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll +++ b/llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll @@ -65,24 +65,26 @@ define i32 @caller_double_in_gpr_exhausted_fprs() nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: addi.w $sp, $sp, -16 ; CHECK-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; CHECK-NEXT: movgr2fr.w $fa7, $zero +; CHECK-NEXT: lu12i.w $a0, 261888 +; CHECK-NEXT: fmov.d $fa0, $fa7 +; CHECK-NEXT: movgr2frh.w $fa0, $a0 +; CHECK-NEXT: lu12i.w $a0, 262144 +; CHECK-NEXT: fmov.d $fa1, $fa7 +; CHECK-NEXT: movgr2frh.w $fa1, $a0 +; CHECK-NEXT: lu12i.w $a0, 262400 +; CHECK-NEXT: fmov.d $fa3, $fa7 +; CHECK-NEXT: movgr2frh.w $fa3, $a0 ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; CHECK-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI3_0) ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1) -; CHECK-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI3_1) +; CHECK-NEXT: fld.d $fa4, $a0, %pc_lo12(.LCPI3_1) ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2) -; CHECK-NEXT: fld.d $fa3, $a0, %pc_lo12(.LCPI3_2) +; CHECK-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI3_2) ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3) -; CHECK-NEXT: fld.d $fa4, $a0, %pc_lo12(.LCPI3_3) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4) -; CHECK-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI3_4) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5) -; CHECK-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI3_5) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6) -; CHECK-NEXT: fld.d $fa7, $a0, %pc_lo12(.LCPI3_6) -; CHECK-NEXT: addi.w $a0, $zero, 1 -; CHECK-NEXT: movgr2fr.w $fa0, $a0 -; CHECK-NEXT: ffint.s.w $fa0, $fa0 -; CHECK-NEXT: fcvt.d.s $fa0, $fa0 +; CHECK-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI3_3) +; CHECK-NEXT: lu12i.w $a0, 262656 +; CHECK-NEXT: movgr2frh.w $fa7, $a0 ; CHECK-NEXT: lu12i.w $a1, 262688 ; CHECK-NEXT: move $a0, $zero ; CHECK-NEXT: bl callee_double_in_gpr_exhausted_fprs @@ -125,24 +127,26 @@ define i32 @caller_double_on_stack_exhausted_fprs_gprs() nounwind { ; CHECK-NEXT: st.w $zero, $sp, 0 ; CHECK-NEXT: lu12i.w $a0, 262848 ; CHECK-NEXT: st.w $a0, $sp, 12 +; CHECK-NEXT: movgr2fr.w $fa7, $zero +; CHECK-NEXT: lu12i.w $a0, 261888 +; CHECK-NEXT: fmov.d $fa0, $fa7 +; CHECK-NEXT: movgr2frh.w $fa0, $a0 +; CHECK-NEXT: lu12i.w $a0, 262144 +; CHECK-NEXT: fmov.d $fa1, $fa7 +; CHECK-NEXT: movgr2frh.w $fa1, $a0 +; CHECK-NEXT: lu12i.w $a0, 262400 +; CHECK-NEXT: fmov.d $fa3, $fa7 +; CHECK-NEXT: movgr2frh.w $fa3, $a0 +; CHECK-NEXT: lu12i.w $a0, 262656 +; CHECK-NEXT: movgr2frh.w $fa7, $a0 ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; CHECK-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI5_0) +; CHECK-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI5_0) ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1) -; CHECK-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI5_1) +; CHECK-NEXT: fld.d $fa4, $a0, %pc_lo12(.LCPI5_1) ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2) -; CHECK-NEXT: fld.d $fa3, $a0, %pc_lo12(.LCPI5_2) +; CHECK-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI5_2) ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_3) -; CHECK-NEXT: fld.d $fa4, $a0, %pc_lo12(.LCPI5_3) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_4) -; CHECK-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI5_4) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_5) -; CHECK-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI5_5) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_6) -; CHECK-NEXT: fld.d $fa7, $a0, %pc_lo12(.LCPI5_6) -; CHECK-NEXT: addi.w $a0, $zero, 1 -; CHECK-NEXT: movgr2fr.w $fa0, $a0 -; CHECK-NEXT: ffint.s.w $fa0, $fa0 -; CHECK-NEXT: fcvt.d.s $fa0, $fa0 +; CHECK-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI5_3) ; CHECK-NEXT: lu12i.w $a1, 262688 ; CHECK-NEXT: lu12i.w $a3, 262720 ; CHECK-NEXT: lu12i.w $a5, 262752 @@ -168,10 +172,9 @@ define i32 @caller_double_on_stack_exhausted_fprs_gprs() nounwind { define double @callee_double_ret() nounwind { ; CHECK-LABEL: callee_double_ret: ; CHECK: # %bb.0: -; CHECK-NEXT: addi.w $a0, $zero, 1 -; CHECK-NEXT: movgr2fr.w $fa0, $a0 -; CHECK-NEXT: ffint.s.w $fa0, $fa0 -; CHECK-NEXT: fcvt.d.s $fa0, $fa0 +; CHECK-NEXT: movgr2fr.w $fa0, $zero +; CHECK-NEXT: lu12i.w $a0, 261888 +; CHECK-NEXT: movgr2frh.w $fa0, $a0 ; CHECK-NEXT: ret ret double 1.0 } diff --git a/llvm/test/CodeGen/LoongArch/double-imm.ll b/llvm/test/CodeGen/LoongArch/double-imm.ll index fe403ec532d8e..0b715cb18f8ad 100644 --- a/llvm/test/CodeGen/LoongArch/double-imm.ll +++ b/llvm/test/CodeGen/LoongArch/double-imm.ll @@ -1,6 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+d -loongarch-materialize-float-imm=0 < %s | FileCheck %s --check-prefixes=LA32,LA32-0 +; RUN: llc --mtriple=loongarch32 --mattr=+d -loongarch-materialize-float-imm=2 < %s | FileCheck %s --check-prefixes=LA32,LA32-2 +; RUN: llc --mtriple=loongarch32 --mattr=+d -loongarch-materialize-float-imm=3 < %s | FileCheck %s --check-prefixes=LA32,LA32-3 +; RUN: llc --mtriple=loongarch32 --mattr=+d -loongarch-materialize-float-imm=4 < %s | FileCheck %s --check-prefixes=LA32,LA32-4 +; RUN: llc --mtriple=loongarch32 --mattr=+d -loongarch-materialize-float-imm=5 < %s | FileCheck %s --check-prefixes=LA32,LA32-5 +; RUN: llc --mtriple=loongarch32 --mattr=+d -loongarch-materialize-float-imm=6 < %s | FileCheck %s --check-prefixes=LA32,LA32-6 +; RUN: llc --mtriple=loongarch64 --mattr=+d -loongarch-materialize-float-imm=0 < %s | FileCheck %s --check-prefixes=LA64,LA64-0 +; RUN: llc --mtriple=loongarch64 --mattr=+d -loongarch-materialize-float-imm=2 < %s | FileCheck %s --check-prefixes=LA64,LA64-2 +; RUN: llc --mtriple=loongarch64 --mattr=+d -loongarch-materialize-float-imm=3 < %s | FileCheck %s --check-prefixes=LA64,LA64-3 +; RUN: llc --mtriple=loongarch64 --mattr=+d -loongarch-materialize-float-imm=4 < %s | FileCheck %s --check-prefixes=LA64,LA64-4 +; RUN: llc --mtriple=loongarch64 --mattr=+d -loongarch-materialize-float-imm=5 < %s | FileCheck %s --check-prefixes=LA64,LA64-5 +; RUN: llc --mtriple=loongarch64 --mattr=+d -loongarch-materialize-float-imm=6 < %s | FileCheck %s --check-prefixes=LA64,LA64-5 define double @f64_positive_zero() nounwind { ; LA32-LABEL: f64_positive_zero: @@ -32,28 +42,318 @@ define double @f64_negative_zero() nounwind { ret double -0.0 } +define double @f64_constant_ins1() nounwind { +; LA32-0-LABEL: f64_constant_ins1: +; LA32-0: # %bb.0: +; LA32-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) +; LA32-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI2_0) +; LA32-0-NEXT: ret +; +; LA32-2-LABEL: f64_constant_ins1: +; LA32-2: # %bb.0: +; LA32-2-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) +; LA32-2-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI2_0) +; LA32-2-NEXT: ret +; +; LA32-3-LABEL: f64_constant_ins1: +; LA32-3: # %bb.0: +; LA32-3-NEXT: movgr2fr.w $fa0, $zero +; LA32-3-NEXT: lu12i.w $a0, 263424 +; LA32-3-NEXT: movgr2frh.w $fa0, $a0 +; LA32-3-NEXT: ret +; +; LA32-4-LABEL: f64_constant_ins1: +; LA32-4: # %bb.0: +; LA32-4-NEXT: movgr2fr.w $fa0, $zero +; LA32-4-NEXT: lu12i.w $a0, 263424 +; LA32-4-NEXT: movgr2frh.w $fa0, $a0 +; LA32-4-NEXT: ret +; +; LA32-5-LABEL: f64_constant_ins1: +; LA32-5: # %bb.0: +; LA32-5-NEXT: movgr2fr.w $fa0, $zero +; LA32-5-NEXT: lu12i.w $a0, 263424 +; LA32-5-NEXT: movgr2frh.w $fa0, $a0 +; LA32-5-NEXT: ret +; +; LA32-6-LABEL: f64_constant_ins1: +; LA32-6: # %bb.0: +; LA32-6-NEXT: movgr2fr.w $fa0, $zero +; LA32-6-NEXT: lu12i.w $a0, 263424 +; LA32-6-NEXT: movgr2frh.w $fa0, $a0 +; LA32-6-NEXT: ret +; +; LA64-0-LABEL: f64_constant_ins1: +; LA64-0: # %bb.0: +; LA64-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) +; LA64-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI2_0) +; LA64-0-NEXT: ret +; +; LA64-2-LABEL: f64_constant_ins1: +; LA64-2: # %bb.0: +; LA64-2-NEXT: lu52i.d $a0, $zero, 1029 +; LA64-2-NEXT: movgr2fr.d $fa0, $a0 +; LA64-2-NEXT: ret +; +; LA64-3-LABEL: f64_constant_ins1: +; LA64-3: # %bb.0: +; LA64-3-NEXT: lu52i.d $a0, $zero, 1029 +; LA64-3-NEXT: movgr2fr.d $fa0, $a0 +; LA64-3-NEXT: ret +; +; LA64-4-LABEL: f64_constant_ins1: +; LA64-4: # %bb.0: +; LA64-4-NEXT: lu52i.d $a0, $zero, 1029 +; LA64-4-NEXT: movgr2fr.d $fa0, $a0 +; LA64-4-NEXT: ret +; +; LA64-5-LABEL: f64_constant_ins1: +; LA64-5: # %bb.0: +; LA64-5-NEXT: lu52i.d $a0, $zero, 1029 +; LA64-5-NEXT: movgr2fr.d $fa0, $a0 +; LA64-5-NEXT: ret + ret double 64.0 +} + +define double @f64_constant_ins2() nounwind { +; LA32-0-LABEL: f64_constant_ins2: +; LA32-0: # %bb.0: +; LA32-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; LA32-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI3_0) +; LA32-0-NEXT: ret +; +; LA32-2-LABEL: f64_constant_ins2: +; LA32-2: # %bb.0: +; LA32-2-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; LA32-2-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI3_0) +; LA32-2-NEXT: ret +; +; LA32-3-LABEL: f64_constant_ins2: +; LA32-3: # %bb.0: +; LA32-3-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; LA32-3-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI3_0) +; LA32-3-NEXT: ret +; +; LA32-4-LABEL: f64_constant_ins2: +; LA32-4: # %bb.0: +; LA32-4-NEXT: ori $a0, $zero, 3 +; LA32-4-NEXT: movgr2fr.w $fa0, $a0 +; LA32-4-NEXT: lu12i.w $a0, 262144 +; LA32-4-NEXT: movgr2frh.w $fa0, $a0 +; LA32-4-NEXT: ret +; +; LA32-5-LABEL: f64_constant_ins2: +; LA32-5: # %bb.0: +; LA32-5-NEXT: ori $a0, $zero, 3 +; LA32-5-NEXT: movgr2fr.w $fa0, $a0 +; LA32-5-NEXT: lu12i.w $a0, 262144 +; LA32-5-NEXT: movgr2frh.w $fa0, $a0 +; LA32-5-NEXT: ret +; +; LA32-6-LABEL: f64_constant_ins2: +; LA32-6: # %bb.0: +; LA32-6-NEXT: ori $a0, $zero, 3 +; LA32-6-NEXT: movgr2fr.w $fa0, $a0 +; LA32-6-NEXT: lu12i.w $a0, 262144 +; LA32-6-NEXT: movgr2frh.w $fa0, $a0 +; LA32-6-NEXT: ret +; +; LA64-0-LABEL: f64_constant_ins2: +; LA64-0: # %bb.0: +; LA64-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; LA64-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI3_0) +; LA64-0-NEXT: ret +; +; LA64-2-LABEL: f64_constant_ins2: +; LA64-2: # %bb.0: +; LA64-2-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; LA64-2-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI3_0) +; LA64-2-NEXT: ret +; +; LA64-3-LABEL: f64_constant_ins2: +; LA64-3: # %bb.0: +; LA64-3-NEXT: ori $a0, $zero, 3 +; LA64-3-NEXT: lu52i.d $a0, $a0, 1024 +; LA64-3-NEXT: movgr2fr.d $fa0, $a0 +; LA64-3-NEXT: ret +; +; LA64-4-LABEL: f64_constant_ins2: +; LA64-4: # %bb.0: +; LA64-4-NEXT: ori $a0, $zero, 3 +; LA64-4-NEXT: lu52i.d $a0, $a0, 1024 +; LA64-4-NEXT: movgr2fr.d $fa0, $a0 +; LA64-4-NEXT: ret +; +; LA64-5-LABEL: f64_constant_ins2: +; LA64-5: # %bb.0: +; LA64-5-NEXT: ori $a0, $zero, 3 +; LA64-5-NEXT: lu52i.d $a0, $a0, 1024 +; LA64-5-NEXT: movgr2fr.d $fa0, $a0 +; LA64-5-NEXT: ret + ret double 2.00000000000000137 +} + +define double @f64_constant_ins3() nounwind { +; LA32-0-LABEL: f64_constant_ins3: +; LA32-0: # %bb.0: +; LA32-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) +; LA32-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI4_0) +; LA32-0-NEXT: ret +; +; LA32-2-LABEL: f64_constant_ins3: +; LA32-2: # %bb.0: +; LA32-2-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) +; LA32-2-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI4_0) +; LA32-2-NEXT: ret +; +; LA32-3-LABEL: f64_constant_ins3: +; LA32-3: # %bb.0: +; LA32-3-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) +; LA32-3-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI4_0) +; LA32-3-NEXT: ret +; +; LA32-4-LABEL: f64_constant_ins3: +; LA32-4: # %bb.0: +; LA32-4-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) +; LA32-4-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI4_0) +; LA32-4-NEXT: ret +; +; LA32-5-LABEL: f64_constant_ins3: +; LA32-5: # %bb.0: +; LA32-5-NEXT: lu12i.w $a0, 268115 +; LA32-5-NEXT: ori $a0, $a0, 344 +; LA32-5-NEXT: lu12i.w $a1, -131072 +; LA32-5-NEXT: movgr2fr.w $fa0, $a1 +; LA32-5-NEXT: movgr2frh.w $fa0, $a0 +; LA32-5-NEXT: ret +; +; LA32-6-LABEL: f64_constant_ins3: +; LA32-6: # %bb.0: +; LA32-6-NEXT: lu12i.w $a0, 268115 +; LA32-6-NEXT: ori $a0, $a0, 344 +; LA32-6-NEXT: lu12i.w $a1, -131072 +; LA32-6-NEXT: movgr2fr.w $fa0, $a1 +; LA32-6-NEXT: movgr2frh.w $fa0, $a0 +; LA32-6-NEXT: ret +; +; LA64-0-LABEL: f64_constant_ins3: +; LA64-0: # %bb.0: +; LA64-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) +; LA64-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI4_0) +; LA64-0-NEXT: ret +; +; LA64-2-LABEL: f64_constant_ins3: +; LA64-2: # %bb.0: +; LA64-2-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) +; LA64-2-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI4_0) +; LA64-2-NEXT: ret +; +; LA64-3-LABEL: f64_constant_ins3: +; LA64-3: # %bb.0: +; LA64-3-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) +; LA64-3-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI4_0) +; LA64-3-NEXT: ret +; +; LA64-4-LABEL: f64_constant_ins3: +; LA64-4: # %bb.0: +; LA64-4-NEXT: lu12i.w $a0, -131072 +; LA64-4-NEXT: lu32i.d $a0, 340312 +; LA64-4-NEXT: lu52i.d $a0, $a0, 1047 +; LA64-4-NEXT: movgr2fr.d $fa0, $a0 +; LA64-4-NEXT: ret +; +; LA64-5-LABEL: f64_constant_ins3: +; LA64-5: # %bb.0: +; LA64-5-NEXT: lu12i.w $a0, -131072 +; LA64-5-NEXT: lu32i.d $a0, 340312 +; LA64-5-NEXT: lu52i.d $a0, $a0, 1047 +; LA64-5-NEXT: movgr2fr.d $fa0, $a0 +; LA64-5-NEXT: ret + ret double 22222222.0 +} + define double @f64_constant_pi() nounwind { -; LA32-LABEL: f64_constant_pi: -; LA32: # %bb.0: -; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) -; LA32-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI2_0) -; LA32-NEXT: ret +; LA32-0-LABEL: f64_constant_pi: +; LA32-0: # %bb.0: +; LA32-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; LA32-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) +; LA32-0-NEXT: ret ; -; LA64-LABEL: f64_constant_pi: -; LA64: # %bb.0: -; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) -; LA64-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI2_0) -; LA64-NEXT: ret +; LA32-2-LABEL: f64_constant_pi: +; LA32-2: # %bb.0: +; LA32-2-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; LA32-2-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) +; LA32-2-NEXT: ret +; +; LA32-3-LABEL: f64_constant_pi: +; LA32-3: # %bb.0: +; LA32-3-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; LA32-3-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) +; LA32-3-NEXT: ret +; +; LA32-4-LABEL: f64_constant_pi: +; LA32-4: # %bb.0: +; LA32-4-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; LA32-4-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) +; LA32-4-NEXT: ret +; +; LA32-5-LABEL: f64_constant_pi: +; LA32-5: # %bb.0: +; LA32-5-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; LA32-5-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) +; LA32-5-NEXT: ret +; +; LA32-6-LABEL: f64_constant_pi: +; LA32-6: # %bb.0: +; LA32-6-NEXT: lu12i.w $a0, 262290 +; LA32-6-NEXT: ori $a0, $a0, 507 +; LA32-6-NEXT: lu12i.w $a1, 345154 +; LA32-6-NEXT: ori $a1, $a1, 3352 +; LA32-6-NEXT: movgr2fr.w $fa0, $a1 +; LA32-6-NEXT: movgr2frh.w $fa0, $a0 +; LA32-6-NEXT: ret +; +; LA64-0-LABEL: f64_constant_pi: +; LA64-0: # %bb.0: +; LA64-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; LA64-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) +; LA64-0-NEXT: ret +; +; LA64-2-LABEL: f64_constant_pi: +; LA64-2: # %bb.0: +; LA64-2-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; LA64-2-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) +; LA64-2-NEXT: ret +; +; LA64-3-LABEL: f64_constant_pi: +; LA64-3: # %bb.0: +; LA64-3-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; LA64-3-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) +; LA64-3-NEXT: ret +; +; LA64-4-LABEL: f64_constant_pi: +; LA64-4: # %bb.0: +; LA64-4-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; LA64-4-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) +; LA64-4-NEXT: ret +; +; LA64-5-LABEL: f64_constant_pi: +; LA64-5: # %bb.0: +; LA64-5-NEXT: lu12i.w $a0, 345154 +; LA64-5-NEXT: ori $a0, $a0, 3352 +; LA64-5-NEXT: lu32i.d $a0, -450053 +; LA64-5-NEXT: lu52i.d $a0, $a0, 1024 +; LA64-5-NEXT: movgr2fr.d $fa0, $a0 +; LA64-5-NEXT: ret ret double 3.1415926535897931159979634685441851615905761718750 } define double @f64_add_fimm1(double %a) nounwind { ; LA32-LABEL: f64_add_fimm1: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $zero, 1 -; LA32-NEXT: movgr2fr.w $fa1, $a0 -; LA32-NEXT: ffint.s.w $fa1, $fa1 -; LA32-NEXT: fcvt.d.s $fa1, $fa1 +; LA32-NEXT: movgr2fr.w $fa1, $zero +; LA32-NEXT: lu12i.w $a0, 261888 +; LA32-NEXT: movgr2frh.w $fa1, $a0 ; LA32-NEXT: fadd.d $fa0, $fa0, $fa1 ; LA32-NEXT: ret ; @@ -69,10 +369,9 @@ define double @f64_add_fimm1(double %a) nounwind { define double @f64_positive_fimm1() nounwind { ; LA32-LABEL: f64_positive_fimm1: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $zero, 1 -; LA32-NEXT: movgr2fr.w $fa0, $a0 -; LA32-NEXT: ffint.s.w $fa0, $fa0 -; LA32-NEXT: fcvt.d.s $fa0, $fa0 +; LA32-NEXT: movgr2fr.w $fa0, $zero +; LA32-NEXT: lu12i.w $a0, 261888 +; LA32-NEXT: movgr2frh.w $fa0, $a0 ; LA32-NEXT: ret ; ; LA64-LABEL: f64_positive_fimm1: diff --git a/llvm/test/CodeGen/LoongArch/float-imm.ll b/llvm/test/CodeGen/LoongArch/float-imm.ll index 006a9e64b190d..4611d8801cf2f 100644 --- a/llvm/test/CodeGen/LoongArch/float-imm.ll +++ b/llvm/test/CodeGen/LoongArch/float-imm.ll @@ -1,6 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d -loongarch-materialize-float-imm=0 < %s | FileCheck %s --check-prefixes=LA32,LA32-0 +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d -loongarch-materialize-float-imm=2 < %s | FileCheck %s --check-prefixes=LA32,LA32-2 +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d -loongarch-materialize-float-imm=3 < %s | FileCheck %s --check-prefixes=LA32,LA32-3 +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d -loongarch-materialize-float-imm=4 < %s | FileCheck %s --check-prefixes=LA32,LA32-3 +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d -loongarch-materialize-float-imm=5 < %s | FileCheck %s --check-prefixes=LA32,LA32-3 +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d -loongarch-materialize-float-imm=6 < %s | FileCheck %s --check-prefixes=LA32,LA32-3 +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d -loongarch-materialize-float-imm=0 < %s | FileCheck %s --check-prefixes=LA64,LA64-0 +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d -loongarch-materialize-float-imm=2 < %s | FileCheck %s --check-prefixes=LA64,LA64-2 +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d -loongarch-materialize-float-imm=3 < %s | FileCheck %s --check-prefixes=LA64,LA64-3 +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d -loongarch-materialize-float-imm=4 < %s | FileCheck %s --check-prefixes=LA64,LA64-3 +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d -loongarch-materialize-float-imm=5 < %s | FileCheck %s --check-prefixes=LA64,LA64-3 +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d -loongarch-materialize-float-imm=6 < %s | FileCheck %s --check-prefixes=LA64,LA64-3 define float @f32_positive_zero() nounwind { ; LA32-LABEL: f32_positive_zero: @@ -30,35 +40,98 @@ define float @f32_negative_zero() nounwind { ret float -0.0 } +define float @f32_constant_ins1() nounwind { +; LA32-0-LABEL: f32_constant_ins1: +; LA32-0: # %bb.0: +; LA32-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) +; LA32-0-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI2_0) +; LA32-0-NEXT: ret +; +; LA32-2-LABEL: f32_constant_ins1: +; LA32-2: # %bb.0: +; LA32-2-NEXT: lu12i.w $a0, 270336 +; LA32-2-NEXT: movgr2fr.w $fa0, $a0 +; LA32-2-NEXT: ret +; +; LA32-3-LABEL: f32_constant_ins1: +; LA32-3: # %bb.0: +; LA32-3-NEXT: lu12i.w $a0, 270336 +; LA32-3-NEXT: movgr2fr.w $fa0, $a0 +; LA32-3-NEXT: ret +; +; LA64-0-LABEL: f32_constant_ins1: +; LA64-0: # %bb.0: +; LA64-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) +; LA64-0-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI2_0) +; LA64-0-NEXT: ret +; +; LA64-2-LABEL: f32_constant_ins1: +; LA64-2: # %bb.0: +; LA64-2-NEXT: lu12i.w $a0, 270336 +; LA64-2-NEXT: movgr2fr.w $fa0, $a0 +; LA64-2-NEXT: ret +; +; LA64-3-LABEL: f32_constant_ins1: +; LA64-3: # %bb.0: +; LA64-3-NEXT: lu12i.w $a0, 270336 +; LA64-3-NEXT: movgr2fr.w $fa0, $a0 +; LA64-3-NEXT: ret + ret float 32.0 +} + define float @f32_constant_pi() nounwind { -; LA32-LABEL: f32_constant_pi: -; LA32: # %bb.0: -; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) -; LA32-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI2_0) -; LA32-NEXT: ret +; LA32-0-LABEL: f32_constant_pi: +; LA32-0: # %bb.0: +; LA32-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; LA32-0-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_0) +; LA32-0-NEXT: ret ; -; LA64-LABEL: f32_constant_pi: -; LA64: # %bb.0: -; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) -; LA64-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI2_0) -; LA64-NEXT: ret +; LA32-2-LABEL: f32_constant_pi: +; LA32-2: # %bb.0: +; LA32-2-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; LA32-2-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_0) +; LA32-2-NEXT: ret +; +; LA32-3-LABEL: f32_constant_pi: +; LA32-3: # %bb.0: +; LA32-3-NEXT: lu12i.w $a0, 263312 +; LA32-3-NEXT: ori $a0, $a0, 4059 +; LA32-3-NEXT: movgr2fr.w $fa0, $a0 +; LA32-3-NEXT: ret +; +; LA64-0-LABEL: f32_constant_pi: +; LA64-0: # %bb.0: +; LA64-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; LA64-0-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_0) +; LA64-0-NEXT: ret +; +; LA64-2-LABEL: f32_constant_pi: +; LA64-2: # %bb.0: +; LA64-2-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; LA64-2-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_0) +; LA64-2-NEXT: ret +; +; LA64-3-LABEL: f32_constant_pi: +; LA64-3: # %bb.0: +; LA64-3-NEXT: lu12i.w $a0, 263312 +; LA64-3-NEXT: ori $a0, $a0, 4059 +; LA64-3-NEXT: movgr2fr.w $fa0, $a0 +; LA64-3-NEXT: ret ret float 3.14159274101257324218750 } define float @f32_add_fimm1(float %a) nounwind { ; LA32-LABEL: f32_add_fimm1: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $zero, 1 +; LA32-NEXT: lu12i.w $a0, 260096 ; LA32-NEXT: movgr2fr.w $fa1, $a0 -; LA32-NEXT: ffint.s.w $fa1, $fa1 ; LA32-NEXT: fadd.s $fa0, $fa0, $fa1 ; LA32-NEXT: ret ; ; LA64-LABEL: f32_add_fimm1: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a0, $zero, 1 +; LA64-NEXT: lu12i.w $a0, 260096 ; LA64-NEXT: movgr2fr.w $fa1, $a0 -; LA64-NEXT: ffint.s.w $fa1, $fa1 ; LA64-NEXT: fadd.s $fa0, $fa0, $fa1 ; LA64-NEXT: ret %1 = fadd float %a, 1.0 @@ -68,16 +141,14 @@ define float @f32_add_fimm1(float %a) nounwind { define float @f32_positive_fimm1() nounwind { ; LA32-LABEL: f32_positive_fimm1: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $zero, 1 +; LA32-NEXT: lu12i.w $a0, 260096 ; LA32-NEXT: movgr2fr.w $fa0, $a0 -; LA32-NEXT: ffint.s.w $fa0, $fa0 ; LA32-NEXT: ret ; ; LA64-LABEL: f32_positive_fimm1: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a0, $zero, 1 +; LA64-NEXT: lu12i.w $a0, 260096 ; LA64-NEXT: movgr2fr.w $fa0, $a0 -; LA64-NEXT: ffint.s.w $fa0, $fa0 ; LA64-NEXT: ret ret float 1.0 } diff --git a/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll index e5c848e0f1542..d875bb98e4593 100644 --- a/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll +++ b/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll @@ -16,16 +16,16 @@ define float @frsqrt_f32(float %a) nounwind { ; ; LA32F-FRECIPE-LABEL: frsqrt_f32: ; LA32F-FRECIPE: # %bb.0: -; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 -; LA32F-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) -; LA32F-FRECIPE-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI0_0) -; LA32F-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1) -; LA32F-FRECIPE-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI0_1) -; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 -; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA32F-FRECIPE-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3 -; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa1, $fa0 +; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 +; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 +; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32F-FRECIPE-NEXT: lu12i.w $a0, -261120 +; LA32F-FRECIPE-NEXT: movgr2fr.w $fa2, $a0 +; LA32F-FRECIPE-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32F-FRECIPE-NEXT: lu12i.w $a0, -266240 +; LA32F-FRECIPE-NEXT: movgr2fr.w $fa2, $a0 +; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2 +; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa1, $fa0 ; LA32F-FRECIPE-NEXT: ret ; ; LA64D-LABEL: frsqrt_f32: @@ -53,30 +53,30 @@ define float @frsqrt_f32(float %a) nounwind { define double @frsqrt_f64(double %a) nounwind { ; LA32F-LABEL: frsqrt_f64: ; LA32F: # %bb.0: -; LA32F-NEXT: addi.w $sp, $sp, -16 -; LA32F-NEXT: st.w $ra, $sp, 12 -; LA32F-NEXT: bl sqrt -; LA32F-NEXT: move $a2, $a0 -; LA32F-NEXT: move $a3, $a1 -; LA32F-NEXT: lu12i.w $a1, 261888 -; LA32F-NEXT: move $a0, $zero -; LA32F-NEXT: bl __divdf3 -; LA32F-NEXT: ld.w $ra, $sp, 12 -; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: bl sqrt +; LA32F-NEXT: move $a2, $a0 +; LA32F-NEXT: move $a3, $a1 +; LA32F-NEXT: lu12i.w $a1, 261888 +; LA32F-NEXT: move $a0, $zero +; LA32F-NEXT: bl __divdf3 +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 ; LA32F-NEXT: ret ; ; LA32F-FRECIPE-LABEL: frsqrt_f64: ; LA32F-FRECIPE: # %bb.0: -; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -16 -; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: bl sqrt -; LA32F-FRECIPE-NEXT: move $a2, $a0 -; LA32F-FRECIPE-NEXT: move $a3, $a1 -; LA32F-FRECIPE-NEXT: lu12i.w $a1, 261888 -; LA32F-FRECIPE-NEXT: move $a0, $zero -; LA32F-FRECIPE-NEXT: bl __divdf3 -; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 16 +; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -16 +; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: bl sqrt +; LA32F-FRECIPE-NEXT: move $a2, $a0 +; LA32F-FRECIPE-NEXT: move $a3, $a1 +; LA32F-FRECIPE-NEXT: lu12i.w $a1, 261888 +; LA32F-FRECIPE-NEXT: move $a0, $zero +; LA32F-FRECIPE-NEXT: bl __divdf3 +; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 16 ; LA32F-FRECIPE-NEXT: ret ; ; LA64D-LABEL: frsqrt_f64: @@ -105,102 +105,102 @@ define double @frsqrt_f64(double %a) nounwind { } define double @sqrt_simplify_before_recip_3_uses_f64(double %x, ptr %p1, ptr %p2) nounwind { -; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_f64: -; LA32F: # %bb.0: -; LA32F-NEXT: addi.w $sp, $sp, -32 -; LA32F-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill -; LA32F-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill -; LA32F-NEXT: move $fp, $a3 -; LA32F-NEXT: move $s0, $a2 -; LA32F-NEXT: bl sqrt -; LA32F-NEXT: move $s1, $a0 -; LA32F-NEXT: move $s2, $a1 -; LA32F-NEXT: lu12i.w $a1, 261888 -; LA32F-NEXT: move $a0, $zero -; LA32F-NEXT: move $a2, $s1 -; LA32F-NEXT: move $a3, $s2 -; LA32F-NEXT: bl __divdf3 -; LA32F-NEXT: move $s3, $a0 -; LA32F-NEXT: move $s4, $a1 -; LA32F-NEXT: lu12i.w $a1, 263248 -; LA32F-NEXT: move $a0, $zero -; LA32F-NEXT: move $a2, $s1 -; LA32F-NEXT: move $a3, $s2 -; LA32F-NEXT: bl __divdf3 -; LA32F-NEXT: st.w $s3, $s0, 0 -; LA32F-NEXT: st.w $s4, $s0, 4 -; LA32F-NEXT: st.w $a0, $fp, 0 -; LA32F-NEXT: st.w $a1, $fp, 4 -; LA32F-NEXT: move $a0, $s1 -; LA32F-NEXT: move $a1, $s2 -; LA32F-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload -; LA32F-NEXT: addi.w $sp, $sp, 32 +; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_f64: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -32 +; LA32F-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32F-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill +; LA32F-NEXT: move $fp, $a3 +; LA32F-NEXT: move $s0, $a2 +; LA32F-NEXT: bl sqrt +; LA32F-NEXT: move $s1, $a0 +; LA32F-NEXT: move $s2, $a1 +; LA32F-NEXT: lu12i.w $a1, 261888 +; LA32F-NEXT: move $a0, $zero +; LA32F-NEXT: move $a2, $s1 +; LA32F-NEXT: move $a3, $s2 +; LA32F-NEXT: bl __divdf3 +; LA32F-NEXT: move $s3, $a0 +; LA32F-NEXT: move $s4, $a1 +; LA32F-NEXT: lu12i.w $a1, 263248 +; LA32F-NEXT: move $a0, $zero +; LA32F-NEXT: move $a2, $s1 +; LA32F-NEXT: move $a3, $s2 +; LA32F-NEXT: bl __divdf3 +; LA32F-NEXT: st.w $s3, $s0, 0 +; LA32F-NEXT: st.w $s4, $s0, 4 +; LA32F-NEXT: st.w $a0, $fp, 0 +; LA32F-NEXT: st.w $a1, $fp, 4 +; LA32F-NEXT: move $a0, $s1 +; LA32F-NEXT: move $a1, $s2 +; LA32F-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 32 ; LA32F-NEXT: ret ; -; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f64: -; LA32F-FRECIPE: # %bb.0: -; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -32 -; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: move $fp, $a3 -; LA32F-FRECIPE-NEXT: move $s0, $a2 -; LA32F-FRECIPE-NEXT: bl sqrt -; LA32F-FRECIPE-NEXT: move $s1, $a0 -; LA32F-FRECIPE-NEXT: move $s2, $a1 -; LA32F-FRECIPE-NEXT: lu12i.w $a1, 261888 -; LA32F-FRECIPE-NEXT: move $a0, $zero -; LA32F-FRECIPE-NEXT: move $a2, $s1 -; LA32F-FRECIPE-NEXT: move $a3, $s2 -; LA32F-FRECIPE-NEXT: bl __divdf3 -; LA32F-FRECIPE-NEXT: move $s3, $a0 -; LA32F-FRECIPE-NEXT: move $s4, $a1 -; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263248 -; LA32F-FRECIPE-NEXT: move $a0, $zero -; LA32F-FRECIPE-NEXT: move $a2, $s1 -; LA32F-FRECIPE-NEXT: move $a3, $s2 -; LA32F-FRECIPE-NEXT: bl __divdf3 -; LA32F-FRECIPE-NEXT: st.w $s3, $s0, 0 -; LA32F-FRECIPE-NEXT: st.w $s4, $s0, 4 -; LA32F-FRECIPE-NEXT: st.w $a0, $fp, 0 -; LA32F-FRECIPE-NEXT: st.w $a1, $fp, 4 -; LA32F-FRECIPE-NEXT: move $a0, $s1 -; LA32F-FRECIPE-NEXT: move $a1, $s2 -; LA32F-FRECIPE-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 32 +; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f64: +; LA32F-FRECIPE: # %bb.0: +; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -32 +; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: move $fp, $a3 +; LA32F-FRECIPE-NEXT: move $s0, $a2 +; LA32F-FRECIPE-NEXT: bl sqrt +; LA32F-FRECIPE-NEXT: move $s1, $a0 +; LA32F-FRECIPE-NEXT: move $s2, $a1 +; LA32F-FRECIPE-NEXT: lu12i.w $a1, 261888 +; LA32F-FRECIPE-NEXT: move $a0, $zero +; LA32F-FRECIPE-NEXT: move $a2, $s1 +; LA32F-FRECIPE-NEXT: move $a3, $s2 +; LA32F-FRECIPE-NEXT: bl __divdf3 +; LA32F-FRECIPE-NEXT: move $s3, $a0 +; LA32F-FRECIPE-NEXT: move $s4, $a1 +; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263248 +; LA32F-FRECIPE-NEXT: move $a0, $zero +; LA32F-FRECIPE-NEXT: move $a2, $s1 +; LA32F-FRECIPE-NEXT: move $a3, $s2 +; LA32F-FRECIPE-NEXT: bl __divdf3 +; LA32F-FRECIPE-NEXT: st.w $s3, $s0, 0 +; LA32F-FRECIPE-NEXT: st.w $s4, $s0, 4 +; LA32F-FRECIPE-NEXT: st.w $a0, $fp, 0 +; LA32F-FRECIPE-NEXT: st.w $a1, $fp, 4 +; LA32F-FRECIPE-NEXT: move $a0, $s1 +; LA32F-FRECIPE-NEXT: move $a1, $s2 +; LA32F-FRECIPE-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 32 ; LA32F-FRECIPE-NEXT: ret ; ; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_f64: ; LA64D: # %bb.0: -; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0) -; LA64D-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI2_0) -; LA64D-NEXT: fsqrt.d $fa1, $fa0 -; LA64D-NEXT: frsqrt.d $fa0, $fa0 -; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa1 -; LA64D-NEXT: fst.d $fa0, $a0, 0 -; LA64D-NEXT: fst.d $fa2, $a1, 0 -; LA64D-NEXT: fmov.d $fa0, $fa1 +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0) +; LA64D-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI2_0) +; LA64D-NEXT: fsqrt.d $fa1, $fa0 +; LA64D-NEXT: frsqrt.d $fa0, $fa0 +; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa1 +; LA64D-NEXT: fst.d $fa0, $a0, 0 +; LA64D-NEXT: fst.d $fa2, $a1, 0 +; LA64D-NEXT: fmov.d $fa0, $fa1 ; LA64D-NEXT: ret ; ; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f64: @@ -235,103 +235,103 @@ define double @sqrt_simplify_before_recip_3_uses_f64(double %x, ptr %p1, ptr %p2 define double @sqrt_simplify_before_recip_3_uses_order_f64(double %x, ptr %p1, ptr %p2) nounwind { -; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_order_f64: -; LA32F: # %bb.0: -; LA32F-NEXT: addi.w $sp, $sp, -32 -; LA32F-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill -; LA32F-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill -; LA32F-NEXT: move $fp, $a3 -; LA32F-NEXT: move $s0, $a2 -; LA32F-NEXT: bl sqrt -; LA32F-NEXT: move $s1, $a0 -; LA32F-NEXT: move $s2, $a1 -; LA32F-NEXT: lu12i.w $a1, 263248 -; LA32F-NEXT: move $a0, $zero -; LA32F-NEXT: move $a2, $s1 -; LA32F-NEXT: move $a3, $s2 -; LA32F-NEXT: bl __divdf3 -; LA32F-NEXT: move $s3, $a0 -; LA32F-NEXT: move $s4, $a1 -; LA32F-NEXT: lu12i.w $a1, 263256 -; LA32F-NEXT: move $a0, $zero -; LA32F-NEXT: move $a2, $s1 -; LA32F-NEXT: move $a3, $s2 -; LA32F-NEXT: bl __divdf3 -; LA32F-NEXT: st.w $s3, $s0, 0 -; LA32F-NEXT: st.w $s4, $s0, 4 -; LA32F-NEXT: st.w $a0, $fp, 0 -; LA32F-NEXT: st.w $a1, $fp, 4 -; LA32F-NEXT: move $a0, $s1 -; LA32F-NEXT: move $a1, $s2 -; LA32F-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload -; LA32F-NEXT: addi.w $sp, $sp, 32 +; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_order_f64: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -32 +; LA32F-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32F-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill +; LA32F-NEXT: move $fp, $a3 +; LA32F-NEXT: move $s0, $a2 +; LA32F-NEXT: bl sqrt +; LA32F-NEXT: move $s1, $a0 +; LA32F-NEXT: move $s2, $a1 +; LA32F-NEXT: lu12i.w $a1, 263248 +; LA32F-NEXT: move $a0, $zero +; LA32F-NEXT: move $a2, $s1 +; LA32F-NEXT: move $a3, $s2 +; LA32F-NEXT: bl __divdf3 +; LA32F-NEXT: move $s3, $a0 +; LA32F-NEXT: move $s4, $a1 +; LA32F-NEXT: lu12i.w $a1, 263256 +; LA32F-NEXT: move $a0, $zero +; LA32F-NEXT: move $a2, $s1 +; LA32F-NEXT: move $a3, $s2 +; LA32F-NEXT: bl __divdf3 +; LA32F-NEXT: st.w $s3, $s0, 0 +; LA32F-NEXT: st.w $s4, $s0, 4 +; LA32F-NEXT: st.w $a0, $fp, 0 +; LA32F-NEXT: st.w $a1, $fp, 4 +; LA32F-NEXT: move $a0, $s1 +; LA32F-NEXT: move $a1, $s2 +; LA32F-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 32 ; LA32F-NEXT: ret ; -; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f64: -; LA32F-FRECIPE: # %bb.0: -; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -32 -; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: move $fp, $a3 -; LA32F-FRECIPE-NEXT: move $s0, $a2 -; LA32F-FRECIPE-NEXT: bl sqrt -; LA32F-FRECIPE-NEXT: move $s1, $a0 -; LA32F-FRECIPE-NEXT: move $s2, $a1 -; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263248 -; LA32F-FRECIPE-NEXT: move $a0, $zero -; LA32F-FRECIPE-NEXT: move $a2, $s1 -; LA32F-FRECIPE-NEXT: move $a3, $s2 -; LA32F-FRECIPE-NEXT: bl __divdf3 -; LA32F-FRECIPE-NEXT: move $s3, $a0 -; LA32F-FRECIPE-NEXT: move $s4, $a1 -; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263256 -; LA32F-FRECIPE-NEXT: move $a0, $zero -; LA32F-FRECIPE-NEXT: move $a2, $s1 -; LA32F-FRECIPE-NEXT: move $a3, $s2 -; LA32F-FRECIPE-NEXT: bl __divdf3 -; LA32F-FRECIPE-NEXT: st.w $s3, $s0, 0 -; LA32F-FRECIPE-NEXT: st.w $s4, $s0, 4 -; LA32F-FRECIPE-NEXT: st.w $a0, $fp, 0 -; LA32F-FRECIPE-NEXT: st.w $a1, $fp, 4 -; LA32F-FRECIPE-NEXT: move $a0, $s1 -; LA32F-FRECIPE-NEXT: move $a1, $s2 -; LA32F-FRECIPE-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 32 +; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f64: +; LA32F-FRECIPE: # %bb.0: +; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -32 +; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: move $fp, $a3 +; LA32F-FRECIPE-NEXT: move $s0, $a2 +; LA32F-FRECIPE-NEXT: bl sqrt +; LA32F-FRECIPE-NEXT: move $s1, $a0 +; LA32F-FRECIPE-NEXT: move $s2, $a1 +; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263248 +; LA32F-FRECIPE-NEXT: move $a0, $zero +; LA32F-FRECIPE-NEXT: move $a2, $s1 +; LA32F-FRECIPE-NEXT: move $a3, $s2 +; LA32F-FRECIPE-NEXT: bl __divdf3 +; LA32F-FRECIPE-NEXT: move $s3, $a0 +; LA32F-FRECIPE-NEXT: move $s4, $a1 +; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263256 +; LA32F-FRECIPE-NEXT: move $a0, $zero +; LA32F-FRECIPE-NEXT: move $a2, $s1 +; LA32F-FRECIPE-NEXT: move $a3, $s2 +; LA32F-FRECIPE-NEXT: bl __divdf3 +; LA32F-FRECIPE-NEXT: st.w $s3, $s0, 0 +; LA32F-FRECIPE-NEXT: st.w $s4, $s0, 4 +; LA32F-FRECIPE-NEXT: st.w $a0, $fp, 0 +; LA32F-FRECIPE-NEXT: st.w $a1, $fp, 4 +; LA32F-FRECIPE-NEXT: move $a0, $s1 +; LA32F-FRECIPE-NEXT: move $a1, $s2 +; LA32F-FRECIPE-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 32 ; LA32F-FRECIPE-NEXT: ret ; ; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_order_f64: ; LA64D: # %bb.0: -; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0) -; LA64D-NEXT: fld.d $fa1, $a2, %pc_lo12(.LCPI3_0) -; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1) -; LA64D-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI3_1) -; LA64D-NEXT: fsqrt.d $fa0, $fa0 -; LA64D-NEXT: fdiv.d $fa1, $fa1, $fa0 -; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa0 -; LA64D-NEXT: fst.d $fa1, $a0, 0 -; LA64D-NEXT: fst.d $fa2, $a1, 0 +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0) +; LA64D-NEXT: fld.d $fa1, $a2, %pc_lo12(.LCPI3_0) +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1) +; LA64D-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI3_1) +; LA64D-NEXT: fsqrt.d $fa0, $fa0 +; LA64D-NEXT: fdiv.d $fa1, $fa1, $fa0 +; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa0 +; LA64D-NEXT: fst.d $fa1, $a0, 0 +; LA64D-NEXT: fst.d $fa2, $a1, 0 ; LA64D-NEXT: ret ; ; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f64: @@ -368,138 +368,138 @@ define double @sqrt_simplify_before_recip_3_uses_order_f64(double %x, ptr %p1, p } define double @sqrt_simplify_before_recip_4_uses_f64(double %x, ptr %p1, ptr %p2, ptr %p3) nounwind { -; LA32F-LABEL: sqrt_simplify_before_recip_4_uses_f64: -; LA32F: # %bb.0: -; LA32F-NEXT: addi.w $sp, $sp, -48 -; LA32F-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill -; LA32F-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill -; LA32F-NEXT: st.w $s7, $sp, 8 # 4-byte Folded Spill -; LA32F-NEXT: move $fp, $a4 -; LA32F-NEXT: move $s0, $a3 -; LA32F-NEXT: move $s1, $a2 -; LA32F-NEXT: bl sqrt -; LA32F-NEXT: move $s2, $a0 -; LA32F-NEXT: move $s3, $a1 -; LA32F-NEXT: lu12i.w $a1, 261888 -; LA32F-NEXT: move $a0, $zero -; LA32F-NEXT: move $a2, $s2 -; LA32F-NEXT: move $a3, $s3 -; LA32F-NEXT: bl __divdf3 -; LA32F-NEXT: move $s4, $a0 -; LA32F-NEXT: move $s5, $a1 -; LA32F-NEXT: lu12i.w $a1, 263248 -; LA32F-NEXT: move $a0, $zero -; LA32F-NEXT: move $a2, $s2 -; LA32F-NEXT: move $a3, $s3 -; LA32F-NEXT: bl __divdf3 -; LA32F-NEXT: move $s6, $a0 -; LA32F-NEXT: move $s7, $a1 -; LA32F-NEXT: lu12i.w $a1, 263256 -; LA32F-NEXT: move $a0, $zero -; LA32F-NEXT: move $a2, $s2 -; LA32F-NEXT: move $a3, $s3 -; LA32F-NEXT: bl __divdf3 -; LA32F-NEXT: st.w $s4, $s1, 0 -; LA32F-NEXT: st.w $s5, $s1, 4 -; LA32F-NEXT: st.w $s6, $s0, 0 -; LA32F-NEXT: st.w $s7, $s0, 4 -; LA32F-NEXT: st.w $a0, $fp, 0 -; LA32F-NEXT: st.w $a1, $fp, 4 -; LA32F-NEXT: move $a0, $s2 -; LA32F-NEXT: move $a1, $s3 -; LA32F-NEXT: ld.w $s7, $sp, 8 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload -; LA32F-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload -; LA32F-NEXT: addi.w $sp, $sp, 48 +; LA32F-LABEL: sqrt_simplify_before_recip_4_uses_f64: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -48 +; LA32F-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32F-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s7, $sp, 8 # 4-byte Folded Spill +; LA32F-NEXT: move $fp, $a4 +; LA32F-NEXT: move $s0, $a3 +; LA32F-NEXT: move $s1, $a2 +; LA32F-NEXT: bl sqrt +; LA32F-NEXT: move $s2, $a0 +; LA32F-NEXT: move $s3, $a1 +; LA32F-NEXT: lu12i.w $a1, 261888 +; LA32F-NEXT: move $a0, $zero +; LA32F-NEXT: move $a2, $s2 +; LA32F-NEXT: move $a3, $s3 +; LA32F-NEXT: bl __divdf3 +; LA32F-NEXT: move $s4, $a0 +; LA32F-NEXT: move $s5, $a1 +; LA32F-NEXT: lu12i.w $a1, 263248 +; LA32F-NEXT: move $a0, $zero +; LA32F-NEXT: move $a2, $s2 +; LA32F-NEXT: move $a3, $s3 +; LA32F-NEXT: bl __divdf3 +; LA32F-NEXT: move $s6, $a0 +; LA32F-NEXT: move $s7, $a1 +; LA32F-NEXT: lu12i.w $a1, 263256 +; LA32F-NEXT: move $a0, $zero +; LA32F-NEXT: move $a2, $s2 +; LA32F-NEXT: move $a3, $s3 +; LA32F-NEXT: bl __divdf3 +; LA32F-NEXT: st.w $s4, $s1, 0 +; LA32F-NEXT: st.w $s5, $s1, 4 +; LA32F-NEXT: st.w $s6, $s0, 0 +; LA32F-NEXT: st.w $s7, $s0, 4 +; LA32F-NEXT: st.w $a0, $fp, 0 +; LA32F-NEXT: st.w $a1, $fp, 4 +; LA32F-NEXT: move $a0, $s2 +; LA32F-NEXT: move $a1, $s3 +; LA32F-NEXT: ld.w $s7, $sp, 8 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 48 ; LA32F-NEXT: ret ; -; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f64: -; LA32F-FRECIPE: # %bb.0: -; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -48 -; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: st.w $s7, $sp, 8 # 4-byte Folded Spill -; LA32F-FRECIPE-NEXT: move $fp, $a4 -; LA32F-FRECIPE-NEXT: move $s0, $a3 -; LA32F-FRECIPE-NEXT: move $s1, $a2 -; LA32F-FRECIPE-NEXT: bl sqrt -; LA32F-FRECIPE-NEXT: move $s2, $a0 -; LA32F-FRECIPE-NEXT: move $s3, $a1 -; LA32F-FRECIPE-NEXT: lu12i.w $a1, 261888 -; LA32F-FRECIPE-NEXT: move $a0, $zero -; LA32F-FRECIPE-NEXT: move $a2, $s2 -; LA32F-FRECIPE-NEXT: move $a3, $s3 -; LA32F-FRECIPE-NEXT: bl __divdf3 -; LA32F-FRECIPE-NEXT: move $s4, $a0 -; LA32F-FRECIPE-NEXT: move $s5, $a1 -; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263248 -; LA32F-FRECIPE-NEXT: move $a0, $zero -; LA32F-FRECIPE-NEXT: move $a2, $s2 -; LA32F-FRECIPE-NEXT: move $a3, $s3 -; LA32F-FRECIPE-NEXT: bl __divdf3 -; LA32F-FRECIPE-NEXT: move $s6, $a0 -; LA32F-FRECIPE-NEXT: move $s7, $a1 -; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263256 -; LA32F-FRECIPE-NEXT: move $a0, $zero -; LA32F-FRECIPE-NEXT: move $a2, $s2 -; LA32F-FRECIPE-NEXT: move $a3, $s3 -; LA32F-FRECIPE-NEXT: bl __divdf3 -; LA32F-FRECIPE-NEXT: st.w $s4, $s1, 0 -; LA32F-FRECIPE-NEXT: st.w $s5, $s1, 4 -; LA32F-FRECIPE-NEXT: st.w $s6, $s0, 0 -; LA32F-FRECIPE-NEXT: st.w $s7, $s0, 4 -; LA32F-FRECIPE-NEXT: st.w $a0, $fp, 0 -; LA32F-FRECIPE-NEXT: st.w $a1, $fp, 4 -; LA32F-FRECIPE-NEXT: move $a0, $s2 -; LA32F-FRECIPE-NEXT: move $a1, $s3 -; LA32F-FRECIPE-NEXT: ld.w $s7, $sp, 8 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload -; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 48 +; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f64: +; LA32F-FRECIPE: # %bb.0: +; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -48 +; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: st.w $s7, $sp, 8 # 4-byte Folded Spill +; LA32F-FRECIPE-NEXT: move $fp, $a4 +; LA32F-FRECIPE-NEXT: move $s0, $a3 +; LA32F-FRECIPE-NEXT: move $s1, $a2 +; LA32F-FRECIPE-NEXT: bl sqrt +; LA32F-FRECIPE-NEXT: move $s2, $a0 +; LA32F-FRECIPE-NEXT: move $s3, $a1 +; LA32F-FRECIPE-NEXT: lu12i.w $a1, 261888 +; LA32F-FRECIPE-NEXT: move $a0, $zero +; LA32F-FRECIPE-NEXT: move $a2, $s2 +; LA32F-FRECIPE-NEXT: move $a3, $s3 +; LA32F-FRECIPE-NEXT: bl __divdf3 +; LA32F-FRECIPE-NEXT: move $s4, $a0 +; LA32F-FRECIPE-NEXT: move $s5, $a1 +; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263248 +; LA32F-FRECIPE-NEXT: move $a0, $zero +; LA32F-FRECIPE-NEXT: move $a2, $s2 +; LA32F-FRECIPE-NEXT: move $a3, $s3 +; LA32F-FRECIPE-NEXT: bl __divdf3 +; LA32F-FRECIPE-NEXT: move $s6, $a0 +; LA32F-FRECIPE-NEXT: move $s7, $a1 +; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263256 +; LA32F-FRECIPE-NEXT: move $a0, $zero +; LA32F-FRECIPE-NEXT: move $a2, $s2 +; LA32F-FRECIPE-NEXT: move $a3, $s3 +; LA32F-FRECIPE-NEXT: bl __divdf3 +; LA32F-FRECIPE-NEXT: st.w $s4, $s1, 0 +; LA32F-FRECIPE-NEXT: st.w $s5, $s1, 4 +; LA32F-FRECIPE-NEXT: st.w $s6, $s0, 0 +; LA32F-FRECIPE-NEXT: st.w $s7, $s0, 4 +; LA32F-FRECIPE-NEXT: st.w $a0, $fp, 0 +; LA32F-FRECIPE-NEXT: st.w $a1, $fp, 4 +; LA32F-FRECIPE-NEXT: move $a0, $s2 +; LA32F-FRECIPE-NEXT: move $a1, $s3 +; LA32F-FRECIPE-NEXT: ld.w $s7, $sp, 8 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 48 ; LA32F-FRECIPE-NEXT: ret ; ; LA64D-LABEL: sqrt_simplify_before_recip_4_uses_f64: ; LA64D: # %bb.0: -; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) -; LA64D-NEXT: fld.d $fa2, $a3, %pc_lo12(.LCPI4_0) -; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_1) -; LA64D-NEXT: fld.d $fa3, $a3, %pc_lo12(.LCPI4_1) -; LA64D-NEXT: fsqrt.d $fa1, $fa0 -; LA64D-NEXT: frsqrt.d $fa0, $fa0 -; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa1 -; LA64D-NEXT: fdiv.d $fa3, $fa3, $fa1 -; LA64D-NEXT: fst.d $fa0, $a0, 0 -; LA64D-NEXT: fst.d $fa2, $a1, 0 -; LA64D-NEXT: fst.d $fa3, $a2, 0 -; LA64D-NEXT: fmov.d $fa0, $fa1 +; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) +; LA64D-NEXT: fld.d $fa2, $a3, %pc_lo12(.LCPI4_0) +; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_1) +; LA64D-NEXT: fld.d $fa3, $a3, %pc_lo12(.LCPI4_1) +; LA64D-NEXT: fsqrt.d $fa1, $fa0 +; LA64D-NEXT: frsqrt.d $fa0, $fa0 +; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa1 +; LA64D-NEXT: fdiv.d $fa3, $fa3, $fa1 +; LA64D-NEXT: fst.d $fa0, $a0, 0 +; LA64D-NEXT: fst.d $fa2, $a1, 0 +; LA64D-NEXT: fst.d $fa3, $a2, 0 +; LA64D-NEXT: fmov.d $fa0, $fa1 ; LA64D-NEXT: ret ; ; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f64: @@ -541,66 +541,65 @@ define double @sqrt_simplify_before_recip_4_uses_f64(double %x, ptr %p1, ptr %p2 define float @sqrt_simplify_before_recip_3_uses_f32(float %x, ptr %p1, ptr %p2) nounwind { ; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_f32: ; LA32F: # %bb.0: -; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0) -; LA32F-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI5_0) -; LA32F-NEXT: fsqrt.s $fa1, $fa0 -; LA32F-NEXT: frsqrt.s $fa0, $fa0 -; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa1 -; LA32F-NEXT: fst.s $fa0, $a0, 0 -; LA32F-NEXT: fst.s $fa2, $a1, 0 -; LA32F-NEXT: fmov.s $fa0, $fa1 +; LA32F-NEXT: fsqrt.s $fa1, $fa0 +; LA32F-NEXT: frsqrt.s $fa0, $fa0 +; LA32F-NEXT: lu12i.w $a2, 270976 +; LA32F-NEXT: movgr2fr.w $fa2, $a2 +; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa1 +; LA32F-NEXT: fst.s $fa0, $a0, 0 +; LA32F-NEXT: fst.s $fa2, $a1, 0 +; LA32F-NEXT: fmov.s $fa0, $fa1 ; LA32F-NEXT: ret ; ; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f32: ; LA32F-FRECIPE: # %bb.0: -; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 -; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 -; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1 -; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0) -; LA32F-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI5_0) -; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_1) -; LA32F-FRECIPE-NEXT: fld.s $fa4, $a2, %pc_lo12(.LCPI5_1) -; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_2) -; LA32F-FRECIPE-NEXT: fld.s $fa5, $a2, %pc_lo12(.LCPI5_2) -; LA32F-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3 -; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4 -; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2 -; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa5 -; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA32F-FRECIPE-NEXT: fst.s $fa1, $a0, 0 -; LA32F-FRECIPE-NEXT: fst.s $fa2, $a1, 0 +; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 +; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 +; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1 +; LA32F-FRECIPE-NEXT: lu12i.w $a2, -261120 +; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a2 +; LA32F-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3 +; LA32F-FRECIPE-NEXT: lu12i.w $a2, -266240 +; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a2 +; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3 +; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2 +; LA32F-FRECIPE-NEXT: lu12i.w $a2, 270976 +; LA32F-FRECIPE-NEXT: movgr2fr.w $fa2, $a2 +; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa2 +; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32F-FRECIPE-NEXT: fst.s $fa1, $a0, 0 +; LA32F-FRECIPE-NEXT: fst.s $fa2, $a1, 0 ; LA32F-FRECIPE-NEXT: ret ; -; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_f32: -; LA64D: # %bb.0: -; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0) -; LA64D-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI5_0) -; LA64D-NEXT: fsqrt.s $fa1, $fa0 -; LA64D-NEXT: frsqrt.s $fa0, $fa0 -; LA64D-NEXT: fdiv.s $fa2, $fa2, $fa1 -; LA64D-NEXT: fst.s $fa0, $a0, 0 -; LA64D-NEXT: fst.s $fa2, $a1, 0 -; LA64D-NEXT: fmov.s $fa0, $fa1 +; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_f32: +; LA64D: # %bb.0: +; LA64D-NEXT: fsqrt.s $fa1, $fa0 +; LA64D-NEXT: frsqrt.s $fa0, $fa0 +; LA64D-NEXT: lu12i.w $a2, 270976 +; LA64D-NEXT: movgr2fr.w $fa2, $a2 +; LA64D-NEXT: fdiv.s $fa2, $fa2, $fa1 +; LA64D-NEXT: fst.s $fa0, $a0, 0 +; LA64D-NEXT: fst.s $fa2, $a1, 0 +; LA64D-NEXT: fmov.s $fa0, $fa1 ; LA64D-NEXT: ret ; -; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f32: -; LA64D-FRECIPE: # %bb.0: +; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f32: +; LA64D-FRECIPE: # %bb.0: ; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 ; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1 ; LA64D-FRECIPE-NEXT: vldi $vr3, -1144 ; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0) -; LA64D-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI5_0) -; LA64D-FRECIPE-NEXT: vldi $vr4, -1056 -; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4 +; LA64D-FRECIPE-NEXT: vldi $vr3, -1056 +; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: lu12i.w $a2, 270976 +; LA64D-FRECIPE-NEXT: movgr2fr.w $fa2, $a2 +; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa2 ; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 ; LA64D-FRECIPE-NEXT: fst.s $fa1, $a0, 0 ; LA64D-FRECIPE-NEXT: fst.s $fa2, $a1, 0 ; LA64D-FRECIPE-NEXT: ret -; %sqrt = tail call fast float @llvm.sqrt.f32(float %x) %rsqrt = fdiv fast float 1.0, %sqrt %r = fdiv fast float 42.0, %sqrt @@ -613,82 +612,81 @@ define float @sqrt_simplify_before_recip_3_uses_f32(float %x, ptr %p1, ptr %p2) define float @sqrt_simplify_before_recip_4_uses_f32(float %x, ptr %p1, ptr %p2, ptr %p3) nounwind { ; LA32F-LABEL: sqrt_simplify_before_recip_4_uses_f32: ; LA32F: # %bb.0: -; LA32F-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0) -; LA32F-NEXT: fld.s $fa2, $a3, %pc_lo12(.LCPI6_0) -; LA32F-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_1) -; LA32F-NEXT: fld.s $fa3, $a3, %pc_lo12(.LCPI6_1) -; LA32F-NEXT: fsqrt.s $fa1, $fa0 -; LA32F-NEXT: frsqrt.s $fa0, $fa0 -; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa1 -; LA32F-NEXT: fdiv.s $fa3, $fa3, $fa1 -; LA32F-NEXT: fst.s $fa0, $a0, 0 -; LA32F-NEXT: fst.s $fa2, $a1, 0 -; LA32F-NEXT: fst.s $fa3, $a2, 0 -; LA32F-NEXT: fmov.s $fa0, $fa1 +; LA32F-NEXT: fsqrt.s $fa1, $fa0 +; LA32F-NEXT: frsqrt.s $fa0, $fa0 +; LA32F-NEXT: lu12i.w $a3, 270976 +; LA32F-NEXT: movgr2fr.w $fa2, $a3 +; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa1 +; LA32F-NEXT: lu12i.w $a3, 271040 +; LA32F-NEXT: movgr2fr.w $fa3, $a3 +; LA32F-NEXT: fdiv.s $fa3, $fa3, $fa1 +; LA32F-NEXT: fst.s $fa0, $a0, 0 +; LA32F-NEXT: fst.s $fa2, $a1, 0 +; LA32F-NEXT: fst.s $fa3, $a2, 0 +; LA32F-NEXT: fmov.s $fa0, $fa1 ; LA32F-NEXT: ret ; ; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f32: ; LA32F-FRECIPE: # %bb.0: -; LA32F-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0) -; LA32F-FRECIPE-NEXT: fld.s $fa1, $a3, %pc_lo12(.LCPI6_0) -; LA32F-FRECIPE-NEXT: frsqrte.s $fa2, $fa0 -; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa2 -; LA32F-FRECIPE-NEXT: fmul.s $fa3, $fa0, $fa2 -; LA32F-FRECIPE-NEXT: fmadd.s $fa1, $fa3, $fa2, $fa1 -; LA32F-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_1) -; LA32F-FRECIPE-NEXT: fld.s $fa3, $a3, %pc_lo12(.LCPI6_1) -; LA32F-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_2) -; LA32F-FRECIPE-NEXT: fld.s $fa4, $a3, %pc_lo12(.LCPI6_2) -; LA32F-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_3) -; LA32F-FRECIPE-NEXT: fld.s $fa5, $a3, %pc_lo12(.LCPI6_3) -; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa2, $fa3 -; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa2, $fa1 -; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa4 -; LA32F-FRECIPE-NEXT: fmul.s $fa3, $fa1, $fa5 -; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA32F-FRECIPE-NEXT: fst.s $fa1, $a0, 0 -; LA32F-FRECIPE-NEXT: fst.s $fa2, $a1, 0 -; LA32F-FRECIPE-NEXT: fst.s $fa3, $a2, 0 +; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 +; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 +; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1 +; LA32F-FRECIPE-NEXT: lu12i.w $a3, -261120 +; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a3 +; LA32F-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3 +; LA32F-FRECIPE-NEXT: lu12i.w $a3, -266240 +; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a3 +; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3 +; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2 +; LA32F-FRECIPE-NEXT: lu12i.w $a3, 270976 +; LA32F-FRECIPE-NEXT: movgr2fr.w $fa2, $a3 +; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa2 +; LA32F-FRECIPE-NEXT: lu12i.w $a3, 271040 +; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a3 +; LA32F-FRECIPE-NEXT: fmul.s $fa3, $fa1, $fa3 +; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32F-FRECIPE-NEXT: fst.s $fa1, $a0, 0 +; LA32F-FRECIPE-NEXT: fst.s $fa2, $a1, 0 +; LA32F-FRECIPE-NEXT: fst.s $fa3, $a2, 0 ; LA32F-FRECIPE-NEXT: ret ; -; LA64D-LABEL: sqrt_simplify_before_recip_4_uses_f32: -; LA64D: # %bb.0: -; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0) -; LA64D-NEXT: fld.s $fa2, $a3, %pc_lo12(.LCPI6_0) -; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_1) -; LA64D-NEXT: fld.s $fa3, $a3, %pc_lo12(.LCPI6_1) -; LA64D-NEXT: fsqrt.s $fa1, $fa0 -; LA64D-NEXT: frsqrt.s $fa0, $fa0 -; LA64D-NEXT: fdiv.s $fa2, $fa2, $fa1 -; LA64D-NEXT: fdiv.s $fa3, $fa3, $fa1 -; LA64D-NEXT: fst.s $fa0, $a0, 0 -; LA64D-NEXT: fst.s $fa2, $a1, 0 -; LA64D-NEXT: fst.s $fa3, $a2, 0 -; LA64D-NEXT: fmov.s $fa0, $fa1 +; LA64D-LABEL: sqrt_simplify_before_recip_4_uses_f32: +; LA64D: # %bb.0: +; LA64D-NEXT: fsqrt.s $fa1, $fa0 +; LA64D-NEXT: frsqrt.s $fa0, $fa0 +; LA64D-NEXT: lu12i.w $a3, 270976 +; LA64D-NEXT: movgr2fr.w $fa2, $a3 +; LA64D-NEXT: fdiv.s $fa2, $fa2, $fa1 +; LA64D-NEXT: lu12i.w $a3, 271040 +; LA64D-NEXT: movgr2fr.w $fa3, $a3 +; LA64D-NEXT: fdiv.s $fa3, $fa3, $fa1 +; LA64D-NEXT: fst.s $fa0, $a0, 0 +; LA64D-NEXT: fst.s $fa2, $a1, 0 +; LA64D-NEXT: fst.s $fa3, $a2, 0 +; LA64D-NEXT: fmov.s $fa0, $fa1 ; LA64D-NEXT: ret ; -; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f32: -; LA64D-FRECIPE: # %bb.0: +; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f32: +; LA64D-FRECIPE: # %bb.0: ; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 ; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1 ; LA64D-FRECIPE-NEXT: vldi $vr3, -1144 ; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3 ; LA64D-FRECIPE-NEXT: vldi $vr3, -1056 -; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0) -; LA64D-FRECIPE-NEXT: fld.s $fa4, $a3, %pc_lo12(.LCPI6_0) -; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_1) -; LA64D-FRECIPE-NEXT: fld.s $fa5, $a3, %pc_lo12(.LCPI6_1) ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa4 -; LA64D-FRECIPE-NEXT: fmul.s $fa3, $fa1, $fa5 +; LA64D-FRECIPE-NEXT: lu12i.w $a3, 270976 +; LA64D-FRECIPE-NEXT: movgr2fr.w $fa2, $a3 +; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa2 +; LA64D-FRECIPE-NEXT: lu12i.w $a3, 271040 +; LA64D-FRECIPE-NEXT: movgr2fr.w $fa3, $a3 +; LA64D-FRECIPE-NEXT: fmul.s $fa3, $fa1, $fa3 ; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 ; LA64D-FRECIPE-NEXT: fst.s $fa1, $a0, 0 ; LA64D-FRECIPE-NEXT: fst.s $fa2, $a1, 0 ; LA64D-FRECIPE-NEXT: fst.s $fa3, $a2, 0 ; LA64D-FRECIPE-NEXT: ret -; %sqrt = tail call fast float @llvm.sqrt.f32(float %x) %rsqrt = fdiv fast float 1.0, %sqrt %r1 = fdiv fast float 42.0, %sqrt @@ -703,55 +701,55 @@ define float @sqrt_simplify_before_recip_4_uses_f32(float %x, ptr %p1, ptr %p2, define float @sqrt_simplify_before_recip_3_uses_order_f32(float %x, ptr %p1, ptr %p2) nounwind { ; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_order_f32: ; LA32F: # %bb.0: -; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0) -; LA32F-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI7_0) -; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_1) -; LA32F-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI7_1) -; LA32F-NEXT: fsqrt.s $fa0, $fa0 -; LA32F-NEXT: fdiv.s $fa1, $fa1, $fa0 -; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa0 -; LA32F-NEXT: fst.s $fa1, $a0, 0 -; LA32F-NEXT: fst.s $fa2, $a1, 0 +; LA32F-NEXT: fsqrt.s $fa0, $fa0 +; LA32F-NEXT: lu12i.w $a2, 270976 +; LA32F-NEXT: movgr2fr.w $fa1, $a2 +; LA32F-NEXT: fdiv.s $fa1, $fa1, $fa0 +; LA32F-NEXT: lu12i.w $a2, 271040 +; LA32F-NEXT: movgr2fr.w $fa2, $a2 +; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa0 +; LA32F-NEXT: fst.s $fa1, $a0, 0 +; LA32F-NEXT: fst.s $fa2, $a1, 0 ; LA32F-NEXT: ret ; ; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f32: ; LA32F-FRECIPE: # %bb.0: -; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 -; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0) -; LA32F-FRECIPE-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI7_0) -; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_1) -; LA32F-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI7_1) -; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 -; LA32F-FRECIPE-NEXT: fmul.s $fa4, $fa0, $fa1 -; LA32F-FRECIPE-NEXT: fmadd.s $fa2, $fa4, $fa1, $fa2 -; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3 -; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_2) -; LA32F-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI7_2) -; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_3) -; LA32F-FRECIPE-NEXT: fld.s $fa4, $a2, %pc_lo12(.LCPI7_3) -; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2 -; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa3 -; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4 -; LA32F-FRECIPE-NEXT: fst.s $fa2, $a0, 0 -; LA32F-FRECIPE-NEXT: fst.s $fa1, $a1, 0 +; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 +; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 +; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1 +; LA32F-FRECIPE-NEXT: lu12i.w $a2, -261120 +; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a2 +; LA32F-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3 +; LA32F-FRECIPE-NEXT: lu12i.w $a2, -266240 +; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a2 +; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3 +; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2 +; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32F-FRECIPE-NEXT: lu12i.w $a2, 270976 +; LA32F-FRECIPE-NEXT: movgr2fr.w $fa2, $a2 +; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa2 +; LA32F-FRECIPE-NEXT: lu12i.w $a2, 271040 +; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a2 +; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3 +; LA32F-FRECIPE-NEXT: fst.s $fa2, $a0, 0 +; LA32F-FRECIPE-NEXT: fst.s $fa1, $a1, 0 ; LA32F-FRECIPE-NEXT: ret ; -; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_order_f32: -; LA64D: # %bb.0: -; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0) -; LA64D-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI7_0) -; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_1) -; LA64D-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI7_1) -; LA64D-NEXT: fsqrt.s $fa0, $fa0 -; LA64D-NEXT: fdiv.s $fa1, $fa1, $fa0 -; LA64D-NEXT: fdiv.s $fa2, $fa2, $fa0 -; LA64D-NEXT: fst.s $fa1, $a0, 0 -; LA64D-NEXT: fst.s $fa2, $a1, 0 +; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_order_f32: +; LA64D: # %bb.0: +; LA64D-NEXT: fsqrt.s $fa0, $fa0 +; LA64D-NEXT: lu12i.w $a2, 270976 +; LA64D-NEXT: movgr2fr.w $fa1, $a2 +; LA64D-NEXT: fdiv.s $fa1, $fa1, $fa0 +; LA64D-NEXT: lu12i.w $a2, 271040 +; LA64D-NEXT: movgr2fr.w $fa2, $a2 +; LA64D-NEXT: fdiv.s $fa2, $fa2, $fa0 +; LA64D-NEXT: fst.s $fa1, $a0, 0 +; LA64D-NEXT: fst.s $fa2, $a1, 0 ; LA64D-NEXT: ret ; -; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f32: -; LA64D-FRECIPE: # %bb.0: +; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f32: +; LA64D-FRECIPE: # %bb.0: ; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 ; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1 @@ -759,18 +757,17 @@ define float @sqrt_simplify_before_recip_3_uses_order_f32(float %x, ptr %p1, ptr ; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3 ; LA64D-FRECIPE-NEXT: vldi $vr3, -1056 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0) -; LA64D-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI7_0) -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_1) -; LA64D-FRECIPE-NEXT: fld.s $fa4, $a2, %pc_lo12(.LCPI7_1) ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2 ; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4 +; LA64D-FRECIPE-NEXT: lu12i.w $a2, 270976 +; LA64D-FRECIPE-NEXT: movgr2fr.w $fa2, $a2 +; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa2 +; LA64D-FRECIPE-NEXT: lu12i.w $a2, 271040 +; LA64D-FRECIPE-NEXT: movgr2fr.w $fa3, $a2 +; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3 ; LA64D-FRECIPE-NEXT: fst.s $fa2, $a0, 0 ; LA64D-FRECIPE-NEXT: fst.s $fa1, $a1, 0 ; LA64D-FRECIPE-NEXT: ret -; %sqrt = tail call fast float @llvm.sqrt.f32(float %x) %sqrt_fast = fdiv fast float %x, %sqrt %r1 = fdiv fast float 42.0, %sqrt diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll index 4990e7002562d..a6e3f790943aa 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll @@ -6,9 +6,8 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64F-LABEL: float_fadd_acquire: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: lu12i.w $a1, 260096 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB0_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -76,8 +75,9 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64F-LABEL: float_fsub_acquire: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) -; LA64F-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI1_0) +; LA64F-NEXT: lu12i.w $a1, -264192 +; LA64F-NEXT: lu32i.d $a1, 0 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB1_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -145,9 +145,8 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64F-LABEL: float_fmin_acquire: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: lu12i.w $a1, 260096 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB2_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -215,9 +214,8 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64F-LABEL: float_fmax_acquire: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: lu12i.w $a1, 260096 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB3_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -573,9 +571,8 @@ define float @float_fadd_release(ptr %p) nounwind { ; LA64F-LABEL: float_fadd_release: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: lu12i.w $a1, 260096 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB8_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -643,8 +640,9 @@ define float @float_fsub_release(ptr %p) nounwind { ; LA64F-LABEL: float_fsub_release: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) -; LA64F-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI9_0) +; LA64F-NEXT: lu12i.w $a1, -264192 +; LA64F-NEXT: lu32i.d $a1, 0 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB9_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -712,9 +710,8 @@ define float @float_fmin_release(ptr %p) nounwind { ; LA64F-LABEL: float_fmin_release: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: lu12i.w $a1, 260096 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB10_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -782,9 +779,8 @@ define float @float_fmax_release(ptr %p) nounwind { ; LA64F-LABEL: float_fmax_release: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: lu12i.w $a1, 260096 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB11_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -1140,9 +1136,8 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { ; LA64F-LABEL: float_fadd_acq_rel: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: lu12i.w $a1, 260096 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB16_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -1210,8 +1205,9 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { ; LA64F-LABEL: float_fsub_acq_rel: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) -; LA64F-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI17_0) +; LA64F-NEXT: lu12i.w $a1, -264192 +; LA64F-NEXT: lu32i.d $a1, 0 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB17_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -1279,9 +1275,8 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { ; LA64F-LABEL: float_fmin_acq_rel: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: lu12i.w $a1, 260096 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB18_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -1349,9 +1344,8 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { ; LA64F-LABEL: float_fmax_acq_rel: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: lu12i.w $a1, 260096 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB19_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -1707,9 +1701,8 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { ; LA64F-LABEL: float_fadd_seq_cst: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: lu12i.w $a1, 260096 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB24_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -1777,8 +1770,9 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { ; LA64F-LABEL: float_fsub_seq_cst: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI25_0) -; LA64F-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI25_0) +; LA64F-NEXT: lu12i.w $a1, -264192 +; LA64F-NEXT: lu32i.d $a1, 0 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB25_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -1846,9 +1840,8 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { ; LA64F-LABEL: float_fmin_seq_cst: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: lu12i.w $a1, 260096 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB26_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -1916,9 +1909,8 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { ; LA64F-LABEL: float_fmax_seq_cst: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: lu12i.w $a1, 260096 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB27_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -2274,9 +2266,8 @@ define float @float_fadd_monotonic(ptr %p) nounwind { ; LA64F-LABEL: float_fadd_monotonic: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: lu12i.w $a1, 260096 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB32_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -2344,8 +2335,9 @@ define float @float_fsub_monotonic(ptr %p) nounwind { ; LA64F-LABEL: float_fsub_monotonic: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI33_0) -; LA64F-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI33_0) +; LA64F-NEXT: lu12i.w $a1, -264192 +; LA64F-NEXT: lu32i.d $a1, 0 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB33_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -2413,9 +2405,8 @@ define float @float_fmin_monotonic(ptr %p) nounwind { ; LA64F-LABEL: float_fmin_monotonic: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: lu12i.w $a1, 260096 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB34_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 @@ -2483,9 +2474,8 @@ define float @float_fmax_monotonic(ptr %p) nounwind { ; LA64F-LABEL: float_fmax_monotonic: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 0 -; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: lu12i.w $a1, 260096 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .p2align 4, , 16 ; LA64F-NEXT: .LBB35_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll index 0b82ea220d7fb..ef211139afdf5 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll @@ -116,8 +116,9 @@ define i32 @convert_double_to_i32(double %a) nounwind { define i32 @convert_double_to_u32(double %a) nounwind { ; LA32-LABEL: convert_double_to_u32: ; LA32: # %bb.0: -; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) -; LA32-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI7_0) +; LA32-NEXT: movgr2fr.w $fa1, $zero +; LA32-NEXT: lu12i.w $a0, 269824 +; LA32-NEXT: movgr2frh.w $fa1, $a0 ; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 ; LA32-NEXT: bne $a0, $zero, .LBB7_2 @@ -173,8 +174,8 @@ define i64 @convert_double_to_u64(double %a) nounwind { ; ; LA64-LABEL: convert_double_to_u64: ; LA64: # %bb.0: -; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0) -; LA64-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI9_0) +; LA64-NEXT: lu52i.d $a0, $zero, 1086 +; LA64-NEXT: movgr2fr.d $fa1, $a0 ; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 ; LA64-NEXT: fsub.d $fa1, $fa0, $fa1 ; LA64-NEXT: ftintrz.l.d $fa1, $fa1 @@ -232,8 +233,8 @@ define double @convert_u32_to_double(i32 %a) nounwind { ; LA32-NEXT: st.w $a1, $sp, 12 ; LA32-NEXT: st.w $a0, $sp, 8 ; LA32-NEXT: fld.d $fa0, $sp, 8 -; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI12_0) -; LA32-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI12_0) +; LA32-NEXT: movgr2fr.w $fa1, $zero +; LA32-NEXT: movgr2frh.w $fa1, $a1 ; LA32-NEXT: fsub.d $fa0, $fa0, $fa1 ; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret @@ -261,12 +262,13 @@ define double @convert_u64_to_double(i64 %a) nounwind { ; LA64-LABEL: convert_u64_to_double: ; LA64: # %bb.0: ; LA64-NEXT: srli.d $a1, $a0, 32 -; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI13_0) -; LA64-NEXT: fld.d $fa0, $a2, %pc_lo12(.LCPI13_0) ; LA64-NEXT: lu52i.d $a2, $zero, 1107 ; LA64-NEXT: or $a1, $a1, $a2 +; LA64-NEXT: movgr2fr.d $fa0, $a1 +; LA64-NEXT: lu12i.w $a1, 256 +; LA64-NEXT: lu52i.d $a1, $a1, 1107 ; LA64-NEXT: movgr2fr.d $fa1, $a1 -; LA64-NEXT: fsub.d $fa0, $fa1, $fa0 +; LA64-NEXT: fsub.d $fa0, $fa0, $fa1 ; LA64-NEXT: lu12i.w $a1, 275200 ; LA64-NEXT: bstrins.d $a0, $a1, 63, 32 ; LA64-NEXT: movgr2fr.d $fa1, $a0 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll index 413702b006b1b..8328bb02cf8b5 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll @@ -181,8 +181,8 @@ define zeroext i16 @convert_float_to_u16(float %a) nounwind { define i32 @convert_float_to_u32(float %a) nounwind { ; LA32F-LABEL: convert_float_to_u32: ; LA32F: # %bb.0: -; LA32F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; LA32F-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI6_0) +; LA32F-NEXT: lu12i.w $a0, 323584 +; LA32F-NEXT: movgr2fr.w $fa1, $a0 ; LA32F-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 ; LA32F-NEXT: movcf2gr $a0, $fcc0 ; LA32F-NEXT: bne $a0, $zero, .LBB6_2 @@ -200,8 +200,8 @@ define i32 @convert_float_to_u32(float %a) nounwind { ; ; LA32D-LABEL: convert_float_to_u32: ; LA32D: # %bb.0: -; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; LA32D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI6_0) +; LA32D-NEXT: lu12i.w $a0, 323584 +; LA32D-NEXT: movgr2fr.w $fa1, $a0 ; LA32D-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 ; LA32D-NEXT: movcf2gr $a0, $fcc0 ; LA32D-NEXT: bne $a0, $zero, .LBB6_2 @@ -219,8 +219,8 @@ define i32 @convert_float_to_u32(float %a) nounwind { ; ; LA64F-LABEL: convert_float_to_u32: ; LA64F: # %bb.0: -; LA64F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; LA64F-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI6_0) +; LA64F-NEXT: lu12i.w $a0, 323584 +; LA64F-NEXT: movgr2fr.w $fa1, $a0 ; LA64F-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 ; LA64F-NEXT: fsub.s $fa1, $fa0, $fa1 ; LA64F-NEXT: ftintrz.w.s $fa1, $fa1 @@ -265,8 +265,8 @@ define i64 @convert_float_to_u64(float %a) nounwind { ; ; LA64F-LABEL: convert_float_to_u64: ; LA64F: # %bb.0: -; LA64F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) -; LA64F-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI7_0) +; LA64F-NEXT: lu12i.w $a0, 389120 +; LA64F-NEXT: movgr2fr.w $fa1, $a0 ; LA64F-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 ; LA64F-NEXT: fsub.s $fa1, $fa0, $fa1 ; LA64F-NEXT: ftintrz.w.s $fa1, $fa1 @@ -283,8 +283,8 @@ define i64 @convert_float_to_u64(float %a) nounwind { ; ; LA64D-LABEL: convert_float_to_u64: ; LA64D: # %bb.0: -; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) -; LA64D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI7_0) +; LA64D-NEXT: lu12i.w $a0, 389120 +; LA64D-NEXT: movgr2fr.w $fa1, $a0 ; LA64D-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 ; LA64D-NEXT: fsub.s $fa1, $fa0, $fa1 ; LA64D-NEXT: ftintrz.l.s $fa1, $fa1 @@ -503,8 +503,8 @@ define float @convert_u32_to_float(i32 %a) nounwind { ; LA32D-NEXT: st.w $a1, $sp, 12 ; LA32D-NEXT: st.w $a0, $sp, 8 ; LA32D-NEXT: fld.d $fa0, $sp, 8 -; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI14_0) -; LA32D-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI14_0) +; LA32D-NEXT: movgr2fr.w $fa1, $zero +; LA32D-NEXT: movgr2frh.w $fa1, $a1 ; LA32D-NEXT: fsub.d $fa0, $fa0, $fa1 ; LA32D-NEXT: fcvt.s.d $fa0, $fa0 ; LA32D-NEXT: addi.w $sp, $sp, 16 diff --git a/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll b/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll index 6e9d26ab362d6..d92de139ce672 100644 --- a/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll +++ b/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll @@ -52,10 +52,9 @@ define float @f(float %a) { ; ILP32D-LABEL: f: ; ILP32D: # %bb.0: -; ILP32D-NEXT: addi.w $a0, $zero, 1 -; ILP32D-NEXT: movgr2fr.w $fa1, $a0 -; ILP32D-NEXT: ffint.s.w $fa1, $fa1 -; ILP32D-NEXT: fadd.s $fa0, $fa0, $fa1 +; ILP32D-NEXT: lu12i.w $a0, 260096 +; ILP32D-NEXT: movgr2fr.w $fa1, $a0 +; ILP32D-NEXT: fadd.s $fa0, $fa0, $fa1 ; ILP32D-NEXT: ret ; ; LP64D-LABEL: f: @@ -71,11 +70,10 @@ define float @f(float %a) { ; LP64S-LP64D-NOD-LABEL: f: ; LP64S-LP64D-NOD: # %bb.0: ; LP64S-LP64D-NOD-NEXT: movgr2fr.w $fa0, $a0 -; LP64S-LP64D-NOD-NEXT: addi.w $a0, $zero, 1 -; LP64S-LP64D-NOD-NEXT: movgr2fr.w $fa1, $a0 -; LP64S-LP64D-NOD-NEXT: ffint.s.w $fa1, $fa1 -; LP64S-LP64D-NOD-NEXT: fadd.s $fa0, $fa0, $fa1 -; LP64S-LP64D-NOD-NEXT: movfr2gr.s $a0, $fa0 +; LP64S-LP64D-NOD-NEXT: lu12i.w $a0, 260096 +; LP64S-LP64D-NOD-NEXT: movgr2fr.w $fa1, $a0 +; LP64S-LP64D-NOD-NEXT: fadd.s $fa0, $fa0, $fa1 +; LP64S-LP64D-NOD-NEXT: movfr2gr.s $a0, $fa0 ; LP64S-LP64D-NOD-NEXT: ret ; ; LP64D-LP64F-NOF-LABEL: f: @@ -92,10 +90,9 @@ define float @f(float %a) { define double @g(double %a) { ; ILP32D-LABEL: g: ; ILP32D: # %bb.0: -; ILP32D-NEXT: addi.w $a0, $zero, 1 -; ILP32D-NEXT: movgr2fr.w $fa1, $a0 -; ILP32D-NEXT: ffint.s.w $fa1, $fa1 -; ILP32D-NEXT: fcvt.d.s $fa1, $fa1 +; ILP32D-NEXT: movgr2fr.w $fa1, $zero +; ILP32D-NEXT: lu12i.w $a0, 261888 +; ILP32D-NEXT: movgr2frh.w $fa1, $a0 ; ILP32D-NEXT: fadd.d $fa0, $fa0, $fa1 ; ILP32D-NEXT: ret ; diff --git a/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll b/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll index c8a33725267a2..b093f6f6010bc 100644 --- a/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll +++ b/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll @@ -10,9 +10,8 @@ define float @f(float %a) { ; ILP32D-LABEL: f: ; ILP32D: # %bb.0: -; ILP32D-NEXT: addi.w $a0, $zero, 1 +; ILP32D-NEXT: lu12i.w $a0, 260096 ; ILP32D-NEXT: movgr2fr.w $fa1, $a0 -; ILP32D-NEXT: ffint.s.w $fa1, $fa1 ; ILP32D-NEXT: fadd.s $fa0, $fa0, $fa1 ; ILP32D-NEXT: ret ; @@ -28,10 +27,9 @@ define float @f(float %a) { define double @g(double %a) { ; ILP32D-LABEL: g: ; ILP32D: # %bb.0: -; ILP32D-NEXT: addi.w $a0, $zero, 1 -; ILP32D-NEXT: movgr2fr.w $fa1, $a0 -; ILP32D-NEXT: ffint.s.w $fa1, $fa1 -; ILP32D-NEXT: fcvt.d.s $fa1, $fa1 +; ILP32D-NEXT: movgr2fr.w $fa1, $zero +; ILP32D-NEXT: lu12i.w $a0, 261888 +; ILP32D-NEXT: movgr2frh.w $fa1, $a0 ; ILP32D-NEXT: fadd.d $fa0, $fa0, $fa1 ; ILP32D-NEXT: ret ; diff --git a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll index 285527fca11cf..16c9e754fb94d 100644 --- a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll +++ b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll @@ -81,47 +81,44 @@ define void @test_zero(ptr %P, ptr %S) nounwind { define void @test_f2(ptr %P, ptr %S) nounwind { ; LA32F-LABEL: test_f2: ; LA32F: # %bb.0: -; LA32F-NEXT: fld.s $fa0, $a0, 4 -; LA32F-NEXT: fld.s $fa1, $a0, 0 -; LA32F-NEXT: addi.w $a0, $zero, 1 -; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0) -; LA32F-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI1_0) -; LA32F-NEXT: movgr2fr.w $fa3, $a0 -; LA32F-NEXT: ffint.s.w $fa3, $fa3 -; LA32F-NEXT: fadd.s $fa1, $fa1, $fa3 +; LA32F-NEXT: fld.s $fa0, $a0, 0 +; LA32F-NEXT: fld.s $fa1, $a0, 4 +; LA32F-NEXT: lu12i.w $a0, 260096 +; LA32F-NEXT: movgr2fr.w $fa2, $a0 ; LA32F-NEXT: fadd.s $fa0, $fa0, $fa2 -; LA32F-NEXT: fst.s $fa0, $a1, 4 -; LA32F-NEXT: fst.s $fa1, $a1, 0 +; LA32F-NEXT: lu12i.w $a0, 262144 +; LA32F-NEXT: movgr2fr.w $fa2, $a0 +; LA32F-NEXT: fadd.s $fa1, $fa1, $fa2 +; LA32F-NEXT: fst.s $fa1, $a1, 4 +; LA32F-NEXT: fst.s $fa0, $a1, 0 ; LA32F-NEXT: ret ; ; LA32D-LABEL: test_f2: ; LA32D: # %bb.0: -; LA32D-NEXT: fld.s $fa0, $a0, 4 -; LA32D-NEXT: fld.s $fa1, $a0, 0 -; LA32D-NEXT: addi.w $a0, $zero, 1 -; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0) -; LA32D-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI1_0) -; LA32D-NEXT: movgr2fr.w $fa3, $a0 -; LA32D-NEXT: ffint.s.w $fa3, $fa3 -; LA32D-NEXT: fadd.s $fa1, $fa1, $fa3 +; LA32D-NEXT: fld.s $fa0, $a0, 0 +; LA32D-NEXT: fld.s $fa1, $a0, 4 +; LA32D-NEXT: lu12i.w $a0, 260096 +; LA32D-NEXT: movgr2fr.w $fa2, $a0 ; LA32D-NEXT: fadd.s $fa0, $fa0, $fa2 -; LA32D-NEXT: fst.s $fa0, $a1, 4 -; LA32D-NEXT: fst.s $fa1, $a1, 0 +; LA32D-NEXT: lu12i.w $a0, 262144 +; LA32D-NEXT: movgr2fr.w $fa2, $a0 +; LA32D-NEXT: fadd.s $fa1, $fa1, $fa2 +; LA32D-NEXT: fst.s $fa1, $a1, 4 +; LA32D-NEXT: fst.s $fa0, $a1, 0 ; LA32D-NEXT: ret ; ; LA64F-LABEL: test_f2: ; LA64F: # %bb.0: -; LA64F-NEXT: fld.s $fa0, $a0, 4 -; LA64F-NEXT: fld.s $fa1, $a0, 0 -; LA64F-NEXT: addi.w $a0, $zero, 1 -; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0) -; LA64F-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI1_0) -; LA64F-NEXT: movgr2fr.w $fa3, $a0 -; LA64F-NEXT: ffint.s.w $fa3, $fa3 -; LA64F-NEXT: fadd.s $fa1, $fa1, $fa3 +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: fld.s $fa1, $a0, 4 +; LA64F-NEXT: lu12i.w $a0, 260096 +; LA64F-NEXT: movgr2fr.w $fa2, $a0 ; LA64F-NEXT: fadd.s $fa0, $fa0, $fa2 -; LA64F-NEXT: fst.s $fa0, $a1, 4 -; LA64F-NEXT: fst.s $fa1, $a1, 0 +; LA64F-NEXT: lu12i.w $a0, 262144 +; LA64F-NEXT: movgr2fr.w $fa2, $a0 +; LA64F-NEXT: fadd.s $fa1, $fa1, $fa2 +; LA64F-NEXT: fst.s $fa1, $a1, 4 +; LA64F-NEXT: fst.s $fa0, $a1, 0 ; LA64F-NEXT: ret ; ; LA64D-LABEL: test_f2: @@ -145,75 +142,72 @@ define void @test_f4(ptr %P, ptr %S) nounwind { ; LA32F: # %bb.0: ; LA32F-NEXT: fld.s $fa0, $a0, 12 ; LA32F-NEXT: fld.s $fa1, $a0, 8 -; LA32F-NEXT: fld.s $fa2, $a0, 4 -; LA32F-NEXT: fld.s $fa3, $a0, 0 -; LA32F-NEXT: addi.w $a0, $zero, 1 +; LA32F-NEXT: fld.s $fa2, $a0, 0 +; LA32F-NEXT: fld.s $fa3, $a0, 4 +; LA32F-NEXT: lu12i.w $a0, 260096 +; LA32F-NEXT: movgr2fr.w $fa4, $a0 +; LA32F-NEXT: fadd.s $fa2, $fa2, $fa4 +; LA32F-NEXT: lu12i.w $a0, 262144 ; LA32F-NEXT: movgr2fr.w $fa4, $a0 -; LA32F-NEXT: ffint.s.w $fa4, $fa4 -; LA32F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) -; LA32F-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI2_0) -; LA32F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1) -; LA32F-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI2_1) -; LA32F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_2) -; LA32F-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI2_2) ; LA32F-NEXT: fadd.s $fa3, $fa3, $fa4 -; LA32F-NEXT: fadd.s $fa2, $fa2, $fa5 -; LA32F-NEXT: fadd.s $fa1, $fa1, $fa6 -; LA32F-NEXT: fadd.s $fa0, $fa0, $fa7 +; LA32F-NEXT: lu12i.w $a0, 263168 +; LA32F-NEXT: movgr2fr.w $fa4, $a0 +; LA32F-NEXT: fadd.s $fa1, $fa1, $fa4 +; LA32F-NEXT: lu12i.w $a0, 264192 +; LA32F-NEXT: movgr2fr.w $fa4, $a0 +; LA32F-NEXT: fadd.s $fa0, $fa0, $fa4 ; LA32F-NEXT: fst.s $fa0, $a1, 12 ; LA32F-NEXT: fst.s $fa1, $a1, 8 -; LA32F-NEXT: fst.s $fa2, $a1, 4 -; LA32F-NEXT: fst.s $fa3, $a1, 0 +; LA32F-NEXT: fst.s $fa3, $a1, 4 +; LA32F-NEXT: fst.s $fa2, $a1, 0 ; LA32F-NEXT: ret ; ; LA32D-LABEL: test_f4: ; LA32D: # %bb.0: ; LA32D-NEXT: fld.s $fa0, $a0, 12 ; LA32D-NEXT: fld.s $fa1, $a0, 8 -; LA32D-NEXT: fld.s $fa2, $a0, 4 -; LA32D-NEXT: fld.s $fa3, $a0, 0 -; LA32D-NEXT: addi.w $a0, $zero, 1 +; LA32D-NEXT: fld.s $fa2, $a0, 0 +; LA32D-NEXT: fld.s $fa3, $a0, 4 +; LA32D-NEXT: lu12i.w $a0, 260096 +; LA32D-NEXT: movgr2fr.w $fa4, $a0 +; LA32D-NEXT: fadd.s $fa2, $fa2, $fa4 +; LA32D-NEXT: lu12i.w $a0, 262144 ; LA32D-NEXT: movgr2fr.w $fa4, $a0 -; LA32D-NEXT: ffint.s.w $fa4, $fa4 -; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) -; LA32D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI2_0) -; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1) -; LA32D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI2_1) -; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_2) -; LA32D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI2_2) ; LA32D-NEXT: fadd.s $fa3, $fa3, $fa4 -; LA32D-NEXT: fadd.s $fa2, $fa2, $fa5 -; LA32D-NEXT: fadd.s $fa1, $fa1, $fa6 -; LA32D-NEXT: fadd.s $fa0, $fa0, $fa7 +; LA32D-NEXT: lu12i.w $a0, 263168 +; LA32D-NEXT: movgr2fr.w $fa4, $a0 +; LA32D-NEXT: fadd.s $fa1, $fa1, $fa4 +; LA32D-NEXT: lu12i.w $a0, 264192 +; LA32D-NEXT: movgr2fr.w $fa4, $a0 +; LA32D-NEXT: fadd.s $fa0, $fa0, $fa4 ; LA32D-NEXT: fst.s $fa0, $a1, 12 ; LA32D-NEXT: fst.s $fa1, $a1, 8 -; LA32D-NEXT: fst.s $fa2, $a1, 4 -; LA32D-NEXT: fst.s $fa3, $a1, 0 +; LA32D-NEXT: fst.s $fa3, $a1, 4 +; LA32D-NEXT: fst.s $fa2, $a1, 0 ; LA32D-NEXT: ret ; ; LA64F-LABEL: test_f4: ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 12 ; LA64F-NEXT: fld.s $fa1, $a0, 8 -; LA64F-NEXT: fld.s $fa2, $a0, 4 -; LA64F-NEXT: fld.s $fa3, $a0, 0 -; LA64F-NEXT: addi.w $a0, $zero, 1 +; LA64F-NEXT: fld.s $fa2, $a0, 0 +; LA64F-NEXT: fld.s $fa3, $a0, 4 +; LA64F-NEXT: lu12i.w $a0, 260096 +; LA64F-NEXT: movgr2fr.w $fa4, $a0 +; LA64F-NEXT: fadd.s $fa2, $fa2, $fa4 +; LA64F-NEXT: lu12i.w $a0, 262144 ; LA64F-NEXT: movgr2fr.w $fa4, $a0 -; LA64F-NEXT: ffint.s.w $fa4, $fa4 -; LA64F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) -; LA64F-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI2_0) -; LA64F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1) -; LA64F-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI2_1) -; LA64F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_2) -; LA64F-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI2_2) ; LA64F-NEXT: fadd.s $fa3, $fa3, $fa4 -; LA64F-NEXT: fadd.s $fa2, $fa2, $fa5 -; LA64F-NEXT: fadd.s $fa1, $fa1, $fa6 -; LA64F-NEXT: fadd.s $fa0, $fa0, $fa7 +; LA64F-NEXT: lu12i.w $a0, 263168 +; LA64F-NEXT: movgr2fr.w $fa4, $a0 +; LA64F-NEXT: fadd.s $fa1, $fa1, $fa4 +; LA64F-NEXT: lu12i.w $a0, 264192 +; LA64F-NEXT: movgr2fr.w $fa4, $a0 +; LA64F-NEXT: fadd.s $fa0, $fa0, $fa4 ; LA64F-NEXT: fst.s $fa0, $a1, 12 ; LA64F-NEXT: fst.s $fa1, $a1, 8 -; LA64F-NEXT: fst.s $fa2, $a1, 4 -; LA64F-NEXT: fst.s $fa3, $a1, 0 +; LA64F-NEXT: fst.s $fa3, $a1, 4 +; LA64F-NEXT: fst.s $fa2, $a1, 0 ; LA64F-NEXT: ret ; ; LA64D-LABEL: test_f4: @@ -233,113 +227,110 @@ define void @test_f4(ptr %P, ptr %S) nounwind { define void @test_f8(ptr %P, ptr %S) nounwind { ; LA32F-LABEL: test_f8: ; LA32F: # %bb.0: -; LA32F-NEXT: addi.w $a2, $zero, 1 -; LA32F-NEXT: movgr2fr.w $fa0, $a2 -; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0) -; LA32F-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI3_0) -; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1) -; LA32F-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI3_1) -; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_2) -; LA32F-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI3_2) -; LA32F-NEXT: fld.s $fa4, $a0, 28 -; LA32F-NEXT: fld.s $fa5, $a0, 24 -; LA32F-NEXT: fld.s $fa6, $a0, 12 -; LA32F-NEXT: fld.s $fa7, $a0, 8 -; LA32F-NEXT: fld.s $ft0, $a0, 0 -; LA32F-NEXT: fld.s $ft1, $a0, 16 -; LA32F-NEXT: fld.s $ft2, $a0, 4 -; LA32F-NEXT: ffint.s.w $fa0, $fa0 -; LA32F-NEXT: fadd.s $ft0, $ft0, $fa0 -; LA32F-NEXT: fadd.s $fa0, $ft1, $fa0 -; LA32F-NEXT: fld.s $ft1, $a0, 20 -; LA32F-NEXT: fadd.s $ft2, $ft2, $fa1 -; LA32F-NEXT: fadd.s $fa7, $fa7, $fa2 -; LA32F-NEXT: fadd.s $fa6, $fa6, $fa3 -; LA32F-NEXT: fadd.s $fa1, $ft1, $fa1 -; LA32F-NEXT: fadd.s $fa2, $fa5, $fa2 -; LA32F-NEXT: fadd.s $fa3, $fa4, $fa3 -; LA32F-NEXT: fst.s $fa3, $a1, 28 -; LA32F-NEXT: fst.s $fa2, $a1, 24 -; LA32F-NEXT: fst.s $fa1, $a1, 20 -; LA32F-NEXT: fst.s $fa6, $a1, 12 -; LA32F-NEXT: fst.s $fa7, $a1, 8 -; LA32F-NEXT: fst.s $ft2, $a1, 4 -; LA32F-NEXT: fst.s $fa0, $a1, 16 -; LA32F-NEXT: fst.s $ft0, $a1, 0 +; LA32F-NEXT: fld.s $fa0, $a0, 28 +; LA32F-NEXT: fld.s $fa1, $a0, 24 +; LA32F-NEXT: fld.s $fa2, $a0, 20 +; LA32F-NEXT: fld.s $fa3, $a0, 16 +; LA32F-NEXT: fld.s $fa4, $a0, 12 +; LA32F-NEXT: fld.s $fa5, $a0, 8 +; LA32F-NEXT: fld.s $fa6, $a0, 0 +; LA32F-NEXT: fld.s $fa7, $a0, 4 +; LA32F-NEXT: lu12i.w $a0, 260096 +; LA32F-NEXT: movgr2fr.w $ft0, $a0 +; LA32F-NEXT: fadd.s $fa6, $fa6, $ft0 +; LA32F-NEXT: lu12i.w $a0, 262144 +; LA32F-NEXT: movgr2fr.w $ft1, $a0 +; LA32F-NEXT: fadd.s $fa7, $fa7, $ft1 +; LA32F-NEXT: lu12i.w $a0, 263168 +; LA32F-NEXT: movgr2fr.w $ft2, $a0 +; LA32F-NEXT: fadd.s $fa5, $fa5, $ft2 +; LA32F-NEXT: lu12i.w $a0, 264192 +; LA32F-NEXT: movgr2fr.w $ft3, $a0 +; LA32F-NEXT: fadd.s $fa4, $fa4, $ft3 +; LA32F-NEXT: fadd.s $fa3, $fa3, $ft0 +; LA32F-NEXT: fadd.s $fa2, $fa2, $ft1 +; LA32F-NEXT: fadd.s $fa1, $fa1, $ft2 +; LA32F-NEXT: fadd.s $fa0, $fa0, $ft3 +; LA32F-NEXT: fst.s $fa0, $a1, 28 +; LA32F-NEXT: fst.s $fa1, $a1, 24 +; LA32F-NEXT: fst.s $fa2, $a1, 20 +; LA32F-NEXT: fst.s $fa3, $a1, 16 +; LA32F-NEXT: fst.s $fa4, $a1, 12 +; LA32F-NEXT: fst.s $fa5, $a1, 8 +; LA32F-NEXT: fst.s $fa7, $a1, 4 +; LA32F-NEXT: fst.s $fa6, $a1, 0 ; LA32F-NEXT: ret ; ; LA32D-LABEL: test_f8: ; LA32D: # %bb.0: -; LA32D-NEXT: addi.w $a2, $zero, 1 -; LA32D-NEXT: movgr2fr.w $fa0, $a2 -; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0) -; LA32D-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI3_0) -; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1) -; LA32D-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI3_1) -; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_2) -; LA32D-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI3_2) -; LA32D-NEXT: fld.s $fa4, $a0, 28 -; LA32D-NEXT: fld.s $fa5, $a0, 24 -; LA32D-NEXT: fld.s $fa6, $a0, 12 -; LA32D-NEXT: fld.s $fa7, $a0, 8 -; LA32D-NEXT: fld.s $ft0, $a0, 0 -; LA32D-NEXT: fld.s $ft1, $a0, 16 -; LA32D-NEXT: fld.s $ft2, $a0, 4 -; LA32D-NEXT: ffint.s.w $fa0, $fa0 -; LA32D-NEXT: fadd.s $ft0, $ft0, $fa0 -; LA32D-NEXT: fadd.s $fa0, $ft1, $fa0 -; LA32D-NEXT: fld.s $ft1, $a0, 20 -; LA32D-NEXT: fadd.s $ft2, $ft2, $fa1 -; LA32D-NEXT: fadd.s $fa7, $fa7, $fa2 -; LA32D-NEXT: fadd.s $fa6, $fa6, $fa3 -; LA32D-NEXT: fadd.s $fa1, $ft1, $fa1 -; LA32D-NEXT: fadd.s $fa2, $fa5, $fa2 -; LA32D-NEXT: fadd.s $fa3, $fa4, $fa3 -; LA32D-NEXT: fst.s $fa3, $a1, 28 -; LA32D-NEXT: fst.s $fa2, $a1, 24 -; LA32D-NEXT: fst.s $fa1, $a1, 20 -; LA32D-NEXT: fst.s $fa6, $a1, 12 -; LA32D-NEXT: fst.s $fa7, $a1, 8 -; LA32D-NEXT: fst.s $ft2, $a1, 4 -; LA32D-NEXT: fst.s $fa0, $a1, 16 -; LA32D-NEXT: fst.s $ft0, $a1, 0 +; LA32D-NEXT: fld.s $fa0, $a0, 28 +; LA32D-NEXT: fld.s $fa1, $a0, 24 +; LA32D-NEXT: fld.s $fa2, $a0, 20 +; LA32D-NEXT: fld.s $fa3, $a0, 16 +; LA32D-NEXT: fld.s $fa4, $a0, 12 +; LA32D-NEXT: fld.s $fa5, $a0, 8 +; LA32D-NEXT: fld.s $fa6, $a0, 0 +; LA32D-NEXT: fld.s $fa7, $a0, 4 +; LA32D-NEXT: lu12i.w $a0, 260096 +; LA32D-NEXT: movgr2fr.w $ft0, $a0 +; LA32D-NEXT: fadd.s $fa6, $fa6, $ft0 +; LA32D-NEXT: lu12i.w $a0, 262144 +; LA32D-NEXT: movgr2fr.w $ft1, $a0 +; LA32D-NEXT: fadd.s $fa7, $fa7, $ft1 +; LA32D-NEXT: lu12i.w $a0, 263168 +; LA32D-NEXT: movgr2fr.w $ft2, $a0 +; LA32D-NEXT: fadd.s $fa5, $fa5, $ft2 +; LA32D-NEXT: lu12i.w $a0, 264192 +; LA32D-NEXT: movgr2fr.w $ft3, $a0 +; LA32D-NEXT: fadd.s $fa4, $fa4, $ft3 +; LA32D-NEXT: fadd.s $fa3, $fa3, $ft0 +; LA32D-NEXT: fadd.s $fa2, $fa2, $ft1 +; LA32D-NEXT: fadd.s $fa1, $fa1, $ft2 +; LA32D-NEXT: fadd.s $fa0, $fa0, $ft3 +; LA32D-NEXT: fst.s $fa0, $a1, 28 +; LA32D-NEXT: fst.s $fa1, $a1, 24 +; LA32D-NEXT: fst.s $fa2, $a1, 20 +; LA32D-NEXT: fst.s $fa3, $a1, 16 +; LA32D-NEXT: fst.s $fa4, $a1, 12 +; LA32D-NEXT: fst.s $fa5, $a1, 8 +; LA32D-NEXT: fst.s $fa7, $a1, 4 +; LA32D-NEXT: fst.s $fa6, $a1, 0 ; LA32D-NEXT: ret ; ; LA64F-LABEL: test_f8: ; LA64F: # %bb.0: -; LA64F-NEXT: addi.w $a2, $zero, 1 -; LA64F-NEXT: movgr2fr.w $fa0, $a2 -; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0) -; LA64F-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI3_0) -; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1) -; LA64F-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI3_1) -; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_2) -; LA64F-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI3_2) -; LA64F-NEXT: fld.s $fa4, $a0, 28 -; LA64F-NEXT: fld.s $fa5, $a0, 24 -; LA64F-NEXT: fld.s $fa6, $a0, 12 -; LA64F-NEXT: fld.s $fa7, $a0, 8 -; LA64F-NEXT: fld.s $ft0, $a0, 0 -; LA64F-NEXT: fld.s $ft1, $a0, 16 -; LA64F-NEXT: fld.s $ft2, $a0, 4 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: fadd.s $ft0, $ft0, $fa0 -; LA64F-NEXT: fadd.s $fa0, $ft1, $fa0 -; LA64F-NEXT: fld.s $ft1, $a0, 20 -; LA64F-NEXT: fadd.s $ft2, $ft2, $fa1 -; LA64F-NEXT: fadd.s $fa7, $fa7, $fa2 -; LA64F-NEXT: fadd.s $fa6, $fa6, $fa3 -; LA64F-NEXT: fadd.s $fa1, $ft1, $fa1 -; LA64F-NEXT: fadd.s $fa2, $fa5, $fa2 -; LA64F-NEXT: fadd.s $fa3, $fa4, $fa3 -; LA64F-NEXT: fst.s $fa3, $a1, 28 -; LA64F-NEXT: fst.s $fa2, $a1, 24 -; LA64F-NEXT: fst.s $fa1, $a1, 20 -; LA64F-NEXT: fst.s $fa6, $a1, 12 -; LA64F-NEXT: fst.s $fa7, $a1, 8 -; LA64F-NEXT: fst.s $ft2, $a1, 4 -; LA64F-NEXT: fst.s $fa0, $a1, 16 -; LA64F-NEXT: fst.s $ft0, $a1, 0 +; LA64F-NEXT: fld.s $fa0, $a0, 28 +; LA64F-NEXT: fld.s $fa1, $a0, 24 +; LA64F-NEXT: fld.s $fa2, $a0, 20 +; LA64F-NEXT: fld.s $fa3, $a0, 16 +; LA64F-NEXT: fld.s $fa4, $a0, 12 +; LA64F-NEXT: fld.s $fa5, $a0, 8 +; LA64F-NEXT: fld.s $fa6, $a0, 0 +; LA64F-NEXT: fld.s $fa7, $a0, 4 +; LA64F-NEXT: lu12i.w $a0, 260096 +; LA64F-NEXT: movgr2fr.w $ft0, $a0 +; LA64F-NEXT: fadd.s $fa6, $fa6, $ft0 +; LA64F-NEXT: lu12i.w $a0, 262144 +; LA64F-NEXT: movgr2fr.w $ft1, $a0 +; LA64F-NEXT: fadd.s $fa7, $fa7, $ft1 +; LA64F-NEXT: lu12i.w $a0, 263168 +; LA64F-NEXT: movgr2fr.w $ft2, $a0 +; LA64F-NEXT: fadd.s $fa5, $fa5, $ft2 +; LA64F-NEXT: lu12i.w $a0, 264192 +; LA64F-NEXT: movgr2fr.w $ft3, $a0 +; LA64F-NEXT: fadd.s $fa4, $fa4, $ft3 +; LA64F-NEXT: fadd.s $fa3, $fa3, $ft0 +; LA64F-NEXT: fadd.s $fa2, $fa2, $ft1 +; LA64F-NEXT: fadd.s $fa1, $fa1, $ft2 +; LA64F-NEXT: fadd.s $fa0, $fa0, $ft3 +; LA64F-NEXT: fst.s $fa0, $a1, 28 +; LA64F-NEXT: fst.s $fa1, $a1, 24 +; LA64F-NEXT: fst.s $fa2, $a1, 20 +; LA64F-NEXT: fst.s $fa3, $a1, 16 +; LA64F-NEXT: fst.s $fa4, $a1, 12 +; LA64F-NEXT: fst.s $fa5, $a1, 8 +; LA64F-NEXT: fst.s $fa7, $a1, 4 +; LA64F-NEXT: fst.s $fa6, $a1, 0 ; LA64F-NEXT: ret ; ; LA64D-LABEL: test_f8: @@ -403,14 +394,14 @@ define void @test_d2(ptr %P, ptr %S) nounwind { ; LA32D: # %bb.0: ; LA32D-NEXT: fld.d $fa0, $a0, 8 ; LA32D-NEXT: fld.d $fa1, $a0, 0 -; LA32D-NEXT: addi.w $a0, $zero, 1 -; LA32D-NEXT: movgr2fr.w $fa2, $a0 -; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) -; LA32D-NEXT: fld.d $fa3, $a0, %pc_lo12(.LCPI4_0) -; LA32D-NEXT: ffint.s.w $fa2, $fa2 -; LA32D-NEXT: fcvt.d.s $fa2, $fa2 -; LA32D-NEXT: fadd.d $fa1, $fa1, $fa2 -; LA32D-NEXT: fadd.d $fa0, $fa0, $fa3 +; LA32D-NEXT: movgr2fr.w $fa2, $zero +; LA32D-NEXT: lu12i.w $a0, 261888 +; LA32D-NEXT: fmov.d $fa3, $fa2 +; LA32D-NEXT: movgr2frh.w $fa3, $a0 +; LA32D-NEXT: fadd.d $fa1, $fa1, $fa3 +; LA32D-NEXT: lu12i.w $a0, 262144 +; LA32D-NEXT: movgr2frh.w $fa2, $a0 +; LA32D-NEXT: fadd.d $fa0, $fa0, $fa2 ; LA32D-NEXT: fst.d $fa0, $a1, 8 ; LA32D-NEXT: fst.d $fa1, $a1, 0 ; LA32D-NEXT: ret @@ -528,26 +519,27 @@ define void @test_d4(ptr %P, ptr %S) nounwind { ; ; LA32D-LABEL: test_d4: ; LA32D: # %bb.0: -; LA32D-NEXT: fld.d $fa0, $a0, 24 -; LA32D-NEXT: fld.d $fa1, $a0, 16 +; LA32D-NEXT: fld.d $fa0, $a0, 16 +; LA32D-NEXT: fld.d $fa1, $a0, 24 ; LA32D-NEXT: fld.d $fa2, $a0, 8 ; LA32D-NEXT: fld.d $fa3, $a0, 0 -; LA32D-NEXT: addi.w $a0, $zero, 1 -; LA32D-NEXT: movgr2fr.w $fa4, $a0 -; LA32D-NEXT: ffint.s.w $fa4, $fa4 -; LA32D-NEXT: fcvt.d.s $fa4, $fa4 +; LA32D-NEXT: movgr2fr.w $fa4, $zero +; LA32D-NEXT: lu12i.w $a0, 261888 +; LA32D-NEXT: fmov.d $fa5, $fa4 +; LA32D-NEXT: movgr2frh.w $fa5, $a0 +; LA32D-NEXT: fadd.d $fa3, $fa3, $fa5 +; LA32D-NEXT: lu12i.w $a0, 262144 +; LA32D-NEXT: fmov.d $fa5, $fa4 +; LA32D-NEXT: movgr2frh.w $fa5, $a0 +; LA32D-NEXT: fadd.d $fa2, $fa2, $fa5 ; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) ; LA32D-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI5_0) -; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1) -; LA32D-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI5_1) -; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2) -; LA32D-NEXT: fld.d $fa7, $a0, %pc_lo12(.LCPI5_2) -; LA32D-NEXT: fadd.d $fa3, $fa3, $fa4 -; LA32D-NEXT: fadd.d $fa2, $fa2, $fa5 -; LA32D-NEXT: fadd.d $fa1, $fa1, $fa6 -; LA32D-NEXT: fadd.d $fa0, $fa0, $fa7 -; LA32D-NEXT: fst.d $fa0, $a1, 24 -; LA32D-NEXT: fst.d $fa1, $a1, 16 +; LA32D-NEXT: lu12i.w $a0, 262400 +; LA32D-NEXT: movgr2frh.w $fa4, $a0 +; LA32D-NEXT: fadd.d $fa1, $fa1, $fa4 +; LA32D-NEXT: fadd.d $fa0, $fa0, $fa5 +; LA32D-NEXT: fst.d $fa0, $a1, 16 +; LA32D-NEXT: fst.d $fa1, $a1, 24 ; LA32D-NEXT: fst.d $fa2, $a1, 8 ; LA32D-NEXT: fst.d $fa3, $a1, 0 ; LA32D-NEXT: ret @@ -748,40 +740,41 @@ define void @test_d8(ptr %P, ptr %S) nounwind { ; ; LA32D-LABEL: test_d8: ; LA32D: # %bb.0: -; LA32D-NEXT: addi.w $a2, $zero, 1 -; LA32D-NEXT: movgr2fr.w $fa0, $a2 -; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_0) -; LA32D-NEXT: fld.d $fa1, $a2, %pc_lo12(.LCPI6_0) -; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_1) -; LA32D-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI6_1) -; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_2) -; LA32D-NEXT: fld.d $fa3, $a2, %pc_lo12(.LCPI6_2) -; LA32D-NEXT: fld.d $fa4, $a0, 56 -; LA32D-NEXT: fld.d $fa5, $a0, 48 -; LA32D-NEXT: fld.d $fa6, $a0, 24 -; LA32D-NEXT: fld.d $fa7, $a0, 16 -; LA32D-NEXT: fld.d $ft0, $a0, 8 -; LA32D-NEXT: fld.d $ft1, $a0, 0 -; LA32D-NEXT: fld.d $ft2, $a0, 32 -; LA32D-NEXT: ffint.s.w $fa0, $fa0 -; LA32D-NEXT: fcvt.d.s $fa0, $fa0 -; LA32D-NEXT: fadd.d $ft1, $ft1, $fa0 -; LA32D-NEXT: fadd.d $fa0, $ft2, $fa0 -; LA32D-NEXT: fld.d $ft2, $a0, 40 -; LA32D-NEXT: fadd.d $ft0, $ft0, $fa1 -; LA32D-NEXT: fadd.d $fa7, $fa7, $fa2 -; LA32D-NEXT: fadd.d $fa6, $fa6, $fa3 -; LA32D-NEXT: fadd.d $fa1, $ft2, $fa1 -; LA32D-NEXT: fadd.d $fa2, $fa5, $fa2 -; LA32D-NEXT: fadd.d $fa3, $fa4, $fa3 -; LA32D-NEXT: fst.d $fa3, $a1, 56 -; LA32D-NEXT: fst.d $fa2, $a1, 48 -; LA32D-NEXT: fst.d $fa1, $a1, 40 -; LA32D-NEXT: fst.d $fa6, $a1, 24 -; LA32D-NEXT: fst.d $fa7, $a1, 16 -; LA32D-NEXT: fst.d $ft0, $a1, 8 -; LA32D-NEXT: fst.d $fa0, $a1, 32 -; LA32D-NEXT: fst.d $ft1, $a1, 0 +; LA32D-NEXT: fld.d $fa0, $a0, 48 +; LA32D-NEXT: fld.d $fa1, $a0, 16 +; LA32D-NEXT: fld.d $fa2, $a0, 56 +; LA32D-NEXT: fld.d $fa3, $a0, 40 +; LA32D-NEXT: fld.d $fa4, $a0, 32 +; LA32D-NEXT: fld.d $fa5, $a0, 24 +; LA32D-NEXT: fld.d $fa6, $a0, 8 +; LA32D-NEXT: fld.d $fa7, $a0, 0 +; LA32D-NEXT: movgr2fr.w $ft0, $zero +; LA32D-NEXT: lu12i.w $a0, 261888 +; LA32D-NEXT: fmov.d $ft1, $ft0 +; LA32D-NEXT: movgr2frh.w $ft1, $a0 +; LA32D-NEXT: fadd.d $fa7, $fa7, $ft1 +; LA32D-NEXT: lu12i.w $a0, 262144 +; LA32D-NEXT: fmov.d $ft2, $ft0 +; LA32D-NEXT: movgr2frh.w $ft2, $a0 +; LA32D-NEXT: fadd.d $fa6, $fa6, $ft2 +; LA32D-NEXT: lu12i.w $a0, 262400 +; LA32D-NEXT: movgr2frh.w $ft0, $a0 +; LA32D-NEXT: fadd.d $fa4, $fa4, $ft1 +; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) +; LA32D-NEXT: fld.d $ft1, $a0, %pc_lo12(.LCPI6_0) +; LA32D-NEXT: fadd.d $fa5, $fa5, $ft0 +; LA32D-NEXT: fadd.d $fa3, $fa3, $ft2 +; LA32D-NEXT: fadd.d $fa2, $fa2, $ft0 +; LA32D-NEXT: fadd.d $fa1, $fa1, $ft1 +; LA32D-NEXT: fadd.d $fa0, $fa0, $ft1 +; LA32D-NEXT: fst.d $fa0, $a1, 48 +; LA32D-NEXT: fst.d $fa1, $a1, 16 +; LA32D-NEXT: fst.d $fa2, $a1, 56 +; LA32D-NEXT: fst.d $fa3, $a1, 40 +; LA32D-NEXT: fst.d $fa4, $a1, 32 +; LA32D-NEXT: fst.d $fa5, $a1, 24 +; LA32D-NEXT: fst.d $fa6, $a1, 8 +; LA32D-NEXT: fst.d $fa7, $a1, 0 ; LA32D-NEXT: ret ; ; LA64F-LABEL: test_d8: