From 25fc7d1d06a7b6b6a9d19ed82586094f58f8c527 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Wed, 29 Oct 2025 15:19:05 +0800 Subject: [PATCH] [LoongArch] Convert ld to fld when result is only used by sitofp If the result of an integer load is only used by an integer-to-float conversion, use a fp load instead. This eliminates an integer-to-float-move (movgr2fr) instruction. --- .../LoongArch/LoongArchFloat32InstrInfo.td | 5 +++ .../LoongArch/LoongArchFloat64InstrInfo.td | 3 ++ .../LoongArch/LoongArchISelLowering.cpp | 45 +++++++++++++++++++ .../Target/LoongArch/LoongArchISelLowering.h | 1 + .../CodeGen/LoongArch/load-itofp-combine.ll | 31 ++++--------- 5 files changed, 62 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index 690dd73014e57..cb6b7c7342ec6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -26,6 +26,7 @@ def SDT_LoongArchMOVFR2GR_S_LA64 def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>; def SDT_LoongArchFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>; def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>; +def SDT_LoongArchITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>; // ISD::BRCOND is custom-lowered to LoongArchISD::BRCOND for floating-point // comparisons to prevent recursive lowering. @@ -39,6 +40,7 @@ def loongarch_movfr2gr_s_la64 def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>; def loongarch_frecipe : SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchFRECIPE>; def loongarch_frsqrte : SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchFRSQRTE>; +def loongarch_sitof : SDNode<"LoongArchISD::SITOF", SDT_LoongArchITOF>; //===----------------------------------------------------------------------===// // Instructions @@ -346,6 +348,9 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa))), // fnmsub.s: -fj * fk + fa (the nsz flag on the FMA) def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; + +// ffint.s.w +def : Pat<(loongarch_sitof FPR32:$fj), (FFINT_S_W FPR32:$fj)>; } // Predicates = [HasBasicF] let Predicates = [HasBasicF, IsLA64] in { diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index daefbaa52d42a..e973c80af807c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -307,6 +307,9 @@ def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg FPR64:$fa))), // fnmsub.d: -fj * fk + fa (the nsz flag on the FMA) def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; + +// ffint.d.l +def : Pat<(loongarch_sitof FPR64:$fj), (FFINT_D_L FPR64:$fj)>; } // Predicates = [HasBasicD] /// Floating point constants diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 80c96c6dc8eb6..3695c5a42790f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -451,6 +451,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, } // Set DAG combine for LA32 and LA64. + if (Subtarget.hasBasicF()) { + setTargetDAGCombine(ISD::SINT_TO_FP); + } setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); @@ -6725,6 +6728,45 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + if (VT != MVT::f32 && VT != MVT::f64) + return SDValue(); + if (VT == MVT::f32 && !Subtarget.hasBasicF()) + return SDValue(); + if (VT == MVT::f64 && !Subtarget.hasBasicD()) + return SDValue(); + + // Only optimize when the source and destination types have the same width. + if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits()) + return SDValue(); + + SDValue Src = N->getOperand(0); + // If the result of an integer load is only used by an integer-to-float + // conversion, use a fp load instead. This eliminates an integer-to-float-move + // (movgr2fr) instruction. + if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() && + // Do not change the width of a volatile load. This condition check is + // inspired by AArch64. + !cast(Src)->isVolatile()) { + LoadSDNode *LN0 = cast(Src); + SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), LN0->getAlign(), + LN0->getMemOperand()->getFlags()); + + // Make sure successors of the original load stay after it by updating them + // to use the new Chain. + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1)); + return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load); + } + + return SDValue(); +} + SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -6760,6 +6802,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget); case ISD::EXTRACT_VECTOR_ELT: return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget); + case ISD::SINT_TO_FP: + return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget); } return SDValue(); } @@ -7491,6 +7535,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(MOVGR2FR_D_LO_HI) NODE_NAME_CASE(MOVFR2GR_S_LA64) NODE_NAME_CASE(FTINT) + NODE_NAME_CASE(SITOF) NODE_NAME_CASE(BUILD_PAIR_F64) NODE_NAME_CASE(SPLIT_PAIR_F64) NODE_NAME_CASE(REVB_2H) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 8a4d7748467c7..e61a77a4b9d9b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -66,6 +66,7 @@ enum NodeType : unsigned { MOVGR2FCSR, FTINT, + SITOF, // Build and split F64 pair BUILD_PAIR_F64, diff --git a/llvm/test/CodeGen/LoongArch/load-itofp-combine.ll b/llvm/test/CodeGen/LoongArch/load-itofp-combine.ll index f9b14be99b1ef..195008679dc95 100644 --- a/llvm/test/CodeGen/LoongArch/load-itofp-combine.ll +++ b/llvm/test/CodeGen/LoongArch/load-itofp-combine.ll @@ -7,33 +7,25 @@ define float @load_sitofp_f32(ptr %src) nounwind { ; LA32F-LABEL: load_sitofp_f32: ; LA32F: # %bb.0: -; LA32F-NEXT: ld.w $a0, $a0, 0 -; LA32F-NEXT: movgr2fr.w $fa0, $a0 +; LA32F-NEXT: fld.s $fa0, $a0, 0 ; LA32F-NEXT: ffint.s.w $fa0, $fa0 ; LA32F-NEXT: ret ; ; LA32D-LABEL: load_sitofp_f32: ; LA32D: # %bb.0: -; LA32D-NEXT: ld.w $a0, $a0, 0 -; LA32D-NEXT: movgr2fr.w $fa0, $a0 +; LA32D-NEXT: fld.s $fa0, $a0, 0 ; LA32D-NEXT: ffint.s.w $fa0, $fa0 ; LA32D-NEXT: ret ; ; LA64F-LABEL: load_sitofp_f32: ; LA64F: # %bb.0: -; LA64F-NEXT: addi.d $sp, $sp, -16 -; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64F-NEXT: ld.w $a0, $a0, 0 -; LA64F-NEXT: pcaddu18i $ra, %call36(__floatdisf) -; LA64F-NEXT: jirl $ra, $ra, 0 -; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: ffint.s.w $fa0, $fa0 ; LA64F-NEXT: ret ; ; LA64D-LABEL: load_sitofp_f32: ; LA64D: # %bb.0: -; LA64D-NEXT: ld.w $a0, $a0, 0 -; LA64D-NEXT: movgr2fr.w $fa0, $a0 +; LA64D-NEXT: fld.s $fa0, $a0, 0 ; LA64D-NEXT: ffint.s.w $fa0, $fa0 ; LA64D-NEXT: ret %1 = load i32, ptr %src @@ -56,14 +48,8 @@ define double @load_sitofp_f64(ptr %src) nounwind { ; ; LA32D-LABEL: load_sitofp_f64: ; LA32D: # %bb.0: -; LA32D-NEXT: addi.w $sp, $sp, -16 -; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32D-NEXT: ld.w $a2, $a0, 0 -; LA32D-NEXT: ld.w $a1, $a0, 4 -; LA32D-NEXT: move $a0, $a2 -; LA32D-NEXT: bl __floatdidf -; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32D-NEXT: addi.w $sp, $sp, 16 +; LA32D-NEXT: fld.d $fa0, $a0, 0 +; LA32D-NEXT: ffint.d.l $fa0, $fa0 ; LA32D-NEXT: ret ; ; LA64F-LABEL: load_sitofp_f64: @@ -79,8 +65,7 @@ define double @load_sitofp_f64(ptr %src) nounwind { ; ; LA64D-LABEL: load_sitofp_f64: ; LA64D: # %bb.0: -; LA64D-NEXT: ld.d $a0, $a0, 0 -; LA64D-NEXT: movgr2fr.d $fa0, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 ; LA64D-NEXT: ffint.d.l $fa0, $fa0 ; LA64D-NEXT: ret %1 = load i64, ptr %src