Skip to content

Conversation

@ylzsx
Copy link
Contributor

@ylzsx ylzsx commented Oct 29, 2025

If the result of an integer load is only used by an integer-to-float conversion, use a fp load instead. This eliminates an integer-to-float-move (movgr2fr) instruction.

If the result of an integer load is only used by an integer-to-float
conversion, use a fp load instead. This eliminates an
integer-to-float-move (movgr2fr) instruction.
@llvmbot
Copy link
Member

llvmbot commented Oct 29, 2025

@llvm/pr-subscribers-backend-loongarch

Author: Zhaoxin Yang (ylzsx)

Changes

If the result of an integer load is only used by an integer-to-float conversion, use a fp load instead. This eliminates an integer-to-float-move (movgr2fr) instruction.


Full diff: https://github.com/llvm/llvm-project/pull/165523.diff

5 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td (+5)
  • (modified) llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td (+3)
  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+45)
  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+1)
  • (modified) llvm/test/CodeGen/LoongArch/load-itofp-combine.ll (+8-23)
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index 690dd73014e57..cb6b7c7342ec6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -26,6 +26,7 @@ def SDT_LoongArchMOVFR2GR_S_LA64
 def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
 def SDT_LoongArchFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
 def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
+def SDT_LoongArchITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
 
 // ISD::BRCOND is custom-lowered to LoongArchISD::BRCOND for floating-point
 // comparisons to prevent recursive lowering.
@@ -39,6 +40,7 @@ def loongarch_movfr2gr_s_la64
 def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>;
 def loongarch_frecipe : SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchFRECIPE>;
 def loongarch_frsqrte : SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchFRSQRTE>;
+def loongarch_sitof : SDNode<"LoongArchISD::SITOF", SDT_LoongArchITOF>;
 
 //===----------------------------------------------------------------------===//
 // Instructions
@@ -346,6 +348,9 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa))),
 // fnmsub.s: -fj * fk + fa (the nsz flag on the FMA)
 def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa),
           (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
+
+// ffint.s.w
+def : Pat<(loongarch_sitof FPR32:$fj), (FFINT_S_W FPR32:$fj)>;
 } // Predicates = [HasBasicF]
 
 let Predicates = [HasBasicF, IsLA64] in {
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index daefbaa52d42a..e973c80af807c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -307,6 +307,9 @@ def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg FPR64:$fa))),
 // fnmsub.d: -fj * fk + fa (the nsz flag on the FMA)
 def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa),
           (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>;
+
+// ffint.d.l
+def : Pat<(loongarch_sitof FPR64:$fj), (FFINT_D_L FPR64:$fj)>;
 } // Predicates = [HasBasicD]
 
 /// Floating point constants
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 80c96c6dc8eb6..3695c5a42790f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -451,6 +451,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
   }
 
   // Set DAG combine for LA32 and LA64.
+  if (Subtarget.hasBasicF()) {
+    setTargetDAGCombine(ISD::SINT_TO_FP);
+  }
 
   setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::OR);
@@ -6725,6 +6728,45 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+                                        TargetLowering::DAGCombinerInfo &DCI,
+                                        const LoongArchSubtarget &Subtarget) {
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+
+  if (VT != MVT::f32 && VT != MVT::f64)
+    return SDValue();
+  if (VT == MVT::f32 && !Subtarget.hasBasicF())
+    return SDValue();
+  if (VT == MVT::f64 && !Subtarget.hasBasicD())
+    return SDValue();
+
+  // Only optimize when the source and destination types have the same width.
+  if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
+    return SDValue();
+
+  SDValue Src = N->getOperand(0);
+  // If the result of an integer load is only used by an integer-to-float
+  // conversion, use a fp load instead. This eliminates an integer-to-float-move
+  // (movgr2fr) instruction.
+  if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
+      // Do not change the width of a volatile load. This condition check is
+      // inspired by AArch64.
+      !cast<LoadSDNode>(Src)->isVolatile()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(Src);
+    SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
+                               LN0->getPointerInfo(), LN0->getAlign(),
+                               LN0->getMemOperand()->getFlags());
+
+    // Make sure successors of the original load stay after it by updating them
+    // to use the new Chain.
+    DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
+    return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
+  }
+
+  return SDValue();
+}
+
 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
                                                    DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -6760,6 +6802,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
     return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
   case ISD::EXTRACT_VECTOR_ELT:
     return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
+  case ISD::SINT_TO_FP:
+    return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
   }
   return SDValue();
 }
@@ -7491,6 +7535,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
     NODE_NAME_CASE(MOVGR2FR_D_LO_HI)
     NODE_NAME_CASE(MOVFR2GR_S_LA64)
     NODE_NAME_CASE(FTINT)
+    NODE_NAME_CASE(SITOF)
     NODE_NAME_CASE(BUILD_PAIR_F64)
     NODE_NAME_CASE(SPLIT_PAIR_F64)
     NODE_NAME_CASE(REVB_2H)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 8a4d7748467c7..e61a77a4b9d9b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -66,6 +66,7 @@ enum NodeType : unsigned {
   MOVGR2FCSR,
 
   FTINT,
+  SITOF,
 
   // Build and split F64 pair
   BUILD_PAIR_F64,
diff --git a/llvm/test/CodeGen/LoongArch/load-itofp-combine.ll b/llvm/test/CodeGen/LoongArch/load-itofp-combine.ll
index f9b14be99b1ef..195008679dc95 100644
--- a/llvm/test/CodeGen/LoongArch/load-itofp-combine.ll
+++ b/llvm/test/CodeGen/LoongArch/load-itofp-combine.ll
@@ -7,33 +7,25 @@
 define float @load_sitofp_f32(ptr %src) nounwind {
 ; LA32F-LABEL: load_sitofp_f32:
 ; LA32F:       # %bb.0:
-; LA32F-NEXT:    ld.w $a0, $a0, 0
-; LA32F-NEXT:    movgr2fr.w $fa0, $a0
+; LA32F-NEXT:    fld.s $fa0, $a0, 0
 ; LA32F-NEXT:    ffint.s.w $fa0, $fa0
 ; LA32F-NEXT:    ret
 ;
 ; LA32D-LABEL: load_sitofp_f32:
 ; LA32D:       # %bb.0:
-; LA32D-NEXT:    ld.w $a0, $a0, 0
-; LA32D-NEXT:    movgr2fr.w $fa0, $a0
+; LA32D-NEXT:    fld.s $fa0, $a0, 0
 ; LA32D-NEXT:    ffint.s.w $fa0, $fa0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: load_sitofp_f32:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    addi.d $sp, $sp, -16
-; LA64F-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64F-NEXT:    ld.w $a0, $a0, 0
-; LA64F-NEXT:    pcaddu18i $ra, %call36(__floatdisf)
-; LA64F-NEXT:    jirl $ra, $ra, 0
-; LA64F-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64F-NEXT:    addi.d $sp, $sp, 16
+; LA64F-NEXT:    fld.s $fa0, $a0, 0
+; LA64F-NEXT:    ffint.s.w $fa0, $fa0
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: load_sitofp_f32:
 ; LA64D:       # %bb.0:
-; LA64D-NEXT:    ld.w $a0, $a0, 0
-; LA64D-NEXT:    movgr2fr.w $fa0, $a0
+; LA64D-NEXT:    fld.s $fa0, $a0, 0
 ; LA64D-NEXT:    ffint.s.w $fa0, $fa0
 ; LA64D-NEXT:    ret
   %1 = load i32, ptr %src
@@ -56,14 +48,8 @@ define double @load_sitofp_f64(ptr %src) nounwind {
 ;
 ; LA32D-LABEL: load_sitofp_f64:
 ; LA32D:       # %bb.0:
-; LA32D-NEXT:    addi.w $sp, $sp, -16
-; LA32D-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32D-NEXT:    ld.w $a2, $a0, 0
-; LA32D-NEXT:    ld.w $a1, $a0, 4
-; LA32D-NEXT:    move $a0, $a2
-; LA32D-NEXT:    bl __floatdidf
-; LA32D-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32D-NEXT:    addi.w $sp, $sp, 16
+; LA32D-NEXT:    fld.d $fa0, $a0, 0
+; LA32D-NEXT:    ffint.d.l $fa0, $fa0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: load_sitofp_f64:
@@ -79,8 +65,7 @@ define double @load_sitofp_f64(ptr %src) nounwind {
 ;
 ; LA64D-LABEL: load_sitofp_f64:
 ; LA64D:       # %bb.0:
-; LA64D-NEXT:    ld.d $a0, $a0, 0
-; LA64D-NEXT:    movgr2fr.d $fa0, $a0
+; LA64D-NEXT:    fld.d $fa0, $a0, 0
 ; LA64D-NEXT:    ffint.d.l $fa0, $fa0
 ; LA64D-NEXT:    ret
   %1 = load i64, ptr %src

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants