Skip to content

Conversation

ylzsx
Copy link
Contributor

@ylzsx ylzsx commented Sep 17, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Sep 17, 2025

@llvm/pr-subscribers-backend-loongarch

Author: Zhaoxin Yang (ylzsx)

Changes

Patch is 42.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159258.diff

15 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp (+33-19)
  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+85-2)
  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+5)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/build-vector.ll (+23-57)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll (+29-58)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll (+14-25)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll (+1-2)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll (+1-2)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/vselect.ll (+11-20)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/build-vector.ll (+22-55)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/fdiv-reciprocal-estimate.ll (+29-58)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll (+23-47)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll (+1-2)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll (+1-2)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vselect.ll (+11-20)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index 07e722b9a6591..fda313e693760 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -113,10 +113,11 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
     APInt SplatValue, SplatUndef;
     unsigned SplatBitSize;
     bool HasAnyUndefs;
-    unsigned Op;
+    unsigned Op = 0;
     EVT ResTy = BVN->getValueType(0);
     bool Is128Vec = BVN->getValueType(0).is128BitVector();
     bool Is256Vec = BVN->getValueType(0).is256BitVector();
+    SDNode *Res;
 
     if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec))
       break;
@@ -124,26 +125,25 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
                               HasAnyUndefs, 8))
       break;
 
-    switch (SplatBitSize) {
-    default:
-      break;
-    case 8:
-      Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
-      break;
-    case 16:
-      Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
-      break;
-    case 32:
-      Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
-      break;
-    case 64:
-      Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
-      break;
-    }
-
-    SDNode *Res;
     // If we have a signed 10 bit integer, we can splat it directly.
     if (SplatValue.isSignedIntN(10)) {
+      switch (SplatBitSize) {
+      default:
+        break;
+      case 8:
+        Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
+        break;
+      case 16:
+        Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
+        break;
+      case 32:
+        Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
+        break;
+      case 64:
+        Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
+        break;
+      }
+
       EVT EleType = ResTy.getVectorElementType();
       APInt Val = SplatValue.sextOrTrunc(EleType.getSizeInBits());
       SDValue Imm = CurDAG->getTargetConstant(Val, DL, EleType);
@@ -151,6 +151,20 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
       ReplaceNode(Node, Res);
       return;
     }
+
+    // Select appropriate [x]vldi instructions for some special constant splats,
+    // where the immediate value `imm[12] == 1` for used [x]vldi instructions.
+    std::pair<bool, uint64_t> ConvertVLDI =
+        LoongArchTargetLowering::isImmVLDILegalForMode1(SplatValue,
+                                                        SplatBitSize);
+    if (ConvertVLDI.first) {
+      Op = Is256Vec ? LoongArch::XVLDI : LoongArch::VLDI;
+      SDValue Imm = CurDAG->getSignedTargetConstant(
+          SignExtend32<13>(ConvertVLDI.second), DL, MVT::i32);
+      Res = CurDAG->getMachineNode(Op, DL, ResTy, Imm);
+      ReplaceNode(Node, Res);
+      return;
+    }
     break;
   }
   }
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index e8668860c2b38..460e2d7c87af7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2679,9 +2679,10 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
 
     if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
       // We can only handle 64-bit elements that are within
-      // the signed 10-bit range on 32-bit targets.
+      // the signed 10-bit range or match vldi patterns on 32-bit targets.
       // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
-      if (!SplatValue.isSignedIntN(10))
+      if (!SplatValue.isSignedIntN(10) &&
+          !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
         return SDValue();
       if ((Is128Vec && ResTy == MVT::v4i32) ||
           (Is256Vec && ResTy == MVT::v8i32))
@@ -8194,6 +8195,88 @@ SDValue LoongArchTargetLowering::LowerReturn(
   return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
 }
 
+// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
+// Note: The following prefixes are excluded:
+//   imm[11:8] == 4'b0000, 4'b0100, 4'b1000
+// as thy can be represented using [x]vrepli.[whb]
+std::pair<bool, uint64_t>
+LoongArchTargetLowering::isImmVLDILegalForMode1(const APInt &SplatValue,
+                                                const unsigned SplatBitSize) {
+  uint64_t RequiredImm = 0;
+  uint64_t V = SplatValue.getZExtValue();
+  if (SplatBitSize == 16 && !(V & 0x00FF)) {
+    // 4'b0101
+    RequiredImm = (0b10101 << 8) | (V >> 8);
+    return {true, RequiredImm};
+  } else if (SplatBitSize == 32) {
+    // 4'b0001
+    if (!(V & 0xFFFF00FF)) {
+      RequiredImm = (0b10001 << 8) | (V >> 8);
+      return {true, RequiredImm};
+    }
+    // 4'b0010
+    if (!(V & 0xFF00FFFF)) {
+      RequiredImm = (0b10010 << 8) | (V >> 16);
+      return {true, RequiredImm};
+    }
+    // 4'b0011
+    if (!(V & 0x00FFFFFF)) {
+      RequiredImm = (0b10011 << 8) | (V >> 24);
+      return {true, RequiredImm};
+    }
+    // 4'b0110
+    if ((V & 0xFFFF00FF) == 0xFF) {
+      RequiredImm = (0b10110 << 8) | (V >> 8);
+      return {true, RequiredImm};
+    }
+    // 4'b0111
+    if ((V & 0xFF00FFFF) == 0xFFFF) {
+      RequiredImm = (0b10111 << 8) | (V >> 16);
+      return {true, RequiredImm};
+    }
+    // 4'b1010
+    if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
+      RequiredImm =
+          (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
+      return {true, RequiredImm};
+    }
+  } else if (SplatBitSize == 64) {
+    // 4'b1011
+    if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
+        (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
+      RequiredImm =
+          (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
+      return {true, RequiredImm};
+    }
+    // 4'b1100
+    if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
+        (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
+      RequiredImm =
+          (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
+      return {true, RequiredImm};
+    }
+    // 4'b1001
+    auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
+      uint8_t res = 0;
+      for (int i = 0; i < 8; ++i) {
+        uint8_t byte = x & 0xFF;
+        if (byte == 0 || byte == 0xFF)
+          res |= ((byte & 1) << i);
+        else
+          return {false, 0};
+        x >>= 8;
+      }
+      return {true, res};
+    };
+    auto [IsSame, Suffix] = sameBitsPreByte(V);
+    if (IsSame) {
+      RequiredImm = (0b11001 << 8) | Suffix;
+      return {true, RequiredImm};
+    }
+  }
+  return {false, RequiredImm};
+}
+
 bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm,
                                                EVT VT) const {
   if (!Subtarget.hasExtLSX())
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 9d14934a9d363..76a5cd87bf76e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -332,6 +332,11 @@ class LoongArchTargetLowering : public TargetLowering {
                                          TargetLoweringOpt &TLO,
                                          unsigned Depth) const override;
 
+  /// Check if a constant splat can be generated using [x]vldi, where imm[12]
+  /// is 1.
+  static std::pair<bool, uint64_t>
+  isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize);
+
 private:
   /// Target-specific function used to lower LoongArch calling conventions.
   typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
index 8dfd7bf3ac8ec..58d684e1beb54 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -201,8 +201,7 @@ entry:
 define void @buildvector_v8f32_const_splat(ptr %dst) nounwind {
 ; CHECK-LABEL: buildvector_v8f32_const_splat:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lu12i.w $a1, 260096
-; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT:    xvldi $xr0, -1424
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -212,19 +211,11 @@ entry:
 
 ;; Also check buildvector_const_splat_xvldi_1100.
 define void @buildvector_v4f64_const_splat(ptr %dst) nounwind {
-; LA32-LABEL: buildvector_v4f64_const_splat:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI14_0)
-; LA32-NEXT:    xvld $xr0, $a1, %pc_lo12(.LCPI14_0)
-; LA32-NEXT:    xvst $xr0, $a0, 0
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: buildvector_v4f64_const_splat:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    lu52i.d $a1, $zero, 1023
-; LA64-NEXT:    xvreplgr2vr.d $xr0, $a1
-; LA64-NEXT:    xvst $xr0, $a0, 0
-; LA64-NEXT:    ret
+; CHECK-LABEL: buildvector_v4f64_const_splat:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvldi $xr0, -912
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
 entry:
   store <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, ptr %dst
   ret void
@@ -234,8 +225,7 @@ entry:
 define void @buildvector_const_splat_xvldi_0001(ptr %dst) nounwind {
 ; CHECK-LABEL: buildvector_const_splat_xvldi_0001:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    ori $a1, $zero, 768
-; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT:    xvldi $xr0, -3837
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -246,8 +236,7 @@ entry:
 define void @buildvector_const_splat_xvldi_0010(ptr %dst) nounwind {
 ; CHECK-LABEL: buildvector_const_splat_xvldi_0010:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lu12i.w $a1, 16
-; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT:    xvldi $xr0, -3583
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -258,8 +247,7 @@ entry:
 define void @buildvector_const_splat_xvldi_0011(ptr %dst) nounwind {
 ; CHECK-LABEL: buildvector_const_splat_xvldi_0011:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lu12i.w $a1, 4096
-; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT:    xvldi $xr0, -3327
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -270,8 +258,7 @@ entry:
 define void @buildvector_const_splat_xvldi_0101(ptr %dst) {
 ; CHECK-LABEL: buildvector_const_splat_xvldi_0101:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    ori $a1, $zero, 768
-; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a1
+; CHECK-NEXT:    xvldi $xr0, -2813
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -282,8 +269,7 @@ entry:
 define void @buildvector_const_splat_xvldi_0110(ptr %dst) nounwind {
 ; CHECK-LABEL: buildvector_const_splat_xvldi_0110:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    ori $a1, $zero, 1023
-; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT:    xvldi $xr0, -2557
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -294,9 +280,7 @@ entry:
 define void @buildvector_const_splat_xvldi_0111(ptr %dst) nounwind {
 ; CHECK-LABEL: buildvector_const_splat_xvldi_0111:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lu12i.w $a1, 15
-; CHECK-NEXT:    ori $a1, $a1, 4095
-; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT:    xvldi $xr0, -2305
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -305,39 +289,22 @@ entry:
 }
 
 define void @buildvector_const_splat_xvldi_1001(ptr %dst) nounwind {
-; LA32-LABEL: buildvector_const_splat_xvldi_1001:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI21_0)
-; LA32-NEXT:    xvld $xr0, $a1, %pc_lo12(.LCPI21_0)
-; LA32-NEXT:    xvst $xr0, $a0, 0
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: buildvector_const_splat_xvldi_1001:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    lu12i.w $a1, 15
-; LA64-NEXT:    ori $a1, $a1, 4095
-; LA64-NEXT:    xvreplgr2vr.d $xr0, $a1
-; LA64-NEXT:    xvst $xr0, $a0, 0
-; LA64-NEXT:    ret
+; CHECK-LABEL: buildvector_const_splat_xvldi_1001:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvldi $xr0, -1789
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
 entry:
   store <8 x i32> <i32 65535, i32 0, i32 65535, i32 0, i32 65535, i32 0, i32 65535, i32 0>, ptr %dst
   ret void
 }
 
 define void @buildvector_const_splat_xvldi_1011(ptr %dst) nounwind {
-; LA32-LABEL: buildvector_const_splat_xvldi_1011:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI22_0)
-; LA32-NEXT:    xvld $xr0, $a1, %pc_lo12(.LCPI22_0)
-; LA32-NEXT:    xvst $xr0, $a0, 0
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: buildvector_const_splat_xvldi_1011:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    lu12i.w $a1, 262144
-; LA64-NEXT:    xvreplgr2vr.d $xr0, $a1
-; LA64-NEXT:    xvst $xr0, $a0, 0
-; LA64-NEXT:    ret
+; CHECK-LABEL: buildvector_const_splat_xvldi_1011:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvldi $xr0, -1280
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
 entry:
   store <8 x float> <float 2.0, float 0.0, float 2.0, float 0.0, float 2.0, float 0.0, float 2.0, float 0.0>, ptr %dst
   ret void
@@ -2458,8 +2425,7 @@ define void @buildvector_v8f32_with_constant(ptr %dst, float %a1, float %a2, flo
 ; CHECK-NEXT:    # kill: def $f2 killed $f2 def $xr2
 ; CHECK-NEXT:    # kill: def $f1 killed $f1 def $xr1
 ; CHECK-NEXT:    # kill: def $f0 killed $f0 def $xr0
-; CHECK-NEXT:    lu12i.w $a1, 262144
-; CHECK-NEXT:    xvreplgr2vr.w $xr4, $a1
+; CHECK-NEXT:    xvldi $xr4, -3264
 ; CHECK-NEXT:    xvinsve0.w $xr4, $xr0, 1
 ; CHECK-NEXT:    xvinsve0.w $xr4, $xr1, 2
 ; CHECK-NEXT:    xvinsve0.w $xr4, $xr2, 5
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll
index 7514dafa8000b..ba821308cb4db 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll
@@ -40,35 +40,19 @@ define void @fdiv_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; FAULT-NEXT:    xvst $xr0, $a0, 0
 ; FAULT-NEXT:    ret
 ;
-; LA32-LABEL: fdiv_v4f64:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    pcalau12i $a3, %pc_hi20(.LCPI1_0)
-; LA32-NEXT:    xvld $xr0, $a2, 0
-; LA32-NEXT:    xvld $xr1, $a3, %pc_lo12(.LCPI1_0)
-; LA32-NEXT:    xvld $xr2, $a1, 0
-; LA32-NEXT:    xvfrecipe.d $xr3, $xr0
-; LA32-NEXT:    xvfmadd.d $xr1, $xr0, $xr3, $xr1
-; LA32-NEXT:    xvfnmsub.d $xr1, $xr1, $xr3, $xr3
-; LA32-NEXT:    xvfmul.d $xr3, $xr2, $xr1
-; LA32-NEXT:    xvfnmsub.d $xr0, $xr0, $xr3, $xr2
-; LA32-NEXT:    xvfmadd.d $xr0, $xr1, $xr0, $xr3
-; LA32-NEXT:    xvst $xr0, $a0, 0
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: fdiv_v4f64:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    xvld $xr0, $a2, 0
-; LA64-NEXT:    xvld $xr1, $a1, 0
-; LA64-NEXT:    lu52i.d $a1, $zero, -1025
-; LA64-NEXT:    xvreplgr2vr.d $xr2, $a1
-; LA64-NEXT:    xvfrecipe.d $xr3, $xr0
-; LA64-NEXT:    xvfmadd.d $xr2, $xr0, $xr3, $xr2
-; LA64-NEXT:    xvfnmsub.d $xr2, $xr2, $xr3, $xr3
-; LA64-NEXT:    xvfmul.d $xr3, $xr1, $xr2
-; LA64-NEXT:    xvfnmsub.d $xr0, $xr0, $xr3, $xr1
-; LA64-NEXT:    xvfmadd.d $xr0, $xr2, $xr0, $xr3
-; LA64-NEXT:    xvst $xr0, $a0, 0
-; LA64-NEXT:    ret
+; CHECK-LABEL: fdiv_v4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfrecipe.d $xr2, $xr0
+; CHECK-NEXT:    xvldi $xr3, -784
+; CHECK-NEXT:    xvfmadd.d $xr3, $xr0, $xr2, $xr3
+; CHECK-NEXT:    xvfnmsub.d $xr2, $xr3, $xr2, $xr2
+; CHECK-NEXT:    xvfmul.d $xr3, $xr1, $xr2
+; CHECK-NEXT:    xvfnmsub.d $xr0, $xr0, $xr3, $xr1
+; CHECK-NEXT:    xvfmadd.d $xr0, $xr2, $xr0, $xr3
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
 entry:
   %v0 = load <4 x double>, ptr %a0
   %v1 = load <4 x double>, ptr %a1
@@ -90,8 +74,7 @@ define void @one_fdiv_v8f32(ptr %res, ptr %a0) nounwind {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xvld $xr0, $a1, 0
 ; CHECK-NEXT:    xvfrecipe.s $xr1, $xr0
-; CHECK-NEXT:    lu12i.w $a1, -264192
-; CHECK-NEXT:    xvreplgr2vr.w $xr2, $a1
+; CHECK-NEXT:    xvldi $xr2, -1296
 ; CHECK-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
 ; CHECK-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
@@ -107,24 +90,22 @@ define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind {
 ; FAULT-LA32-LABEL: one_fdiv_v4f64:
 ; FAULT-LA32:       # %bb.0: # %entry
 ; FAULT-LA32-NEXT:    xvld $xr0, $a1, 0
-; FAULT-LA32-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI3_0)
-; FAULT-LA32-NEXT:    xvld $xr1, $a1, %pc_lo12(.LCPI3_0)
+; FAULT-LA32-NEXT:    xvldi $xr1, -912
 ; FAULT-LA32-NEXT:    xvfdiv.d $xr0, $xr1, $xr0
 ; FAULT-LA32-NEXT:    xvst $xr0, $a0, 0
 ; FAULT-LA32-NEXT:    ret
 ;
-; LA32-LABEL: one_fdiv_v4f64:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    xvld $xr0, $a1, 0
-; LA32-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI3_0)
-; LA32-NEXT:    xvld $xr1, $a1, %pc_lo12(.LCPI3_0)
-; LA32-NEXT:    xvfrecipe.d $xr2, $xr0
-; LA32-NEXT:    xvfnmsub.d $xr3, $xr0, $xr2, $xr1
-; LA32-NEXT:    xvfmadd.d $xr2, $xr2, $xr3, $xr2
-; LA32-NEXT:    xvfnmsub.d $xr0, $xr0, $xr2, $xr1
-; LA32-NEXT:    xvfmadd.d $xr0, $xr2, $xr0, $xr2
-; LA32-NEXT:    xvst $xr0, $a0, 0
-; LA32-NEXT:    ret
+; CHECK-LABEL: one_fdiv_v4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvfrecipe.d $xr1, $xr0
+; CHECK-NEXT:    xvldi $xr2, -912
+; CHECK-NEXT:    xvfnmsub.d $xr3, $xr0, $xr1, $xr2
+; CHECK-NEXT:    xvfmadd.d $xr1, $xr1, $xr3, $xr1
+; CHECK-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
+; CHECK-NEXT:    xvfmadd.d $xr0, $xr1, $xr0, $xr1
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
 ;
 ; FAULT-LA64-LABEL: one_fdiv_v4f64:
 ; FAULT-LA64:       # %bb.0: # %entry
@@ -132,22 +113,12 @@ define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind {
 ; FAULT-LA64-NEXT:    xvfrecip.d $xr0, $xr0
 ; FAULT-LA64-NEXT:    xvst $xr0, $a0, 0
 ; FAULT-LA64-NEXT:    ret
-;
-; LA64-LABEL: one_fdiv_v4f64:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    xvld $xr0, $a1, 0
-; LA64-NEXT:    xvfrecipe.d $xr1, $xr0
-; LA64-NEXT:    lu52i.d $a1, $zero, 1023
-; LA64-NEXT:    xvreplgr2vr.d $xr2, $a1
-; LA64-NEXT:    xvfnmsub.d $xr3, $xr0, $xr1, $xr2
-; LA64-NEXT:    xvfmadd.d $xr1, $xr1, $xr3, $xr1
-; LA64-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
-; LA64-NEXT:    xvfmadd.d $xr0, $xr1, $xr0, $xr1
-; LA64-NEXT:    xvst $xr0, $a0, 0
-; LA64-NEXT:    ret
 entry:
   %v0 = load <4 x double>, ptr %a0
   %div = fdiv fast <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %v0
   store <4 x double> %div, ptr %res
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; LA32: {{.*}}
+; LA64: {{.*}}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll
index 4e475daa8ced3..e696129acb862 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll
@@ -63,11 +63,9 @@ define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind {
 ; LA32-NEXT:    xvfrsqrte.s $xr1, $xr0
 ; LA32-NEXT:    xvfmul.s $xr1, $xr0, $xr1
 ; LA32-NEXT:    xvfmul.s $xr0, $xr0, $xr1
-; LA32-NEXT:    lu12i.w $a1, -261120
-; LA32-NEXT:    xvreplgr2vr.w $xr2, $a1
+; LA32-NEXT:    xvldi $xr2, -1400
 ; LA32-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
-; LA32-NEXT:    lu12i.w $a1, -266240
-; LA32-NEXT:    xvreplgr2vr.w $xr2, $a1
+; LA32-NEXT:    xvldi $xr2, -3137
 ; LA32-NEXT:    xvfmul.s $xr1, $xr1, $xr2
 ; LA32-NEXT:    xvfmul.s $xr0, $xr1, $xr0
 ; LA32-NEXT:    xvst $xr0, $sp, 64
@@ -100,11 +98,9 @@ define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind {
 ; LA64-NEXT:    xvfrsqrte.s $xr1, $xr0
 ; LA64-NEXT:    xvfmul.s $xr1, $xr0, $xr1
 ; LA64-NEXT:    xvfmul.s $xr0, $xr0, $xr1
-; LA64-NEXT:    lu12i.w $a1, -261120
-; LA64-NEXT:    xvreplgr2vr.w $xr2, $a1
+; LA64-NEXT:    xvldi $xr2, -1400
 ; LA64-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
-; LA64-NEXT:    lu12i.w $a1, -266240
-; LA64-NEXT:    xvreplgr2vr.w $xr2, $a1
+; LA64-NEXT:    xvldi $xr2, -3137
 ; LA64-NEXT:    xvfmul.s $xr1, $xr1, $xr2
 ; LA64-NEXT:    xvfmul.s $xr0, $xr1, ...
[truncated]

@ylzsx ylzsx marked this pull request as draft September 17, 2025 08:06
@ylzsx ylzsx marked this pull request as ready for review September 17, 2025 10:57
unsigned SplatBitSize;
bool HasAnyUndefs;
unsigned Op;
unsigned Op = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is unnecessary to initialize it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without initialization, a warning will occur. I'm not sure why it didn't happen before.

[44/185] Building CXX object lib/Target/LoongArch/CMakeFiles/LLVMLoongArchCodeGen.dir/LoongArchISelDAGToDAG.cpp.o
/home/yangzhaoxin/workspace/flang-test/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp:131:7: warning: variable 'Op' is used uninitialized whenever switch default is taken [-Wsometimes-uninitialized]
  131 |       default:
      |       ^~~~~~~
/home/yangzhaoxin/workspace/flang-test/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp:150:36: note: uninitialized use occurs here
  150 |       Res = CurDAG->getMachineNode(Op, DL, ResTy, Imm);
      |                                    ^~
/home/yangzhaoxin/workspace/flang-test/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp:116:16: note: initialize the variable 'Op' to silence this warning
  116 |     unsigned Op;
      |                ^
      |                 = 0
1 warning generated.

Copy link
Member

@heiher heiher left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

Base automatically changed from users/ylzsx/precommit-vldi to main September 25, 2025 10:07
@ylzsx ylzsx force-pushed the users/ylzsx/vldi-with-special-constant branch from 409f80c to 56fe245 Compare September 26, 2025 07:36
@ylzsx ylzsx merged commit 9de1bc0 into main Sep 26, 2025
9 checks passed
@ylzsx ylzsx deleted the users/ylzsx/vldi-with-special-constant branch September 26, 2025 08:32
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants