[LoongArch] Generate [x]vldi instructions with special constant splats #159258

ylzsx · 2025-09-17T07:00:14Z

No description provided.

llvmbot · 2025-09-17T07:00:43Z

@llvm/pr-subscribers-backend-loongarch

Author: Zhaoxin Yang (ylzsx)

Changes

Patch is 42.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159258.diff

15 Files Affected:

(modified) llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp (+33-19)
(modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+85-2)
(modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+5)
(modified) llvm/test/CodeGen/LoongArch/lasx/build-vector.ll (+23-57)
(modified) llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll (+29-58)
(modified) llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll (+14-25)
(modified) llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll (+1-2)
(modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll (+1-2)
(modified) llvm/test/CodeGen/LoongArch/lasx/vselect.ll (+11-20)
(modified) llvm/test/CodeGen/LoongArch/lsx/build-vector.ll (+22-55)
(modified) llvm/test/CodeGen/LoongArch/lsx/fdiv-reciprocal-estimate.ll (+29-58)
(modified) llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll (+23-47)
(modified) llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll (+1-2)
(modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll (+1-2)
(modified) llvm/test/CodeGen/LoongArch/lsx/vselect.ll (+11-20)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index 07e722b9a6591..fda313e693760 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -113,10 +113,11 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
     APInt SplatValue, SplatUndef;
     unsigned SplatBitSize;
     bool HasAnyUndefs;
-    unsigned Op;
+    unsigned Op = 0;
     EVT ResTy = BVN->getValueType(0);
     bool Is128Vec = BVN->getValueType(0).is128BitVector();
     bool Is256Vec = BVN->getValueType(0).is256BitVector();
+    SDNode *Res;
 
     if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec))
       break;
@@ -124,26 +125,25 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
                               HasAnyUndefs, 8))
       break;
 
-    switch (SplatBitSize) {
-    default:
-      break;
-    case 8:
-      Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
-      break;
-    case 16:
-      Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
-      break;
-    case 32:
-      Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
-      break;
-    case 64:
-      Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
-      break;
-    }
-
-    SDNode *Res;
     // If we have a signed 10 bit integer, we can splat it directly.
     if (SplatValue.isSignedIntN(10)) {
+      switch (SplatBitSize) {
+      default:
+        break;
+      case 8:
+        Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
+        break;
+      case 16:
+        Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
+        break;
+      case 32:
+        Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
+        break;
+      case 64:
+        Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
+        break;
+      }
+
       EVT EleType = ResTy.getVectorElementType();
       APInt Val = SplatValue.sextOrTrunc(EleType.getSizeInBits());
       SDValue Imm = CurDAG->getTargetConstant(Val, DL, EleType);
@@ -151,6 +151,20 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
       ReplaceNode(Node, Res);
       return;
     }
+
+    // Select appropriate [x]vldi instructions for some special constant splats,
+    // where the immediate value `imm[12] == 1` for used [x]vldi instructions.
+    std::pair<bool, uint64_t> ConvertVLDI =
+        LoongArchTargetLowering::isImmVLDILegalForMode1(SplatValue,
+                                                        SplatBitSize);
+    if (ConvertVLDI.first) {
+      Op = Is256Vec ? LoongArch::XVLDI : LoongArch::VLDI;
+      SDValue Imm = CurDAG->getSignedTargetConstant(
+          SignExtend32<13>(ConvertVLDI.second), DL, MVT::i32);
+      Res = CurDAG->getMachineNode(Op, DL, ResTy, Imm);
+      ReplaceNode(Node, Res);
+      return;
+    }
     break;
   }
   }
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index e8668860c2b38..460e2d7c87af7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2679,9 +2679,10 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
 
     if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
       // We can only handle 64-bit elements that are within
-      // the signed 10-bit range on 32-bit targets.
+      // the signed 10-bit range or match vldi patterns on 32-bit targets.
       // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
-      if (!SplatValue.isSignedIntN(10))
+      if (!SplatValue.isSignedIntN(10) &&
+          !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
         return SDValue();
       if ((Is128Vec && ResTy == MVT::v4i32) ||
           (Is256Vec && ResTy == MVT::v8i32))
@@ -8194,6 +8195,88 @@ SDValue LoongArchTargetLowering::LowerReturn(
   return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
 }
 
+// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
+// Note: The following prefixes are excluded:
+//   imm[11:8] == 4'b0000, 4'b0100, 4'b1000
+// as thy can be represented using [x]vrepli.[whb]
+std::pair<bool, uint64_t>
+LoongArchTargetLowering::isImmVLDILegalForMode1(const APInt &SplatValue,
+                                                const unsigned SplatBitSize) {
+  uint64_t RequiredImm = 0;
+  uint64_t V = SplatValue.getZExtValue();
+  if (SplatBitSize == 16 && !(V & 0x00FF)) {
+    // 4'b0101
+    RequiredImm = (0b10101 << 8) | (V >> 8);
+    return {true, RequiredImm};
+  } else if (SplatBitSize == 32) {
+    // 4'b0001
+    if (!(V & 0xFFFF00FF)) {
+      RequiredImm = (0b10001 << 8) | (V >> 8);
+      return {true, RequiredImm};
+    }
+    // 4'b0010
+    if (!(V & 0xFF00FFFF)) {
+      RequiredImm = (0b10010 << 8) | (V >> 16);
+      return {true, RequiredImm};
+    }
+    // 4'b0011
+    if (!(V & 0x00FFFFFF)) {
+      RequiredImm = (0b10011 << 8) | (V >> 24);
+      return {true, RequiredImm};
+    }
+    // 4'b0110
+    if ((V & 0xFFFF00FF) == 0xFF) {
+      RequiredImm = (0b10110 << 8) | (V >> 8);
+      return {true, RequiredImm};
+    }
+    // 4'b0111
+    if ((V & 0xFF00FFFF) == 0xFFFF) {
+      RequiredImm = (0b10111 << 8) | (V >> 16);
+      return {true, RequiredImm};
+    }
+    // 4'b1010
+    if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
+      RequiredImm =
+          (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
+      return {true, RequiredImm};
+    }
+  } else if (SplatBitSize == 64) {
+    // 4'b1011
+    if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
+        (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
+      RequiredImm =
+          (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
+      return {true, RequiredImm};
+    }
+    // 4'b1100
+    if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
+        (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
+      RequiredImm =
+          (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
+      return {true, RequiredImm};
+    }
+    // 4'b1001
+    auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
+      uint8_t res = 0;
+      for (int i = 0; i < 8; ++i) {
+        uint8_t byte = x & 0xFF;
+        if (byte == 0 || byte == 0xFF)
+          res |= ((byte & 1) << i);
+        else
+          return {false, 0};
+        x >>= 8;
+      }
+      return {true, res};
+    };
+    auto [IsSame, Suffix] = sameBitsPreByte(V);
+    if (IsSame) {
+      RequiredImm = (0b11001 << 8) | Suffix;
+      return {true, RequiredImm};
+    }
+  }
+  return {false, RequiredImm};
+}
+
 bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm,
                                                EVT VT) const {
   if (!Subtarget.hasExtLSX())
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 9d14934a9d363..76a5cd87bf76e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -332,6 +332,11 @@ class LoongArchTargetLowering : public TargetLowering {
                                          TargetLoweringOpt &TLO,
                                          unsigned Depth) const override;
 
+  /// Check if a constant splat can be generated using [x]vldi, where imm[12]
+  /// is 1.
+  static std::pair<bool, uint64_t>
+  isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize);
+
 private:
   /// Target-specific function used to lower LoongArch calling conventions.
   typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
index 8dfd7bf3ac8ec..58d684e1beb54 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -201,8 +201,7 @@ entry:
 define void @buildvector_v8f32_const_splat(ptr %dst) nounwind {
 ; CHECK-LABEL: buildvector_v8f32_const_splat:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lu12i.w $a1, 260096
-; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT:    xvldi $xr0, -1424
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -212,19 +211,11 @@ entry:
 
 ;; Also check buildvector_const_splat_xvldi_1100.
 define void @buildvector_v4f64_const_splat(ptr %dst) nounwind {
-; LA32-LABEL: buildvector_v4f64_const_splat:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI14_0)
-; LA32-NEXT:    xvld $xr0, $a1, %pc_lo12(.LCPI14_0)
-; LA32-NEXT:    xvst $xr0, $a0, 0
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: buildvector_v4f64_const_splat:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    lu52i.d $a1, $zero, 1023
-; LA64-NEXT:    xvreplgr2vr.d $xr0, $a1
-; LA64-NEXT:    xvst $xr0, $a0, 0
-; LA64-NEXT:    ret
+; CHECK-LABEL: buildvector_v4f64_const_splat:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvldi $xr0, -912
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
 entry:
   store <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, ptr %dst
   ret void
@@ -234,8 +225,7 @@ entry:
 define void @buildvector_const_splat_xvldi_0001(ptr %dst) nounwind {
 ; CHECK-LABEL: buildvector_const_splat_xvldi_0001:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    ori $a1, $zero, 768
-; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT:    xvldi $xr0, -3837
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -246,8 +236,7 @@ entry:
 define void @buildvector_const_splat_xvldi_0010(ptr %dst) nounwind {
 ; CHECK-LABEL: buildvector_const_splat_xvldi_0010:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lu12i.w $a1, 16
-; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT:    xvldi $xr0, -3583
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -258,8 +247,7 @@ entry:
 define void @buildvector_const_splat_xvldi_0011(ptr %dst) nounwind {
 ; CHECK-LABEL: buildvector_const_splat_xvldi_0011:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lu12i.w $a1, 4096
-; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT:    xvldi $xr0, -3327
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -270,8 +258,7 @@ entry:
 define void @buildvector_const_splat_xvldi_0101(ptr %dst) {
 ; CHECK-LABEL: buildvector_const_splat_xvldi_0101:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    ori $a1, $zero, 768
-; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a1
+; CHECK-NEXT:    xvldi $xr0, -2813
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -282,8 +269,7 @@ entry:
 define void @buildvector_const_splat_xvldi_0110(ptr %dst) nounwind {
 ; CHECK-LABEL: buildvector_const_splat_xvldi_0110:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    ori $a1, $zero, 1023
-; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT:    xvldi $xr0, -2557
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -294,9 +280,7 @@ entry:
 define void @buildvector_const_splat_xvldi_0111(ptr %dst) nounwind {
 ; CHECK-LABEL: buildvector_const_splat_xvldi_0111:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lu12i.w $a1, 15
-; CHECK-NEXT:    ori $a1, $a1, 4095
-; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT:    xvldi $xr0, -2305
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -305,39 +289,22 @@ entry:
 }
 
 define void @buildvector_const_splat_xvldi_1001(ptr %dst) nounwind {
-; LA32-LABEL: buildvector_const_splat_xvldi_1001:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI21_0)
-; LA32-NEXT:    xvld $xr0, $a1, %pc_lo12(.LCPI21_0)
-; LA32-NEXT:    xvst $xr0, $a0, 0
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: buildvector_const_splat_xvldi_1001:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    lu12i.w $a1, 15
-; LA64-NEXT:    ori $a1, $a1, 4095
-; LA64-NEXT:    xvreplgr2vr.d $xr0, $a1
-; LA64-NEXT:    xvst $xr0, $a0, 0
-; LA64-NEXT:    ret
+; CHECK-LABEL: buildvector_const_splat_xvldi_1001:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvldi $xr0, -1789
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
 entry:
   store <8 x i32> <i32 65535, i32 0, i32 65535, i32 0, i32 65535, i32 0, i32 65535, i32 0>, ptr %dst
   ret void
 }
 
 define void @buildvector_const_splat_xvldi_1011(ptr %dst) nounwind {
-; LA32-LABEL: buildvector_const_splat_xvldi_1011:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI22_0)
-; LA32-NEXT:    xvld $xr0, $a1, %pc_lo12(.LCPI22_0)
-; LA32-NEXT:    xvst $xr0, $a0, 0
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: buildvector_const_splat_xvldi_1011:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    lu12i.w $a1, 262144
-; LA64-NEXT:    xvreplgr2vr.d $xr0, $a1
-; LA64-NEXT:    xvst $xr0, $a0, 0
-; LA64-NEXT:    ret
+; CHECK-LABEL: buildvector_const_splat_xvldi_1011:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvldi $xr0, -1280
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
 entry:
   store <8 x float> <float 2.0, float 0.0, float 2.0, float 0.0, float 2.0, float 0.0, float 2.0, float 0.0>, ptr %dst
   ret void
@@ -2458,8 +2425,7 @@ define void @buildvector_v8f32_with_constant(ptr %dst, float %a1, float %a2, flo
 ; CHECK-NEXT:    # kill: def $f2 killed $f2 def $xr2
 ; CHECK-NEXT:    # kill: def $f1 killed $f1 def $xr1
 ; CHECK-NEXT:    # kill: def $f0 killed $f0 def $xr0
-; CHECK-NEXT:    lu12i.w $a1, 262144
-; CHECK-NEXT:    xvreplgr2vr.w $xr4, $a1
+; CHECK-NEXT:    xvldi $xr4, -3264
 ; CHECK-NEXT:    xvinsve0.w $xr4, $xr0, 1
 ; CHECK-NEXT:    xvinsve0.w $xr4, $xr1, 2
 ; CHECK-NEXT:    xvinsve0.w $xr4, $xr2, 5
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll
index 7514dafa8000b..ba821308cb4db 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll
@@ -40,35 +40,19 @@ define void @fdiv_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; FAULT-NEXT:    xvst $xr0, $a0, 0
 ; FAULT-NEXT:    ret
 ;
-; LA32-LABEL: fdiv_v4f64:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    pcalau12i $a3, %pc_hi20(.LCPI1_0)
-; LA32-NEXT:    xvld $xr0, $a2, 0
-; LA32-NEXT:    xvld $xr1, $a3, %pc_lo12(.LCPI1_0)
-; LA32-NEXT:    xvld $xr2, $a1, 0
-; LA32-NEXT:    xvfrecipe.d $xr3, $xr0
-; LA32-NEXT:    xvfmadd.d $xr1, $xr0, $xr3, $xr1
-; LA32-NEXT:    xvfnmsub.d $xr1, $xr1, $xr3, $xr3
-; LA32-NEXT:    xvfmul.d $xr3, $xr2, $xr1
-; LA32-NEXT:    xvfnmsub.d $xr0, $xr0, $xr3, $xr2
-; LA32-NEXT:    xvfmadd.d $xr0, $xr1, $xr0, $xr3
-; LA32-NEXT:    xvst $xr0, $a0, 0
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: fdiv_v4f64:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    xvld $xr0, $a2, 0
-; LA64-NEXT:    xvld $xr1, $a1, 0
-; LA64-NEXT:    lu52i.d $a1, $zero, -1025
-; LA64-NEXT:    xvreplgr2vr.d $xr2, $a1
-; LA64-NEXT:    xvfrecipe.d $xr3, $xr0
-; LA64-NEXT:    xvfmadd.d $xr2, $xr0, $xr3, $xr2
-; LA64-NEXT:    xvfnmsub.d $xr2, $xr2, $xr3, $xr3
-; LA64-NEXT:    xvfmul.d $xr3, $xr1, $xr2
-; LA64-NEXT:    xvfnmsub.d $xr0, $xr0, $xr3, $xr1
-; LA64-NEXT:    xvfmadd.d $xr0, $xr2, $xr0, $xr3
-; LA64-NEXT:    xvst $xr0, $a0, 0
-; LA64-NEXT:    ret
+; CHECK-LABEL: fdiv_v4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfrecipe.d $xr2, $xr0
+; CHECK-NEXT:    xvldi $xr3, -784
+; CHECK-NEXT:    xvfmadd.d $xr3, $xr0, $xr2, $xr3
+; CHECK-NEXT:    xvfnmsub.d $xr2, $xr3, $xr2, $xr2
+; CHECK-NEXT:    xvfmul.d $xr3, $xr1, $xr2
+; CHECK-NEXT:    xvfnmsub.d $xr0, $xr0, $xr3, $xr1
+; CHECK-NEXT:    xvfmadd.d $xr0, $xr2, $xr0, $xr3
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
 entry:
   %v0 = load <4 x double>, ptr %a0
   %v1 = load <4 x double>, ptr %a1
@@ -90,8 +74,7 @@ define void @one_fdiv_v8f32(ptr %res, ptr %a0) nounwind {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xvld $xr0, $a1, 0
 ; CHECK-NEXT:    xvfrecipe.s $xr1, $xr0
-; CHECK-NEXT:    lu12i.w $a1, -264192
-; CHECK-NEXT:    xvreplgr2vr.w $xr2, $a1
+; CHECK-NEXT:    xvldi $xr2, -1296
 ; CHECK-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
 ; CHECK-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
@@ -107,24 +90,22 @@ define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind {
 ; FAULT-LA32-LABEL: one_fdiv_v4f64:
 ; FAULT-LA32:       # %bb.0: # %entry
 ; FAULT-LA32-NEXT:    xvld $xr0, $a1, 0
-; FAULT-LA32-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI3_0)
-; FAULT-LA32-NEXT:    xvld $xr1, $a1, %pc_lo12(.LCPI3_0)
+; FAULT-LA32-NEXT:    xvldi $xr1, -912
 ; FAULT-LA32-NEXT:    xvfdiv.d $xr0, $xr1, $xr0
 ; FAULT-LA32-NEXT:    xvst $xr0, $a0, 0
 ; FAULT-LA32-NEXT:    ret
 ;
-; LA32-LABEL: one_fdiv_v4f64:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    xvld $xr0, $a1, 0
-; LA32-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI3_0)
-; LA32-NEXT:    xvld $xr1, $a1, %pc_lo12(.LCPI3_0)
-; LA32-NEXT:    xvfrecipe.d $xr2, $xr0
-; LA32-NEXT:    xvfnmsub.d $xr3, $xr0, $xr2, $xr1
-; LA32-NEXT:    xvfmadd.d $xr2, $xr2, $xr3, $xr2
-; LA32-NEXT:    xvfnmsub.d $xr0, $xr0, $xr2, $xr1
-; LA32-NEXT:    xvfmadd.d $xr0, $xr2, $xr0, $xr2
-; LA32-NEXT:    xvst $xr0, $a0, 0
-; LA32-NEXT:    ret
+; CHECK-LABEL: one_fdiv_v4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvfrecipe.d $xr1, $xr0
+; CHECK-NEXT:    xvldi $xr2, -912
+; CHECK-NEXT:    xvfnmsub.d $xr3, $xr0, $xr1, $xr2
+; CHECK-NEXT:    xvfmadd.d $xr1, $xr1, $xr3, $xr1
+; CHECK-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
+; CHECK-NEXT:    xvfmadd.d $xr0, $xr1, $xr0, $xr1
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
 ;
 ; FAULT-LA64-LABEL: one_fdiv_v4f64:
 ; FAULT-LA64:       # %bb.0: # %entry
@@ -132,22 +113,12 @@ define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind {
 ; FAULT-LA64-NEXT:    xvfrecip.d $xr0, $xr0
 ; FAULT-LA64-NEXT:    xvst $xr0, $a0, 0
 ; FAULT-LA64-NEXT:    ret
-;
-; LA64-LABEL: one_fdiv_v4f64:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    xvld $xr0, $a1, 0
-; LA64-NEXT:    xvfrecipe.d $xr1, $xr0
-; LA64-NEXT:    lu52i.d $a1, $zero, 1023
-; LA64-NEXT:    xvreplgr2vr.d $xr2, $a1
-; LA64-NEXT:    xvfnmsub.d $xr3, $xr0, $xr1, $xr2
-; LA64-NEXT:    xvfmadd.d $xr1, $xr1, $xr3, $xr1
-; LA64-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
-; LA64-NEXT:    xvfmadd.d $xr0, $xr1, $xr0, $xr1
-; LA64-NEXT:    xvst $xr0, $a0, 0
-; LA64-NEXT:    ret
 entry:
   %v0 = load <4 x double>, ptr %a0
   %div = fdiv fast <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %v0
   store <4 x double> %div, ptr %res
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; LA32: {{.*}}
+; LA64: {{.*}}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll
index 4e475daa8ced3..e696129acb862 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll
@@ -63,11 +63,9 @@ define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind {
 ; LA32-NEXT:    xvfrsqrte.s $xr1, $xr0
 ; LA32-NEXT:    xvfmul.s $xr1, $xr0, $xr1
 ; LA32-NEXT:    xvfmul.s $xr0, $xr0, $xr1
-; LA32-NEXT:    lu12i.w $a1, -261120
-; LA32-NEXT:    xvreplgr2vr.w $xr2, $a1
+; LA32-NEXT:    xvldi $xr2, -1400
 ; LA32-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
-; LA32-NEXT:    lu12i.w $a1, -266240
-; LA32-NEXT:    xvreplgr2vr.w $xr2, $a1
+; LA32-NEXT:    xvldi $xr2, -3137
 ; LA32-NEXT:    xvfmul.s $xr1, $xr1, $xr2
 ; LA32-NEXT:    xvfmul.s $xr0, $xr1, $xr0
 ; LA32-NEXT:    xvst $xr0, $sp, 64
@@ -100,11 +98,9 @@ define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind {
 ; LA64-NEXT:    xvfrsqrte.s $xr1, $xr0
 ; LA64-NEXT:    xvfmul.s $xr1, $xr0, $xr1
 ; LA64-NEXT:    xvfmul.s $xr0, $xr0, $xr1
-; LA64-NEXT:    lu12i.w $a1, -261120
-; LA64-NEXT:    xvreplgr2vr.w $xr2, $a1
+; LA64-NEXT:    xvldi $xr2, -1400
 ; LA64-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
-; LA64-NEXT:    lu12i.w $a1, -266240
-; LA64-NEXT:    xvreplgr2vr.w $xr2, $a1
+; LA64-NEXT:    xvldi $xr2, -3137
 ; LA64-NEXT:    xvfmul.s $xr1, $xr1, $xr2
 ; LA64-NEXT:    xvfmul.s $xr0, $xr1, ...
[truncated]

zhaoqi5 · 2025-09-18T02:18:03Z

llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp

    unsigned SplatBitSize;
    bool HasAnyUndefs;
-    unsigned Op;
+    unsigned Op = 0;


It is unnecessary to initialize it.

Without initialization, a warning will occur. I'm not sure why it didn't happen before.

[44/185] Building CXX object lib/Target/LoongArch/CMakeFiles/LLVMLoongArchCodeGen.dir/LoongArchISelDAGToDAG.cpp.o /home/yangzhaoxin/workspace/flang-test/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp:131:7: warning: variable 'Op' is used uninitialized whenever switch default is taken [-Wsometimes-uninitialized] 131 | default: | ^~~~~~~ /home/yangzhaoxin/workspace/flang-test/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp:150:36: note: uninitialized use occurs here 150 | Res = CurDAG->getMachineNode(Op, DL, ResTy, Imm); | ^~ /home/yangzhaoxin/workspace/flang-test/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp:116:16: note: initialize the variable 'Op' to silence this warning 116 | unsigned Op; | ^ | = 0 1 warning generated.

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll

heiher

LGTM.

Otherwise, many tests will failed in LA32. For example: CodeGen/LoongArch/lasx/fsqrt.ll

llvmbot added the backend:loongarch label Sep 17, 2025

ylzsx marked this pull request as draft September 17, 2025 08:06

ylzsx marked this pull request as ready for review September 17, 2025 10:57

ylzsx requested review from zhaoqi5, heiher and SixWeining September 17, 2025 10:57

zhaoqi5 reviewed Sep 18, 2025

View reviewed changes

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp Outdated Show resolved Hide resolved

llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll Outdated Show resolved Hide resolved

ylzsx force-pushed the users/ylzsx/vldi-with-special-constant branch from 0c87b0d to 409f80c Compare September 18, 2025 09:01

tangaac mentioned this pull request Sep 23, 2025

LLVM optimization on PR 159258 [bot] llvm-ci-la/llvm-opt-ci#57

Open

heiher approved these changes Sep 25, 2025

View reviewed changes

Base automatically changed from users/ylzsx/precommit-vldi to main September 25, 2025 10:07

ylzsx added 4 commits September 26, 2025 14:06

[LoongArch] Generate [x]vldi instructions with special constant splats

7bc3e6b

Strengthen check in vsplatf32_fpimm_eq_1

ccf4cd1

Otherwise, many tests will failed in LA32. For example: CodeGen/LoongArch/lasx/fsqrt.ll

fix accroding to zhaoqi5's reviews

fa357a2

remove static declaration

56fe245

ylzsx force-pushed the users/ylzsx/vldi-with-special-constant branch from 409f80c to 56fe245 Compare September 26, 2025 07:36

ylzsx merged commit 9de1bc0 into main Sep 26, 2025
9 checks passed

ylzsx deleted the users/ylzsx/vldi-with-special-constant branch September 26, 2025 08:32

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[LoongArch] Generate [x]vldi instructions with special constant splats #159258

[LoongArch] Generate [x]vldi instructions with special constant splats #159258

Uh oh!

ylzsx commented Sep 17, 2025

Uh oh!

llvmbot commented Sep 17, 2025

Uh oh!

zhaoqi5 Sep 18, 2025

Uh oh!

ylzsx Sep 18, 2025

Uh oh!

Uh oh!

Uh oh!

heiher left a comment

Uh oh!

Uh oh!

Uh oh!

[LoongArch] Generate [x]vldi instructions with special constant splats #159258

[LoongArch] Generate [x]vldi instructions with special constant splats #159258

Uh oh!

Conversation

ylzsx commented Sep 17, 2025

Uh oh!

llvmbot commented Sep 17, 2025

Uh oh!

zhaoqi5 Sep 18, 2025

Choose a reason for hiding this comment

Uh oh!

ylzsx Sep 18, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

heiher left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!