Skip to content

Conversation

heiher
Copy link
Member

@heiher heiher commented Sep 25, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Sep 25, 2025

@llvm/pr-subscribers-backend-loongarch

Author: hev (heiher)

Changes

Patch is 306.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160656.diff

74 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+134-25)
  • (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+4-4)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll (+168-69)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/fpowi.ll (+260-136)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll (+1)
  • (added) llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-d-invalid-imm.ll (+33)
  • (added) llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-d.ll (+26)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll (+1-32)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll (+1-27)
  • (added) llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr-d.ll (+17)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll (+1-14)
  • (added) llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr-d.ll (+14)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll (+1-12)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll (+12-28)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll (+26-10)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll (+18-5)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll (+174-81)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll (+2-1)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/vec-reduce-add.ll (+96-42)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll (+50-21)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll (+50-21)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll (+61-21)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll (+61-21)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll (+61-21)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll (+61-21)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll (+50-21)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll (+716-339)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll (+114-41)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll (+1)
  • (added) llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-d-invalid-imm.ll (+33)
  • (added) llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-d.ll (+26)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll (+1-32)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll (+1-24)
  • (added) llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr-d.ll (+17)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll (+1-14)
  • (added) llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr-d.ll (+14)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll (+1-12)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll (+10-24)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll (+172-81)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vreplvei.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vec-reduce-add.ll (+196-83)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll (+113-47)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll (+113-47)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll (+123-47)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll (+123-47)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll (+123-47)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll (+123-47)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll (+113-47)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll (+64-29)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-any-ext.ll (+52-22)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll (+174-75)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll (+269-129)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll (+1)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 32baa2d111270..468dd1c085b98 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -666,6 +666,7 @@ SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
 
   unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
   unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
+  unsigned ResBits = OpVT.getScalarSizeInBits();
 
   unsigned LegalVecSize = 128;
   bool isLASX256Vector =
@@ -691,10 +692,11 @@ SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
 
   if (isLASX256Vector) {
     SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
-                              DAG.getConstant(2, DL, MVT::i64));
+                              DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
     Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
   }
 
+  Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
                      DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
 }
@@ -727,15 +729,16 @@ SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
 
   unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
   MVT VecTy = Val.getSimpleValueType();
+  MVT GRLenVT = Subtarget.getGRLenVT();
 
   for (int i = NumEles; i > 1; i /= 2) {
-    SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, MVT::i64);
+    SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
     SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
     Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
   }
 
   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
-                     DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
+                     DAG.getConstant(0, DL, GRLenVT));
 }
 
 SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
@@ -1128,10 +1131,12 @@ SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
   SmallVector<SDValue, 8> Ops;
   for (unsigned int i = 0; i < NewEltNum; i++) {
     SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
-                             DAG.getConstant(i, DL, MVT::i64));
+                             DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
     unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
                          ? (unsigned)LoongArchISD::BITREV_8B
                          : (unsigned)ISD::BITREVERSE;
+    if (!Subtarget.is64Bit() && RevOp == LoongArchISD::BITREV_8B)
+      return SDValue();
     Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
   }
   SDValue Res =
@@ -1611,9 +1616,8 @@ lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
 
   assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
   if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
-    APInt Imm(64, SplatIndex);
     return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
-                       DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
+                       DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
   }
 
   return SDValue();
@@ -1671,7 +1675,7 @@ lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
   }
 
   // Calculate the immediate. Replace any remaining undefs with zero
-  APInt Imm(64, 0);
+  int Imm = 0;
   for (int i = SubVecSize - 1; i >= 0; --i) {
     int M = SubMask[i];
 
@@ -1946,11 +1950,12 @@ static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
 /// adding it as an operand to the resulting VSHUF.
 static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
                                          MVT VT, SDValue V1, SDValue V2,
-                                         SelectionDAG &DAG) {
+                                         SelectionDAG &DAG,
+                                         const LoongArchSubtarget &Subtarget) {
 
   SmallVector<SDValue, 16> Ops;
   for (auto M : Mask)
-    Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
+    Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
 
   EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
   SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
@@ -2030,7 +2035,8 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
     return Result;
   if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
     return NewShuffle;
-  if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
+  if ((Result =
+           lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
     return Result;
   return SDValue();
 }
@@ -2088,7 +2094,8 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
 /// Lower VECTOR_SHUFFLE into XVPERM (if possible).
 static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
                                           MVT VT, SDValue V1, SDValue V2,
-                                          SelectionDAG &DAG) {
+                                          SelectionDAG &DAG,
+                                          const LoongArchSubtarget &Subtarget) {
   // LoongArch LASX only have XVPERM_W.
   if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
     return SDValue();
@@ -2119,9 +2126,10 @@ static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
     return SDValue();
 
   SmallVector<SDValue, 8> Masks;
+  MVT GRLenVT = Subtarget.getGRLenVT();
   for (unsigned i = 0; i < NumElts; ++i)
-    Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(MVT::i64)
-                                  : DAG.getConstant(Mask[i], DL, MVT::i64));
+    Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
+                                  : DAG.getConstant(Mask[i], DL, GRLenVT));
   SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
 
   return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
@@ -2533,7 +2541,8 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
     if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG,
                                                Subtarget)))
       return Result;
-    if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG)))
+    if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG,
+                                             Subtarget)))
       return Result;
     if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
                                                              V1, V2, DAG)))
@@ -3102,12 +3111,33 @@ LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
     return SDValue();
 
   SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
-  SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
-
   SmallVector<SDValue, 32> RawIndices;
-  for (unsigned i = 0; i < NumElts; ++i)
-    RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
-  SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
+  SDValue SplatIdx;
+  SDValue Indices;
+
+  if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
+    MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
+    for (unsigned i = 0; i < NumElts; ++i) {
+      RawIndices.push_back(Op2);
+      RawIndices.push_back(DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
+    }
+    SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
+    SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
+
+    RawIndices.clear();
+    for (unsigned i = 0; i < NumElts; ++i) {
+      RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
+      RawIndices.push_back(DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
+    }
+    Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
+    Indices = DAG.getBitcast(IdxVTy, Indices);
+  } else {
+    SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
+
+    for (unsigned i = 0; i < NumElts; ++i)
+      RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
+    Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
+  }
 
   // insert vec, elt, idx
   // =>
@@ -5129,7 +5159,7 @@ performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG,
   if (Opc == ISD::DELETED_NODE)
     return SDValue();
 
-  SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0));
+  SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
   EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
   V = DAG.getZExtOrTrunc(V, DL, T);
   return DAG.getBitcast(VT, V);
@@ -5142,6 +5172,7 @@ static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
   EVT VT = N->getValueType(0);
   SDValue Src = N->getOperand(0);
   EVT SrcVT = Src.getValueType();
+  MVT GRLenVT = Subtarget.getGRLenVT();
 
   if (!DCI.isBeforeLegalizeOps())
     return SDValue();
@@ -5209,11 +5240,11 @@ static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
     if (Src.getSimpleValueType() == MVT::v32i8) {
       SDValue Lo, Hi;
       std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
-      Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Lo);
-      Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Hi);
-      Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
+      Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
+      Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
+      Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
                        DAG.getConstant(16, DL, MVT::i8));
-      V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
+      V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
     } else if (UseLASX) {
       return SDValue();
     }
@@ -5221,7 +5252,7 @@ static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
 
   if (!V) {
     Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
-    V = DAG.getNode(Opc, DL, MVT::i64, Src);
+    V = DAG.getNode(Opc, DL, GRLenVT, Src);
   }
 
   EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
@@ -5878,6 +5909,22 @@ static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
   return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
 }
 
+template <unsigned W>
+static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG,
+                                    unsigned ResOp) {
+  unsigned Imm = N->getConstantOperandVal(2);
+  if (!isUInt<W>(Imm)) {
+    const StringRef ErrorMsg = "argument out of range";
+    DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
+    return DAG.getUNDEF(N->getValueType(0));
+  }
+  SDLoc DL(N);
+  SDValue Vec = N->getOperand(1);
+  SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
+  SDValue EltVT = DAG.getValueType(Vec.getValueType().getVectorElementType());
+  return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
+}
+
 static SDValue
 performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
@@ -6367,6 +6414,68 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
                        N->getOperand(1),
                        DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
                                    N->getOperand(2)));
+  case Intrinsic::loongarch_lsx_vpickve2gr_b:
+    if (!Subtarget.is64Bit())
+      return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
+    break;
+  case Intrinsic::loongarch_lsx_vpickve2gr_h:
+  case Intrinsic::loongarch_lasx_xvpickve2gr_w:
+    if (!Subtarget.is64Bit())
+      return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
+    break;
+  case Intrinsic::loongarch_lsx_vpickve2gr_w:
+    if (!Subtarget.is64Bit())
+      return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
+    break;
+  case Intrinsic::loongarch_lsx_vpickve2gr_bu:
+    if (!Subtarget.is64Bit())
+      return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
+    break;
+  case Intrinsic::loongarch_lsx_vpickve2gr_hu:
+  case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
+    if (!Subtarget.is64Bit())
+      return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
+    break;
+  case Intrinsic::loongarch_lsx_vpickve2gr_wu:
+    if (!Subtarget.is64Bit())
+      return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
+    break;
+  case Intrinsic::loongarch_lsx_bz_b:
+  case Intrinsic::loongarch_lsx_bz_h:
+  case Intrinsic::loongarch_lsx_bz_w:
+  case Intrinsic::loongarch_lsx_bz_d:
+  case Intrinsic::loongarch_lasx_xbz_b:
+  case Intrinsic::loongarch_lasx_xbz_h:
+  case Intrinsic::loongarch_lasx_xbz_w:
+  case Intrinsic::loongarch_lasx_xbz_d:
+    if (!Subtarget.is64Bit())
+      return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
+                         N->getOperand(1));
+    break;
+  case Intrinsic::loongarch_lsx_bz_v:
+  case Intrinsic::loongarch_lasx_xbz_v:
+    if (!Subtarget.is64Bit())
+      return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
+                         N->getOperand(1));
+    break;
+  case Intrinsic::loongarch_lsx_bnz_b:
+  case Intrinsic::loongarch_lsx_bnz_h:
+  case Intrinsic::loongarch_lsx_bnz_w:
+  case Intrinsic::loongarch_lsx_bnz_d:
+  case Intrinsic::loongarch_lasx_xbnz_b:
+  case Intrinsic::loongarch_lasx_xbnz_h:
+  case Intrinsic::loongarch_lasx_xbnz_w:
+  case Intrinsic::loongarch_lasx_xbnz_d:
+    if (!Subtarget.is64Bit())
+      return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
+                         N->getOperand(1));
+    break;
+  case Intrinsic::loongarch_lsx_bnz_v:
+  case Intrinsic::loongarch_lasx_xbnz_v:
+    if (!Subtarget.is64Bit())
+      return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
+                         N->getOperand(1));
+    break;
   }
   return SDValue();
 }
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index d99a57e562528..b0eb51a92c6c6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -26,7 +26,7 @@ def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
 def SDT_LoongArchV2RUimm
     : SDTypeProfile<1, 3,
                     [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
-                     SDTCisVT<3, i64>]>;
+                     SDTCisVT<3, GRLenVT>]>;
 def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
 def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
 def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
@@ -1482,7 +1482,7 @@ multiclass VldreplPat<ValueType vt, LAInst Inst, Operand ImmOpnd> {
 }
 
 multiclass VstelmPat<PatFrag StoreOp, ValueType vt, LAInst Inst,
-                     Operand ImmOpnd, Operand IdxOpnd, ValueType elt = i64> {
+                     Operand ImmOpnd, Operand IdxOpnd, ValueType elt = GRLenVT> {
   def : Pat<(StoreOp(elt(vector_extract vt:$vd, IdxOpnd:$idx)), BaseAddr:$rj),
             (Inst vt:$vd, BaseAddr:$rj, 0, IdxOpnd:$idx)>;
 
@@ -2110,8 +2110,8 @@ def : Pat<(GRLenVT (vector_extract v4i32:$vj, GRLenVT:$rk)),
           (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, GRLenVT:$rk),
                                                  sub_32)),
                             GPR)>;
-def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)),
-          (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk),
+def : Pat<(GRLenVT (vector_extract v2i64:$vj, GRLenVT:$rk)),
+          (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, GRLenVT:$rk),
                                                  sub_64)),
                             GPR)>;
 def : Pat<(f32 (vector_extract v4f32:$vj, GRLenVT:$rk)),
diff --git a/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll
index 87ee4ad025395..8b12216d0f856 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll
@@ -1,27 +1,46 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lasx --verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix=LA32
 ; RUN: llc --mtriple=loongarch64 -mattr=+lasx --verify-machineinstrs < %s \
-; RUN:   | FileCheck %s
+; RUN:   | FileCheck %s --check-prefix=LA64
 
 declare <32 x i8> @llvm.bitreverse.v32i8(<32 x i8>)
 
 define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind {
-; CHECK-LABEL: test_bitreverse_v32i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 2
-; CHECK-NEXT:    bitrev.8b $a0, $a0
-; CHECK-NEXT:    vinsgr2vr.d $vr2, $a0, 0
-; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 3
-; CHECK-NEXT:    bitrev.8b $a0, $a0
-; CHECK-NEXT:    vinsgr2vr.d $vr2, $a0, 1
-; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 0
-; CHECK-NEXT:    bitrev.8b $a0, $a0
-; CHECK-NEXT:    vinsgr2vr.d $vr1, $a0, 0
-; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 1
-; CHECK-NEXT:    bitrev.8b $a0, $a0
-; CHECK-NEXT:    vinsgr2vr.d $vr1, $a0, 1
-; CHECK-NEXT:    xvpermi.q $xr1, $xr2, 2
-; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
-; CHECK-NEXT:    ret
+; LA32-LABEL: test_bitreverse_v32i8:
+; LA32:       # %bb.0:
+; LA32-NEXT:    xvslli.b $xr1, $xr0, 4
+; LA32-NEXT:    xvsrli.b $xr0, $xr0, 4
+; LA32-NEXT:    xvor.v $xr0, $xr0, $xr1
+; LA32-NEXT:    xvandi.b $xr1, $xr0, 51
+; LA32-NEXT:    xvslli.b $xr1, $xr1, 2
+; LA32-NEXT:    xvsrli.b $xr0, $xr0, 2
+; LA32-NEXT:    xvandi.b $xr0, $xr0, 51
+; LA32-NEXT:    xvor.v $xr0, $xr0, $xr1
+; LA32-NEXT:    xvandi.b $xr1, $xr0, 85
+; LA32-NEXT:    xvslli.b $xr1, $xr1, 1
+; LA32-NEXT:    xvsrli.b $xr0, $xr0, 1
+; LA32-NEXT:    xvandi.b $xr0, $xr0, 85
+; LA32-NEXT:    xvor.v $xr0, $xr0, $xr1
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: test_bitreverse_v32i8:
+; LA64:       # %bb.0:
+; LA64-NEXT:    xvpickve2gr.d $a0, $xr0, 2
+; LA64-NEXT:    bitrev.8b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    xvpickve2gr.d $a0, $xr0, 3
+; LA64-NEXT:    bitrev.8b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    xvpickve2gr.d $a0, $xr0, 0
+; LA64-NEXT:    bitrev.8b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    xvpickve2gr.d $a0, $xr0, 1
+; LA64-NEXT:    bitrev.8b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr1, $xr2, 2
+; LA64-NEXT:    xvori.b $xr0, $xr1, 0
+; LA64-NEXT:    ret
   %b = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a)
   ret <32 x i8> %b
 }
@@ -29,23 +48,53 @@ define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind {
 declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
 
 define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind {
-; CHECK-LABEL: test_bitreverse_v16i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 2
-; CHECK-NEXT:    bitrev.d $a0, $a0
-; CHECK-NEXT:    vinsgr2vr.d $vr1, $a0, 0
-; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 3
-; CHECK-NEXT:    bitrev.d $a0, $a0
-; CHECK-NEXT:    vinsgr2vr.d $vr1, $a0, 1
-; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 0
-; CHECK-NEXT:    bitrev.d $a0, $a0
-; CHECK-NEXT:    vinsgr2vr.d $vr2, $a0, 0
-; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 1
-; CHECK-NEXT:    bitrev.d $a0, $a0
-; CHECK-NEXT:    vinsgr2vr.d $vr2, $a0, 1
-; CHECK-NEXT:    xvpermi.q $xr2, $xr1, 2
-; CHECK-NEXT:    xvshuf4i.h $xr0, $xr2, 27
-; CHECK-NEXT:    ret
+; LA32-LABEL: test_bitreverse_v16i16:
+; LA32:       # %bb.0:
+; LA32-NEXT:    xvpickve2gr.w $a0, $xr0, 5
+; LA32-NEXT:    bitrev.w $a0, $a0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; LA32-NEXT:    xvpickve2gr.w $a0, $xr0, 4
+; LA32-NEXT:    bitrev.w $a0, $a0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT:    xvpickve2gr.w $a0, $xr0, 7
+; LA32-NEXT:    bitrev.w $a0, $a0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 2
+; LA32-NEXT:    xvpickve2gr.w $a0, $xr0, 6
+; LA32-NEXT:    bitrev.w $a0, $a0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 3
+; LA32-NEXT:    xvpickve2gr.w $a0, $xr0, 1
+; LA32-NEXT:    bitrev.w $a0, $a0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; LA32-NEXT:    xvpickve2gr.w $a0, $xr0, 0
+; LA32-N...
[truncated]

Copy link
Contributor

@zhaoqi5 zhaoqi5 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@heiher heiher merged commit fde15cb into llvm:main Sep 25, 2025
9 checks passed
@heiher heiher deleted the la32-vec branch September 25, 2025 13:08
mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Oct 3, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants