Skip to content

Conversation

@zhaoqi5
Copy link
Contributor

@zhaoqi5 zhaoqi5 commented Dec 2, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Dec 2, 2025

@llvm/pr-subscribers-backend-loongarch

Author: ZhaoQi (zhaoqi5)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/170246.diff

4 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+30)
  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+1)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/vec-trunc.ll (+20-120)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll (+19-33)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 5a3b53437a750..f2a55d9bf1469 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -618,6 +618,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
     return lowerVECREDUCE(Op, DAG);
   case ISD::ConstantFP:
     return lowerConstantFP(Op, DAG);
+  case ISD::TRUNCATE:
+    return lowerTRUNCATE(Op, DAG);
   }
   return SDValue();
 }
@@ -675,6 +677,34 @@ static SDValue isNOT(SDValue V, SelectionDAG &DAG) {
   return SDValue();
 }
 
+SDValue LoongArchTargetLowering::lowerTRUNCATE(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  MVT VT = Op.getSimpleValueType();
+  unsigned NumElts = VT.getVectorNumElements();
+  MVT EltVT = VT.getVectorElementType();
+  SDValue Src = Op.getOperand(0);
+  EVT SrcVT = Src.getValueType();
+
+  // Only need to consider v4i64->v4i32, v8i32->v8i16 and v16i16->v16i8.
+  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
+    return SDValue();
+  if (SrcVT != MVT::v4i64 && SrcVT != MVT::v8i32 && SrcVT != MVT::v16i16)
+    return SDValue();
+
+  unsigned WidenNumElts = NumElts * 2;
+  SmallVector<int, 32> Mask(WidenNumElts, -1);
+  for (unsigned i = 0; i < NumElts; ++i)
+    Mask[i] = 2 * i;
+
+  MVT NewVT = MVT::getVectorVT(EltVT, WidenNumElts);
+  SDValue CastSrc = DAG.getBitcast(NewVT, Src);
+  SDValue Result = DAG.getVectorShuffle(NewVT, DL, CastSrc, CastSrc, Mask);
+
+  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
+                     DAG.getVectorIdxConstant(0, DL));
+}
+
 SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
                                                  SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 0c09fb6afd2d1..6596bfe447c3e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -240,6 +240,7 @@ class LoongArchTargetLowering : public TargetLowering {
   SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerRotate(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
 
   bool isFPImmLegal(const APFloat &Imm, EVT VT,
                     bool ForCodeSize) const override;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-trunc.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-trunc.ll
index b5950fd55606e..108b77ba78e89 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-trunc.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-trunc.ll
@@ -6,29 +6,19 @@ define void @trunc_v4i64_to_v4i32(ptr %res, ptr %a) nounwind {
 ; LA32-LABEL: trunc_v4i64_to_v4i32:
 ; LA32:       # %bb.0: # %entry
 ; LA32-NEXT:    xvld $xr0, $a1, 0
-; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT:    vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT:    vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 4
-; LA32-NEXT:    vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 6
-; LA32-NEXT:    vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT:    vst $vr1, $a0, 0
+; LA32-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI0_0)
+; LA32-NEXT:    xvld $xr1, $a1, %pc_lo12(.LCPI0_0)
+; LA32-NEXT:    xvperm.w $xr0, $xr0, $xr1
+; LA32-NEXT:    vst $vr0, $a0, 0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: trunc_v4i64_to_v4i32:
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    xvld $xr0, $a1, 0
-; LA64-NEXT:    xvpickve2gr.d $a1, $xr0, 0
-; LA64-NEXT:    vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT:    xvpickve2gr.d $a1, $xr0, 1
-; LA64-NEXT:    vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT:    xvpickve2gr.d $a1, $xr0, 2
-; LA64-NEXT:    vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT:    xvpickve2gr.d $a1, $xr0, 3
-; LA64-NEXT:    vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT:    vst $vr1, $a0, 0
+; LA64-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI0_0)
+; LA64-NEXT:    xvld $xr1, $a1, %pc_lo12(.LCPI0_0)
+; LA64-NEXT:    xvperm.w $xr0, $xr0, $xr1
+; LA64-NEXT:    vst $vr0, $a0, 0
 ; LA64-NEXT:    ret
 entry:
   %v = load <4 x i64>, ptr %a
@@ -99,45 +89,17 @@ define void @trunc_v8i32_to_v8i16(ptr %res, ptr %a) nounwind {
 ; LA32-LABEL: trunc_v8i32_to_v8i16:
 ; LA32:       # %bb.0: # %entry
 ; LA32-NEXT:    xvld $xr0, $a1, 0
-; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT:    vinsgr2vr.h $vr1, $a1, 0
-; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT:    vinsgr2vr.h $vr1, $a1, 1
-; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT:    vinsgr2vr.h $vr1, $a1, 2
-; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT:    vinsgr2vr.h $vr1, $a1, 3
-; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 4
-; LA32-NEXT:    vinsgr2vr.h $vr1, $a1, 4
-; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 5
-; LA32-NEXT:    vinsgr2vr.h $vr1, $a1, 5
-; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 6
-; LA32-NEXT:    vinsgr2vr.h $vr1, $a1, 6
-; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 7
-; LA32-NEXT:    vinsgr2vr.h $vr1, $a1, 7
-; LA32-NEXT:    vst $vr1, $a0, 0
+; LA32-NEXT:    xvpermi.d $xr1, $xr0, 78
+; LA32-NEXT:    xvpickev.h $xr0, $xr1, $xr0
+; LA32-NEXT:    vst $vr0, $a0, 0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: trunc_v8i32_to_v8i16:
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    xvld $xr0, $a1, 0
-; LA64-NEXT:    xvpickve2gr.w $a1, $xr0, 0
-; LA64-NEXT:    vinsgr2vr.h $vr1, $a1, 0
-; LA64-NEXT:    xvpickve2gr.w $a1, $xr0, 1
-; LA64-NEXT:    vinsgr2vr.h $vr1, $a1, 1
-; LA64-NEXT:    xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT:    vinsgr2vr.h $vr1, $a1, 2
-; LA64-NEXT:    xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT:    vinsgr2vr.h $vr1, $a1, 3
-; LA64-NEXT:    xvpickve2gr.w $a1, $xr0, 4
-; LA64-NEXT:    vinsgr2vr.h $vr1, $a1, 4
-; LA64-NEXT:    xvpickve2gr.w $a1, $xr0, 5
-; LA64-NEXT:    vinsgr2vr.h $vr1, $a1, 5
-; LA64-NEXT:    xvpickve2gr.w $a1, $xr0, 6
-; LA64-NEXT:    vinsgr2vr.h $vr1, $a1, 6
-; LA64-NEXT:    xvpickve2gr.w $a1, $xr0, 7
-; LA64-NEXT:    vinsgr2vr.h $vr1, $a1, 7
-; LA64-NEXT:    vst $vr1, $a0, 0
+; LA64-NEXT:    xvpermi.d $xr1, $xr0, 78
+; LA64-NEXT:    xvpickev.h $xr0, $xr1, $xr0
+; LA64-NEXT:    vst $vr0, $a0, 0
 ; LA64-NEXT:    ret
 entry:
   %v = load <8 x i32>, ptr %a
@@ -180,79 +142,17 @@ define void @trunc_v16i16_to_v16i8(ptr %res, ptr %a) nounwind {
 ; LA32-LABEL: trunc_v16i16_to_v16i8:
 ; LA32:       # %bb.0: # %entry
 ; LA32-NEXT:    xvld $xr0, $a1, 0
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 0
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 0
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 1
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 1
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 2
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 2
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 3
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 3
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 4
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 4
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 5
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 5
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 6
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 6
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 7
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 7
-; LA32-NEXT:    xvpermi.d $xr0, $xr0, 14
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 0
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 8
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 1
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 9
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 2
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 10
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 3
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 11
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 4
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 12
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 5
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 13
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 6
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 14
-; LA32-NEXT:    vpickve2gr.h $a1, $vr0, 7
-; LA32-NEXT:    vinsgr2vr.b $vr1, $a1, 15
-; LA32-NEXT:    vst $vr1, $a0, 0
+; LA32-NEXT:    xvpermi.d $xr1, $xr0, 78
+; LA32-NEXT:    xvpickev.b $xr0, $xr1, $xr0
+; LA32-NEXT:    vst $vr0, $a0, 0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: trunc_v16i16_to_v16i8:
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    xvld $xr0, $a1, 0
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 0
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 0
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 1
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 1
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 2
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 2
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 3
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 3
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 4
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 4
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 5
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 5
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 6
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 6
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 7
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 7
-; LA64-NEXT:    xvpermi.d $xr0, $xr0, 14
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 0
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 8
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 1
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 9
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 2
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 10
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 3
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 11
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 4
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 12
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 5
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 13
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 6
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 14
-; LA64-NEXT:    vpickve2gr.h $a1, $vr0, 7
-; LA64-NEXT:    vinsgr2vr.b $vr1, $a1, 15
-; LA64-NEXT:    vst $vr1, $a0, 0
+; LA64-NEXT:    xvpermi.d $xr1, $xr0, 78
+; LA64-NEXT:    xvpickev.b $xr0, $xr1, $xr0
+; LA64-NEXT:    vst $vr0, $a0, 0
 ; LA64-NEXT:    ret
 entry:
   %v = load <16 x i16>, ptr %a
diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
index 09908f619fa1f..75c71dffd21c8 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
@@ -884,39 +884,25 @@ define i8 @xvmsk_ne_v4i32_concat_poison(<4 x i32> %vec) {
 }
 
 define i8 @xvmsk_ogt_v4f64_concat_poison(<4 x double> %vec) {
-; LA32-LABEL: xvmsk_ogt_v4f64_concat_poison:
-; LA32:       # %bb.0:
-; LA32-NEXT:    xvrepli.b $xr1, 0
-; LA32-NEXT:    xvfcmp.clt.d $xr0, $xr1, $xr0
-; LA32-NEXT:    xvpickve2gr.w $a0, $xr0, 6
-; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 4
-; LA32-NEXT:    xvpickve2gr.w $a2, $xr0, 2
-; LA32-NEXT:    xvpickve2gr.w $a3, $xr0, 0
-; LA32-NEXT:    vinsgr2vr.h $vr0, $a3, 0
-; LA32-NEXT:    vinsgr2vr.h $vr0, $a2, 1
-; LA32-NEXT:    vinsgr2vr.h $vr0, $a1, 2
-; LA32-NEXT:    vinsgr2vr.h $vr0, $a0, 3
-; LA32-NEXT:    vslli.h $vr0, $vr0, 15
-; LA32-NEXT:    vmskltz.h $vr0, $vr0
-; LA32-NEXT:    vpickve2gr.hu $a0, $vr0, 0
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: xvmsk_ogt_v4f64_concat_poison:
-; LA64:       # %bb.0:
-; LA64-NEXT:    xvrepli.b $xr1, 0
-; LA64-NEXT:    xvfcmp.clt.d $xr0, $xr1, $xr0
-; LA64-NEXT:    xvpickve2gr.d $a0, $xr0, 3
-; LA64-NEXT:    xvpickve2gr.d $a1, $xr0, 2
-; LA64-NEXT:    xvpickve2gr.d $a2, $xr0, 1
-; LA64-NEXT:    xvpickve2gr.d $a3, $xr0, 0
-; LA64-NEXT:    vinsgr2vr.h $vr0, $a3, 0
-; LA64-NEXT:    vinsgr2vr.h $vr0, $a2, 1
-; LA64-NEXT:    vinsgr2vr.h $vr0, $a1, 2
-; LA64-NEXT:    vinsgr2vr.h $vr0, $a0, 3
-; LA64-NEXT:    vslli.h $vr0, $vr0, 15
-; LA64-NEXT:    vmskltz.h $vr0, $vr0
-; LA64-NEXT:    vpickve2gr.hu $a0, $vr0, 0
-; LA64-NEXT:    ret
+; CHECK-LABEL: xvmsk_ogt_v4f64_concat_poison:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI34_0)
+; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI34_0)
+; CHECK-NEXT:    xvrepli.b $xr2, 0
+; CHECK-NEXT:    xvfcmp.clt.d $xr0, $xr2, $xr0
+; CHECK-NEXT:    xvperm.w $xr0, $xr0, $xr1
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 0
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 1
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 2
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 2
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 3
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 3
+; CHECK-NEXT:    vslli.h $vr0, $vr1, 15
+; CHECK-NEXT:    vmskltz.h $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT:    ret
   %tobool = fcmp ogt <4 x double> %vec, zeroinitializer
   %insertvec = shufflevector <4 x i1> %tobool, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
   %res = bitcast <8 x i1> %insertvec to i8

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants