diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 7ddf996f53f4c..b2d81d98df627 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -466,8 +466,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, // Set DAG combine for 'LASX' feature. - if (Subtarget.hasExtLASX()) + if (Subtarget.hasExtLASX()) { setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); + setTargetDAGCombine(ISD::BUILD_VECTOR); + } // Compute derived properties from the register classes. computeRegisterProperties(Subtarget.getRegisterInfo()); @@ -6679,6 +6681,58 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget, + const LoongArchTargetLowering &TLI) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + unsigned NumElts = N->getNumOperands(); + + if (!VT.is128BitVector() || !TLI.isTypeLegal(VT)) + return SDValue(); + + // Combine: + // t1 = extract_vector_elt t0, imm1 + // t2 = extract_vector_elt t0, imm2 + // t3 = BUILD_VECTOR t1, t2 + // to: + // t4 = extract_subvector t0, 0 + // t5 = extract_subvector t0, 2 + // t3 = vector_shuffle t4, t5, + SDValue OrigVec; + SmallVector Mask(NumElts, -1); + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Op = N->getOperand(i); + + if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + auto *ExtractIdx = dyn_cast(Op.getOperand(1)); + if (!ExtractIdx) + return SDValue(); + Mask[i] = ExtractIdx->getZExtValue(); + + SDValue Vec = Op.getOperand(0); + if (i == 0) { + OrigVec = Vec; + EVT OrigTy = OrigVec.getValueType(); + if (!OrigTy.is256BitVector() || !TLI.isTypeLegal(OrigTy)) + return SDValue(); + if (OrigTy.getVectorElementType() != VT.getVectorElementType() || + OrigTy.getVectorNumElements() != NumElts * 2) + return SDValue(); + } else { + if (Vec != OrigVec) + return SDValue(); + } + } + + SDValue SubVec0 = DAG.getExtractSubvector(DL, VT, OrigVec, 0); + SDValue SubVec1 = DAG.getExtractSubvector(DL, VT, OrigVec, NumElts); + return DAG.getVectorShuffle(VT, DL, SubVec0, SubVec1, Mask); +} + SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -6714,6 +6768,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget); case ISD::EXTRACT_VECTOR_ELT: return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget); + case ISD::BUILD_VECTOR: + return performBUILD_VECTORCombine(N, DAG, DCI, Subtarget, *this); } return SDValue(); } diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-halfvec-extractvec.ll b/llvm/test/CodeGen/LoongArch/lasx/build-halfvec-extractvec.ll index 1d19c4b243a7a..370345ecc83ce 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/build-halfvec-extractvec.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/build-halfvec-extractvec.ll @@ -1,43 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @buildvector_ext32107654ba98fedc(ptr %dst, ptr %src) nounwind { ; CHECK-LABEL: buildvector_ext32107654ba98fedc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 -; CHECK-NEXT: vpickve2gr.b $a2, $vr0, 2 -; CHECK-NEXT: vpickve2gr.b $a3, $vr0, 1 -; CHECK-NEXT: vpickve2gr.b $a4, $vr0, 0 -; CHECK-NEXT: vpickve2gr.b $a5, $vr0, 7 -; CHECK-NEXT: vpickve2gr.b $a6, $vr0, 6 -; CHECK-NEXT: vpickve2gr.b $a7, $vr0, 5 -; CHECK-NEXT: vpickve2gr.b $t0, $vr0, 4 -; CHECK-NEXT: vpickve2gr.b $t1, $vr0, 11 -; CHECK-NEXT: vpickve2gr.b $t2, $vr0, 10 -; CHECK-NEXT: vpickve2gr.b $t3, $vr0, 9 -; CHECK-NEXT: vpickve2gr.b $t4, $vr0, 8 -; CHECK-NEXT: vpickve2gr.b $t5, $vr0, 15 -; CHECK-NEXT: vpickve2gr.b $t6, $vr0, 14 -; CHECK-NEXT: vpickve2gr.b $t7, $vr0, 13 -; CHECK-NEXT: vpickve2gr.b $t8, $vr0, 12 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t0, 7 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t1, 8 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t2, 9 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t3, 10 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t4, 11 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t5, 12 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t6, 13 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t7, 14 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t8, 15 +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -82,39 +51,8 @@ define void @buildvector_ext13579bdfx13579bdf(ptr %dst, ptr %src) nounwind { ; CHECK-LABEL: buildvector_ext13579bdfx13579bdf: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 -; CHECK-NEXT: vpickve2gr.b $a2, $vr0, 3 -; CHECK-NEXT: vpickve2gr.b $a3, $vr0, 5 -; CHECK-NEXT: vpickve2gr.b $a4, $vr0, 7 -; CHECK-NEXT: vpickve2gr.b $a5, $vr0, 9 -; CHECK-NEXT: vpickve2gr.b $a6, $vr0, 11 -; CHECK-NEXT: vpickve2gr.b $a7, $vr0, 13 -; CHECK-NEXT: vpickve2gr.b $t0, $vr0, 15 -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 14 -; CHECK-NEXT: vpickve2gr.b $t1, $vr0, 1 -; CHECK-NEXT: vpickve2gr.b $t2, $vr0, 3 -; CHECK-NEXT: vpickve2gr.b $t3, $vr0, 5 -; CHECK-NEXT: vpickve2gr.b $t4, $vr0, 7 -; CHECK-NEXT: vpickve2gr.b $t5, $vr0, 9 -; CHECK-NEXT: vpickve2gr.b $t6, $vr0, 11 -; CHECK-NEXT: vpickve2gr.b $t7, $vr0, 13 -; CHECK-NEXT: vpickve2gr.b $t8, $vr0, 15 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t0, 7 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t1, 8 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t2, 9 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t3, 10 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t4, 11 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t5, 12 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t6, 13 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t7, 14 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t8, 15 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vpickod.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -158,23 +96,11 @@ entry: define void @buildvector_ext01234560(ptr %dst, ptr %src) nounwind { ; CHECK-LABEL: buildvector_ext01234560: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 0 -; CHECK-NEXT: vpickve2gr.h $a2, $vr0, 1 -; CHECK-NEXT: vpickve2gr.h $a3, $vr0, 2 -; CHECK-NEXT: vpickve2gr.h $a4, $vr0, 3 -; CHECK-NEXT: vpickve2gr.h $a5, $vr0, 4 -; CHECK-NEXT: vpickve2gr.h $a6, $vr0, 5 -; CHECK-NEXT: vpickve2gr.h $a7, $vr0, 6 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7 -; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0) +; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0 +; CHECK-NEXT: vst $vr1, $a0, 0 ; CHECK-NEXT: ret entry: %v = load <16 x i16>, ptr %src @@ -202,23 +128,8 @@ define void @buildvector_ext08192a3b(ptr %dst, ptr %src) nounwind { ; CHECK-LABEL: buildvector_ext08192a3b: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14 -; CHECK-NEXT: vpickve2gr.h $a2, $vr1, 0 -; CHECK-NEXT: vpickve2gr.h $a3, $vr0, 1 -; CHECK-NEXT: vpickve2gr.h $a4, $vr1, 1 -; CHECK-NEXT: vpickve2gr.h $a5, $vr0, 2 -; CHECK-NEXT: vpickve2gr.h $a6, $vr1, 2 -; CHECK-NEXT: vpickve2gr.h $a7, $vr0, 3 -; CHECK-NEXT: vpickve2gr.h $t0, $vr1, 3 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6 -; CHECK-NEXT: vinsgr2vr.h $vr0, $t0, 7 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -244,20 +155,12 @@ entry: } define void @buildvector_ext0000(ptr %dst, ptr %src) nounwind { -; LA32-LABEL: buildvector_ext0000: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ld.w $a1, $a1, 0 -; LA32-NEXT: vreplgr2vr.w $vr0, $a1 -; LA32-NEXT: vst $vr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: buildvector_ext0000: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0 -; LA64-NEXT: vreplgr2vr.w $vr0, $a1 -; LA64-NEXT: vst $vr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: buildvector_ext0000: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret entry: %v = load <8 x i32>, ptr %src %e0 = extractelement <8 x i32> %v, i32 0 @@ -276,15 +179,11 @@ define void @buildvector_ext7610(ptr %dst, ptr %src) nounwind { ; CHECK-LABEL: buildvector_ext7610: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 7 -; CHECK-NEXT: xvpickve2gr.w $a2, $xr0, 6 -; CHECK-NEXT: xvpickve2gr.w $a3, $xr0, 1 -; CHECK-NEXT: xvpickve2gr.w $a4, $xr0, 0 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 2 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a4, 3 -; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI5_0) +; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI5_0) +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 1 +; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0 +; CHECK-NEXT: vst $vr1, $a0, 0 ; CHECK-NEXT: ret entry: %v = load <8 x i32>, ptr %src @@ -303,12 +202,8 @@ entry: define void @buildvector_ext0113(ptr %dst, ptr %src) nounwind { ; CHECK-LABEL: buildvector_ext0113: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 3 -; CHECK-NEXT: vextrins.w $vr0, $vr1, 16 -; CHECK-NEXT: vextrins.w $vr0, $vr1, 32 -; CHECK-NEXT: vextrins.w $vr0, $vr2, 48 +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 212 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -329,12 +224,10 @@ define void @buildvector_ext6060(ptr %dst, ptr %src) nounwind { ; CHECK-LABEL: buildvector_ext6060: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 0 -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 238 -; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 2 -; CHECK-NEXT: vextrins.w $vr0, $vr1, 16 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vrepli.d $vr2, 6 +; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vst $vr2, $a0, 0 ; CHECK-NEXT: ret entry: %v = load <8 x float>, ptr %src @@ -351,23 +244,12 @@ entry: } define void @buildvector_ext00(ptr %dst, ptr %src) nounwind { -; LA32-LABEL: buildvector_ext00: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1 -; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 1 -; LA32-NEXT: vreplvei.d $vr0, $vr0, 0 -; LA32-NEXT: vst $vr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: buildvector_ext00: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ld.d $a1, $a1, 0 -; LA64-NEXT: vreplgr2vr.d $vr0, $a1 -; LA64-NEXT: vst $vr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: buildvector_ext00: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret entry: %v = load <4 x i64>, ptr %src %e0 = extractelement <4 x i64> %v, i32 0 @@ -379,29 +261,13 @@ entry: } define void @buildvector_ext12(ptr %dst, ptr %src) nounwind { -; LA32-LABEL: buildvector_ext12: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3 -; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 2 -; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 5 -; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 4 -; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 1 -; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 2 -; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 3 -; LA32-NEXT: vst $vr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: buildvector_ext12: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1 -; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 2 -; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 0 -; LA64-NEXT: vinsgr2vr.d $vr0, $a2, 1 -; LA64-NEXT: vst $vr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: buildvector_ext12: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret entry: %v = load <4 x i64>, ptr %src %e0 = extractelement <4 x i64> %v, i32 1 @@ -415,10 +281,8 @@ entry: define void @buildvector_ext10(ptr %dst, ptr %src) nounwind { ; CHECK-LABEL: buildvector_ext10: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -435,9 +299,8 @@ define void @buildvector_ext31(ptr %dst, ptr %src) nounwind { ; CHECK-LABEL: buildvector_ext31: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1 -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 3 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vpackod.d $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll index 09908f619fa1f..80f15ebc570a1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll @@ -888,15 +888,16 @@ define i8 @xvmsk_ogt_v4f64_concat_poison(<4 x double> %vec) { ; LA32: # %bb.0: ; LA32-NEXT: xvrepli.b $xr1, 0 ; LA32-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 -; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 6 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4 -; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 2 -; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 0 -; LA32-NEXT: vinsgr2vr.h $vr0, $a3, 0 -; LA32-NEXT: vinsgr2vr.h $vr0, $a2, 1 -; LA32-NEXT: vinsgr2vr.h $vr0, $a1, 2 -; LA32-NEXT: vinsgr2vr.h $vr0, $a0, 3 -; LA32-NEXT: vslli.h $vr0, $vr0, 15 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 1 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 +; LA32-NEXT: vinsgr2vr.h $vr2, $a0, 0 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 +; LA32-NEXT: vinsgr2vr.h $vr2, $a0, 1 +; LA32-NEXT: vpickve2gr.w $a0, $vr1, 0 +; LA32-NEXT: vinsgr2vr.h $vr2, $a0, 2 +; LA32-NEXT: vpickve2gr.w $a0, $vr1, 2 +; LA32-NEXT: vinsgr2vr.h $vr2, $a0, 3 +; LA32-NEXT: vslli.h $vr0, $vr2, 15 ; LA32-NEXT: vmskltz.h $vr0, $vr0 ; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; LA32-NEXT: ret