diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 78dc3cb27a698..b170ce4e13051 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3496,21 +3496,30 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); SDValue Vec = SplatVal.getOperand(0); - // Only perform this optimization on vectors of the same size for simplicity. - // Don't perform this optimization for i1 vectors. + // Don't perform this optimization for i1 vectors, or if the element types are + // different // FIXME: Support i1 vectors, maybe by promoting to i8? - if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1) + MVT EltTy = VT.getVectorElementType(); + if (EltTy == MVT::i1 || + EltTy != Vec.getSimpleValueType().getVectorElementType()) return SDValue(); SDValue Idx = SplatVal.getOperand(1); // The index must be a legal type. if (Idx.getValueType() != Subtarget.getXLenVT()) return SDValue(); + // Check that Index lies within VT + // TODO: Can we check if the Index is constant and known in-bounds? + if (!TypeSize::isKnownLE(Vec.getValueSizeInBits(), VT.getSizeInBits())) + return SDValue(); + MVT ContainerVT = VT; - if (VT.isFixedLengthVector()) { + if (VT.isFixedLengthVector()) ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); - Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); - } + + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), Vec, + DAG.getVectorIdxConstant(0, DL)); auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); @@ -3523,7 +3532,6 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, return convertFromScalableVector(VT, Gather, DAG, Subtarget); } - /// Try and optimize BUILD_VECTORs with "dominant values" - these are values /// which constitute a large proportion of the elements. In such cases we can /// splat a vector with the dominant element and make up the shortfall with diff --git a/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll b/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll index e70dcd16d02cd..5d730da09ef83 100644 --- a/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll +++ b/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll @@ -6,8 +6,8 @@ define @match_nxv16i8_v1i8( %op1, <1 x i8> ; CHECK-LABEL: match_nxv16i8_v1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.x.s a0, v10 -; CHECK-NEXT: vmseq.vx v10, v8, a0 +; CHECK-NEXT: vrgather.vi v12, v10, 0 +; CHECK-NEXT: vmseq.vv v10, v8, v12 ; CHECK-NEXT: vmand.mm v0, v10, v0 ; CHECK-NEXT: ret %r = tail call @llvm.experimental.vector.match( %op1, <1 x i8> %op2, %mask) @@ -17,14 +17,12 @@ define @match_nxv16i8_v1i8( %op1, <1 x i8> define @match_nxv16i8_v2i8( %op1, <2 x i8> %op2, %mask) { ; CHECK-LABEL: match_nxv16i8_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.x.s a0, v10 -; CHECK-NEXT: vslidedown.vi v10, v10, 1 -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma -; CHECK-NEXT: vmseq.vx v11, v8, a0 -; CHECK-NEXT: vmv.x.s a0, v10 -; CHECK-NEXT: vmseq.vx v10, v8, a0 -; CHECK-NEXT: vmor.mm v8, v11, v10 +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vrgather.vi v12, v10, 1 +; CHECK-NEXT: vmseq.vv v14, v8, v12 +; CHECK-NEXT: vrgather.vi v12, v10, 0 +; CHECK-NEXT: vmseq.vv v10, v8, v12 +; CHECK-NEXT: vmor.mm v8, v10, v14 ; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %r = tail call @llvm.experimental.vector.match( %op1, <2 x i8> %op2, %mask) @@ -34,21 +32,17 @@ define @match_nxv16i8_v2i8( %op1, <2 x i8> define @match_nxv16i8_v4i8( %op1, <4 x i8> %op2, %mask) { ; CHECK-LABEL: match_nxv16i8_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.x.s a0, v10 -; CHECK-NEXT: vslidedown.vi v11, v10, 1 -; CHECK-NEXT: vslidedown.vi v12, v10, 2 -; CHECK-NEXT: vslidedown.vi v10, v10, 3 -; CHECK-NEXT: vmv.x.s a1, v11 -; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma -; CHECK-NEXT: vmseq.vx v11, v8, a0 -; CHECK-NEXT: vmv.x.s a0, v12 -; CHECK-NEXT: vmseq.vx v12, v8, a1 -; CHECK-NEXT: vmv.x.s a1, v10 -; CHECK-NEXT: vmseq.vx v10, v8, a0 -; CHECK-NEXT: vmor.mm v11, v11, v12 -; CHECK-NEXT: vmor.mm v10, v11, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vrgather.vi v12, v10, 1 +; CHECK-NEXT: vmseq.vv v14, v8, v12 +; CHECK-NEXT: vrgather.vi v12, v10, 0 +; CHECK-NEXT: vmseq.vv v15, v8, v12 +; CHECK-NEXT: vmor.mm v12, v15, v14 +; CHECK-NEXT: vrgather.vi v14, v10, 2 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vrgather.vi v14, v10, 3 +; CHECK-NEXT: vmor.mm v10, v12, v13 +; CHECK-NEXT: vmseq.vv v11, v8, v14 ; CHECK-NEXT: vmor.mm v8, v10, v11 ; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret @@ -59,37 +53,29 @@ define @match_nxv16i8_v4i8( %op1, <4 x i8> define @match_nxv16i8_v8i8( %op1, <8 x i8> %op2, %mask) { ; CHECK-LABEL: match_nxv16i8_v8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.x.s a0, v10 -; CHECK-NEXT: vslidedown.vi v11, v10, 1 -; CHECK-NEXT: vslidedown.vi v12, v10, 2 -; CHECK-NEXT: vmv.x.s a1, v11 -; CHECK-NEXT: vslidedown.vi v11, v10, 3 -; CHECK-NEXT: vmv.x.s a2, v12 -; CHECK-NEXT: vslidedown.vi v12, v10, 4 -; CHECK-NEXT: vmv.x.s a3, v11 -; CHECK-NEXT: vslidedown.vi v11, v10, 5 -; CHECK-NEXT: vmv.x.s a4, v12 -; CHECK-NEXT: vslidedown.vi v12, v10, 6 -; CHECK-NEXT: vslidedown.vi v10, v10, 7 -; CHECK-NEXT: vmv.x.s a5, v11 -; CHECK-NEXT: vsetvli a6, zero, e8, m2, ta, ma -; CHECK-NEXT: vmseq.vx v11, v8, a0 -; CHECK-NEXT: vmv.x.s a0, v12 -; CHECK-NEXT: vmseq.vx v12, v8, a1 -; CHECK-NEXT: vmv.x.s a1, v10 -; CHECK-NEXT: vmseq.vx v10, v8, a2 -; CHECK-NEXT: vmor.mm v11, v11, v12 -; CHECK-NEXT: vmseq.vx v12, v8, a3 -; CHECK-NEXT: vmor.mm v10, v11, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a4 -; CHECK-NEXT: vmor.mm v10, v10, v12 -; CHECK-NEXT: vmseq.vx v12, v8, a5 -; CHECK-NEXT: vmor.mm v10, v10, v11 -; CHECK-NEXT: vmseq.vx v11, v8, a0 -; CHECK-NEXT: vmor.mm v10, v10, v12 -; CHECK-NEXT: vmor.mm v10, v10, v11 -; CHECK-NEXT: vmseq.vx v11, v8, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vrgather.vi v12, v10, 1 +; CHECK-NEXT: vmseq.vv v14, v8, v12 +; CHECK-NEXT: vrgather.vi v12, v10, 0 +; CHECK-NEXT: vmseq.vv v15, v8, v12 +; CHECK-NEXT: vmor.mm v12, v15, v14 +; CHECK-NEXT: vrgather.vi v14, v10, 2 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 3 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 4 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 5 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 6 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vrgather.vi v14, v10, 7 +; CHECK-NEXT: vmor.mm v10, v12, v13 +; CHECK-NEXT: vmseq.vv v11, v8, v14 ; CHECK-NEXT: vmor.mm v8, v10, v11 ; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret @@ -100,69 +86,53 @@ define @match_nxv16i8_v8i8( %op1, <8 x i8> define @match_nxv16i8_v16i8( %op1, <16 x i8> %op2, %mask) { ; CHECK-LABEL: match_nxv16i8_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v10 -; CHECK-NEXT: vslidedown.vi v11, v10, 1 -; CHECK-NEXT: vslidedown.vi v12, v10, 2 -; CHECK-NEXT: vmv.x.s a1, v11 -; CHECK-NEXT: vslidedown.vi v11, v10, 3 -; CHECK-NEXT: vmv.x.s a2, v12 -; CHECK-NEXT: vslidedown.vi v12, v10, 4 -; CHECK-NEXT: vmv.x.s a3, v11 -; CHECK-NEXT: vslidedown.vi v11, v10, 5 -; CHECK-NEXT: vmv.x.s a4, v12 -; CHECK-NEXT: vslidedown.vi v12, v10, 6 -; CHECK-NEXT: vmv.x.s a5, v11 -; CHECK-NEXT: vslidedown.vi v11, v10, 7 -; CHECK-NEXT: vmv.x.s a6, v12 -; CHECK-NEXT: vslidedown.vi v12, v10, 8 -; CHECK-NEXT: vmv.x.s a7, v11 -; CHECK-NEXT: vslidedown.vi v11, v10, 9 -; CHECK-NEXT: vmv.x.s t0, v12 -; CHECK-NEXT: vslidedown.vi v12, v10, 10 -; CHECK-NEXT: vmv.x.s t1, v11 -; CHECK-NEXT: vslidedown.vi v11, v10, 11 -; CHECK-NEXT: vmv.x.s t2, v12 -; CHECK-NEXT: vslidedown.vi v12, v10, 12 -; CHECK-NEXT: vmv.x.s t3, v11 -; CHECK-NEXT: vslidedown.vi v11, v10, 13 -; CHECK-NEXT: vmv.x.s t4, v12 -; CHECK-NEXT: vslidedown.vi v12, v10, 14 -; CHECK-NEXT: vslidedown.vi v10, v10, 15 -; CHECK-NEXT: vmv.x.s t5, v11 -; CHECK-NEXT: vsetvli t6, zero, e8, m2, ta, ma -; CHECK-NEXT: vmseq.vx v11, v8, a0 -; CHECK-NEXT: vmv.x.s a0, v12 -; CHECK-NEXT: vmseq.vx v12, v8, a1 -; CHECK-NEXT: vmv.x.s a1, v10 -; CHECK-NEXT: vmseq.vx v10, v8, a2 -; CHECK-NEXT: vmor.mm v11, v11, v12 -; CHECK-NEXT: vmseq.vx v12, v8, a3 -; CHECK-NEXT: vmor.mm v10, v11, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a4 -; CHECK-NEXT: vmor.mm v10, v10, v12 -; CHECK-NEXT: vmseq.vx v12, v8, a5 -; CHECK-NEXT: vmor.mm v10, v10, v11 -; CHECK-NEXT: vmseq.vx v11, v8, a6 -; CHECK-NEXT: vmor.mm v10, v10, v12 -; CHECK-NEXT: vmseq.vx v12, v8, a7 -; CHECK-NEXT: vmor.mm v10, v10, v11 -; CHECK-NEXT: vmseq.vx v11, v8, t0 -; CHECK-NEXT: vmor.mm v10, v10, v12 -; CHECK-NEXT: vmseq.vx v12, v8, t1 -; CHECK-NEXT: vmor.mm v10, v10, v11 -; CHECK-NEXT: vmseq.vx v11, v8, t2 -; CHECK-NEXT: vmor.mm v10, v10, v12 -; CHECK-NEXT: vmseq.vx v12, v8, t3 -; CHECK-NEXT: vmor.mm v10, v10, v11 -; CHECK-NEXT: vmseq.vx v11, v8, t4 -; CHECK-NEXT: vmor.mm v10, v10, v12 -; CHECK-NEXT: vmseq.vx v12, v8, t5 -; CHECK-NEXT: vmor.mm v10, v10, v11 -; CHECK-NEXT: vmseq.vx v11, v8, a0 -; CHECK-NEXT: vmor.mm v10, v10, v12 -; CHECK-NEXT: vmor.mm v10, v10, v11 -; CHECK-NEXT: vmseq.vx v11, v8, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vrgather.vi v12, v10, 1 +; CHECK-NEXT: vmseq.vv v14, v8, v12 +; CHECK-NEXT: vrgather.vi v12, v10, 0 +; CHECK-NEXT: vmseq.vv v15, v8, v12 +; CHECK-NEXT: vmor.mm v12, v15, v14 +; CHECK-NEXT: vrgather.vi v14, v10, 2 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 3 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 4 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 5 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 6 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 7 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 8 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 9 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 10 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 11 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 12 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 13 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 14 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vrgather.vi v14, v10, 15 +; CHECK-NEXT: vmor.mm v10, v12, v13 +; CHECK-NEXT: vmseq.vv v11, v8, v14 ; CHECK-NEXT: vmor.mm v8, v10, v11 ; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret @@ -173,9 +143,10 @@ define @match_nxv16i8_v16i8( %op1, <16 x i8 define <16 x i1> @match_v16i8_v1i8(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mask) { ; CHECK-LABEL: match_v16i8_v1i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vrgather.vi v10, v9, 0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v9 -; CHECK-NEXT: vmseq.vx v8, v8, a0 +; CHECK-NEXT: vmseq.vv v8, v8, v10 ; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mask) @@ -185,14 +156,12 @@ define <16 x i1> @match_v16i8_v1i8(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mas define <16 x i1> @match_v16i8_v2i8(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mask) { ; CHECK-LABEL: match_v16i8_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.x.s a0, v9 -; CHECK-NEXT: vslidedown.vi v9, v9, 1 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmseq.vx v10, v8, a0 -; CHECK-NEXT: vmv.x.s a0, v9 -; CHECK-NEXT: vmseq.vx v8, v8, a0 -; CHECK-NEXT: vmor.mm v8, v10, v8 +; CHECK-NEXT: vrgather.vi v10, v9, 1 +; CHECK-NEXT: vrgather.vi v11, v9, 0 +; CHECK-NEXT: vmseq.vv v9, v8, v10 +; CHECK-NEXT: vmseq.vv v8, v8, v11 +; CHECK-NEXT: vmor.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mask) @@ -202,21 +171,17 @@ define <16 x i1> @match_v16i8_v2i8(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mas define <16 x i1> @match_v16i8_v4i8(<16 x i8> %op1, <4 x i8> %op2, <16 x i1> %mask) { ; CHECK-LABEL: match_v16i8_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.x.s a0, v9 -; CHECK-NEXT: vslidedown.vi v10, v9, 1 -; CHECK-NEXT: vslidedown.vi v11, v9, 2 -; CHECK-NEXT: vslidedown.vi v9, v9, 3 -; CHECK-NEXT: vmv.x.s a1, v10 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmseq.vx v10, v8, a0 -; CHECK-NEXT: vmv.x.s a0, v11 -; CHECK-NEXT: vmseq.vx v11, v8, a1 -; CHECK-NEXT: vmv.x.s a1, v9 -; CHECK-NEXT: vmseq.vx v9, v8, a0 -; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vrgather.vi v10, v9, 1 +; CHECK-NEXT: vrgather.vi v11, v9, 0 +; CHECK-NEXT: vmseq.vv v10, v8, v10 +; CHECK-NEXT: vmseq.vv v11, v8, v11 +; CHECK-NEXT: vmor.mm v10, v11, v10 +; CHECK-NEXT: vrgather.vi v11, v9, 2 +; CHECK-NEXT: vrgather.vi v12, v9, 3 +; CHECK-NEXT: vmseq.vv v9, v8, v11 ; CHECK-NEXT: vmor.mm v9, v10, v9 -; CHECK-NEXT: vmseq.vx v8, v8, a1 +; CHECK-NEXT: vmseq.vv v8, v8, v12 ; CHECK-NEXT: vmor.mm v8, v9, v8 ; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret @@ -227,37 +192,29 @@ define <16 x i1> @match_v16i8_v4i8(<16 x i8> %op1, <4 x i8> %op2, <16 x i1> %mas define <16 x i1> @match_v16i8_v8i8(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mask) { ; CHECK-LABEL: match_v16i8_v8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.x.s a0, v9 -; CHECK-NEXT: vslidedown.vi v10, v9, 1 -; CHECK-NEXT: vslidedown.vi v11, v9, 2 -; CHECK-NEXT: vmv.x.s a1, v10 -; CHECK-NEXT: vslidedown.vi v10, v9, 3 -; CHECK-NEXT: vmv.x.s a2, v11 -; CHECK-NEXT: vslidedown.vi v11, v9, 4 -; CHECK-NEXT: vmv.x.s a3, v10 -; CHECK-NEXT: vslidedown.vi v10, v9, 5 -; CHECK-NEXT: vmv.x.s a4, v11 -; CHECK-NEXT: vslidedown.vi v11, v9, 6 -; CHECK-NEXT: vslidedown.vi v9, v9, 7 -; CHECK-NEXT: vmv.x.s a5, v10 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmseq.vx v10, v8, a0 -; CHECK-NEXT: vmv.x.s a0, v11 -; CHECK-NEXT: vmseq.vx v11, v8, a1 -; CHECK-NEXT: vmv.x.s a1, v9 -; CHECK-NEXT: vmseq.vx v9, v8, a2 +; CHECK-NEXT: vrgather.vi v10, v9, 1 +; CHECK-NEXT: vrgather.vi v11, v9, 0 +; CHECK-NEXT: vmseq.vv v10, v8, v10 +; CHECK-NEXT: vmseq.vv v11, v8, v11 +; CHECK-NEXT: vmor.mm v10, v11, v10 +; CHECK-NEXT: vrgather.vi v11, v9, 2 +; CHECK-NEXT: vmseq.vv v11, v8, v11 ; CHECK-NEXT: vmor.mm v10, v10, v11 -; CHECK-NEXT: vmseq.vx v11, v8, a3 +; CHECK-NEXT: vrgather.vi v11, v9, 3 +; CHECK-NEXT: vmseq.vv v11, v8, v11 +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vrgather.vi v11, v9, 4 +; CHECK-NEXT: vmseq.vv v11, v8, v11 +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vrgather.vi v11, v9, 5 +; CHECK-NEXT: vmseq.vv v11, v8, v11 +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vrgather.vi v11, v9, 6 +; CHECK-NEXT: vrgather.vi v12, v9, 7 +; CHECK-NEXT: vmseq.vv v9, v8, v11 ; CHECK-NEXT: vmor.mm v9, v10, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a4 -; CHECK-NEXT: vmor.mm v9, v9, v11 -; CHECK-NEXT: vmseq.vx v11, v8, a5 -; CHECK-NEXT: vmor.mm v9, v9, v10 -; CHECK-NEXT: vmseq.vx v10, v8, a0 -; CHECK-NEXT: vmor.mm v9, v9, v11 -; CHECK-NEXT: vmor.mm v9, v9, v10 -; CHECK-NEXT: vmseq.vx v8, v8, a1 +; CHECK-NEXT: vmseq.vv v8, v8, v12 ; CHECK-NEXT: vmor.mm v8, v9, v8 ; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret @@ -358,37 +315,29 @@ define <8 x i1> @match_v8i8_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) { define @match_nxv8i16_v8i16( %op1, <8 x i16> %op2, %mask) { ; CHECK-LABEL: match_nxv8i16_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v10 -; CHECK-NEXT: vslidedown.vi v11, v10, 1 -; CHECK-NEXT: vslidedown.vi v12, v10, 2 -; CHECK-NEXT: vmv.x.s a1, v11 -; CHECK-NEXT: vslidedown.vi v11, v10, 3 -; CHECK-NEXT: vmv.x.s a2, v12 -; CHECK-NEXT: vslidedown.vi v12, v10, 4 -; CHECK-NEXT: vmv.x.s a3, v11 -; CHECK-NEXT: vslidedown.vi v11, v10, 5 -; CHECK-NEXT: vmv.x.s a4, v12 -; CHECK-NEXT: vslidedown.vi v12, v10, 6 -; CHECK-NEXT: vslidedown.vi v10, v10, 7 -; CHECK-NEXT: vmv.x.s a5, v11 -; CHECK-NEXT: vsetvli a6, zero, e16, m2, ta, ma -; CHECK-NEXT: vmseq.vx v11, v8, a0 -; CHECK-NEXT: vmv.x.s a0, v12 -; CHECK-NEXT: vmseq.vx v12, v8, a1 -; CHECK-NEXT: vmv.x.s a1, v10 -; CHECK-NEXT: vmseq.vx v10, v8, a2 -; CHECK-NEXT: vmor.mm v11, v11, v12 -; CHECK-NEXT: vmseq.vx v12, v8, a3 -; CHECK-NEXT: vmor.mm v10, v11, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a4 -; CHECK-NEXT: vmor.mm v10, v10, v12 -; CHECK-NEXT: vmseq.vx v12, v8, a5 -; CHECK-NEXT: vmor.mm v10, v10, v11 -; CHECK-NEXT: vmseq.vx v11, v8, a0 -; CHECK-NEXT: vmor.mm v10, v10, v12 -; CHECK-NEXT: vmor.mm v10, v10, v11 -; CHECK-NEXT: vmseq.vx v11, v8, a1 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vrgather.vi v12, v10, 1 +; CHECK-NEXT: vmseq.vv v14, v8, v12 +; CHECK-NEXT: vrgather.vi v12, v10, 0 +; CHECK-NEXT: vmseq.vv v15, v8, v12 +; CHECK-NEXT: vmor.mm v12, v15, v14 +; CHECK-NEXT: vrgather.vi v14, v10, 2 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 3 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 4 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 5 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vmor.mm v12, v12, v13 +; CHECK-NEXT: vrgather.vi v14, v10, 6 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vrgather.vi v14, v10, 7 +; CHECK-NEXT: vmor.mm v10, v12, v13 +; CHECK-NEXT: vmseq.vv v11, v8, v14 ; CHECK-NEXT: vmor.mm v8, v10, v11 ; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret @@ -1281,21 +1230,17 @@ define <16 x i1> @match_v16i8_v32i8(<16 x i8> %op1, <32 x i8> %op2, <16 x i1> %m define @match_nxv4xi32_v4i32( %op1, <4 x i32> %op2, %mask) { ; CHECK-LABEL: match_nxv4xi32_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v10 -; CHECK-NEXT: vslidedown.vi v11, v10, 1 -; CHECK-NEXT: vslidedown.vi v12, v10, 2 -; CHECK-NEXT: vslidedown.vi v10, v10, 3 -; CHECK-NEXT: vmv.x.s a1, v11 -; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vx v11, v8, a0 -; CHECK-NEXT: vmv.x.s a0, v12 -; CHECK-NEXT: vmseq.vx v12, v8, a1 -; CHECK-NEXT: vmv.x.s a1, v10 -; CHECK-NEXT: vmseq.vx v10, v8, a0 -; CHECK-NEXT: vmor.mm v11, v11, v12 -; CHECK-NEXT: vmor.mm v10, v11, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a1 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vrgather.vi v12, v10, 1 +; CHECK-NEXT: vmseq.vv v14, v8, v12 +; CHECK-NEXT: vrgather.vi v12, v10, 0 +; CHECK-NEXT: vmseq.vv v15, v8, v12 +; CHECK-NEXT: vmor.mm v12, v15, v14 +; CHECK-NEXT: vrgather.vi v14, v10, 2 +; CHECK-NEXT: vmseq.vv v13, v8, v14 +; CHECK-NEXT: vrgather.vi v14, v10, 3 +; CHECK-NEXT: vmor.mm v10, v12, v13 +; CHECK-NEXT: vmseq.vv v11, v8, v14 ; CHECK-NEXT: vmor.mm v8, v10, v11 ; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret @@ -1304,48 +1249,16 @@ define @match_nxv4xi32_v4i32( %op1, <4 x i32 } define @match_nxv2xi64_v2i64( %op1, <2 x i64> %op2, %mask) { -; RV32-LABEL: match_nxv2xi64_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v10 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vslidedown.vi v11, v10, 1 -; RV32-NEXT: addi a2, sp, 8 -; RV32-NEXT: vsrl.vx v10, v10, a1 -; RV32-NEXT: vmv.x.s a3, v11 -; RV32-NEXT: vsrl.vx v11, v11, a1 -; RV32-NEXT: vmv.x.s a1, v10 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: vmv.x.s a0, v11 -; RV32-NEXT: sw a3, 0(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a2), zero -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmseq.vv v14, v8, v10 -; RV32-NEXT: vmseq.vv v10, v8, v12 -; RV32-NEXT: vmor.mm v8, v14, v10 -; RV32-NEXT: vmand.mm v0, v8, v0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 -; RV32-NEXT: ret -; -; RV64-LABEL: match_nxv2xi64_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v10 -; RV64-NEXT: vslidedown.vi v10, v10, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV64-NEXT: vmseq.vx v11, v8, a0 -; RV64-NEXT: vmv.x.s a0, v10 -; RV64-NEXT: vmseq.vx v10, v8, a0 -; RV64-NEXT: vmor.mm v8, v11, v10 -; RV64-NEXT: vmand.mm v0, v8, v0 -; RV64-NEXT: ret +; CHECK-LABEL: match_nxv2xi64_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vrgather.vi v12, v10, 1 +; CHECK-NEXT: vmseq.vv v14, v8, v12 +; CHECK-NEXT: vrgather.vi v12, v10, 0 +; CHECK-NEXT: vmseq.vv v10, v8, v12 +; CHECK-NEXT: vmor.mm v8, v10, v14 +; CHECK-NEXT: vmand.mm v0, v8, v0 +; CHECK-NEXT: ret %r = tail call @llvm.experimental.vector.match( %op1, <2 x i64> %op2, %mask) ret %r }