diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 10212c867fb35..7d28d52dd12e3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11632,21 +11632,24 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, // zero-extended their indices, \p narrowIndex tries to narrow the type of index // operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C < // bits(ty). -static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) { +static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) { + if (isIndexTypeSigned(IndexType)) + return false; + if (N.getOpcode() != ISD::SHL || !N->hasOneUse()) - return SDValue(); + return false; SDValue N0 = N.getOperand(0); if (N0.getOpcode() != ISD::ZERO_EXTEND && N0.getOpcode() != RISCVISD::VZEXT_VL) - return SDValue(); + return false;; if (!N0->hasOneUse()) - return SDValue(); + return false;; APInt ShAmt; SDValue N1 = N.getOperand(1); if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt)) - return SDValue(); + return false;; SDLoc DL(N); SDValue Src = N0.getOperand(0); @@ -11658,14 +11661,15 @@ static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) { // Skip if NewElen is not narrower than the original extended type. if (NewElen >= N0.getValueType().getScalarSizeInBits()) - return SDValue(); + return false; EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen); EVT NewVT = SrcVT.changeVectorElementType(NewEltVT); SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops()); SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT); - return DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec); + N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec); + return true; } // Replace (seteq (i64 (and X, 0xffffffff)), C1) with @@ -13883,6 +13887,13 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), MGN->getBasePtr(), Index, ScaleOp}, MGN->getMemOperand(), IndexType, MGN->getExtensionType()); + + if (narrowIndex(Index, IndexType, DAG)) + return DAG.getMaskedGather( + N->getVTList(), MGN->getMemoryVT(), DL, + {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), + MGN->getBasePtr(), Index, ScaleOp}, + MGN->getMemOperand(), IndexType, MGN->getExtensionType()); break; } case ISD::MSCATTER:{ @@ -13900,6 +13911,13 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), Index, ScaleOp}, MSN->getMemOperand(), IndexType, MSN->isTruncatingStore()); + + if (narrowIndex(Index, IndexType, DAG)) + return DAG.getMaskedScatter( + N->getVTList(), MSN->getMemoryVT(), DL, + {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), + Index, ScaleOp}, + MSN->getMemOperand(), IndexType, MSN->isTruncatingStore()); break; } case ISD::VP_GATHER: { @@ -13917,6 +13935,14 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, ScaleOp, VPGN->getMask(), VPGN->getVectorLength()}, VPGN->getMemOperand(), IndexType); + + if (narrowIndex(Index, IndexType, DAG)) + return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, + {VPGN->getChain(), VPGN->getBasePtr(), Index, + ScaleOp, VPGN->getMask(), + VPGN->getVectorLength()}, + VPGN->getMemOperand(), IndexType); + break; } case ISD::VP_SCATTER: { @@ -13934,6 +13960,13 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, VPSN->getBasePtr(), Index, ScaleOp, VPSN->getMask(), VPSN->getVectorLength()}, VPSN->getMemOperand(), IndexType); + + if (narrowIndex(Index, IndexType, DAG)) + return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, + {VPSN->getChain(), VPSN->getValue(), + VPSN->getBasePtr(), Index, ScaleOp, + VPSN->getMask(), VPSN->getVectorLength()}, + VPSN->getMemOperand(), IndexType); break; } case RISCVISD::SRA_VL: @@ -14238,23 +14271,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getConstant(-1, DL, VT); return DAG.getConstant(0, DL, VT); } - case Intrinsic::riscv_vloxei: - case Intrinsic::riscv_vloxei_mask: - case Intrinsic::riscv_vluxei: - case Intrinsic::riscv_vluxei_mask: - case Intrinsic::riscv_vsoxei: - case Intrinsic::riscv_vsoxei_mask: - case Intrinsic::riscv_vsuxei: - case Intrinsic::riscv_vsuxei_mask: - if (SDValue V = narrowIndex(N->getOperand(4), DAG)) { - SmallVector Ops(N->ops()); - Ops[4] = V; - const auto *MemSD = cast(N); - return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(), - Ops, MemSD->getMemoryVT(), - MemSD->getMemOperand()); - } - return SDValue(); } } case ISD::BITCAST: { @@ -17692,7 +17708,11 @@ Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const { - return false; + // We have indexed loads for all legal index types. Indices are always + // zero extended + return Extend.getOpcode() == ISD::ZERO_EXTEND && + isTypeLegal(Extend.getValueType()) && + isTypeLegal(Extend.getOperand(0).getValueType()); } bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index f3af177ac0ff2..6c6ffe656f433 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -1716,21 +1716,19 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v10, v8, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t +; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t ; RV32-NEXT: vmv.v.v v8, v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i16: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vzext.vf8 v12, v8 -; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwaddu.vv v10, v8, v8 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t +; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t ; RV64V-NEXT: vmv.v.v v8, v9 ; RV64V-NEXT: ret ; @@ -2793,20 +2791,21 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v9, v8 +; RV32-NEXT: vsll.vi v8, v9, 2 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vluxei16.v v10, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vzext.vf8 v12, v8 -; RV64V-NEXT: vsll.vi v12, v12, 2 +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vzext.vf2 v9, v8 +; RV64V-NEXT: vsll.vi v8, v9, 2 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t +; RV64V-NEXT: vluxei16.v v10, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret ; @@ -3264,11 +3263,10 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vzext.vf4 v12, v8 -; RV64V-NEXT: vsll.vi v12, v12, 2 -; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV64V-NEXT: vzext.vf2 v12, v8 +; RV64V-NEXT: vsll.vi v8, v12, 2 +; RV64V-NEXT: vluxei32.v v10, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret ; @@ -4772,20 +4770,21 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vzext.vf4 v10, v8 -; RV32V-NEXT: vsll.vi v8, v10, 3 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vzext.vf2 v9, v8 +; RV32V-NEXT: vsll.vi v8, v9, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV32V-NEXT: vluxei16.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64V-NEXT: vzext.vf8 v16, v8 -; RV64V-NEXT: vsll.vi v8, v16, 3 -; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vzext.vf2 v9, v8 +; RV64V-NEXT: vsll.vi v8, v9, 3 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64V-NEXT: vluxei16.v v12, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; @@ -5616,10 +5615,11 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64V-NEXT: vzext.vf4 v16, v8 -; RV64V-NEXT: vsll.vi v8, v16, 3 -; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vzext.vf2 v10, v8 +; RV64V-NEXT: vsll.vi v8, v10, 3 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; @@ -7645,21 +7645,19 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v10, v8, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t +; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t ; RV32-NEXT: vmv.v.v v8, v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f16: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vzext.vf8 v12, v8 -; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwaddu.vv v10, v8, v8 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t +; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t ; RV64V-NEXT: vmv.v.v v8, v9 ; RV64V-NEXT: ret ; @@ -8596,20 +8594,21 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v9, v8 +; RV32-NEXT: vsll.vi v8, v9, 2 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vluxei16.v v10, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vzext.vf8 v12, v8 -; RV64V-NEXT: vsll.vi v12, v12, 2 +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vzext.vf2 v9, v8 +; RV64V-NEXT: vsll.vi v8, v9, 2 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t +; RV64V-NEXT: vluxei16.v v10, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret ; @@ -9067,11 +9066,10 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vzext.vf4 v12, v8 -; RV64V-NEXT: vsll.vi v12, v12, 2 -; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV64V-NEXT: vzext.vf2 v12, v8 +; RV64V-NEXT: vsll.vi v8, v12, 2 +; RV64V-NEXT: vluxei32.v v10, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret ; @@ -10334,20 +10332,21 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vzext.vf4 v10, v8 -; RV32V-NEXT: vsll.vi v8, v10, 3 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vzext.vf2 v9, v8 +; RV32V-NEXT: vsll.vi v8, v9, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV32V-NEXT: vluxei16.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64V-NEXT: vzext.vf8 v16, v8 -; RV64V-NEXT: vsll.vi v8, v16, 3 -; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vzext.vf2 v9, v8 +; RV64V-NEXT: vsll.vi v8, v9, 3 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64V-NEXT: vluxei16.v v12, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; @@ -11001,10 +11000,11 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64V-NEXT: vzext.vf4 v16, v8 -; RV64V-NEXT: vsll.vi v8, v16, 3 -; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vzext.vf2 v10, v8 +; RV64V-NEXT: vsll.vi v8, v10, 3 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index 4c7b6db0d41c5..9e19bb7845831 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -1309,20 +1309,18 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v9 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v10, v9, v9 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v9 -; RV64-NEXT: vadd.vv v12, v12, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwaddu.vv v10, v9, v9 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i16: @@ -2215,19 +2213,20 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v11, v10 +; RV32-NEXT: vsll.vi v10, v11, 2 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v11, v10 +; RV64-NEXT: vsll.vi v10, v11, 2 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i32: @@ -2631,11 +2630,10 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v12, v10 +; RV64-NEXT: vsll.vi v10, v12, 2 +; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV64-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i32: @@ -3957,19 +3955,20 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vzext.vf4 v14, v12 -; RV32V-NEXT: vsll.vi v12, v14, 3 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vzext.vf2 v13, v12 +; RV32V-NEXT: vsll.vi v12, v13, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v13, v12 +; RV64-NEXT: vsll.vi v12, v13, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64: @@ -4710,10 +4709,11 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v14, v12 +; RV64-NEXT: vsll.vi v12, v14, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64: @@ -6564,20 +6564,18 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v9 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v10, v9, v9 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v9 -; RV64-NEXT: vadd.vv v12, v12, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwaddu.vv v10, v9, v9 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f16: @@ -7428,19 +7426,20 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v11, v10 +; RV32-NEXT: vsll.vi v10, v11, 2 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v11, v10 +; RV64-NEXT: vsll.vi v10, v11, 2 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f32: @@ -7856,11 +7855,10 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v12, v10 +; RV64-NEXT: vsll.vi v10, v12, 2 +; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV64-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f32: @@ -9007,19 +9005,20 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vzext.vf4 v14, v12 -; RV32V-NEXT: vsll.vi v12, v14, 3 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vzext.vf2 v13, v12 +; RV32V-NEXT: vsll.vi v12, v13, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v13, v12 +; RV64-NEXT: vsll.vi v12, v13, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64: @@ -9628,10 +9627,11 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v14, v12 +; RV64-NEXT: vsll.vi v12, v14, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index 091c938931a77..beff4157b14bb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -531,20 +531,18 @@ define <8 x i16> @vpgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 define <8 x i16> @vpgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v9, v8, v8 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v9, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v8 -; RV64-NEXT: vadd.vv v12, v12, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwaddu.vv v9, v8, v8 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v9, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i16> %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs @@ -742,20 +740,20 @@ define <8 x i32> @vpgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 define <8 x i32> @vpgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vsll.vi v8, v10, 2 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v9, v8 +; RV32-NEXT: vsll.vi v10, v9, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v8 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v9, v8 +; RV64-NEXT: vsll.vi v10, v9, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs @@ -822,11 +820,11 @@ define <8 x i32> @vpgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, ; ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v8 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v10, v8 +; RV64-NEXT: vsll.vi v8, v10, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs @@ -982,20 +980,20 @@ define <8 x i64> @vpgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 define <8 x i64> @vpgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v9, v8 +; RV32-NEXT: vsll.vi v12, v9, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 3 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v9, v8 +; RV64-NEXT: vsll.vi v12, v9, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i64> %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs @@ -1062,11 +1060,11 @@ define <8 x i64> @vpgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, ; ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 3 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v10, v8 +; RV64-NEXT: vsll.vi v12, v10, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i64> %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs @@ -1294,20 +1292,18 @@ define <8 x half> @vpgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, < define <8 x half> @vpgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v9, v8, v8 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v9, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v8 -; RV64-NEXT: vadd.vv v12, v12, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwaddu.vv v9, v8, v8 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v9, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i16> %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs @@ -1463,20 +1459,20 @@ define <8 x float> @vpgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, define <8 x float> @vpgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vsll.vi v8, v10, 2 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v9, v8 +; RV32-NEXT: vsll.vi v10, v9, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v8 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v9, v8 +; RV64-NEXT: vsll.vi v10, v9, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs @@ -1543,11 +1539,11 @@ define <8 x float> @vpgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs ; ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v8 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v10, v8 +; RV64-NEXT: vsll.vi v8, v10, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs @@ -1703,20 +1699,20 @@ define <8 x double> @vpgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, define <8 x double> @vpgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v9, v8 +; RV32-NEXT: vsll.vi v12, v9, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 3 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v9, v8 +; RV64-NEXT: vsll.vi v12, v9, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs @@ -1783,11 +1779,11 @@ define <8 x double> @vpgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idx ; ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 3 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v10, v8 +; RV64-NEXT: vsll.vi v12, v10, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs @@ -2056,55 +2052,53 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV32-LABEL: vpgather_baseidx_zext_v32i8_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vzext.vf4 v16, v8 +; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; RV32-NEXT: vzext.vf2 v12, v8 ; RV32-NEXT: li a3, 16 -; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: vsll.vi v16, v12, 3 ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB89_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB89_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vsetivli zero, 16, e16, m4, ta, ma ; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei16.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v24, v8 -; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a2, 32 +; RV64-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; RV64-NEXT: vzext.vf2 v12, v8 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsll.vi v16, v12, 3 ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: bltu a1, a3, .LBB89_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB89_2: ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma +; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t +; RV64-NEXT: vluxei16.v v16, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = zext <32 x i8> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2259,30 +2253,28 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; ; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v24, v8 -; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a2, 32 +; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV64-NEXT: vzext.vf2 v16, v8 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: bltu a1, a3, .LBB92_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB92_2: ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t +; RV64-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = zext <32 x i16> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll index c94c2f80ad82d..83e3422c44b95 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -376,20 +376,18 @@ define void @vpscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i define void @vpscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v9 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v10, v9, v9 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v9 -; RV64-NEXT: vadd.vv v12, v12, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwaddu.vv v10, v9, v9 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i16> %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs @@ -562,20 +560,20 @@ define void @vpscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i define void @vpscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v11, v10 +; RV32-NEXT: vsll.vi v10, v11, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v11, v10 +; RV64-NEXT: vsll.vi v10, v11, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs @@ -642,11 +640,11 @@ define void @vpscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v12, v10 +; RV64-NEXT: vsll.vi v10, v12, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs @@ -798,20 +796,20 @@ define void @vpscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i define void @vpscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v14, v12 -; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v13, v12 +; RV32-NEXT: vsll.vi v12, v13, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v13, v12 +; RV64-NEXT: vsll.vi v12, v13, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i64> %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs @@ -878,11 +876,11 @@ define void @vpscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v14, v12 +; RV64-NEXT: vsll.vi v12, v14, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i64> %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs @@ -1102,20 +1100,18 @@ define void @vpscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x define void @vpscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v9 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v10, v9, v9 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v9 -; RV64-NEXT: vadd.vv v12, v12, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwaddu.vv v10, v9, v9 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i16> %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs @@ -1267,20 +1263,20 @@ define void @vpscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x define void @vpscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v11, v10 +; RV32-NEXT: vsll.vi v10, v11, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v11, v10 +; RV64-NEXT: vsll.vi v10, v11, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs @@ -1347,11 +1343,11 @@ define void @vpscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v12, v10 +; RV64-NEXT: vsll.vi v10, v12, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs @@ -1503,20 +1499,20 @@ define void @vpscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 define void @vpscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v14, v12 -; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v13, v12 +; RV32-NEXT: vsll.vi v12, v13, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v13, v12 +; RV64-NEXT: vsll.vi v12, v13, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs @@ -1583,11 +1579,11 @@ define void @vpscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v14, v12 +; RV64-NEXT: vsll.vi v12, v14, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs diff --git a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll index 1bc6f87059745..b966aea720a92 100644 --- a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll +++ b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll @@ -10,11 +10,11 @@ declare @llvm.riscv.vloxei.nxv4i32.nxv4i64( define @test_vloxei(* %ptr, %offset, i64 %vl) { ; CHECK-LABEL: test_vloxei: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vzext.vf2 v9, v8 -; CHECK-NEXT: vsll.vi v10, v9, 4 +; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf8 v12, v8 +; CHECK-NEXT: vsll.vi v12, v12, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vloxei16.v v8, (a0), v10 +; CHECK-NEXT: vloxei64.v v8, (a0), v12 ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to @@ -32,11 +32,11 @@ entry: define @test_vloxei2(* %ptr, %offset, i64 %vl) { ; CHECK-LABEL: test_vloxei2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; CHECK-NEXT: vzext.vf4 v10, v8 -; CHECK-NEXT: vsll.vi v8, v10, 14 +; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf8 v12, v8 +; CHECK-NEXT: vsll.vi v12, v12, 14 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vloxei32.v v8, (a0), v8 +; CHECK-NEXT: vloxei64.v v8, (a0), v12 ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to @@ -78,12 +78,12 @@ declare @llvm.vp.zext.nxvi64.nxv1i8(, @test_vloxei4(* %ptr, %offset, %m, i32 zeroext %vl) { ; CHECK-LABEL: test_vloxei4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vzext.vf2 v9, v8, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vsll.vi v10, v9, 4 +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf8 v12, v8, v0.t +; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vsll.vi v12, v12, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vloxei16.v v8, (a0), v10 +; CHECK-NEXT: vloxei64.v v8, (a0), v12 ; CHECK-NEXT: ret entry: %offset.ext = call @llvm.vp.zext.nxvi64.nxv1i8( %offset, %m, i32 %vl) @@ -133,11 +133,11 @@ define @test_vloxei6(* %ptr, %offset to @@ -155,12 +155,12 @@ entry: define @test_vloxei7(* %ptr, %offset, i64 %vl) { ; CHECK-LABEL: test_vloxei7: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsll.vi v10, v8, 2 +; CHECK-NEXT: vsll.vi v12, v8, 2 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vloxei8.v v8, (a0), v10 +; CHECK-NEXT: vloxei64.v v8, (a0), v12 ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to @@ -186,11 +186,11 @@ declare @llvm.riscv.vloxei.mask.nxv4i32.nxv4i64( define @test_vloxei_mask(* %ptr, %offset, %m, i64 %vl) { ; CHECK-LABEL: test_vloxei_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vzext.vf2 v9, v8 -; CHECK-NEXT: vsll.vi v10, v9, 4 +; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf8 v12, v8 +; CHECK-NEXT: vsll.vi v12, v12, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vloxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT: vloxei64.v v8, (a0), v12, v0.t ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to @@ -215,11 +215,11 @@ declare @llvm.riscv.vluxei.nxv4i32.nxv4i64( define @test_vluxei(* %ptr, %offset, i64 %vl) { ; CHECK-LABEL: test_vluxei: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vzext.vf2 v9, v8 -; CHECK-NEXT: vsll.vi v10, v9, 4 +; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf8 v12, v8 +; CHECK-NEXT: vsll.vi v12, v12, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vluxei16.v v8, (a0), v10 +; CHECK-NEXT: vluxei64.v v8, (a0), v12 ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to @@ -245,11 +245,11 @@ declare @llvm.riscv.vluxei.mask.nxv4i32.nxv4i64( define @test_vluxei_mask(* %ptr, %offset, %m, i64 %vl) { ; CHECK-LABEL: test_vluxei_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vzext.vf2 v9, v8 -; CHECK-NEXT: vsll.vi v10, v9, 4 +; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf8 v12, v8 +; CHECK-NEXT: vsll.vi v12, v12, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vluxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT: vluxei64.v v8, (a0), v12, v0.t ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to @@ -274,11 +274,11 @@ declare void @llvm.riscv.vsoxei.nxv4i32.nxv4i64( define void @test_vsoxei( %val, * %ptr, %offset, i64 %vl) { ; CHECK-LABEL: test_vsoxei: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vzext.vf2 v11, v10 -; CHECK-NEXT: vsll.vi v10, v11, 4 +; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf8 v12, v10 +; CHECK-NEXT: vsll.vi v12, v12, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vsoxei16.v v8, (a0), v10 +; CHECK-NEXT: vsoxei64.v v8, (a0), v12 ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to @@ -303,11 +303,11 @@ declare void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i64( define void @test_vsoxei_mask( %val, * %ptr, %offset, %m, i64 %vl) { ; CHECK-LABEL: test_vsoxei_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vzext.vf2 v11, v10 -; CHECK-NEXT: vsll.vi v10, v11, 4 +; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf8 v12, v10 +; CHECK-NEXT: vsll.vi v12, v12, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT: vsoxei64.v v8, (a0), v12, v0.t ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to @@ -332,11 +332,11 @@ declare void @llvm.riscv.vsuxei.nxv4i32.nxv4i64( define void @test_vsuxei( %val, * %ptr, %offset, i64 %vl) { ; CHECK-LABEL: test_vsuxei: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vzext.vf2 v11, v10 -; CHECK-NEXT: vsll.vi v10, v11, 4 +; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf8 v12, v10 +; CHECK-NEXT: vsll.vi v12, v12, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vsuxei16.v v8, (a0), v10 +; CHECK-NEXT: vsuxei64.v v8, (a0), v12 ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to @@ -361,11 +361,11 @@ declare void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i64( define void @test_vsuxei_mask( %val, * %ptr, %offset, %m, i64 %vl) { ; CHECK-LABEL: test_vsuxei_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vzext.vf2 v11, v10 -; CHECK-NEXT: vsll.vi v10, v11, 4 +; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf8 v12, v10 +; CHECK-NEXT: vsll.vi v12, v12, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vsuxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT: vsuxei64.v v8, (a0), v12, v0.t ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index 85a0509ba662d..a5c305d5ac822 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -2468,11 +2468,9 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v12, v0 -; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV64-NEXT: vzext.vf2 v16, v8 ; RV64-NEXT: vsll.vi v24, v16, 3 -; RV64-NEXT: vzext.vf2 v16, v10 -; RV64-NEXT: vsll.vi v8, v16, 3 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: sub a3, a1, a2 ; RV64-NEXT: sltu a4, a1, a3 @@ -2482,7 +2480,7 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a4 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV64-NEXT: vluxei32.v v16, (a0), v8, v0.t +; RV64-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV64-NEXT: bltu a1, a2, .LBB105_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll index 8ec880d255923..984ac65b1f218 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -2279,20 +2279,18 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64: ; RV64: # %bb.0: -; RV64-NEXT: vl4re16.v v28, (a1) -; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV64-NEXT: vzext.vf2 v24, v30 -; RV64-NEXT: vsll.vi v24, v24, 3 -; RV64-NEXT: vzext.vf2 v4, v28 +; RV64-NEXT: vl4re16.v v4, (a1) +; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV64-NEXT: vzext.vf2 v24, v4 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: vsll.vi v28, v4, 3 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: mv a3, a2 ; RV64-NEXT: bltu a2, a1, .LBB98_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB98_2: ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV64-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV64-NEXT: sub a3, a2, a1 ; RV64-NEXT: sltu a2, a2, a3 ; RV64-NEXT: addi a2, a2, -1 @@ -2301,7 +2299,7 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vsoxei32.v v16, (a0), v24, v0.t +; RV64-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs