251 changes: 67 additions & 184 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1502,7 +1502,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
if (Subtarget.hasVInstructions())
setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, ISD::SRA, ISD::SRL,
ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
Expand Down Expand Up @@ -1628,12 +1628,6 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
case Intrinsic::riscv_masked_strided_load:
return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
/*IsUnitStrided*/ false);
case Intrinsic::riscv_masked_strided_store:
return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
/*IsUnitStrided*/ false);
case Intrinsic::riscv_seg2_load:
case Intrinsic::riscv_seg3_load:
case Intrinsic::riscv_seg4_load:
Expand Down Expand Up @@ -9313,81 +9307,6 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
switch (IntNo) {
default:
break;
case Intrinsic::riscv_masked_strided_load: {
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();

// If the mask is known to be all ones, optimize to an unmasked intrinsic;
// the selection of the masked intrinsics doesn't do this for us.
SDValue Mask = Op.getOperand(5);
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());

MVT VT = Op->getSimpleValueType(0);
MVT ContainerVT = VT;
if (VT.isFixedLengthVector())
ContainerVT = getContainerForFixedLengthVector(VT);

SDValue PassThru = Op.getOperand(2);
if (!IsUnmasked) {
MVT MaskVT = getMaskTypeFor(ContainerVT);
if (VT.isFixedLengthVector()) {
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
}
}

auto *Load = cast<MemIntrinsicSDNode>(Op);
SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
SDValue Ptr = Op.getOperand(3);
SDValue Stride = Op.getOperand(4);
SDValue Result, Chain;

// TODO: We restrict this to unmasked loads currently in consideration of
// the complexity of handling all falses masks.
MVT ScalarVT = ContainerVT.getVectorElementType();
if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) {
SDValue ScalarLoad =
DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
ScalarVT, Load->getMemOperand());
Chain = ScalarLoad.getValue(1);
Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
Subtarget);
} else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) {
SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr,
Load->getMemOperand());
Chain = ScalarLoad.getValue(1);
Result = DAG.getSplat(ContainerVT, DL, ScalarLoad);
} else {
SDValue IntID = DAG.getTargetConstant(
IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
XLenVT);

SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
if (IsUnmasked)
Ops.push_back(DAG.getUNDEF(ContainerVT));
else
Ops.push_back(PassThru);
Ops.push_back(Ptr);
Ops.push_back(Stride);
if (!IsUnmasked)
Ops.push_back(Mask);
Ops.push_back(VL);
if (!IsUnmasked) {
SDValue Policy =
DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
Ops.push_back(Policy);
}

SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
Result =
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
Load->getMemoryVT(), Load->getMemOperand());
Chain = Result.getValue(1);
}
if (VT.isFixedLengthVector())
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
return DAG.getMergeValues({Result, Chain}, DL);
}
case Intrinsic::riscv_seg2_load:
case Intrinsic::riscv_seg3_load:
case Intrinsic::riscv_seg4_load:
Expand Down Expand Up @@ -9467,47 +9386,6 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
switch (IntNo) {
default:
break;
case Intrinsic::riscv_masked_strided_store: {
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();

// If the mask is known to be all ones, optimize to an unmasked intrinsic;
// the selection of the masked intrinsics doesn't do this for us.
SDValue Mask = Op.getOperand(5);
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());

SDValue Val = Op.getOperand(2);
MVT VT = Val.getSimpleValueType();
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VT);
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
}
if (!IsUnmasked) {
MVT MaskVT = getMaskTypeFor(ContainerVT);
if (VT.isFixedLengthVector())
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}

SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;

SDValue IntID = DAG.getTargetConstant(
IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
XLenVT);

auto *Store = cast<MemIntrinsicSDNode>(Op);
SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
Ops.push_back(Val);
Ops.push_back(Op.getOperand(3)); // Ptr
Ops.push_back(Op.getOperand(4)); // Stride
if (!IsUnmasked)
Ops.push_back(Mask);
Ops.push_back(VL);

return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
Ops, Store->getMemoryVT(),
Store->getMemOperand());
}
case Intrinsic::riscv_seg2_store:
case Intrinsic::riscv_seg3_store:
case Intrinsic::riscv_seg4_store:
Expand Down Expand Up @@ -10826,6 +10704,23 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
return DAG.getMergeValues({Result, Chain}, DL);
}

static SDValue convertEVLToAVL(SDValue V, SDLoc DL, MVT VT, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {

if (VT.isScalableVector()) {
SDValue F = V;
if (F.getOpcode() == ISD::ZERO_EXTEND ||
(F.getOpcode() == ISD::AND && isa<ConstantSDNode>(F.getOperand(1)) &&
F.getConstantOperandVal(1) == UINT64_C(0xffffffff)))
F = F.getOperand(0);
if (F.getOpcode() == ISD::VSCALE &&
F.getConstantOperandVal(0) == VT.getVectorMinNumElements()) {
V = DAG.getConstant(-1, DL, Subtarget.getXLenVT());
}
}
return V;
}

SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
Expand All @@ -10841,7 +10736,8 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
Val = VPStore->getValue();
Mask = VPStore->getMask();
VL = VPStore->getVectorLength();
VL = convertEVLToAVL(VPStore->getVectorLength(), DL,
Val.getSimpleValueType(), DAG, Subtarget);
} else {
const auto *MStore = cast<MaskedStoreSDNode>(Op);
Val = MStore->getValue();
Expand Down Expand Up @@ -11177,6 +11073,9 @@ SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
}
// Pass through operands which aren't fixed-length vectors.
if (!V.getValueType().isFixedLengthVector()) {
if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
V = convertEVLToAVL(V, DL, VT, DAG, Subtarget);

Ops.push_back(V);
continue;
}
Expand Down Expand Up @@ -11702,7 +11601,8 @@ SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
}
Ops.push_back(Mask);
}
Ops.push_back(VPNode->getVectorLength());
Ops.push_back(
convertEVLToAVL(VPNode->getVectorLength(), DL, VT, DAG, Subtarget));
if (!IsUnmasked) {
SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
Ops.push_back(Policy);
Expand Down Expand Up @@ -11749,7 +11649,8 @@ SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
}
Ops.push_back(Mask);
}
Ops.push_back(VPNode->getVectorLength());
Ops.push_back(
convertEVLToAVL(VPNode->getVectorLength(), DL, VT, DAG, Subtarget));

return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
Ops, VPNode->getMemoryVT(),
Expand Down Expand Up @@ -16103,18 +16004,10 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
if (MustNegateStride)
Stride = DAG.getNegative(Stride, DL, Stride.getValueType());

SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
SDValue IntID =
DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
Subtarget.getXLenVT());

SDValue AllOneMask =
DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
DAG.getConstant(1, DL, MVT::i1));

SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT),
BaseLd->getBasePtr(), Stride, AllOneMask};

uint64_t MemSize;
if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
ConstStride && ConstStride->getSExtValue() >= 0)
Expand All @@ -16125,13 +16018,13 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
else
// If Stride isn't constant, then we can't know how much it will load
MemSize = MemoryLocation::UnknownSize;

MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
Align);
Align);

SDValue StridedLoad = DAG.getStridedLoadVP(WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride, AllOneMask, DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);

SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
Ops, WideVecVT, MMO);
for (SDValue Ld : N->ops())
DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);

Expand Down Expand Up @@ -16958,15 +16851,16 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
DAG.getConstant(Addend, DL, PtrVT));

SDVTList VTs = DAG.getVTList({VT, MVT::Other});
SDValue IntID =
DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
XLenVT);
SDValue Ops[] =
{MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
Ops, VT, MGN->getMemOperand());
SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
VT.getVectorElementCount());
SDValue StridedLoad =
DAG.getStridedLoadVP(VT, DL, MGN->getChain(), BasePtr,
DAG.getConstant(StepNumerator, DL, XLenVT),
MGN->getMask(), EVL, MGN->getMemOperand());
SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
StridedLoad, MGN->getPassThru(), EVL);
return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
DL);
}
}

Expand Down Expand Up @@ -17099,6 +16993,32 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
VPSN->getMemOperand(), IndexType);
break;
}
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: {
if (DCI.isBeforeLegalize())
break;
auto *Load = cast<VPStridedLoadSDNode>(N);
MVT VT = N->getSimpleValueType(0);

if (!DAG.isKnownNeverZero(Load->getVectorLength()) ||
!Load->getOffset().isUndef() ||
!ISD::isConstantSplatVectorAllOnes(Load->getMask().getNode()) ||
!isNullConstant(Load->getStride()))
break;

SDValue ScalarLoad;
if (VT.isInteger())
ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Load->getChain(),
Load->getBasePtr(), VT.getVectorElementType(),
Load->getMemOperand());
else
ScalarLoad = DAG.getLoad(VT.getVectorElementType(), DL, Load->getChain(),
Load->getBasePtr(), Load->getMemOperand());
SDValue Splat = VT.isFixedLengthVector()
? DAG.getSplatBuildVector(VT, DL, ScalarLoad)
: DAG.getSplatVector(VT, DL, ScalarLoad);
return DAG.getMergeValues({Splat, SDValue(ScalarLoad.getNode(), 1)}, DL);
break;
}
case RISCVISD::SHL_VL:
if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
return V;
Expand Down Expand Up @@ -17405,43 +17325,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// By default we do not combine any intrinsic.
default:
return SDValue();
case Intrinsic::riscv_masked_strided_load: {
MVT VT = N->getSimpleValueType(0);
auto *Load = cast<MemIntrinsicSDNode>(N);
SDValue PassThru = N->getOperand(2);
SDValue Base = N->getOperand(3);
SDValue Stride = N->getOperand(4);
SDValue Mask = N->getOperand(5);

// If the stride is equal to the element size in bytes, we can use
// a masked.load.
const unsigned ElementSize = VT.getScalarStoreSize();
if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
StrideC && StrideC->getZExtValue() == ElementSize)
return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
DAG.getUNDEF(XLenVT), Mask, PassThru,
Load->getMemoryVT(), Load->getMemOperand(),
ISD::UNINDEXED, ISD::NON_EXTLOAD);
return SDValue();
}
case Intrinsic::riscv_masked_strided_store: {
auto *Store = cast<MemIntrinsicSDNode>(N);
SDValue Value = N->getOperand(2);
SDValue Base = N->getOperand(3);
SDValue Stride = N->getOperand(4);
SDValue Mask = N->getOperand(5);

// If the stride is equal to the element size in bytes, we can use
// a masked.store.
const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
StrideC && StrideC->getZExtValue() == ElementSize)
return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
DAG.getUNDEF(XLenVT), Mask,
Value.getValueType(), Store->getMemOperand(),
ISD::UNINDEXED, false);
return SDValue();
}
case Intrinsic::riscv_vcpop:
case Intrinsic::riscv_vcpop_mask:
case Intrinsic::riscv_vfirst:
Expand Down
16 changes: 0 additions & 16 deletions llvm/test/CodeGen/RISCV/pr89833.ll

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
define void @widen_2xv4i16(ptr %x, ptr %z) {
; CHECK-LABEL: widen_2xv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %x
%b.gep = getelementptr i8, ptr %x, i64 8
Expand Down Expand Up @@ -52,9 +52,9 @@ define void @widen_3xv4i16(ptr %x, ptr %z) {
define void @widen_4xv4i16(ptr %x, ptr %z) {
; CHECK-LABEL: widen_4xv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %x
%b.gep = getelementptr i8, ptr %x, i64 8
Expand Down Expand Up @@ -90,9 +90,9 @@ define void @widen_4xv4i16_unaligned(ptr %x, ptr %z) {
;
; RV64-MISALIGN-LABEL: widen_4xv4i16_unaligned:
; RV64-MISALIGN: # %bb.0:
; RV64-MISALIGN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64-MISALIGN-NEXT: vle16.v v8, (a0)
; RV64-MISALIGN-NEXT: vse16.v v8, (a1)
; RV64-MISALIGN-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-MISALIGN-NEXT: vle64.v v8, (a0)
; RV64-MISALIGN-NEXT: vse64.v v8, (a1)
; RV64-MISALIGN-NEXT: ret
%a = load <4 x i16>, ptr %x, align 1
%b.gep = getelementptr i8, ptr %x, i64 8
Expand Down
106 changes: 65 additions & 41 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ define void @stride_one_store(i64 %n, ptr %p) {
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vs1r.v v8, (a1)
; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: ret
%step = tail call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
%gep = getelementptr inbounds i64, ptr %p, <vscale x 1 x i64> %step
Expand Down
37 changes: 37 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1144,3 +1144,40 @@ define <vscale x 2 x double> @vpmerge_vfwsub.w_tied(<vscale x 2 x double> %passt
%b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %mask, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl)
ret <vscale x 2 x double> %b
}

; FIXME: We don't currently handle vmerge with an implicit passthru if the true
; operand also has a tied dest. This could be folded into a masked vmacc with ta
; policy.
define <vscale x 2 x i32> @true_tied_dest_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) {
; CHECK-LABEL: true_tied_dest_vmerge_implicit_passthru:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vmacc.vv v8, v9, v10, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %avl, i64 0)
%b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
<vscale x 2 x i32> poison,
<vscale x 2 x i32> %passthru,
<vscale x 2 x i32> %a,
<vscale x 2 x i1> %m,
i64 %avl
)
ret <vscale x 2 x i32> %b
}

define <vscale x 2 x i32> @true_mask_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) {
; CHECK-LABEL: true_mask_vmerge_implicit_passthru:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl, i64 0)
%b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
<vscale x 2 x i32> poison,
<vscale x 2 x i32> %passthru,
<vscale x 2 x i32> %a,
<vscale x 2 x i1> shufflevector(<vscale x 2 x i1> insertelement(<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer),
i64 %avl
)
ret <vscale x 2 x i32> %b
}
133 changes: 0 additions & 133 deletions llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll

This file was deleted.

54 changes: 37 additions & 17 deletions llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1436,9 +1436,7 @@ define <vscale x 32 x i32> @vadd_vi_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, <v
define <vscale x 32 x i32> @vadd_vi_nxv32i32_evl_nx16(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m) {
; RV32-LABEL: vadd_vi_nxv32i32_evl_nx16:
; RV32: # %bb.0:
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; RV32-NEXT: vadd.vi v8, v8, -1, v0.t
; RV32-NEXT: ret
;
Expand Down
7 changes: 2 additions & 5 deletions llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,8 @@ define <vscale x 1 x float> @vfmacc_vv_nxv1f32_tu(<vscale x 1 x half> %a, <vscal
define <vscale x 1 x float> @vfmacc_vv_nxv1f32_masked__tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfmacc_vv_nxv1f32_masked__tu:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vmv1r.v v11, v10
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfwmacc.vv v11, v8, v9, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
; ZVFH-NEXT: vmerge.vvm v10, v10, v11, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
; ZVFH-NEXT: vmv1r.v v8, v10
; ZVFH-NEXT: ret
;
Expand Down
110 changes: 44 additions & 66 deletions llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,8 @@ define <vscale x 1 x i8> @vmadd_vv_nxv1i8_ta(<vscale x 1 x i8> %a, <vscale x 1 x
define <vscale x 1 x i8> @vmadd_vx_nxv1i8_ta(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv1i8_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
; CHECK-NEXT: vmacc.vx v9, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
Expand Down Expand Up @@ -170,9 +169,8 @@ define <vscale x 2 x i8> @vmadd_vv_nxv2i8_ta(<vscale x 2 x i8> %a, <vscale x 2 x
define <vscale x 2 x i8> @vmadd_vx_nxv2i8_ta(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv2i8_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
; CHECK-NEXT: vmacc.vx v9, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
Expand Down Expand Up @@ -259,9 +257,8 @@ define <vscale x 4 x i8> @vmadd_vv_nxv4i8_ta(<vscale x 4 x i8> %a, <vscale x 4 x
define <vscale x 4 x i8> @vmadd_vx_nxv4i8_ta(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv4i8_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; CHECK-NEXT: vmacc.vx v9, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
Expand Down Expand Up @@ -348,9 +345,8 @@ define <vscale x 8 x i8> @vmadd_vv_nxv8i8_ta(<vscale x 8 x i8> %a, <vscale x 8 x
define <vscale x 8 x i8> @vmadd_vx_nxv8i8_ta(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv8i8_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmacc.vx v9, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
Expand Down Expand Up @@ -437,9 +433,8 @@ define <vscale x 16 x i8> @vmadd_vv_nxv16i8_ta(<vscale x 16 x i8> %a, <vscale x
define <vscale x 16 x i8> @vmadd_vx_nxv16i8_ta(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv16i8_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vmacc.vx v10, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v10, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
Expand Down Expand Up @@ -526,9 +521,8 @@ define <vscale x 32 x i8> @vmadd_vv_nxv32i8_ta(<vscale x 32 x i8> %a, <vscale x
define <vscale x 32 x i8> @vmadd_vx_nxv32i8_ta(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv32i8_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vmacc.vx v12, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v12, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
Expand Down Expand Up @@ -618,9 +612,8 @@ define <vscale x 64 x i8> @vmadd_vv_nxv64i8_ta(<vscale x 64 x i8> %a, <vscale x
define <vscale x 64 x i8> @vmadd_vx_nxv64i8_ta(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c, <vscale x 64 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv64i8_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmacc.vx v16, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
Expand Down Expand Up @@ -707,9 +700,8 @@ define <vscale x 1 x i16> @vmadd_vv_nxv1i16_ta(<vscale x 1 x i16> %a, <vscale x
define <vscale x 1 x i16> @vmadd_vx_nxv1i16_ta(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv1i16_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
; CHECK-NEXT: vmacc.vx v9, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
%vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
Expand Down Expand Up @@ -796,9 +788,8 @@ define <vscale x 2 x i16> @vmadd_vv_nxv2i16_ta(<vscale x 2 x i16> %a, <vscale x
define <vscale x 2 x i16> @vmadd_vx_nxv2i16_ta(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv2i16_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
; CHECK-NEXT: vmacc.vx v9, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
%vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
Expand Down Expand Up @@ -885,9 +876,8 @@ define <vscale x 4 x i16> @vmadd_vv_nxv4i16_ta(<vscale x 4 x i16> %a, <vscale x
define <vscale x 4 x i16> @vmadd_vx_nxv4i16_ta(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv4i16_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmacc.vx v9, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
%vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
Expand Down Expand Up @@ -974,9 +964,8 @@ define <vscale x 8 x i16> @vmadd_vv_nxv8i16_ta(<vscale x 8 x i16> %a, <vscale x
define <vscale x 8 x i16> @vmadd_vx_nxv8i16_ta(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv8i16_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; CHECK-NEXT: vmacc.vx v10, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v10, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
%vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
Expand Down Expand Up @@ -1063,9 +1052,8 @@ define <vscale x 16 x i16> @vmadd_vv_nxv16i16_ta(<vscale x 16 x i16> %a, <vscale
define <vscale x 16 x i16> @vmadd_vx_nxv16i16_ta(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv16i16_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vmacc.vx v12, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v12, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
%vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
Expand Down Expand Up @@ -1155,9 +1143,8 @@ define <vscale x 32 x i16> @vmadd_vv_nxv32i16_ta(<vscale x 32 x i16> %a, <vscale
define <vscale x 32 x i16> @vmadd_vx_nxv32i16_ta(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv32i16_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vmacc.vx v16, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
%vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
Expand Down Expand Up @@ -1244,9 +1231,8 @@ define <vscale x 1 x i32> @vmadd_vv_nxv1i32_ta(<vscale x 1 x i32> %a, <vscale x
define <vscale x 1 x i32> @vmadd_vx_nxv1i32_ta(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv1i32_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; CHECK-NEXT: vmacc.vx v9, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
Expand Down Expand Up @@ -1333,9 +1319,8 @@ define <vscale x 2 x i32> @vmadd_vv_nxv2i32_ta(<vscale x 2 x i32> %a, <vscale x
define <vscale x 2 x i32> @vmadd_vx_nxv2i32_ta(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv2i32_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmacc.vx v9, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
Expand Down Expand Up @@ -1422,9 +1407,8 @@ define <vscale x 4 x i32> @vmadd_vv_nxv4i32_ta(<vscale x 4 x i32> %a, <vscale x
define <vscale x 4 x i32> @vmadd_vx_nxv4i32_ta(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv4i32_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vmacc.vx v10, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v10, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
Expand Down Expand Up @@ -1511,9 +1495,8 @@ define <vscale x 8 x i32> @vmadd_vv_nxv8i32_ta(<vscale x 8 x i32> %a, <vscale x
define <vscale x 8 x i32> @vmadd_vx_nxv8i32_ta(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv8i32_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; CHECK-NEXT: vmacc.vx v12, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v12, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
Expand Down Expand Up @@ -1603,9 +1586,8 @@ define <vscale x 16 x i32> @vmadd_vv_nxv16i32_ta(<vscale x 16 x i32> %a, <vscale
define <vscale x 16 x i32> @vmadd_vx_nxv16i32_ta(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv16i32_ta:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmacc.vx v16, a0, v8
; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
Expand Down Expand Up @@ -1739,9 +1721,8 @@ define <vscale x 1 x i64> @vmadd_vx_nxv1i64_ta(<vscale x 1 x i64> %a, i64 %b, <v
;
; RV64-LABEL: vmadd_vx_nxv1i64_ta:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
; RV64-NEXT: vmacc.vx v9, a0, v8
; RV64-NEXT: vmerge.vvm v8, v8, v9, v0
; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu
; RV64-NEXT: vmadd.vx v8, a0, v9, v0.t
; RV64-NEXT: ret
%elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
%vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
Expand Down Expand Up @@ -1875,9 +1856,8 @@ define <vscale x 2 x i64> @vmadd_vx_nxv2i64_ta(<vscale x 2 x i64> %a, i64 %b, <v
;
; RV64-LABEL: vmadd_vx_nxv2i64_ta:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
; RV64-NEXT: vmacc.vx v10, a0, v8
; RV64-NEXT: vmerge.vvm v8, v8, v10, v0
; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu
; RV64-NEXT: vmadd.vx v8, a0, v10, v0.t
; RV64-NEXT: ret
%elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
%vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
Expand Down Expand Up @@ -2011,9 +1991,8 @@ define <vscale x 4 x i64> @vmadd_vx_nxv4i64_ta(<vscale x 4 x i64> %a, i64 %b, <v
;
; RV64-LABEL: vmadd_vx_nxv4i64_ta:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; RV64-NEXT: vmacc.vx v12, a0, v8
; RV64-NEXT: vmerge.vvm v8, v8, v12, v0
; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu
; RV64-NEXT: vmadd.vx v8, a0, v12, v0.t
; RV64-NEXT: ret
%elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
%vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
Expand Down Expand Up @@ -2150,9 +2129,8 @@ define <vscale x 8 x i64> @vmadd_vx_nxv8i64_ta(<vscale x 8 x i64> %a, i64 %b, <v
;
; RV64-LABEL: vmadd_vx_nxv8i64_ta:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vmacc.vx v16, a0, v8
; RV64-NEXT: vmerge.vvm v8, v8, v16, v0
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; RV64-NEXT: vmadd.vx v8, a0, v16, v0.t
; RV64-NEXT: ret
%elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
%vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1073,9 +1073,7 @@ define <vscale x 32 x i32> @vmax_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i3
define <vscale x 32 x i32> @vmax_vx_nxv32i32_evl_nx16(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
; RV32-LABEL: vmax_vx_nxv32i32_evl_nx16:
; RV32: # %bb.0:
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
; RV32-NEXT: vmax.vx v8, v8, a0, v0.t
; RV32-NEXT: ret
;
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1072,9 +1072,7 @@ define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i
define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_evl_nx16(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
; RV32-LABEL: vmaxu_vx_nxv32i32_evl_nx16:
; RV32: # %bb.0:
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
; RV32-NEXT: vmaxu.vx v8, v8, a0, v0.t
; RV32-NEXT: ret
;
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1073,9 +1073,7 @@ define <vscale x 32 x i32> @vmin_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i3
define <vscale x 32 x i32> @vmin_vx_nxv32i32_evl_nx16(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
; RV32-LABEL: vmin_vx_nxv32i32_evl_nx16:
; RV32: # %bb.0:
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
; RV32-NEXT: vmin.vx v8, v8, a0, v0.t
; RV32-NEXT: ret
;
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1072,9 +1072,7 @@ define <vscale x 32 x i32> @vminu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i
define <vscale x 32 x i32> @vminu_vx_nxv32i32_evl_nx16(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
; RV32-LABEL: vminu_vx_nxv32i32_evl_nx16:
; RV32: # %bb.0:
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
; RV32-NEXT: vminu.vx v8, v8, a0, v0.t
; RV32-NEXT: ret
;
Expand Down