diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4f659471253a4..b980945ac0e00 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -17489,9 +17489,18 @@ struct CombineResult { Passthru = DAG.getUNDEF(Root->getValueType(0)); break; } - return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0), - LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt), - RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt), + SDValue L = LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt); + SDValue R = RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt); + // Return SDValue() if the instructions are not reduced. + if (L->getOpcode() == Root->getOperand(0).getOpcode() && + (R->getOpcode() == RISCVISD::VZEXT_VL || + R->getOpcode() == RISCVISD::VSEXT_VL) && + (R->getOperand(0).getOpcode() != ISD::SPLAT_VECTOR && + R->getOperand(0).getOpcode() != RISCVISD::VMV_V_X_VL && + R->getOperand(0).getOpcode() != ISD::INSERT_SUBVECTOR)) + return SDValue(); + + return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0), L, R, Passthru, Mask, VL); } }; @@ -17740,6 +17749,30 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N, if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget)) return SDValue(); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + unsigned Opc0 = Op0.getOpcode(); + unsigned Opc1 = Op1.getOpcode(); + // Do not combine to the 'vw' instructions if the number of extended + // instructions cannot be reduced. + // vx and vi, if v is ext_mf4/ext_mf8 + // vv, if op0 is ext_mf4/ext_mf8 and op1 is ext_mf8(except: imm and scalar) + if ((Opc0 == RISCVISD::VZEXT_VL || Opc0 == RISCVISD::VSEXT_VL || + Opc0 == ISD::ZERO_EXTEND || Opc0 == ISD::SIGN_EXTEND) && + (N->getValueType(0).getScalarSizeInBits() > + Op0.getOperand(0)->getValueType(0).getScalarSizeInBits() * 2) && + (Opc1 == ISD::SPLAT_VECTOR || Opc1 == RISCVISD::VMV_V_X_VL || + Opc1 == ISD::INSERT_SUBVECTOR || + ((Opc1 == RISCVISD::VZEXT_VL || Opc1 == RISCVISD::VSEXT_VL || + Opc1 == ISD::ZERO_EXTEND || Opc1 == ISD::SIGN_EXTEND) && + Op1.getOperand(0).getOpcode() != ISD::SPLAT_VECTOR && + Op1.getOperand(0).getOpcode() != RISCVISD::VMV_V_X_VL && + Op1.getOperand(0).getOpcode() != ISD::INSERT_SUBVECTOR && + Op1->getValueType(0).getScalarSizeInBits() > + Op1.getOperand(0)->getValueType(0).getScalarSizeInBits() * 4))) { + return SDValue(); + } + SmallVector Worklist; SmallPtrSet Inserted; Worklist.push_back(N); @@ -17817,6 +17850,9 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N, ValuesToReplace.reserve(CombinesToApply.size()); for (CombineResult Res : CombinesToApply) { SDValue NewValue = Res.materialize(DAG, Subtarget); + if (!NewValue) + return SDValue(); + if (!InputRootReplacement) { assert(Res.Root == N && "First element is expected to be the current node"); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll index e89bac54a7b66..b58468c9010b9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll @@ -869,12 +869,9 @@ define <4 x i64> @crash(<4 x i16> %x, <4 x i16> %y) { ; CHECK-LABEL: crash: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v9 -; CHECK-NEXT: vmv1r.v v11, v8 -; CHECK-NEXT: vsext.vf4 v8, v11 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vzext.vf2 v11, v10 -; CHECK-NEXT: vwaddu.wv v8, v8, v11 +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vzext.vf4 v12, v9 +; CHECK-NEXT: vadd.vv v8, v10, v12 ; CHECK-NEXT: ret %a = sext <4 x i16> %x to <4 x i64> %b = zext <4 x i16> %y to <4 x i64> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll index 2c9aed6274dd8..ef048e22553b8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll @@ -716,10 +716,10 @@ define <4 x i64> @vwsll_vv_v4i64_v4i8_zext(<4 x i8> %a, <4 x i8> %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_v4i8_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf4 v11, v9 -; CHECK-ZVBB-NEXT: vwsll.vv v8, v10, v11 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 ; CHECK-ZVBB-NEXT: ret %x = zext <4 x i8> %a to <4 x i64> %y = zext <4 x i8> %b to <4 x i64> @@ -917,9 +917,9 @@ define <4 x i64> @vwsll_vi_v4i64_v4i8(<4 x i8> %a) { ; ; CHECK-ZVBB-LABEL: vwsll_vi_v4i64_v4i8: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8 -; CHECK-ZVBB-NEXT: vwsll.vi v8, v10, 2 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 ; CHECK-ZVBB-NEXT: ret %x = zext <4 x i8> %a to <4 x i64> %z = shl <4 x i64> %x, splat (i64 2) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll index e6ca6875e1412..48f47b68151d7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll @@ -7,11 +7,10 @@ define i32 @vqdot_vv(<16 x i8> %a, <16 x i8> %b) { ; NODOT-LABEL: vqdot_vv: ; NODOT: # %bb.0: # %entry -; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; NODOT-NEXT: vsext.vf2 v12, v8 -; NODOT-NEXT: vsext.vf2 v14, v9 -; NODOT-NEXT: vwmul.vv v8, v12, v14 -; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; NODOT-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; NODOT-NEXT: vsext.vf4 v12, v8 +; NODOT-NEXT: vsext.vf4 v16, v9 +; NODOT-NEXT: vmul.vv v8, v12, v16 ; NODOT-NEXT: vmv.s.x v12, zero ; NODOT-NEXT: vredsum.vs v8, v8, v12 ; NODOT-NEXT: vmv.x.s a0, v8 @@ -37,11 +36,10 @@ entry: define i32 @vqdot_vx_constant(<16 x i8> %a) { ; CHECK-LABEL: vqdot_vx_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vsext.vf2 v12, v8 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v8 ; CHECK-NEXT: li a0, 23 -; CHECK-NEXT: vwmul.vx v8, v12, a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmul.vx v8, v12, a0 ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vredsum.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -56,11 +54,10 @@ entry: define i32 @vqdot_vx_constant_swapped(<16 x i8> %a) { ; CHECK-LABEL: vqdot_vx_constant_swapped: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vsext.vf2 v12, v8 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v8 ; CHECK-NEXT: li a0, 23 -; CHECK-NEXT: vwmul.vx v8, v12, a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmul.vx v8, v12, a0 ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vredsum.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -105,11 +102,10 @@ entry: define i32 @vqdotu_vx_constant(<16 x i8> %a) { ; CHECK-LABEL: vqdotu_vx_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v12, v8 ; CHECK-NEXT: li a0, 123 -; CHECK-NEXT: vwmulu.vx v8, v12, a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmul.vx v8, v12, a0 ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vredsum.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -124,11 +120,10 @@ entry: define i32 @vqdotsu_vv(<16 x i8> %a, <16 x i8> %b) { ; NODOT-LABEL: vqdotsu_vv: ; NODOT: # %bb.0: # %entry -; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; NODOT-NEXT: vsext.vf2 v12, v8 -; NODOT-NEXT: vzext.vf2 v14, v9 -; NODOT-NEXT: vwmulsu.vv v8, v12, v14 -; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; NODOT-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; NODOT-NEXT: vsext.vf4 v12, v8 +; NODOT-NEXT: vzext.vf4 v16, v9 +; NODOT-NEXT: vmul.vv v8, v12, v16 ; NODOT-NEXT: vmv.s.x v12, zero ; NODOT-NEXT: vredsum.vs v8, v8, v12 ; NODOT-NEXT: vmv.x.s a0, v8 @@ -184,11 +179,10 @@ entry: define i32 @vdotqsu_vx_constant(<16 x i8> %a) { ; CHECK-LABEL: vdotqsu_vx_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vsext.vf2 v12, v8 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v8 ; CHECK-NEXT: li a0, 123 -; CHECK-NEXT: vwmul.vx v8, v12, a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmul.vx v8, v12, a0 ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vredsum.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -203,12 +197,10 @@ entry: define i32 @vdotqus_vx_constant(<16 x i8> %a) { ; CHECK-LABEL: vdotqus_vx_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v12, v8 ; CHECK-NEXT: li a0, -23 -; CHECK-NEXT: vmv.v.x v14, a0 -; CHECK-NEXT: vwmulsu.vv v8, v14, v12 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmul.vx v8, v12, a0 ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vredsum.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -277,13 +269,12 @@ entry: define i32 @vqdot_vv_accum(<16 x i8> %a, <16 x i8> %b, <16 x i32> %x) { ; NODOT-LABEL: vqdot_vv_accum: ; NODOT: # %bb.0: # %entry -; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; NODOT-NEXT: vsext.vf2 v10, v8 -; NODOT-NEXT: vsext.vf2 v16, v9 -; NODOT-NEXT: vwmacc.vv v12, v10, v16 -; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; NODOT-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; NODOT-NEXT: vsext.vf4 v16, v8 +; NODOT-NEXT: vsext.vf4 v20, v9 +; NODOT-NEXT: vmadd.vv v20, v16, v12 ; NODOT-NEXT: vmv.s.x v8, zero -; NODOT-NEXT: vredsum.vs v8, v12, v8 +; NODOT-NEXT: vredsum.vs v8, v20, v8 ; NODOT-NEXT: vmv.x.s a0, v8 ; NODOT-NEXT: ret ; @@ -345,13 +336,12 @@ entry: define i32 @vqdotsu_vv_accum(<16 x i8> %a, <16 x i8> %b, <16 x i32> %x) { ; NODOT-LABEL: vqdotsu_vv_accum: ; NODOT: # %bb.0: # %entry -; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; NODOT-NEXT: vsext.vf2 v10, v8 -; NODOT-NEXT: vzext.vf2 v16, v9 -; NODOT-NEXT: vwmaccsu.vv v12, v10, v16 -; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; NODOT-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; NODOT-NEXT: vsext.vf4 v16, v8 +; NODOT-NEXT: vzext.vf4 v20, v9 +; NODOT-NEXT: vmadd.vv v20, v16, v12 ; NODOT-NEXT: vmv.s.x v8, zero -; NODOT-NEXT: vredsum.vs v8, v12, v8 +; NODOT-NEXT: vredsum.vs v8, v20, v8 ; NODOT-NEXT: vmv.x.s a0, v8 ; NODOT-NEXT: ret ; @@ -379,11 +369,10 @@ entry: define i32 @vqdot_vv_scalar_add(<16 x i8> %a, <16 x i8> %b, i32 %x) { ; NODOT-LABEL: vqdot_vv_scalar_add: ; NODOT: # %bb.0: # %entry -; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; NODOT-NEXT: vsext.vf2 v12, v8 -; NODOT-NEXT: vsext.vf2 v14, v9 -; NODOT-NEXT: vwmul.vv v8, v12, v14 -; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; NODOT-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; NODOT-NEXT: vsext.vf4 v12, v8 +; NODOT-NEXT: vsext.vf4 v16, v9 +; NODOT-NEXT: vmul.vv v8, v12, v16 ; NODOT-NEXT: vmv.s.x v12, a0 ; NODOT-NEXT: vredsum.vs v8, v8, v12 ; NODOT-NEXT: vmv.x.s a0, v8 @@ -441,11 +430,10 @@ entry: define i32 @vqdotsu_vv_scalar_add(<16 x i8> %a, <16 x i8> %b, i32 %x) { ; NODOT-LABEL: vqdotsu_vv_scalar_add: ; NODOT: # %bb.0: # %entry -; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; NODOT-NEXT: vsext.vf2 v12, v8 -; NODOT-NEXT: vzext.vf2 v14, v9 -; NODOT-NEXT: vwmulsu.vv v8, v12, v14 -; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; NODOT-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; NODOT-NEXT: vsext.vf4 v12, v8 +; NODOT-NEXT: vzext.vf4 v16, v9 +; NODOT-NEXT: vmul.vv v8, v12, v16 ; NODOT-NEXT: vmv.s.x v12, a0 ; NODOT-NEXT: vredsum.vs v8, v8, v12 ; NODOT-NEXT: vmv.x.s a0, v8 @@ -472,16 +460,15 @@ entry: define i32 @vqdot_vv_split(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; NODOT-LABEL: vqdot_vv_split: ; NODOT: # %bb.0: # %entry -; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; NODOT-NEXT: vsext.vf2 v12, v8 -; NODOT-NEXT: vsext.vf2 v14, v9 -; NODOT-NEXT: vsext.vf2 v16, v10 -; NODOT-NEXT: vsext.vf2 v18, v11 -; NODOT-NEXT: vwmul.vv v8, v12, v14 -; NODOT-NEXT: vwmacc.vv v8, v16, v18 -; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; NODOT-NEXT: vmv.s.x v12, zero -; NODOT-NEXT: vredsum.vs v8, v8, v12 +; NODOT-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; NODOT-NEXT: vsext.vf4 v12, v8 +; NODOT-NEXT: vsext.vf4 v16, v9 +; NODOT-NEXT: vmul.vv v12, v12, v16 +; NODOT-NEXT: vsext.vf4 v16, v10 +; NODOT-NEXT: vsext.vf4 v20, v11 +; NODOT-NEXT: vmadd.vv v20, v16, v12 +; NODOT-NEXT: vmv.s.x v8, zero +; NODOT-NEXT: vredsum.vs v8, v20, v8 ; NODOT-NEXT: vmv.x.s a0, v8 ; NODOT-NEXT: ret ; @@ -510,20 +497,19 @@ entry: define <1 x i32> @vqdot_vv_partial_reduce_v1i32_v4i8(<4 x i8> %a, <4 x i8> %b) { ; NODOT-LABEL: vqdot_vv_partial_reduce_v1i32_v4i8: ; NODOT: # %bb.0: # %entry -; NODOT-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; NODOT-NEXT: vsext.vf2 v10, v8 -; NODOT-NEXT: vsext.vf2 v8, v9 -; NODOT-NEXT: vwmul.vv v9, v10, v8 -; NODOT-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; NODOT-NEXT: vslidedown.vi v8, v9, 3 -; NODOT-NEXT: vslidedown.vi v10, v9, 2 +; NODOT-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; NODOT-NEXT: vsext.vf4 v10, v8 +; NODOT-NEXT: vsext.vf4 v8, v9 +; NODOT-NEXT: vmul.vv v8, v10, v8 +; NODOT-NEXT: vslidedown.vi v9, v8, 3 +; NODOT-NEXT: vslidedown.vi v10, v8, 2 ; NODOT-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; NODOT-NEXT: vadd.vv v8, v8, v9 +; NODOT-NEXT: vadd.vv v9, v9, v8 ; NODOT-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; NODOT-NEXT: vslidedown.vi v9, v9, 1 +; NODOT-NEXT: vslidedown.vi v8, v8, 1 ; NODOT-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; NODOT-NEXT: vadd.vv v9, v9, v10 -; NODOT-NEXT: vadd.vv v8, v9, v8 +; NODOT-NEXT: vadd.vv v8, v8, v10 +; NODOT-NEXT: vadd.vv v8, v8, v9 ; NODOT-NEXT: ret ; ; DOT-LABEL: vqdot_vv_partial_reduce_v1i32_v4i8: @@ -648,20 +634,19 @@ entry: define <1 x i32> @vqdotsu_vv_partial_reduce_v1i32_v4i8(<4 x i8> %a, <4 x i8> %b) { ; NODOT-LABEL: vqdotsu_vv_partial_reduce_v1i32_v4i8: ; NODOT: # %bb.0: # %entry -; NODOT-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; NODOT-NEXT: vsext.vf2 v10, v8 -; NODOT-NEXT: vzext.vf2 v8, v9 -; NODOT-NEXT: vwmulsu.vv v9, v10, v8 -; NODOT-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; NODOT-NEXT: vslidedown.vi v8, v9, 3 -; NODOT-NEXT: vslidedown.vi v10, v9, 2 +; NODOT-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; NODOT-NEXT: vsext.vf4 v10, v8 +; NODOT-NEXT: vzext.vf4 v8, v9 +; NODOT-NEXT: vmul.vv v8, v10, v8 +; NODOT-NEXT: vslidedown.vi v9, v8, 3 +; NODOT-NEXT: vslidedown.vi v10, v8, 2 ; NODOT-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; NODOT-NEXT: vadd.vv v8, v8, v9 +; NODOT-NEXT: vadd.vv v9, v9, v8 ; NODOT-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; NODOT-NEXT: vslidedown.vi v9, v9, 1 +; NODOT-NEXT: vslidedown.vi v8, v8, 1 ; NODOT-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; NODOT-NEXT: vadd.vv v9, v9, v10 -; NODOT-NEXT: vadd.vv v8, v9, v8 +; NODOT-NEXT: vadd.vv v8, v8, v10 +; NODOT-NEXT: vadd.vv v8, v8, v9 ; NODOT-NEXT: ret ; ; DOT-LABEL: vqdotsu_vv_partial_reduce_v1i32_v4i8: @@ -740,10 +725,10 @@ entry: define <2 x i32> @vqdot_vv_partial_reduce_v2i32_v8i8(<8 x i8> %a, <8 x i8> %b) { ; NODOT-LABEL: vqdot_vv_partial_reduce_v2i32_v8i8: ; NODOT: # %bb.0: # %entry -; NODOT-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; NODOT-NEXT: vsext.vf2 v10, v8 -; NODOT-NEXT: vsext.vf2 v11, v9 -; NODOT-NEXT: vwmul.vv v8, v10, v11 +; NODOT-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; NODOT-NEXT: vsext.vf4 v10, v8 +; NODOT-NEXT: vsext.vf4 v12, v9 +; NODOT-NEXT: vmul.vv v8, v10, v12 ; NODOT-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; NODOT-NEXT: vslidedown.vi v10, v8, 6 ; NODOT-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -952,10 +937,10 @@ entry: define <4 x i32> @vqdot_vv_partial_reduce_v4i32_v16i8(<16 x i8> %a, <16 x i8> %b) { ; NODOT-LABEL: vqdot_vv_partial_reduce_v4i32_v16i8: ; NODOT: # %bb.0: # %entry -; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; NODOT-NEXT: vsext.vf2 v12, v8 -; NODOT-NEXT: vsext.vf2 v14, v9 -; NODOT-NEXT: vwmul.vv v8, v12, v14 +; NODOT-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; NODOT-NEXT: vsext.vf4 v12, v8 +; NODOT-NEXT: vsext.vf4 v16, v9 +; NODOT-NEXT: vmul.vv v8, v12, v16 ; NODOT-NEXT: vsetivli zero, 4, e32, m4, ta, ma ; NODOT-NEXT: vslidedown.vi v12, v8, 12 ; NODOT-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -1030,10 +1015,10 @@ entry: define <4 x i32> @vqdot_vv_partial_reduce_m1_accum(<16 x i8> %a, <16 x i8> %b, <4 x i32> %accum) { ; NODOT-LABEL: vqdot_vv_partial_reduce_m1_accum: ; NODOT: # %bb.0: # %entry -; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; NODOT-NEXT: vsext.vf2 v16, v8 -; NODOT-NEXT: vsext.vf2 v18, v9 -; NODOT-NEXT: vwmul.vv v12, v16, v18 +; NODOT-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; NODOT-NEXT: vsext.vf4 v12, v8 +; NODOT-NEXT: vsext.vf4 v16, v9 +; NODOT-NEXT: vmul.vv v12, v12, v16 ; NODOT-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; NODOT-NEXT: vadd.vv v16, v10, v12 ; NODOT-NEXT: vsetivli zero, 4, e32, m4, ta, ma @@ -1066,10 +1051,10 @@ entry: define <16 x i32> @vqdot_vv_partial_reduce3(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: vqdot_vv_partial_reduce3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vsext.vf2 v12, v8 -; CHECK-NEXT: vsext.vf2 v14, v9 -; CHECK-NEXT: vwmul.vv v8, v12, v14 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v8 +; CHECK-NEXT: vsext.vf4 v16, v9 +; CHECK-NEXT: vmul.vv v8, v12, v16 ; CHECK-NEXT: ret entry: %a.sext = sext <16 x i8> %a to <16 x i32> @@ -1556,11 +1541,10 @@ entry: define i32 @vqdot_vv_accum_disjoint_or(<16 x i8> %a, <16 x i8> %b, <16 x i32> %x) { ; NODOT-LABEL: vqdot_vv_accum_disjoint_or: ; NODOT: # %bb.0: # %entry -; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; NODOT-NEXT: vsext.vf2 v16, v8 -; NODOT-NEXT: vsext.vf2 v18, v9 -; NODOT-NEXT: vwmul.vv v8, v16, v18 -; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; NODOT-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; NODOT-NEXT: vsext.vf4 v16, v8 +; NODOT-NEXT: vsext.vf4 v20, v9 +; NODOT-NEXT: vmul.vv v8, v16, v20 ; NODOT-NEXT: vor.vv v8, v8, v12 ; NODOT-NEXT: vmv.s.x v12, zero ; NODOT-NEXT: vredsum.vs v8, v8, v12 @@ -1591,11 +1575,10 @@ entry: define i32 @vqdot_vv_accum_or(<16 x i8> %a, <16 x i8> %b, <16 x i32> %x) { ; CHECK-LABEL: vqdot_vv_accum_or: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vsext.vf2 v16, v8 -; CHECK-NEXT: vsext.vf2 v18, v9 -; CHECK-NEXT: vwmul.vv v8, v16, v18 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v16, v8 +; CHECK-NEXT: vsext.vf4 v20, v9 +; CHECK-NEXT: vmul.vv v8, v16, v20 ; CHECK-NEXT: vor.vv v8, v8, v12 ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vredsum.vs v8, v8, v12 @@ -1647,11 +1630,10 @@ entry: define i32 @vqdotsu_vv_accum_disjoint_or(<16 x i8> %a, <16 x i8> %b, <16 x i32> %x) { ; NODOT-LABEL: vqdotsu_vv_accum_disjoint_or: ; NODOT: # %bb.0: # %entry -; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; NODOT-NEXT: vsext.vf2 v16, v8 -; NODOT-NEXT: vzext.vf2 v18, v9 -; NODOT-NEXT: vwmulsu.vv v8, v16, v18 -; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; NODOT-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; NODOT-NEXT: vsext.vf4 v16, v8 +; NODOT-NEXT: vzext.vf4 v20, v9 +; NODOT-NEXT: vmul.vv v8, v16, v20 ; NODOT-NEXT: vor.vv v8, v8, v12 ; NODOT-NEXT: vmv.s.x v12, zero ; NODOT-NEXT: vredsum.vs v8, v8, v12 diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll index c8b882b92b934..18e79b026569b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll @@ -489,9 +489,9 @@ define @vwaddu_vx_nxv1i64_nxv1i16( %va, i16 define @vwadd_wv_nxv1i64_nxv1i16( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv1i64_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vsext.vf2 v10, v9 -; CHECK-NEXT: vwadd.wv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v9 +; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -501,9 +501,9 @@ define @vwadd_wv_nxv1i64_nxv1i16( %va, @vwaddu_wv_nxv1i64_nxv1i16( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv1i64_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v9 -; CHECK-NEXT: vwaddu.wv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v9 +; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc @@ -607,9 +607,9 @@ define @vwaddu_vx_nxv2i64_nxv2i16( %va, i16 define @vwadd_wv_nxv2i64_nxv2i16( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv2i64_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vsext.vf2 v11, v10 -; CHECK-NEXT: vwadd.wv v8, v8, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v10 +; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -619,9 +619,9 @@ define @vwadd_wv_nxv2i64_nxv2i16( %va, @vwaddu_wv_nxv2i64_nxv2i16( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv2i64_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vzext.vf2 v11, v10 -; CHECK-NEXT: vwaddu.wv v8, v8, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf4 v12, v10 +; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc @@ -725,9 +725,9 @@ define @vwaddu_vx_nxv4i64_nxv4i16( %va, i16 define @vwadd_wv_nxv4i64_nxv4i16( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv4i64_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vsext.vf2 v14, v12 -; CHECK-NEXT: vwadd.wv v8, v8, v14 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v16, v12 +; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -737,9 +737,9 @@ define @vwadd_wv_nxv4i64_nxv4i16( %va, @vwaddu_wv_nxv4i64_nxv4i16( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv4i64_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v14, v12 -; CHECK-NEXT: vwaddu.wv v8, v8, v14 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v16, v12 +; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc @@ -843,9 +843,9 @@ define @vwaddu_vx_nxv8i64_nxv8i16( %va, i16 define @vwadd_wv_nxv8i64_nxv8i16( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv8i64_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vsext.vf2 v20, v16 -; CHECK-NEXT: vwadd.wv v8, v8, v20 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsext.vf4 v24, v16 +; CHECK-NEXT: vadd.vv v8, v8, v24 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -855,9 +855,9 @@ define @vwadd_wv_nxv8i64_nxv8i16( %va, @vwaddu_wv_nxv8i64_nxv8i16( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv8i64_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vzext.vf2 v20, v16 -; CHECK-NEXT: vwaddu.wv v8, v8, v20 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vzext.vf4 v24, v16 +; CHECK-NEXT: vadd.vv v8, v8, v24 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc @@ -899,10 +899,10 @@ define @vwaddu_wx_nxv8i64_nxv8i16( %va, i16 define @vwadd_vv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwadd_vv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v11, v9 -; CHECK-NEXT: vwadd.vv v8, v10, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf8 v8, v9 +; CHECK-NEXT: vadd.vv v8, v10, v8 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -961,9 +961,9 @@ define @vwaddu_vx_nxv1i64_nxv1i8( %va, i8 %b define @vwadd_wv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v9 -; CHECK-NEXT: vwadd.wv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsext.vf8 v10, v9 +; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -973,9 +973,9 @@ define @vwadd_wv_nxv1i64_nxv1i8( %va, @vwaddu_wv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vzext.vf4 v10, v9 -; CHECK-NEXT: vwaddu.wv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v9 +; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc @@ -1017,10 +1017,10 @@ define @vwaddu_wx_nxv1i64_nxv1i8( %va, i8 % define @vwadd_vv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwadd_vv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v11, v9 -; CHECK-NEXT: vwadd.vv v8, v10, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf8 v12, v9 +; CHECK-NEXT: vadd.vv v8, v10, v12 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -1079,9 +1079,9 @@ define @vwaddu_vx_nxv2i64_nxv2i8( %va, i8 %b define @vwadd_wv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v11, v10 -; CHECK-NEXT: vwadd.wv v8, v8, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsext.vf8 v12, v10 +; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -1091,9 +1091,9 @@ define @vwadd_wv_nxv2i64_nxv2i8( %va, @vwaddu_wv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vzext.vf4 v11, v10 -; CHECK-NEXT: vwaddu.wv v8, v8, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v12, v10 +; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc @@ -1135,10 +1135,10 @@ define @vwaddu_wx_nxv2i64_nxv2i8( %va, i8 % define @vwadd_vv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwadd_vv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vsext.vf4 v14, v9 -; CHECK-NEXT: vwadd.vv v8, v12, v14 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsext.vf8 v12, v8 +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vadd.vv v8, v12, v16 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -1197,9 +1197,9 @@ define @vwaddu_vx_nxv4i64_nxv4i8( %va, i8 %b define @vwadd_wv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v14, v12 -; CHECK-NEXT: vwadd.wv v8, v8, v14 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsext.vf8 v16, v12 +; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -1209,9 +1209,9 @@ define @vwadd_wv_nxv4i64_nxv4i8( %va, @vwaddu_wv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vzext.vf4 v14, v12 -; CHECK-NEXT: vwaddu.wv v8, v8, v14 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf8 v16, v12 +; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc @@ -1253,10 +1253,10 @@ define @vwaddu_wx_nxv4i64_nxv4i8( %va, i8 % define @vwadd_vv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwadd_vv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v8 -; CHECK-NEXT: vsext.vf4 v20, v9 -; CHECK-NEXT: vwadd.vv v8, v16, v20 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsext.vf8 v16, v8 +; CHECK-NEXT: vsext.vf8 v24, v9 +; CHECK-NEXT: vadd.vv v8, v16, v24 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -1315,9 +1315,9 @@ define @vwaddu_vx_nxv8i64_nxv8i8( %va, i8 %b define @vwadd_wv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v20, v16 -; CHECK-NEXT: vwadd.wv v8, v8, v20 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsext.vf8 v24, v16 +; CHECK-NEXT: vadd.vv v8, v8, v24 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -1327,9 +1327,9 @@ define @vwadd_wv_nxv8i64_nxv8i8( %va, @vwaddu_wv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vzext.vf4 v20, v16 -; CHECK-NEXT: vwaddu.wv v8, v8, v20 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vzext.vf8 v24, v16 +; CHECK-NEXT: vadd.vv v8, v8, v24 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-vp.ll index 433f5d2717e48..415cdc10d6ba3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-vp.ll @@ -6,10 +6,10 @@ define @vwadd_tu( %arg, % ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: slli a0, a0, 32 ; CHECK-NEXT: srli a0, a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vsext.vf2 v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, ma -; CHECK-NEXT: vwadd.wv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma +; CHECK-NEXT: vadd.vv v9, v9, v10 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret bb: @@ -24,10 +24,10 @@ define @vwaddu_tu( %arg, ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: slli a0, a0, 32 ; CHECK-NEXT: srli a0, a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, ma -; CHECK-NEXT: vwaddu.wv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma +; CHECK-NEXT: vadd.vv v9, v9, v10 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret bb: diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll index be380544be563..712f728a3510e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll @@ -726,10 +726,10 @@ define @vwmulsu_vx_nxv8i64_nxv8i16( %va, i1 define @vwmul_vv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwmul_vv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v11, v9 -; CHECK-NEXT: vwmul.vv v8, v10, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf8 v8, v9 +; CHECK-NEXT: vmul.vv v8, v10, v8 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -754,10 +754,10 @@ define @vwmulu_vv_nxv1i64_nxv1i8( %va, @vwmulsu_vv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwmulsu_vv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vzext.vf4 v11, v9 -; CHECK-NEXT: vwmulsu.vv v8, v10, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsext.vf8 v10, v8 +; CHECK-NEXT: vzext.vf8 v8, v9 +; CHECK-NEXT: vmul.vv v8, v10, v8 ; CHECK-NEXT: ret %vc = sext %va to %vd = zext %vb to @@ -820,10 +820,10 @@ define @vwmulsu_vx_nxv1i64_nxv1i8( %va, i8 % define @vwmul_vv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwmul_vv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v11, v9 -; CHECK-NEXT: vwmul.vv v8, v10, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf8 v12, v9 +; CHECK-NEXT: vmul.vv v8, v10, v12 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -848,10 +848,10 @@ define @vwmulu_vv_nxv2i64_nxv2i8( %va, @vwmulsu_vv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwmulsu_vv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vzext.vf4 v11, v9 -; CHECK-NEXT: vwmulsu.vv v8, v10, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsext.vf8 v10, v8 +; CHECK-NEXT: vzext.vf8 v12, v9 +; CHECK-NEXT: vmul.vv v8, v10, v12 ; CHECK-NEXT: ret %vc = sext %va to %vd = zext %vb to @@ -914,10 +914,10 @@ define @vwmulsu_vx_nxv2i64_nxv2i8( %va, i8 % define @vwmul_vv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwmul_vv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vsext.vf4 v14, v9 -; CHECK-NEXT: vwmul.vv v8, v12, v14 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsext.vf8 v12, v8 +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vmul.vv v8, v12, v16 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -942,10 +942,10 @@ define @vwmulu_vv_nxv4i64_nxv4i8( %va, @vwmulsu_vv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwmulsu_vv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vzext.vf4 v14, v9 -; CHECK-NEXT: vwmulsu.vv v8, v12, v14 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsext.vf8 v12, v8 +; CHECK-NEXT: vzext.vf8 v16, v9 +; CHECK-NEXT: vmul.vv v8, v12, v16 ; CHECK-NEXT: ret %vc = sext %va to %vd = zext %vb to @@ -1008,10 +1008,10 @@ define @vwmulsu_vx_nxv4i64_nxv4i8( %va, i8 % define @vwmul_vv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwmul_vv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v8 -; CHECK-NEXT: vsext.vf4 v20, v9 -; CHECK-NEXT: vwmul.vv v8, v16, v20 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsext.vf8 v16, v8 +; CHECK-NEXT: vsext.vf8 v24, v9 +; CHECK-NEXT: vmul.vv v8, v16, v24 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -1036,10 +1036,10 @@ define @vwmulu_vv_nxv8i64_nxv8i8( %va, @vwmulsu_vv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwmulsu_vv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v8 -; CHECK-NEXT: vzext.vf4 v20, v9 -; CHECK-NEXT: vwmulsu.vv v8, v16, v20 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsext.vf8 v16, v8 +; CHECK-NEXT: vzext.vf8 v24, v9 +; CHECK-NEXT: vmul.vv v8, v16, v24 ; CHECK-NEXT: ret %vc = sext %va to %vd = zext %vb to diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll index 381f57e59aa76..8e6ec17709cd9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll @@ -674,10 +674,10 @@ define @vwsll_vv_nxv2i64_nxv2i8_zext( %a, %a to %y = zext %b to @@ -875,9 +875,9 @@ define @vwsll_vi_nxv2i64_nxv2i8( %a) { ; ; CHECK-ZVBB-LABEL: vwsll_vi_nxv2i64_nxv2i8: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8 -; CHECK-ZVBB-NEXT: vwsll.vi v8, v10, 2 +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 ; CHECK-ZVBB-NEXT: ret %x = zext %a to %z = shl %x, splat (i64 2) diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll index 631c8c3ca48f9..2bdf828b2b1ae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll @@ -489,9 +489,9 @@ define @vwsubu_vx_nxv1i64_nxv1i16( %va, i16 define @vwsub_wv_nxv1i64_nxv1i16( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv1i64_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vsext.vf2 v10, v9 -; CHECK-NEXT: vwsub.wv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v9 +; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -501,9 +501,9 @@ define @vwsub_wv_nxv1i64_nxv1i16( %va, @vwsubu_wv_nxv1i64_nxv1i16( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv1i64_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v9 -; CHECK-NEXT: vwsubu.wv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v9 +; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc @@ -607,9 +607,9 @@ define @vwsubu_vx_nxv2i64_nxv2i16( %va, i16 define @vwsub_wv_nxv2i64_nxv2i16( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv2i64_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vsext.vf2 v11, v10 -; CHECK-NEXT: vwsub.wv v8, v8, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v10 +; CHECK-NEXT: vsub.vv v8, v8, v12 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -619,9 +619,9 @@ define @vwsub_wv_nxv2i64_nxv2i16( %va, @vwsubu_wv_nxv2i64_nxv2i16( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv2i64_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vzext.vf2 v11, v10 -; CHECK-NEXT: vwsubu.wv v8, v8, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf4 v12, v10 +; CHECK-NEXT: vsub.vv v8, v8, v12 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc @@ -725,9 +725,9 @@ define @vwsubu_vx_nxv4i64_nxv4i16( %va, i16 define @vwsub_wv_nxv4i64_nxv4i16( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv4i64_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vsext.vf2 v14, v12 -; CHECK-NEXT: vwsub.wv v8, v8, v14 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v16, v12 +; CHECK-NEXT: vsub.vv v8, v8, v16 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -737,9 +737,9 @@ define @vwsub_wv_nxv4i64_nxv4i16( %va, @vwsubu_wv_nxv4i64_nxv4i16( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv4i64_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v14, v12 -; CHECK-NEXT: vwsubu.wv v8, v8, v14 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v16, v12 +; CHECK-NEXT: vsub.vv v8, v8, v16 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc @@ -843,9 +843,9 @@ define @vwsubu_vx_nxv8i64_nxv8i16( %va, i16 define @vwsub_wv_nxv8i64_nxv8i16( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv8i64_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vsext.vf2 v20, v16 -; CHECK-NEXT: vwsub.wv v8, v8, v20 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsext.vf4 v24, v16 +; CHECK-NEXT: vsub.vv v8, v8, v24 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -855,9 +855,9 @@ define @vwsub_wv_nxv8i64_nxv8i16( %va, @vwsubu_wv_nxv8i64_nxv8i16( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv8i64_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vzext.vf2 v20, v16 -; CHECK-NEXT: vwsubu.wv v8, v8, v20 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vzext.vf4 v24, v16 +; CHECK-NEXT: vsub.vv v8, v8, v24 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc @@ -899,10 +899,10 @@ define @vwsubu_wx_nxv8i64_nxv8i16( %va, i16 define @vwsub_vv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwsub_vv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v11, v9 -; CHECK-NEXT: vwsub.vv v8, v10, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf8 v8, v9 +; CHECK-NEXT: vsub.vv v8, v10, v8 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -961,9 +961,9 @@ define @vwsubu_vx_nxv1i64_nxv1i8( %va, i8 %b define @vwsub_wv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v9 -; CHECK-NEXT: vwsub.wv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsext.vf8 v10, v9 +; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -973,9 +973,9 @@ define @vwsub_wv_nxv1i64_nxv1i8( %va, @vwsubu_wv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vzext.vf4 v10, v9 -; CHECK-NEXT: vwsubu.wv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v9 +; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc @@ -1017,10 +1017,10 @@ define @vwsubu_wx_nxv1i64_nxv1i8( %va, i8 % define @vwsub_vv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwsub_vv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v11, v9 -; CHECK-NEXT: vwsub.vv v8, v10, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf8 v12, v9 +; CHECK-NEXT: vsub.vv v8, v10, v12 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -1079,9 +1079,9 @@ define @vwsubu_vx_nxv2i64_nxv2i8( %va, i8 %b define @vwsub_wv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v11, v10 -; CHECK-NEXT: vwsub.wv v8, v8, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsext.vf8 v12, v10 +; CHECK-NEXT: vsub.vv v8, v8, v12 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -1091,9 +1091,9 @@ define @vwsub_wv_nxv2i64_nxv2i8( %va, @vwsubu_wv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vzext.vf4 v11, v10 -; CHECK-NEXT: vwsubu.wv v8, v8, v11 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v12, v10 +; CHECK-NEXT: vsub.vv v8, v8, v12 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc @@ -1135,10 +1135,10 @@ define @vwsubu_wx_nxv2i64_nxv2i8( %va, i8 % define @vwsub_vv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwsub_vv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vsext.vf4 v14, v9 -; CHECK-NEXT: vwsub.vv v8, v12, v14 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsext.vf8 v12, v8 +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsub.vv v8, v12, v16 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -1197,9 +1197,9 @@ define @vwsubu_vx_nxv4i64_nxv4i8( %va, i8 %b define @vwsub_wv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v14, v12 -; CHECK-NEXT: vwsub.wv v8, v8, v14 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsext.vf8 v16, v12 +; CHECK-NEXT: vsub.vv v8, v8, v16 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -1209,9 +1209,9 @@ define @vwsub_wv_nxv4i64_nxv4i8( %va, @vwsubu_wv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vzext.vf4 v14, v12 -; CHECK-NEXT: vwsubu.wv v8, v8, v14 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf8 v16, v12 +; CHECK-NEXT: vsub.vv v8, v8, v16 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc @@ -1253,10 +1253,10 @@ define @vwsubu_wx_nxv4i64_nxv4i8( %va, i8 % define @vwsub_vv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwsub_vv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v8 -; CHECK-NEXT: vsext.vf4 v20, v9 -; CHECK-NEXT: vwsub.vv v8, v16, v20 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsext.vf8 v16, v8 +; CHECK-NEXT: vsext.vf8 v24, v9 +; CHECK-NEXT: vsub.vv v8, v16, v24 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -1315,9 +1315,9 @@ define @vwsubu_vx_nxv8i64_nxv8i8( %va, i8 %b define @vwsub_wv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v20, v16 -; CHECK-NEXT: vwsub.wv v8, v8, v20 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsext.vf8 v24, v16 +; CHECK-NEXT: vsub.vv v8, v8, v24 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -1327,9 +1327,9 @@ define @vwsub_wv_nxv8i64_nxv8i8( %va, @vwsubu_wv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vzext.vf4 v20, v16 -; CHECK-NEXT: vwsubu.wv v8, v8, v20 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vzext.vf8 v24, v16 +; CHECK-NEXT: vsub.vv v8, v8, v24 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc diff --git a/llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll index 772895316ebc6..066166e01b80b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll @@ -37,11 +37,10 @@ entry: define i32 @vqdot_vx_constant( %a) { ; CHECK-LABEL: vqdot_vx_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vsext.vf2 v16, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsext.vf4 v16, v8 ; CHECK-NEXT: li a0, 23 -; CHECK-NEXT: vwmul.vx v8, v16, a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmul.vx v8, v16, a0 ; CHECK-NEXT: vmv.s.x v16, zero ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -56,11 +55,10 @@ entry: define i32 @vqdot_vx_constant_swapped( %a) { ; CHECK-LABEL: vqdot_vx_constant_swapped: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vsext.vf2 v16, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsext.vf4 v16, v8 ; CHECK-NEXT: li a0, 23 -; CHECK-NEXT: vwmul.vx v8, v16, a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmul.vx v8, v16, a0 ; CHECK-NEXT: vmv.s.x v16, zero ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -105,11 +103,10 @@ entry: define i32 @vqdotu_vx_constant( %a) { ; CHECK-LABEL: vqdotu_vx_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vzext.vf2 v16, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vzext.vf4 v16, v8 ; CHECK-NEXT: li a0, 123 -; CHECK-NEXT: vwmulu.vx v8, v16, a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmul.vx v8, v16, a0 ; CHECK-NEXT: vmv.s.x v16, zero ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -184,11 +181,10 @@ entry: define i32 @vdotqsu_vx_constant( %a) { ; CHECK-LABEL: vdotqsu_vx_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vsext.vf2 v16, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsext.vf4 v16, v8 ; CHECK-NEXT: li a0, 123 -; CHECK-NEXT: vwmul.vx v8, v16, a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmul.vx v8, v16, a0 ; CHECK-NEXT: vmv.s.x v16, zero ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -203,12 +199,10 @@ entry: define i32 @vdotqus_vx_constant( %a) { ; CHECK-LABEL: vdotqus_vx_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vzext.vf2 v16, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vzext.vf4 v16, v8 ; CHECK-NEXT: li a0, -23 -; CHECK-NEXT: vmv.v.x v20, a0 -; CHECK-NEXT: vwmulsu.vv v8, v20, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmul.vx v8, v16, a0 ; CHECK-NEXT: vmv.s.x v16, zero ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8