diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 686350de29883..c4942f9c637bd 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1295,7 +1295,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER, ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL, ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR, - ISD::CONCAT_VECTORS}); + ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}); if (Subtarget.hasVendorXTHeadMemPair()) setTargetDAGCombine({ISD::LOAD, ISD::STORE}); if (Subtarget.useRVVForFixedLengthVectors()) @@ -13337,6 +13337,55 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true); } +/// IF we have a build_vector where each lane is binop X, C, where C +/// is a constant (but not necessarily the same constant on all lanes), +/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..). +/// We assume that materializing a constant build vector will be no more +/// expensive that performing O(n) binops. +static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget, + const RISCVTargetLowering &TLI) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + assert(!VT.isScalableVector() && "unexpected build vector"); + + if (VT.getVectorNumElements() == 1) + return SDValue(); + + const unsigned Opcode = N->op_begin()->getNode()->getOpcode(); + if (!TLI.isBinOp(Opcode)) + return SDValue(); + + if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT)) + return SDValue(); + + SmallVector LHSOps; + SmallVector RHSOps; + for (SDValue Op : N->ops()) { + if (Op.isUndef()) { + LHSOps.push_back(Op); + RHSOps.push_back(Op); + continue; + } + + // TODO: We can handle operations which have an neutral rhs value + // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track + // of profit in a more explicit manner. + if (Op.getOpcode() != Opcode || !Op.hasOneUse()) + return SDValue(); + + LHSOps.push_back(Op.getOperand(0)); + if (!isa(Op.getOperand(1)) && + !isa(Op.getOperand(1))) + return SDValue(); + RHSOps.push_back(Op.getOperand(1)); + } + + return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps), + DAG.getBuildVector(VT, DL, RHSOps)); +} + // If we're concatenating a series of vector loads like // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ... // Then we can turn this into a strided load by widening the vector elements @@ -14399,6 +14448,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return Gather; break; } + case ISD::BUILD_VECTOR: + if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this)) + return V; + break; case ISD::CONCAT_VECTORS: if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this)) return V; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll index 37a43c3550a52..9e4584eb17ff9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll @@ -3,33 +3,18 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define <4 x i32> @add_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { -; RV32-LABEL: add_constant_rhs: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, a0, 23 -; RV32-NEXT: addi a1, a1, 25 -; RV32-NEXT: addi a2, a2, 1 -; RV32-NEXT: addi a3, a3, 2047 -; RV32-NEXT: addi a3, a3, 308 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: ret -; -; RV64-LABEL: add_constant_rhs: -; RV64: # %bb.0: -; RV64-NEXT: addiw a0, a0, 23 -; RV64-NEXT: addiw a1, a1, 25 -; RV64-NEXT: addiw a2, a2, 1 -; RV64-NEXT: addiw a3, a3, 2047 -; RV64-NEXT: addiw a3, a3, 308 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: ret +; CHECK-LABEL: add_constant_rhs: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_0) +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret %e0 = add i32 %a, 23 %e1 = add i32 %b, 25 %e2 = add i32 %c, 1 @@ -42,49 +27,22 @@ define <4 x i32> @add_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { } define <8 x i32> @add_constant_rhs_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) { -; RV32-LABEL: add_constant_rhs_8xi32: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, a0, 23 -; RV32-NEXT: addi a1, a1, 25 -; RV32-NEXT: addi a2, a2, 1 -; RV32-NEXT: addi a3, a3, 2047 -; RV32-NEXT: addi a3, a3, 308 -; RV32-NEXT: addi a4, a4, 23 -; RV32-NEXT: addi a5, a5, 23 -; RV32-NEXT: addi a6, a6, 22 -; RV32-NEXT: addi a7, a7, 23 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: vslide1down.vx v8, v8, a4 -; RV32-NEXT: vslide1down.vx v8, v8, a5 -; RV32-NEXT: vslide1down.vx v8, v8, a6 -; RV32-NEXT: vslide1down.vx v8, v8, a7 -; RV32-NEXT: ret -; -; RV64-LABEL: add_constant_rhs_8xi32: -; RV64: # %bb.0: -; RV64-NEXT: addiw a0, a0, 23 -; RV64-NEXT: addiw a1, a1, 25 -; RV64-NEXT: addiw a2, a2, 1 -; RV64-NEXT: addiw a3, a3, 2047 -; RV64-NEXT: addiw a3, a3, 308 -; RV64-NEXT: addiw a4, a4, 23 -; RV64-NEXT: addiw a5, a5, 23 -; RV64-NEXT: addiw a6, a6, 22 -; RV64-NEXT: addiw a7, a7, 23 -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: vslide1down.vx v8, v8, a4 -; RV64-NEXT: vslide1down.vx v8, v8, a5 -; RV64-NEXT: vslide1down.vx v8, v8, a6 -; RV64-NEXT: vslide1down.vx v8, v8, a7 -; RV64-NEXT: ret +; CHECK-LABEL: add_constant_rhs_8xi32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vslide1down.vx v8, v8, a4 +; CHECK-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI1_0) +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vslide1down.vx v8, v8, a5 +; CHECK-NEXT: vslide1down.vx v8, v8, a6 +; CHECK-NEXT: vslide1down.vx v8, v8, a7 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %e0 = add i32 %a, 23 %e1 = add i32 %b, 25 %e2 = add i32 %c, 1 @@ -106,33 +64,18 @@ define <8 x i32> @add_constant_rhs_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, define <4 x i32> @sub_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { -; RV32-LABEL: sub_constant_rhs: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, a0, -23 -; RV32-NEXT: addi a1, a1, -25 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: addi a3, a3, -2048 -; RV32-NEXT: addi a3, a3, -307 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: ret -; -; RV64-LABEL: sub_constant_rhs: -; RV64: # %bb.0: -; RV64-NEXT: addiw a0, a0, -23 -; RV64-NEXT: addiw a1, a1, -25 -; RV64-NEXT: addiw a2, a2, -1 -; RV64-NEXT: addiw a3, a3, -2048 -; RV64-NEXT: addiw a3, a3, -307 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: ret +; CHECK-LABEL: sub_constant_rhs: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI2_0) +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret %e0 = sub i32 %a, 23 %e1 = sub i32 %b, 25 %e2 = sub i32 %c, 1 @@ -145,41 +88,18 @@ define <4 x i32> @sub_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { } define <4 x i32> @mul_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { -; RV32-LABEL: mul_constant_rhs: -; RV32: # %bb.0: -; RV32-NEXT: li a4, 23 -; RV32-NEXT: mul a0, a0, a4 -; RV32-NEXT: li a4, 25 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: li a4, 27 -; RV32-NEXT: mul a2, a2, a4 -; RV32-NEXT: lui a4, 1 -; RV32-NEXT: addi a4, a4, -1741 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: ret -; -; RV64-LABEL: mul_constant_rhs: -; RV64: # %bb.0: -; RV64-NEXT: li a4, 23 -; RV64-NEXT: mulw a0, a0, a4 -; RV64-NEXT: li a4, 25 -; RV64-NEXT: mulw a1, a1, a4 -; RV64-NEXT: li a4, 27 -; RV64-NEXT: mulw a2, a2, a4 -; RV64-NEXT: lui a4, 1 -; RV64-NEXT: addiw a4, a4, -1741 -; RV64-NEXT: mulw a3, a3, a4 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: ret +; CHECK-LABEL: mul_constant_rhs: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI3_0) +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: ret %e0 = mul i32 %a, 23 %e1 = mul i32 %b, 25 %e2 = mul i32 %c, 27 @@ -192,60 +112,30 @@ define <4 x i32> @mul_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { } define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { -; RV32-LABEL: udiv_constant_rhs: -; RV32: # %bb.0: -; RV32-NEXT: lui a4, 729444 -; RV32-NEXT: addi a4, a4, 713 -; RV32-NEXT: mulhu a0, a0, a4 -; RV32-NEXT: srli a0, a0, 4 -; RV32-NEXT: lui a4, 335544 -; RV32-NEXT: addi a4, a4, 1311 -; RV32-NEXT: mulhu a1, a1, a4 -; RV32-NEXT: srli a1, a1, 3 -; RV32-NEXT: lui a4, 93703 -; RV32-NEXT: addi a4, a4, -1899 -; RV32-NEXT: mulhu a4, a3, a4 -; RV32-NEXT: sub a3, a3, a4 -; RV32-NEXT: srli a3, a3, 1 -; RV32-NEXT: add a3, a3, a4 -; RV32-NEXT: srli a3, a3, 7 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: ret -; -; RV64-LABEL: udiv_constant_rhs: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: lui a4, 729444 -; RV64-NEXT: addiw a4, a4, 713 -; RV64-NEXT: slli a4, a4, 32 -; RV64-NEXT: mulhu a0, a0, a4 -; RV64-NEXT: srli a0, a0, 36 -; RV64-NEXT: slli a1, a1, 32 -; RV64-NEXT: lui a4, 335544 -; RV64-NEXT: addiw a4, a4, 1311 -; RV64-NEXT: slli a4, a4, 32 -; RV64-NEXT: mulhu a1, a1, a4 -; RV64-NEXT: srli a1, a1, 35 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: lui a5, 93703 -; RV64-NEXT: addiw a5, a5, -1899 -; RV64-NEXT: slli a5, a5, 32 -; RV64-NEXT: mulhu a4, a4, a5 -; RV64-NEXT: srli a4, a4, 32 -; RV64-NEXT: subw a3, a3, a4 -; RV64-NEXT: srliw a3, a3, 1 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: srli a3, a3, 7 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: ret +; CHECK-LABEL: udiv_constant_rhs: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI4_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0) +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vmulhu.vv v9, v8, v9 +; CHECK-NEXT: vsub.vv v10, v8, v9 +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vslide1down.vx v11, v11, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI4_1) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_1) +; CHECK-NEXT: vle32.v v12, (a0) +; CHECK-NEXT: vmulhu.vv v10, v10, v11 +; CHECK-NEXT: vadd.vv v9, v10, v9 +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vsrl.vv v9, v9, v12 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret %e0 = udiv i32 %a, 23 %e1 = udiv i32 %b, 25 %e2 = udiv i32 %c, 1 @@ -261,21 +151,15 @@ define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { define <4 x float> @fadd_constant_rhs(float %a, float %b, float %c, float %d) { ; CHECK-LABEL: fadd_constant_rhs: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 269184 -; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: fadd.s fa4, fa0, fa5 -; CHECK-NEXT: lui a0, 269440 -; CHECK-NEXT: fmv.w.x fa0, a0 -; CHECK-NEXT: fadd.s fa1, fa1, fa0 -; CHECK-NEXT: lui a0, 262144 -; CHECK-NEXT: fmv.w.x fa0, a0 -; CHECK-NEXT: fadd.s fa2, fa2, fa0 -; CHECK-NEXT: fadd.s fa5, fa3, fa5 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: lui a0, %hi(.LCPI5_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0) +; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 +; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret %e0 = fadd float %a, 23.0 %e1 = fadd float %b, 25.0 @@ -291,21 +175,15 @@ define <4 x float> @fadd_constant_rhs(float %a, float %b, float %c, float %d) { define <4 x float> @fdiv_constant_rhs(float %a, float %b, float %c, float %d) { ; CHECK-LABEL: fdiv_constant_rhs: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 269184 -; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: fdiv.s fa4, fa0, fa5 -; CHECK-NEXT: lui a0, 269440 -; CHECK-NEXT: fmv.w.x fa0, a0 -; CHECK-NEXT: fdiv.s fa1, fa1, fa0 -; CHECK-NEXT: lui a0, 266752 -; CHECK-NEXT: fmv.w.x fa0, a0 -; CHECK-NEXT: fdiv.s fa2, fa2, fa0 -; CHECK-NEXT: fdiv.s fa5, fa3, fa5 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: lui a0, %hi(.LCPI6_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) +; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 +; CHECK-NEXT: vfdiv.vv v8, v8, v9 ; CHECK-NEXT: ret %e0 = fdiv float %a, 23.0 %e1 = fdiv float %b, 25.0 @@ -319,31 +197,16 @@ define <4 x float> @fdiv_constant_rhs(float %a, float %b, float %c, float %d) { } define <4 x i32> @add_constant_rhs_splat(i32 %a, i32 %b, i32 %c, i32 %d) { -; RV32-LABEL: add_constant_rhs_splat: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, a0, 23 -; RV32-NEXT: addi a1, a1, 23 -; RV32-NEXT: addi a2, a2, 23 -; RV32-NEXT: addi a3, a3, 23 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: ret -; -; RV64-LABEL: add_constant_rhs_splat: -; RV64: # %bb.0: -; RV64-NEXT: addiw a0, a0, 23 -; RV64-NEXT: addiw a1, a1, 23 -; RV64-NEXT: addiw a2, a2, 23 -; RV64-NEXT: addiw a3, a3, 23 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: ret +; CHECK-LABEL: add_constant_rhs_splat: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: li a0, 23 +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret %e0 = add i32 %a, 23 %e1 = add i32 %b, 23 %e2 = add i32 %c, 23 @@ -458,33 +321,18 @@ define <4 x i32> @add_constant_rhs_identity2(i32 %a, i32 %b, i32 %c, i32 %d) { } define <4 x i32> @add_constant_rhs_inverse(i32 %a, i32 %b, i32 %c, i32 %d) { -; RV32-LABEL: add_constant_rhs_inverse: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: addi a1, a1, 25 -; RV32-NEXT: addi a2, a2, 1 -; RV32-NEXT: addi a3, a3, 2047 -; RV32-NEXT: addi a3, a3, 308 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: ret -; -; RV64-LABEL: add_constant_rhs_inverse: -; RV64: # %bb.0: -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: addiw a1, a1, 25 -; RV64-NEXT: addiw a2, a2, 1 -; RV64-NEXT: addiw a3, a3, 2047 -; RV64-NEXT: addiw a3, a3, 308 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: ret +; CHECK-LABEL: add_constant_rhs_inverse: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI11_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI11_0) +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret %e0 = sub i32 %a, 1 %e1 = add i32 %b, 25 %e2 = add i32 %c, 1 @@ -497,33 +345,18 @@ define <4 x i32> @add_constant_rhs_inverse(i32 %a, i32 %b, i32 %c, i32 %d) { } define <4 x i32> @add_constant_rhs_commute(i32 %a, i32 %b, i32 %c, i32 %d) { -; RV32-LABEL: add_constant_rhs_commute: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, a0, 23 -; RV32-NEXT: addi a1, a1, 25 -; RV32-NEXT: addi a2, a2, 1 -; RV32-NEXT: addi a3, a3, 2047 -; RV32-NEXT: addi a3, a3, 308 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: ret -; -; RV64-LABEL: add_constant_rhs_commute: -; RV64: # %bb.0: -; RV64-NEXT: addiw a0, a0, 23 -; RV64-NEXT: addiw a1, a1, 25 -; RV64-NEXT: addiw a2, a2, 1 -; RV64-NEXT: addiw a3, a3, 2047 -; RV64-NEXT: addiw a3, a3, 308 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: ret +; CHECK-LABEL: add_constant_rhs_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI12_0) +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret %e0 = add i32 %a, 23 %e1 = add i32 %b, 25 %e2 = add i32 1, %c diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index 820e875d3ee3b..d27e7799a3862 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -3,65 +3,30 @@ ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64 define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { -; RV32-LABEL: vselect_vv_v6i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV32-NEXT: lbu a2, 0(a2) -; RV32-NEXT: vle32.v v8, (a1) -; RV32-NEXT: andi a1, a2, 1 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v8, a1 -; RV32-NEXT: slli a1, a2, 30 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 29 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 28 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 27 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: srli a2, a2, 5 -; RV32-NEXT: vslide1down.vx v10, v10, a2 -; RV32-NEXT: vslidedown.vi v10, v10, 2 -; RV32-NEXT: vand.vi v10, v10, 1 -; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu -; RV32-NEXT: vle32.v v8, (a0), v0.t -; RV32-NEXT: vse32.v v8, (a3) -; RV32-NEXT: ret -; -; RV64-LABEL: vselect_vv_v6i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV64-NEXT: lbu a2, 0(a2) -; RV64-NEXT: vle32.v v8, (a1) -; RV64-NEXT: andi a1, a2, 1 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v8, a1 -; RV64-NEXT: slli a1, a2, 62 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 61 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 60 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 59 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: srli a2, a2, 5 -; RV64-NEXT: vslide1down.vx v10, v10, a2 -; RV64-NEXT: vslidedown.vi v10, v10, 2 -; RV64-NEXT: vand.vi v10, v10, 1 -; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu -; RV64-NEXT: vle32.v v8, (a0), v0.t -; RV64-NEXT: vse32.v v8, (a3) -; RV64-NEXT: ret +; CHECK-LABEL: vselect_vv_v6i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: lbu a2, 0(a2) +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslide1down.vx v10, v8, a2 +; CHECK-NEXT: srli a1, a2, 1 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 +; CHECK-NEXT: srli a1, a2, 2 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 +; CHECK-NEXT: srli a1, a2, 3 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 +; CHECK-NEXT: srli a1, a2, 4 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 +; CHECK-NEXT: srli a2, a2, 5 +; CHECK-NEXT: vslide1down.vx v10, v10, a2 +; CHECK-NEXT: vslidedown.vi v10, v10, 2 +; CHECK-NEXT: vand.vi v10, v10, 1 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: vse32.v v8, (a3) +; CHECK-NEXT: ret %va = load <6 x i32>, ptr %a %vb = load <6 x i32>, ptr %b %vcc = load <6 x i1>, ptr %cc @@ -71,67 +36,31 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { } define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { -; RV32-LABEL: vselect_vx_v6i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV32-NEXT: lbu a2, 0(a2) -; RV32-NEXT: vle32.v v8, (a1) -; RV32-NEXT: andi a1, a2, 1 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v8, a1 -; RV32-NEXT: slli a1, a2, 30 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 29 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 28 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 27 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: srli a2, a2, 5 -; RV32-NEXT: vslide1down.vx v10, v10, a2 -; RV32-NEXT: vslidedown.vi v10, v10, 2 -; RV32-NEXT: vand.vi v10, v10, 1 -; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV32-NEXT: vse32.v v8, (a3) -; RV32-NEXT: ret -; -; RV64-LABEL: vselect_vx_v6i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV64-NEXT: lbu a2, 0(a2) -; RV64-NEXT: vle32.v v8, (a1) -; RV64-NEXT: andi a1, a2, 1 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v8, a1 -; RV64-NEXT: slli a1, a2, 62 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 61 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 60 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 59 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: srli a2, a2, 5 -; RV64-NEXT: vslide1down.vx v10, v10, a2 -; RV64-NEXT: vslidedown.vi v10, v10, 2 -; RV64-NEXT: vand.vi v10, v10, 1 -; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV64-NEXT: vse32.v v8, (a3) -; RV64-NEXT: ret +; CHECK-LABEL: vselect_vx_v6i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: lbu a2, 0(a2) +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslide1down.vx v10, v8, a2 +; CHECK-NEXT: srli a1, a2, 1 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 +; CHECK-NEXT: srli a1, a2, 2 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 +; CHECK-NEXT: srli a1, a2, 3 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 +; CHECK-NEXT: srli a1, a2, 4 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 +; CHECK-NEXT: srli a2, a2, 5 +; CHECK-NEXT: vslide1down.vx v10, v10, a2 +; CHECK-NEXT: vslidedown.vi v10, v10, 2 +; CHECK-NEXT: vand.vi v10, v10, 1 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vse32.v v8, (a3) +; CHECK-NEXT: ret %vb = load <6 x i32>, ptr %b %ahead = insertelement <6 x i32> poison, i32 %a, i32 0 %va = shufflevector <6 x i32> %ahead, <6 x i32> poison, <6 x i32> zeroinitializer @@ -142,67 +71,31 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { } define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { -; RV32-LABEL: vselect_vi_v6i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: andi a0, a1, 1 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v8, a0 -; RV32-NEXT: slli a0, a1, 30 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: slli a0, a1, 29 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: slli a0, a1, 28 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: slli a0, a1, 27 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: srli a1, a1, 5 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: vslidedown.vi v10, v10, 2 -; RV32-NEXT: vand.vi v10, v10, 1 -; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmerge.vim v8, v8, -1, v0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV32-NEXT: vse32.v v8, (a2) -; RV32-NEXT: ret -; -; RV64-LABEL: vselect_vi_v6i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV64-NEXT: lbu a1, 0(a1) -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: andi a0, a1, 1 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v8, a0 -; RV64-NEXT: slli a0, a1, 62 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: slli a0, a1, 61 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: slli a0, a1, 60 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: slli a0, a1, 59 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: srli a1, a1, 5 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: vslidedown.vi v10, v10, 2 -; RV64-NEXT: vand.vi v10, v10, 1 -; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmerge.vim v8, v8, -1, v0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV64-NEXT: vse32.v v8, (a2) -; RV64-NEXT: ret +; CHECK-LABEL: vselect_vi_v6i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: lbu a1, 0(a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslide1down.vx v10, v8, a1 +; CHECK-NEXT: srli a0, a1, 1 +; CHECK-NEXT: vslide1down.vx v10, v10, a0 +; CHECK-NEXT: srli a0, a1, 2 +; CHECK-NEXT: vslide1down.vx v10, v10, a0 +; CHECK-NEXT: srli a0, a1, 3 +; CHECK-NEXT: vslide1down.vx v10, v10, a0 +; CHECK-NEXT: srli a0, a1, 4 +; CHECK-NEXT: vslide1down.vx v10, v10, a0 +; CHECK-NEXT: srli a1, a1, 5 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 +; CHECK-NEXT: vslidedown.vi v10, v10, 2 +; CHECK-NEXT: vand.vi v10, v10, 1 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vse32.v v8, (a2) +; CHECK-NEXT: ret %vb = load <6 x i32>, ptr %b %a = insertelement <6 x i32> poison, i32 -1, i32 0 %va = shufflevector <6 x i32> %a, <6 x i32> poison, <6 x i32> zeroinitializer @@ -214,65 +107,30 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { -; RV32-LABEL: vselect_vv_v6f32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV32-NEXT: lbu a2, 0(a2) -; RV32-NEXT: vle32.v v8, (a1) -; RV32-NEXT: andi a1, a2, 1 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v8, a1 -; RV32-NEXT: slli a1, a2, 30 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 29 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 28 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 27 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: srli a2, a2, 5 -; RV32-NEXT: vslide1down.vx v10, v10, a2 -; RV32-NEXT: vslidedown.vi v10, v10, 2 -; RV32-NEXT: vand.vi v10, v10, 1 -; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu -; RV32-NEXT: vle32.v v8, (a0), v0.t -; RV32-NEXT: vse32.v v8, (a3) -; RV32-NEXT: ret -; -; RV64-LABEL: vselect_vv_v6f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV64-NEXT: lbu a2, 0(a2) -; RV64-NEXT: vle32.v v8, (a1) -; RV64-NEXT: andi a1, a2, 1 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v8, a1 -; RV64-NEXT: slli a1, a2, 62 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 61 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 60 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 59 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: srli a2, a2, 5 -; RV64-NEXT: vslide1down.vx v10, v10, a2 -; RV64-NEXT: vslidedown.vi v10, v10, 2 -; RV64-NEXT: vand.vi v10, v10, 1 -; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu -; RV64-NEXT: vle32.v v8, (a0), v0.t -; RV64-NEXT: vse32.v v8, (a3) -; RV64-NEXT: ret +; CHECK-LABEL: vselect_vv_v6f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: lbu a2, 0(a2) +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslide1down.vx v10, v8, a2 +; CHECK-NEXT: srli a1, a2, 1 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 +; CHECK-NEXT: srli a1, a2, 2 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 +; CHECK-NEXT: srli a1, a2, 3 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 +; CHECK-NEXT: srli a1, a2, 4 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 +; CHECK-NEXT: srli a2, a2, 5 +; CHECK-NEXT: vslide1down.vx v10, v10, a2 +; CHECK-NEXT: vslidedown.vi v10, v10, 2 +; CHECK-NEXT: vand.vi v10, v10, 1 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: vse32.v v8, (a3) +; CHECK-NEXT: ret %va = load <6 x float>, ptr %a %vb = load <6 x float>, ptr %b %vcc = load <6 x i1>, ptr %cc @@ -282,67 +140,31 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { } define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { -; RV32-LABEL: vselect_vx_v6f32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: andi a0, a1, 1 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v8, a0 -; RV32-NEXT: slli a0, a1, 30 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: slli a0, a1, 29 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: slli a0, a1, 28 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: slli a0, a1, 27 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: srli a1, a1, 5 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: vslidedown.vi v10, v10, 2 -; RV32-NEXT: vand.vi v10, v10, 1 -; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV32-NEXT: vse32.v v8, (a2) -; RV32-NEXT: ret -; -; RV64-LABEL: vselect_vx_v6f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV64-NEXT: lbu a1, 0(a1) -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: andi a0, a1, 1 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v8, a0 -; RV64-NEXT: slli a0, a1, 62 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: slli a0, a1, 61 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: slli a0, a1, 60 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: slli a0, a1, 59 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: srli a1, a1, 5 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: vslidedown.vi v10, v10, 2 -; RV64-NEXT: vand.vi v10, v10, 1 -; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV64-NEXT: vse32.v v8, (a2) -; RV64-NEXT: ret +; CHECK-LABEL: vselect_vx_v6f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: lbu a1, 0(a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslide1down.vx v10, v8, a1 +; CHECK-NEXT: srli a0, a1, 1 +; CHECK-NEXT: vslide1down.vx v10, v10, a0 +; CHECK-NEXT: srli a0, a1, 2 +; CHECK-NEXT: vslide1down.vx v10, v10, a0 +; CHECK-NEXT: srli a0, a1, 3 +; CHECK-NEXT: vslide1down.vx v10, v10, a0 +; CHECK-NEXT: srli a0, a1, 4 +; CHECK-NEXT: vslide1down.vx v10, v10, a0 +; CHECK-NEXT: srli a1, a1, 5 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 +; CHECK-NEXT: vslidedown.vi v10, v10, 2 +; CHECK-NEXT: vand.vi v10, v10, 1 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vse32.v v8, (a2) +; CHECK-NEXT: ret %vb = load <6 x float>, ptr %b %ahead = insertelement <6 x float> poison, float %a, i32 0 %va = shufflevector <6 x float> %ahead, <6 x float> poison, <6 x i32> zeroinitializer @@ -353,67 +175,31 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { } define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { -; RV32-LABEL: vselect_vfpzero_v6f32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: andi a0, a1, 1 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v8, a0 -; RV32-NEXT: slli a0, a1, 30 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: slli a0, a1, 29 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: slli a0, a1, 28 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: slli a0, a1, 27 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: srli a1, a1, 5 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: vslidedown.vi v10, v10, 2 -; RV32-NEXT: vand.vi v10, v10, 1 -; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmerge.vim v8, v8, 0, v0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV32-NEXT: vse32.v v8, (a2) -; RV32-NEXT: ret -; -; RV64-LABEL: vselect_vfpzero_v6f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV64-NEXT: lbu a1, 0(a1) -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: andi a0, a1, 1 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v8, a0 -; RV64-NEXT: slli a0, a1, 62 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: slli a0, a1, 61 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: slli a0, a1, 60 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: slli a0, a1, 59 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: srli a1, a1, 5 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: vslidedown.vi v10, v10, 2 -; RV64-NEXT: vand.vi v10, v10, 1 -; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmerge.vim v8, v8, 0, v0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV64-NEXT: vse32.v v8, (a2) -; RV64-NEXT: ret +; CHECK-LABEL: vselect_vfpzero_v6f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: lbu a1, 0(a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslide1down.vx v10, v8, a1 +; CHECK-NEXT: srli a0, a1, 1 +; CHECK-NEXT: vslide1down.vx v10, v10, a0 +; CHECK-NEXT: srli a0, a1, 2 +; CHECK-NEXT: vslide1down.vx v10, v10, a0 +; CHECK-NEXT: srli a0, a1, 3 +; CHECK-NEXT: vslide1down.vx v10, v10, a0 +; CHECK-NEXT: srli a0, a1, 4 +; CHECK-NEXT: vslide1down.vx v10, v10, a0 +; CHECK-NEXT: srli a1, a1, 5 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 +; CHECK-NEXT: vslidedown.vi v10, v10, 2 +; CHECK-NEXT: vand.vi v10, v10, 1 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vse32.v v8, (a2) +; CHECK-NEXT: ret %vb = load <6 x float>, ptr %b %a = insertelement <6 x float> poison, float 0.0, i32 0 %va = shufflevector <6 x float> %a, <6 x float> poison, <6 x i32> zeroinitializer @@ -711,3 +497,6 @@ define <64 x i1> @vselect_v64i1(<64 x i1> %a, <64 x i1> %b, <64 x i1> %cc) { %v = select <64 x i1> %cc, <64 x i1> %a, <64 x i1> %b ret <64 x i1> %v } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index ee91bae6b6e02..dcf701be76f62 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -725,79 +725,24 @@ define void @test_srem_vec(ptr %X) nounwind { ; ; RV64MV-LABEL: test_srem_vec: ; RV64MV: # %bb.0: -; RV64MV-NEXT: ld a1, 0(a0) -; RV64MV-NEXT: lwu a2, 8(a0) -; RV64MV-NEXT: srli a3, a1, 2 -; RV64MV-NEXT: lbu a4, 12(a0) -; RV64MV-NEXT: slli a5, a2, 62 -; RV64MV-NEXT: or a3, a5, a3 -; RV64MV-NEXT: srai a3, a3, 31 -; RV64MV-NEXT: slli a4, a4, 32 -; RV64MV-NEXT: or a2, a2, a4 -; RV64MV-NEXT: slli a2, a2, 29 -; RV64MV-NEXT: lui a4, %hi(.LCPI3_0) -; RV64MV-NEXT: ld a4, %lo(.LCPI3_0)(a4) -; RV64MV-NEXT: srai a2, a2, 31 -; RV64MV-NEXT: slli a1, a1, 31 -; RV64MV-NEXT: srai a1, a1, 31 -; RV64MV-NEXT: mulh a4, a2, a4 -; RV64MV-NEXT: srli a5, a4, 63 -; RV64MV-NEXT: srai a4, a4, 1 -; RV64MV-NEXT: add a4, a4, a5 -; RV64MV-NEXT: lui a5, %hi(.LCPI3_1) -; RV64MV-NEXT: ld a5, %lo(.LCPI3_1)(a5) -; RV64MV-NEXT: add a2, a2, a4 -; RV64MV-NEXT: slli a4, a4, 2 -; RV64MV-NEXT: add a2, a2, a4 -; RV64MV-NEXT: mulh a4, a3, a5 -; RV64MV-NEXT: srli a5, a4, 63 -; RV64MV-NEXT: srai a4, a4, 1 -; RV64MV-NEXT: add a4, a4, a5 -; RV64MV-NEXT: lui a5, %hi(.LCPI3_2) -; RV64MV-NEXT: ld a5, %lo(.LCPI3_2)(a5) -; RV64MV-NEXT: add a3, a3, a4 -; RV64MV-NEXT: slli a4, a4, 3 -; RV64MV-NEXT: sub a3, a3, a4 -; RV64MV-NEXT: mulh a4, a1, a5 -; RV64MV-NEXT: srli a5, a4, 63 -; RV64MV-NEXT: add a4, a4, a5 -; RV64MV-NEXT: li a5, 6 -; RV64MV-NEXT: mul a4, a4, a5 -; RV64MV-NEXT: sub a1, a1, a4 ; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64MV-NEXT: vslide1down.vx v8, v8, a1 -; RV64MV-NEXT: vslide1down.vx v8, v8, a3 -; RV64MV-NEXT: vslide1down.vx v8, v8, a2 -; RV64MV-NEXT: vslidedown.vi v8, v8, 1 -; RV64MV-NEXT: li a1, -1 -; RV64MV-NEXT: srli a1, a1, 31 -; RV64MV-NEXT: vand.vx v8, v8, a1 -; RV64MV-NEXT: lui a2, 32 -; RV64MV-NEXT: addiw a2, a2, 256 -; RV64MV-NEXT: vmv.s.x v10, a2 -; RV64MV-NEXT: vsext.vf8 v12, v10 -; RV64MV-NEXT: vmsne.vv v0, v8, v12 -; RV64MV-NEXT: vmv.v.i v8, 0 -; RV64MV-NEXT: vmerge.vim v8, v8, -1, v0 +; RV64MV-NEXT: vmv.v.i v8, -1 ; RV64MV-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV64MV-NEXT: vslidedown.vi v10, v8, 2 -; RV64MV-NEXT: vmv.x.s a2, v10 -; RV64MV-NEXT: slli a3, a2, 31 -; RV64MV-NEXT: srli a3, a3, 61 -; RV64MV-NEXT: sb a3, 12(a0) -; RV64MV-NEXT: vmv.x.s a3, v8 -; RV64MV-NEXT: and a1, a3, a1 +; RV64MV-NEXT: vmv.x.s a1, v10 +; RV64MV-NEXT: slli a2, a1, 31 +; RV64MV-NEXT: srli a2, a2, 61 +; RV64MV-NEXT: sb a2, 12(a0) ; RV64MV-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64MV-NEXT: vslidedown.vi v8, v8, 1 -; RV64MV-NEXT: vmv.x.s a3, v8 -; RV64MV-NEXT: slli a4, a3, 33 -; RV64MV-NEXT: or a1, a1, a4 -; RV64MV-NEXT: sd a1, 0(a0) -; RV64MV-NEXT: slli a2, a2, 2 -; RV64MV-NEXT: slli a3, a3, 31 -; RV64MV-NEXT: srli a3, a3, 62 -; RV64MV-NEXT: or a2, a3, a2 -; RV64MV-NEXT: sw a2, 8(a0) +; RV64MV-NEXT: vmv.x.s a2, v8 +; RV64MV-NEXT: slli a3, a2, 33 +; RV64MV-NEXT: sd a3, 0(a0) +; RV64MV-NEXT: slli a1, a1, 2 +; RV64MV-NEXT: slli a2, a2, 31 +; RV64MV-NEXT: srli a2, a2, 62 +; RV64MV-NEXT: or a1, a2, a1 +; RV64MV-NEXT: sw a1, 8(a0) ; RV64MV-NEXT: ret %ld = load <3 x i33>, ptr %X %srem = srem <3 x i33> %ld, diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index f24b6107f15ad..456d98fd4e47f 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -521,35 +521,33 @@ define void @test_urem_vec(ptr %X) nounwind { ; ; RV32MV-LABEL: test_urem_vec: ; RV32MV: # %bb.0: -; RV32MV-NEXT: lw a1, 0(a0) -; RV32MV-NEXT: andi a2, a1, 2047 +; RV32MV-NEXT: lbu a1, 4(a0) +; RV32MV-NEXT: lw a2, 0(a0) +; RV32MV-NEXT: slli a1, a1, 10 +; RV32MV-NEXT: srli a3, a2, 22 +; RV32MV-NEXT: or a1, a3, a1 ; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32MV-NEXT: vslide1down.vx v8, v8, a2 -; RV32MV-NEXT: lbu a2, 4(a0) -; RV32MV-NEXT: slli a3, a1, 10 -; RV32MV-NEXT: srli a3, a3, 21 -; RV32MV-NEXT: vslide1down.vx v8, v8, a3 -; RV32MV-NEXT: slli a2, a2, 10 -; RV32MV-NEXT: srli a1, a1, 22 -; RV32MV-NEXT: or a1, a1, a2 -; RV32MV-NEXT: andi a1, a1, 2047 +; RV32MV-NEXT: srli a2, a2, 11 +; RV32MV-NEXT: vslide1down.vx v8, v8, a2 ; RV32MV-NEXT: vslide1down.vx v8, v8, a1 -; RV32MV-NEXT: lui a1, %hi(.LCPI4_0) -; RV32MV-NEXT: addi a1, a1, %lo(.LCPI4_0) -; RV32MV-NEXT: vle16.v v9, (a1) ; RV32MV-NEXT: vslidedown.vi v8, v8, 1 +; RV32MV-NEXT: li a1, 2047 +; RV32MV-NEXT: lui a2, %hi(.LCPI4_0) +; RV32MV-NEXT: addi a2, a2, %lo(.LCPI4_0) +; RV32MV-NEXT: vle16.v v9, (a2) +; RV32MV-NEXT: vand.vx v8, v8, a1 ; RV32MV-NEXT: vid.v v10 ; RV32MV-NEXT: vsub.vv v8, v8, v10 ; RV32MV-NEXT: vmul.vv v8, v8, v9 ; RV32MV-NEXT: vadd.vv v9, v8, v8 -; RV32MV-NEXT: lui a1, 41121 -; RV32MV-NEXT: addi a1, a1, -1527 +; RV32MV-NEXT: lui a2, 41121 +; RV32MV-NEXT: addi a2, a2, -1527 ; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV32MV-NEXT: vmv.s.x v10, a1 +; RV32MV-NEXT: vmv.s.x v10, a2 ; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV32MV-NEXT: vsext.vf2 v11, v10 ; RV32MV-NEXT: vsll.vv v9, v9, v11 -; RV32MV-NEXT: li a1, 2047 ; RV32MV-NEXT: vand.vx v8, v8, a1 ; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32MV-NEXT: vmv.v.i v10, 1 @@ -587,30 +585,31 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64MV-NEXT: lwu a2, 0(a0) ; RV64MV-NEXT: slli a1, a1, 32 ; RV64MV-NEXT: or a1, a2, a1 -; RV64MV-NEXT: andi a2, a1, 2047 ; RV64MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64MV-NEXT: vslide1down.vx v8, v8, a2 -; RV64MV-NEXT: slli a2, a1, 42 -; RV64MV-NEXT: srli a2, a2, 53 +; RV64MV-NEXT: vslide1down.vx v8, v8, a1 +; RV64MV-NEXT: slli a1, a1, 24 +; RV64MV-NEXT: srli a1, a1, 24 +; RV64MV-NEXT: srli a2, a1, 11 ; RV64MV-NEXT: vslide1down.vx v8, v8, a2 ; RV64MV-NEXT: srli a1, a1, 22 ; RV64MV-NEXT: vslide1down.vx v8, v8, a1 -; RV64MV-NEXT: lui a1, %hi(.LCPI4_0) -; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_0) -; RV64MV-NEXT: vle16.v v9, (a1) ; RV64MV-NEXT: vslidedown.vi v8, v8, 1 +; RV64MV-NEXT: li a1, 2047 +; RV64MV-NEXT: lui a2, %hi(.LCPI4_0) +; RV64MV-NEXT: addi a2, a2, %lo(.LCPI4_0) +; RV64MV-NEXT: vle16.v v9, (a2) +; RV64MV-NEXT: vand.vx v8, v8, a1 ; RV64MV-NEXT: vid.v v10 ; RV64MV-NEXT: vsub.vv v8, v8, v10 ; RV64MV-NEXT: vmul.vv v8, v8, v9 ; RV64MV-NEXT: vadd.vv v9, v8, v8 -; RV64MV-NEXT: lui a1, 41121 -; RV64MV-NEXT: addiw a1, a1, -1527 +; RV64MV-NEXT: lui a2, 41121 +; RV64MV-NEXT: addiw a2, a2, -1527 ; RV64MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64MV-NEXT: vmv.s.x v10, a1 +; RV64MV-NEXT: vmv.s.x v10, a2 ; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64MV-NEXT: vsext.vf2 v11, v10 ; RV64MV-NEXT: vsll.vv v9, v9, v11 -; RV64MV-NEXT: li a1, 2047 ; RV64MV-NEXT: vand.vx v8, v8, a1 ; RV64MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64MV-NEXT: vmv.v.i v10, 1