[RISCV] Unify getDemanded between forward and backwards passes in RIS…

…CVInsertVSETVLI (#92860) We have two rules in needVSETVLI where we can relax the demanded fields for slides and splats when VL=1. However these aren't present in getDemanded which prevents us from coalescing some vsetvlis around slides and splats in the backwards pass. The reasoning as to why they weren't in getDemanded is that these require us to check the value of the AVL operand, which may be stale in the backwards pass: the actual VL or VTYPE value may differ from what was precisely requested in the pseudo's operands. Using the original operands should actually be fine though, as we only care about what was originally demanded by the instruction. The current value of VL or VTYPE shouldn't influence this. This addresses some of the regressions we are seeing in #70549 from splats and slides getting reordered.
llvm · May 21, 2024 · e3ffc4b · e3ffc4b
1 parent 66b76fa
commit e3ffc4b
Show file tree

Hide file tree

Showing 16 changed files with 136 additions and 212 deletions.
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -378,10 +378,10 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
 
 /// Return the fields and properties demanded by the provided instruction.
 DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
-  // Warning: This function has to work on both the lowered (i.e. post
-  // emitVSETVLIs) and pre-lowering forms.  The main implication of this is
-  // that it can't use the value of a SEW, VL, or Policy operand as they might
-  // be stale after lowering.
+  // This function works in RISCVCoalesceVSETVLI too. We can still use the value
+  // of a SEW, VL, or Policy operand even though it might not be the exact value
+  // in the VL or VTYPE, since we only care about what the instruction
+  // originally demanded.
 
   // Most instructions don't use any of these subfeilds.
   DemandedFields Res;
@@ -459,6 +459,43 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
     Res.MaskPolicy = false;
   }
 
+  if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) {
+    const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
+    // A slidedown/slideup with an *undefined* merge op can freely clobber
+    // elements not copied from the source vector (e.g. masked off, tail, or
+    // slideup's prefix). Notes:
+    // * We can't modify SEW here since the slide amount is in units of SEW.
+    // * VL=1 is special only because we have existing support for zero vs
+    //   non-zero VL.  We could generalize this if we had a VL > C predicate.
+    // * The LMUL1 restriction is for machines whose latency may depend on VL.
+    // * As above, this is only legal for tail "undefined" not "agnostic".
+    if (isVSlideInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 &&
+        hasUndefinedMergeOp(MI)) {
+      Res.VLAny = false;
+      Res.VLZeroness = true;
+      Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
+      Res.TailPolicy = false;
+    }
+
+    // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the
+    // same semantically as vmv.s.x.  This is particularly useful since we don't
+    // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in
+    // it's place. Since a splat is non-constant time in LMUL, we do need to be
+    // careful to not increase the number of active vector registers (unlike for
+    // vmv.s.x.)
+    if (isScalarSplatInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 &&
+        hasUndefinedMergeOp(MI)) {
+      Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
+      Res.SEWLMULRatio = false;
+      Res.VLAny = false;
+      if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
+        Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
+      else
+        Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
+      Res.TailPolicy = false;
+    }
+  }
+
   return Res;
 }
 
@@ -1149,39 +1186,6 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
 
   DemandedFields Used = getDemanded(MI, ST);
 
-  // A slidedown/slideup with an *undefined* merge op can freely clobber
-  // elements not copied from the source vector (e.g. masked off, tail, or
-  // slideup's prefix). Notes:
-  // * We can't modify SEW here since the slide amount is in units of SEW.
-  // * VL=1 is special only because we have existing support for zero vs
-  //   non-zero VL.  We could generalize this if we had a VL > C predicate.
-  // * The LMUL1 restriction is for machines whose latency may depend on VL.
-  // * As above, this is only legal for tail "undefined" not "agnostic".
-  if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
-      hasUndefinedMergeOp(MI)) {
-    Used.VLAny = false;
-    Used.VLZeroness = true;
-    Used.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
-    Used.TailPolicy = false;
-  }
-
-  // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
-  // semantically as vmv.s.x.  This is particularly useful since we don't have an
-  // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
-  // Since a splat is non-constant time in LMUL, we do need to be careful to not
-  // increase the number of active vector registers (unlike for vmv.s.x.)
-  if (isScalarSplatInstr(MI) && Require.hasAVLImm() &&
-      Require.getAVLImm() == 1 && hasUndefinedMergeOp(MI)) {
-    Used.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
-    Used.SEWLMULRatio = false;
-    Used.VLAny = false;
-    if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
-      Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
-    else
-      Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
-    Used.TailPolicy = false;
-  }
-
   if (CurInfo.isCompatible(Used, Require, LIS))
     return false;
 

diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
@@ -78,7 +78,6 @@ define i1 @extractelt_nxv16i1(ptr %x, i64 %idx) nounwind {
 ; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vsetivli zero, 1, e8, m2, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a1
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -96,7 +95,6 @@ define i1 @extractelt_nxv32i1(ptr %x, i64 %idx) nounwind {
 ; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vsetivli zero, 1, e8, m4, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a1
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -114,7 +112,6 @@ define i1 @extractelt_nxv64i1(ptr %x, i64 %idx) nounwind {
 ; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vsetivli zero, 1, e8, m8, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a1
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
@@ -73,7 +73,6 @@ define void @extract_v1i32_v8i32_4(ptr %x, ptr %y) {
 ; VLA:       # %bb.0:
 ; VLA-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; VLA-NEXT:    vle32.v v8, (a0)
-; VLA-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; VLA-NEXT:    vslidedown.vi v8, v8, 4
 ; VLA-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; VLA-NEXT:    vse32.v v8, (a1)
@@ -96,7 +95,6 @@ define void @extract_v1i32_v8i32_5(ptr %x, ptr %y) {
 ; VLA:       # %bb.0:
 ; VLA-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; VLA-NEXT:    vle32.v v8, (a0)
-; VLA-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; VLA-NEXT:    vslidedown.vi v8, v8, 5
 ; VLA-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; VLA-NEXT:    vse32.v v8, (a1)
@@ -391,19 +389,17 @@ define void @extract_v8i1_v64i1_8(ptr %x, ptr %y) {
 ; VLA-NEXT:    li a2, 64
 ; VLA-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; VLA-NEXT:    vlm.v v8, (a0)
-; VLA-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
-; VLA-NEXT:    vslidedown.vi v8, v8, 1
 ; VLA-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; VLA-NEXT:    vslidedown.vi v8, v8, 1
 ; VLA-NEXT:    vsm.v v8, (a1)
 ; VLA-NEXT:    ret
 ;
 ; VLS-LABEL: extract_v8i1_v64i1_8:
 ; VLS:       # %bb.0:
 ; VLS-NEXT:    vsetvli a2, zero, e8, m4, ta, ma
 ; VLS-NEXT:    vlm.v v8, (a0)
-; VLS-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
-; VLS-NEXT:    vslidedown.vi v8, v8, 1
 ; VLS-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; VLS-NEXT:    vslidedown.vi v8, v8, 1
 ; VLS-NEXT:    vsm.v v8, (a1)
 ; VLS-NEXT:    ret
   %a = load <64 x i1>, ptr %x
@@ -418,19 +414,17 @@ define void @extract_v8i1_v64i1_48(ptr %x, ptr %y) {
 ; VLA-NEXT:    li a2, 64
 ; VLA-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; VLA-NEXT:    vlm.v v8, (a0)
-; VLA-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
-; VLA-NEXT:    vslidedown.vi v8, v8, 6
 ; VLA-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; VLA-NEXT:    vslidedown.vi v8, v8, 6
 ; VLA-NEXT:    vsm.v v8, (a1)
 ; VLA-NEXT:    ret
 ;
 ; VLS-LABEL: extract_v8i1_v64i1_48:
 ; VLS:       # %bb.0:
 ; VLS-NEXT:    vsetvli a2, zero, e8, m4, ta, ma
 ; VLS-NEXT:    vlm.v v8, (a0)
-; VLS-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
-; VLS-NEXT:    vslidedown.vi v8, v8, 6
 ; VLS-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; VLS-NEXT:    vslidedown.vi v8, v8, 6
 ; VLS-NEXT:    vsm.v v8, (a1)
 ; VLS-NEXT:    ret
   %a = load <64 x i1>, ptr %x
@@ -853,9 +847,8 @@ define void @extract_v2i1_nxv32i1_26(<vscale x 32 x i1> %x, ptr %y) {
 define void @extract_v8i1_nxv32i1_16(<vscale x 32 x i1> %x, ptr %y) {
 ; CHECK-LABEL: extract_v8i1_nxv32i1_16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v8, v0, 2
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vslidedown.vi v8, v0, 2
 ; CHECK-NEXT:    vsm.v v8, (a0)
 ; CHECK-NEXT:    ret
   %c = call <8 x i1> @llvm.vector.extract.v8i1.nxv32i1(<vscale x 32 x i1> %x, i64 16)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -138,7 +138,6 @@ define i32 @extractelt_v8i32(ptr %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v8, v8, 6
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -152,9 +151,9 @@ define i64 @extractelt_v4i64(ptr %x) nounwind {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
 ; RV32-NEXT:    vslidedown.vi v8, v8, 3
 ; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
 ; RV32-NEXT:    vsrl.vx v10, v8, a0
 ; RV32-NEXT:    vmv.x.s a1, v10
 ; RV32-NEXT:    vmv.x.s a0, v8
@@ -164,7 +163,6 @@ define i64 @extractelt_v4i64(ptr %x) nounwind {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
 ; RV64-NEXT:    vslidedown.vi v8, v8, 3
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    ret
@@ -233,7 +231,6 @@ define i64 @extractelt_v3i64(ptr %x) nounwind {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 3, e64, m2, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
 ; RV64-NEXT:    vslidedown.vi v8, v8, 2
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    ret
@@ -452,7 +449,6 @@ define i8 @extractelt_v32i8_idx(ptr %x, i32 zeroext %idx) nounwind {
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vsetivli zero, 1, e8, m2, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a1
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -466,7 +462,6 @@ define i16 @extractelt_v16i16_idx(ptr %x, i32 zeroext %idx) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a1
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -481,7 +476,6 @@ define i32 @extractelt_v8i32_idx(ptr %x, i32 zeroext %idx) nounwind {
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vadd.vv v8, v8, v8
-; CHECK-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a1
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -497,10 +491,10 @@ define i64 @extractelt_v4i64_idx(ptr %x, i32 zeroext %idx) nounwind {
 ; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
 ; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
 ; RV32-NEXT:    vslidedown.vx v8, v8, a1
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
 ; RV32-NEXT:    vsrl.vx v8, v8, a1
 ; RV32-NEXT:    vmv.x.s a1, v8
 ; RV32-NEXT:    ret
@@ -510,7 +504,6 @@ define i64 @extractelt_v4i64_idx(ptr %x, i32 zeroext %idx) nounwind {
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
 ; RV64-NEXT:    vslidedown.vx v8, v8, a1
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    ret
@@ -526,7 +519,6 @@ define half @extractelt_v16f16_idx(ptr %x, i32 zeroext %idx) nounwind {
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8
-; CHECK-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a1
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
@@ -542,7 +534,6 @@ define float @extractelt_v8f32_idx(ptr %x, i32 zeroext %idx) nounwind {
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8
-; CHECK-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a1
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
@@ -558,7 +549,6 @@ define double @extractelt_v4f64_idx(ptr %x, i32 zeroext %idx) nounwind {
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8
-; CHECK-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a1
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
@@ -594,7 +584,6 @@ define i64 @extractelt_v3i64_idx(ptr %x, i32 zeroext %idx) nounwind {
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
 ; RV64-NEXT:    vslidedown.vx v8, v8, a1
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    ret

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
@@ -5,9 +5,8 @@
 define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) {
 ; CHECK-LABEL: shuffle_v4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT:    vmv.v.i v0, 11
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.i v0, 11
 ; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %s = shufflevector <4 x half> %x, <4 x half> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -5,9 +5,8 @@
 define <4 x i16> @shuffle_v4i16(<4 x i16> %x, <4 x i16> %y) {
 ; CHECK-LABEL: shuffle_v4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT:    vmv.v.i v0, 11
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.i v0, 11
 ; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %s = shufflevector <4 x i16> %x, <4 x i16> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
@@ -29,9 +28,8 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %x, <8 x i32> %y) {
 define <4 x i16> @shuffle_xv_v4i16(<4 x i16> %x) {
 ; CHECK-LABEL: shuffle_xv_v4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT:    vmv.v.i v0, 9
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.i v0, 9
 ; CHECK-NEXT:    vmerge.vim v8, v8, 5, v0
 ; CHECK-NEXT:    ret
   %s = shufflevector <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i16> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
@@ -41,9 +39,8 @@ define <4 x i16> @shuffle_xv_v4i16(<4 x i16> %x) {
 define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) {
 ; CHECK-LABEL: shuffle_vx_v4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT:    vmv.v.i v0, 6
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.i v0, 6
 ; CHECK-NEXT:    vmerge.vim v8, v8, 5, v0
 ; CHECK-NEXT:    ret
   %s = shufflevector <4 x i16> %x, <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>