[RISCV] Move performCombineVMergeAndVOps into RISCVFoldMasks #71764

lukel97 · 2023-11-09T03:49:40Z

Continuing on from 72e6c1c, this moves the
vmerge fold peephole into RISCVFoldMasks.

Instead of reasoning in chain and glue nodes, this sinks the True operand to
just before the vmerge pseudo so it can reuse its VL/mask/false/passthru
operands. Most of the time we're able to do this without any hitches because we
check that the vmerge is the only user of True. However there are certain
cases where we have write-after-read dependencies of physical registers that we
also need to sink too, e.g. fcsr in @vmerge_vfcvt_rm in
test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll

We also need to implement the masked -> unmasked peephole, since the vmerge
fold peephole can produce new masked pseudos with all one masks that should
then be unmasked. However that peephole is still kept in RISCVISelDAGToDAG to
minimize the diff for this patch. It will be switched over in a later patch.

llvmbot · 2023-11-09T03:50:11Z

@llvm/pr-subscribers-backend-risc-v

Author: Luke Lau (lukel97)

Changes

Continuing on from 72e6c1c, this moves the
vmerge fold peephole into RISCVFoldMasks.

Instead of reasoning in chain and glue nodes, this sinks the True operand to
just before the vmerge pseudo so it can reuse its VL/mask/false/passthru
operands. Most of the time we're able to do this without any hitches because we
check that the vmerge is the only user of True. However there are certain
cases where we have write-after-read dependencies of physical registers that we
also need to sink too, e.g. fcsr in @vmerge_vfcvt_rm in
test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll

We also need to implement the masked -> unmasked peephole, since the vmerge
fold peephole can produce new masked pseudos with all one masks that should
then be unmasked. However that peephole is still kept in RISCVISelDAGToDAG to
minimize the diff for this patch. It will be switched over in a later patch.

Patch is 32.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71764.diff

5 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVFoldMasks.cpp (+391-19)
(modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (-305)
(modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h (-2)
(modified) llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll (+1-1)
(modified) llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll (+8-10)

diff --git a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
index d1c77a6cc7756df..b4ef44cddb90107 100644
--- a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
@@ -9,15 +9,29 @@
 // This pass performs various peephole optimisations that fold masks into vector
 // pseudo instructions after instruction selection.
 //
-// Currently it converts
-// PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew
+// It performs the following transforms:
+//
+// %true = PseudoFOO %passthru, ..., %vl, %sew
+// %x = PseudoVMERGE_VVM %passthru, %passthru, %true, %mask, %vl, %sew
+// ->
+// %x = PseudoFOO_MASK %false, ..., %mask, %vl, %sew
+//
+// %x = PseudoFOO_MASK ..., %allonesmask, %vl, %sew
 // ->
-// PseudoVMV_V_V %false, %true, %vl, %sew
+// %x = PseudoFOO ..., %vl, %sew
+//
+// %x = PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew
+// ->
+// %x = PseudoVMV_V_V %false, %true, %vl, %sew
 //
 //===---------------------------------------------------------------------===//
 
 #include "RISCV.h"
+#include "RISCVISelDAGToDAG.h"
+#include "RISCVInstrInfo.h"
 #include "RISCVSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
@@ -48,7 +62,9 @@ class RISCVFoldMasks : public MachineFunctionPass {
   StringRef getPassName() const override { return "RISC-V Fold Masks"; }
 
 private:
+  bool foldVMergeIntoOps(MachineInstr &MI, MachineInstr *MaskDef);
   bool convertVMergeToVMv(MachineInstr &MI, MachineInstr *MaskDef);
+  bool convertToUnmasked(MachineInstr &MI, MachineInstr *MaskDef);
 
   bool isAllOnesMask(MachineInstr *MaskCopy);
 };
@@ -59,22 +75,24 @@ char RISCVFoldMasks::ID = 0;
 
 INITIALIZE_PASS(RISCVFoldMasks, DEBUG_TYPE, "RISC-V Fold Masks", false, false)
 
-bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskCopy) {
-  if (!MaskCopy)
-    return false;
-  assert(MaskCopy->isCopy() && MaskCopy->getOperand(0).getReg() == RISCV::V0);
-  Register SrcReg =
-      TRI->lookThruCopyLike(MaskCopy->getOperand(1).getReg(), MRI);
-  if (!SrcReg.isVirtual())
-    return false;
-  MachineInstr *SrcDef = MRI->getVRegDef(SrcReg);
-  if (!SrcDef)
+bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskDef) {
+  if (!MaskDef)
     return false;
+  if (MaskDef->isCopy()) {
+    assert(MaskDef->getOperand(0).getReg() == RISCV::V0);
+    Register SrcReg =
+        TRI->lookThruCopyLike(MaskDef->getOperand(1).getReg(), MRI);
+    if (!SrcReg.isVirtual())
+      return false;
+    MaskDef = MRI->getVRegDef(SrcReg);
+    if (!MaskDef)
+      return false;
+  }
 
   // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
   // undefined behaviour if it's the wrong bitwidth, so we could choose to
   // assume that it's all-ones? Same applies to its VL.
-  switch (SrcDef->getOpcode()) {
+  switch (MaskDef->getOpcode()) {
   case RISCV::PseudoVMSET_M_B1:
   case RISCV::PseudoVMSET_M_B2:
   case RISCV::PseudoVMSET_M_B4:
@@ -88,6 +106,313 @@ bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskCopy) {
   }
 }
 
+static unsigned getVMSetForLMul(RISCVII::VLMUL LMUL) {
+  switch (LMUL) {
+  case RISCVII::LMUL_F8:
+    return RISCV::PseudoVMSET_M_B1;
+  case RISCVII::LMUL_F4:
+    return RISCV::PseudoVMSET_M_B2;
+  case RISCVII::LMUL_F2:
+    return RISCV::PseudoVMSET_M_B4;
+  case RISCVII::LMUL_1:
+    return RISCV::PseudoVMSET_M_B8;
+  case RISCVII::LMUL_2:
+    return RISCV::PseudoVMSET_M_B16;
+  case RISCVII::LMUL_4:
+    return RISCV::PseudoVMSET_M_B32;
+  case RISCVII::LMUL_8:
+    return RISCV::PseudoVMSET_M_B64;
+  case RISCVII::LMUL_RESERVED:
+    llvm_unreachable("Unexpected LMUL");
+  }
+  llvm_unreachable("Unknown VLMUL enum");
+}
+
+/// Inserts an operand at Idx in MI, pushing back any operands.
+static void insertOperand(MachineInstr &MI, MachineOperand MO, unsigned Idx) {
+  SmallVector<MachineOperand> OpsToAddBack;
+  unsigned NumTailOps = MI.getNumOperands() - Idx;
+  for (unsigned I = 0; I < NumTailOps; I++) {
+    OpsToAddBack.push_back(MI.getOperand(Idx));
+    MI.removeOperand(Idx);
+  }
+  MI.addOperand(MO);
+  for (MachineOperand &TailOp : OpsToAddBack)
+    MI.addOperand(TailOp);
+}
+
+// Try to sink From to before To, also sinking any instructions between From and
+// To where there is a write-after-read dependency on a physical register.
+static bool sinkInstructionAndDeps(MachineInstr &From, MachineInstr &To) {
+  assert(From.getParent() == To.getParent());
+  SmallVector<MachineInstr *> Worklist, ToSink;
+  Worklist.push_back(&From);
+
+  // Rather than compute whether or not we saw a store for every instruction,
+  // just compute it once even if it's more conservative.
+  bool SawStore = false;
+  for (MachineBasicBlock::instr_iterator II = From.getIterator();
+       II != To.getIterator(); II++) {
+    if (II->mayStore()) {
+      SawStore = true;
+      break;
+    }
+  }
+
+  while (!Worklist.empty()) {
+    MachineInstr *MI = Worklist.pop_back_val();
+
+    if (!MI->isSafeToMove(nullptr, SawStore))
+      return false;
+
+    SmallSet<Register, 8> Defs, Uses;
+    for (MachineOperand &Def : MI->all_defs())
+      Defs.insert(Def.getReg());
+    for (MachineOperand &Use : MI->all_uses())
+      Uses.insert(Use.getReg());
+
+    // If anything from [MI, To] uses a definition of MI, we can't sink it.
+    for (MachineBasicBlock::instr_iterator II = MI->getIterator();
+         II != To.getIterator(); II++) {
+      for (MachineOperand &Use : II->all_uses()) {
+        if (Defs.contains(Use.getReg()))
+          return false;
+      }
+    }
+
+    // If MI uses any physical registers, we need to sink any instructions after
+    // it where there might be a write-after-read dependency.
+    for (MachineBasicBlock::instr_iterator II = MI->getIterator();
+         II != To.getIterator(); II++) {
+      bool NeedsSink = any_of(II->all_defs(), [&Uses](MachineOperand &Def) {
+        return Def.getReg().isPhysical() && Uses.contains(Def.getReg());
+      });
+      if (NeedsSink)
+        Worklist.push_back(&*II);
+    }
+
+    ToSink.push_back(MI);
+  }
+
+  for (MachineInstr *MI : ToSink)
+    MI->moveBefore(&To);
+  return true;
+}
+
+// Try to fold away VMERGE_VVM instructions. We handle these cases:
+// -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction
+//  folds to a masked TU instruction. VMERGE_VVM must have have merge operand
+//  same as false operand.
+// -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a
+//  masked TA instruction.
+// -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to
+//  masked TU instruction. Both instructions must have the same merge operand.
+//  VMERGE_VVM must have have merge operand same as false operand.
+// Note: The VMERGE_VVM forms above (TA, and TU) refer to the policy implied,
+// not the pseudo name.  That is, a TA VMERGE_VVM can be either the _TU pseudo
+// form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo
+// form.
+bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI,
+                                       MachineInstr *MaskDef) {
+  MachineInstr *TrueMI;
+  MachineOperand *True;
+  MachineOperand *Merge;
+  MachineOperand *False;
+
+  const unsigned BaseOpc = RISCV::getRVVMCOpcode(MI.getOpcode());
+  // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
+  if (BaseOpc == RISCV::VMV_V_V) {
+    Merge = &MI.getOperand(1);
+    False = &MI.getOperand(1);
+    True = &MI.getOperand(2);
+    TrueMI = MRI->getVRegDef(True->getReg());
+  } else if (BaseOpc == RISCV::VMERGE_VVM) {
+    Merge = &MI.getOperand(1);
+    False = &MI.getOperand(2);
+    True = &MI.getOperand(3);
+    TrueMI = MRI->getVRegDef(True->getReg());
+  } else
+    return false;
+
+  // Returns true if LHS is the same register as RHS, or if LHS is undefined.
+  auto IsOpSameAs = [&](MachineOperand LHS, MachineOperand RHS) {
+    if (LHS.getReg() == RISCV::NoRegister)
+      return true;
+    if (RHS.getReg() == RISCV::NoRegister)
+      return false;
+    return TRI->lookThruCopyLike(LHS.getReg(), MRI) ==
+           TRI->lookThruCopyLike(RHS.getReg(), MRI);
+  };
+
+  // We require that either merge and false are the same, or that merge
+  // is undefined.
+  if (!IsOpSameAs(*Merge, *False))
+    return false;
+
+  // N must be the only user of True.
+  if (!MRI->hasOneUse(True->getReg()))
+    return false;
+
+  unsigned TrueOpc = TrueMI->getOpcode();
+  const MCInstrDesc &TrueMCID = TrueMI->getDesc();
+  bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
+
+  bool IsMasked = false;
+  const RISCV::RISCVMaskedPseudoInfo *Info =
+      RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
+  if (!Info && HasTiedDest) {
+    Info = RISCV::getMaskedPseudoInfo(TrueOpc);
+    IsMasked = true;
+  }
+
+  if (!Info)
+    return false;
+
+  // When Mask is not a true mask, this transformation is illegal for some
+  // operations whose results are affected by mask, like viota.m.
+  if (Info->MaskAffectsResult && BaseOpc == RISCV::VMERGE_VVM &&
+      !isAllOnesMask(MaskDef))
+    return false;
+
+  MachineOperand &TrueMergeOp = TrueMI->getOperand(1);
+  if (HasTiedDest && TrueMergeOp.getReg() != RISCV::NoRegister) {
+    // The vmerge instruction must be TU.
+    // FIXME: This could be relaxed, but we need to handle the policy for the
+    // resulting op correctly.
+    if (Merge->getReg() == RISCV::NoRegister)
+      return false;
+    // Both the vmerge instruction and the True instruction must have the same
+    // merge operand.
+    if (!IsOpSameAs(TrueMergeOp, *False))
+      return false;
+  }
+
+  if (IsMasked) {
+    assert(HasTiedDest && "Expected tied dest");
+    // The vmerge instruction must be TU.
+    if (Merge->getReg() == RISCV::NoRegister)
+      return false;
+    // The vmerge instruction must have an all 1s mask since we're going to keep
+    // the mask from the True instruction.
+    // FIXME: Support mask agnostic True instruction which would have an
+    // undef merge operand.
+    if (BaseOpc == RISCV::VMERGE_VVM && !isAllOnesMask(MaskDef))
+      return false;
+  }
+
+  // Skip if True has side effect.
+  // TODO: Support vleff and vlsegff.
+  if (TII->get(TrueOpc).hasUnmodeledSideEffects())
+    return false;
+
+  // The vector policy operand may be present for masked intrinsics
+  const MachineOperand TrueVL =
+      TrueMI->getOperand(RISCVII::getVLOpNum(TrueMCID));
+
+  auto GetMinVL = [](MachineOperand LHS,
+                     MachineOperand RHS) -> std::optional<MachineOperand> {
+    if (LHS.isReg() && RHS.isReg() && LHS.getReg().isVirtual() &&
+        LHS.getReg() == RHS.getReg())
+      return LHS;
+    if (LHS.isImm() && LHS.getImm() == RISCV::VLMaxSentinel)
+      return RHS;
+    if (RHS.isImm() && RHS.getImm() == RISCV::VLMaxSentinel)
+      return LHS;
+    if (!LHS.isImm() || !RHS.isImm())
+      return std::nullopt;
+    return LHS.getImm() <= RHS.getImm() ? LHS : RHS;
+  };
+
+  // Because MI and True must have the same merge operand (or True's operand is
+  // implicit_def), the "effective" body is the minimum of their VLs.
+  const MachineOperand VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
+  auto MinVL = GetMinVL(TrueVL, VL);
+  if (!MinVL)
+    return false;
+  bool VLChanged = !MinVL->isIdenticalTo(VL);
+
+  // If we end up changing the VL or mask of True, then we need to make sure it
+  // doesn't raise any observable fp exceptions, since changing the active
+  // elements will affect how fflags is set.
+  if (VLChanged || !IsMasked)
+    if (TrueMCID.mayRaiseFPException() &&
+        !TrueMI->getFlag(MachineInstr::MIFlag::NoFPExcept))
+      return false;
+
+  unsigned MaskedOpc = Info->MaskedPseudo;
+  const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
+#ifndef NDEBUG
+  assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&
+         "Expected instructions with mask have policy operand.");
+  assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
+                                         MCOI::TIED_TO) == 0 &&
+         "Expected instructions with mask have a tied dest.");
+#endif
+
+  // Sink True down to MI so that it can access MI's operands.
+  if (!sinkInstructionAndDeps(*TrueMI, MI))
+    return false;
+
+  // Set the merge to the false operand of the merge.
+  TrueMI->getOperand(1).setReg(False->getReg());
+
+  // If we're converting it to a masked pseudo, reuse MI's mask.
+  if (!IsMasked) {
+    if (BaseOpc == RISCV::VMV_V_V) {
+      // If MI is a vmv.v.v, it won't have a mask operand. So insert an all-ones
+      // mask just before True.
+      unsigned VMSetOpc =
+          getVMSetForLMul(RISCVII::getLMul(MI.getDesc().TSFlags));
+      BuildMI(*MI.getParent(), TrueMI, MI.getDebugLoc(), TII->get(VMSetOpc))
+          .addDef(RISCV::V0)
+          .add(VL)
+          .add(TrueMI->getOperand(RISCVII::getSEWOpNum(TrueMCID)));
+    }
+
+    TrueMI->setDesc(MaskedMCID);
+
+    // TODO: Increment MaskOpIdx by number of explicit defs in tablegen?
+    unsigned MaskOpIdx = Info->MaskOpIdx + TrueMI->getNumExplicitDefs();
+    insertOperand(*TrueMI, MachineOperand::CreateReg(RISCV::V0, false),
+                  MaskOpIdx);
+  }
+
+  // Update the AVL.
+  if (MinVL->isReg())
+    TrueMI->getOperand(RISCVII::getVLOpNum(MaskedMCID))
+        .ChangeToRegister(MinVL->getReg(), false);
+  else
+    TrueMI->getOperand(RISCVII::getVLOpNum(MaskedMCID))
+        .ChangeToImmediate(MinVL->getImm());
+
+  // Use a tumu policy, relaxing it to tail agnostic provided that the merge
+  // operand is undefined.
+  //
+  // However, if the VL became smaller than what the vmerge had originally, then
+  // elements past VL that were previously in the vmerge's body will have moved
+  // to the tail. In that case we always need to use tail undisturbed to
+  // preserve them.
+  uint64_t Policy = (Merge->getReg() == RISCV::NoRegister && !VLChanged)
+                        ? RISCVII::TAIL_AGNOSTIC
+                        : RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
+  TrueMI->getOperand(RISCVII::getVecPolicyOpNum(MaskedMCID)).setImm(Policy);
+
+  const TargetRegisterClass *V0RC =
+      TII->getRegClass(MaskedMCID, 0, TRI, *MI.getMF());
+
+  // The destination and passthru can no longer be in V0.
+  MRI->constrainRegClass(TrueMI->getOperand(0).getReg(), V0RC);
+  Register PassthruReg = TrueMI->getOperand(1).getReg();
+  if (PassthruReg != RISCV::NoRegister)
+    MRI->constrainRegClass(PassthruReg, V0RC);
+
+  MRI->replaceRegWith(MI.getOperand(0).getReg(),
+                      TrueMI->getOperand(0).getReg());
+  MI.eraseFromParent();
+
+  return true;
+}
+
 // Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
 // (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
 bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) {
@@ -98,7 +423,7 @@ bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) {
   unsigned NewOpc;
   switch (MI.getOpcode()) {
   default:
-    llvm_unreachable("Expected VMERGE_VVM_<LMUL> instruction.");
+    return false;
     CASE_VMERGE_TO_VMV(MF8)
     CASE_VMERGE_TO_VMV(MF4)
     CASE_VMERGE_TO_VMV(MF2)
@@ -133,6 +458,48 @@ bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) {
   return true;
 }
 
+bool RISCVFoldMasks::convertToUnmasked(MachineInstr &MI,
+                                       MachineInstr *MaskDef) {
+  const RISCV::RISCVMaskedPseudoInfo *I =
+      RISCV::getMaskedPseudoInfo(MI.getOpcode());
+  if (!I)
+    return false;
+
+  // TODO: Increment all MaskOpIdxs in tablegen by num of explicit defs?
+  unsigned MaskOpIdx = I->MaskOpIdx + MI.getNumExplicitDefs();
+  if (!isAllOnesMask(MaskDef))
+    return false;
+
+  // There are two classes of pseudos in the table - compares and
+  // everything else.  See the comment on RISCVMaskedPseudo for details.
+  const unsigned Opc = I->UnmaskedPseudo;
+  const MCInstrDesc &MCID = TII->get(Opc);
+  const bool HasPolicyOp = RISCVII::hasVecPolicyOp(MCID.TSFlags);
+  const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
+#ifndef NDEBUG
+  const MCInstrDesc &MaskedMCID = TII->get(MI.getOpcode());
+  assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ==
+             RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
+         "Masked and unmasked pseudos are inconsistent");
+  assert(HasPolicyOp == HasPassthru && "Unexpected pseudo structure");
+#endif
+
+  MI.setDesc(MCID);
+  MI.removeOperand(MaskOpIdx);
+
+  // The unmasked pseudo will no longer be constrained to the vrnov0 reg class,
+  // so try and relax it to vr.
+  MRI->recomputeRegClass(MI.getOperand(0).getReg());
+  unsigned PassthruOpIdx = MI.getNumExplicitDefs();
+  if (HasPassthru) {
+    if (MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister)
+      MRI->recomputeRegClass(MI.getOperand(PassthruOpIdx).getReg());
+  } else
+    MI.removeOperand(PassthruOpIdx);
+
+  return true;
+}
+
 bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
@@ -158,11 +525,16 @@ bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) {
   MachineInstr *CurrentV0Def;
   for (MachineBasicBlock &MBB : MF) {
     CurrentV0Def = nullptr;
-    for (MachineInstr &MI : MBB) {
-      unsigned BaseOpc = RISCV::getRVVMCOpcode(MI.getOpcode());
-      if (BaseOpc == RISCV::VMERGE_VVM)
-        Changed |= convertVMergeToVMv(MI, CurrentV0Def);
+    for (MachineInstr &MI : make_early_inc_range(MBB)) {
+      Changed |= foldVMergeIntoOps(MI, CurrentV0Def);
+      if (MI.definesRegister(RISCV::V0, TRI))
+        CurrentV0Def = &MI;
+    }
 
+    CurrentV0Def = nullptr;
+    for (MachineInstr &MI : MBB) {
+      Changed |= convertVMergeToVMv(MI, CurrentV0Def);
+      Changed |= convertToUnmasked(MI, CurrentV0Def);
       if (MI.definesRegister(RISCV::V0, TRI))
         CurrentV0Def = &MI;
     }
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 920657a198d9b6b..f1b96ffd37b27a8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -156,8 +156,6 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
 
   CurDAG->setRoot(Dummy.getValue());
 
-  MadeChange |= doPeepholeMergeVVMFold();
-
   // After we're done with everything else, convert IMPLICIT_DEF
   // passthru operands to NoRegister.  This is required to workaround
   // an optimization deficiency in MachineCSE.  This really should
@@ -3420,309 +3418,6 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
   return true;
 }
 
-static bool IsVMerge(SDNode *N) {
-  return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
-}
-
-static bool IsVMv(SDNode *N) {
-  return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
-}
-
-static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
-  switch (LMUL) {
-  case RISCVII::LMUL_F8:
-    return RISCV::PseudoVMSET_M_B1;
-  case RISCVII::LMUL_F4:
-    return RISCV::PseudoVMSET_M_B2;
-  case RISCVII::LMUL_F2:
-    return RISCV::PseudoVMSET_M_B4;
-  case RISCVII::LMUL_1:
-    return RISCV::PseudoVMSET_M_B8;
-  case RISCVII::LMUL_2:
-    return RISCV::PseudoVMSET_M_B16;
-  case RISCVII::LMUL_4:
-    return RISCV::PseudoVMSET_M_B32;
-  case RISCVII::LMUL_8:
-    return RISCV::PseudoVMSET_M_B64;
-  case RISCVII::LMUL_RESERVED:
-    llvm_unreachable("Unexpected LMUL");
-  }
-  llvm_unreachable("Unknown VLMUL enum");
-}
-
-// Try to fold away VMERGE_VVM instructions. We handle these cases:
-// -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction
-//  folds to a masked TU instruction. VMERGE_VVM must have have merge operand
-//  same as false operand.
-// -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a
-//  masked TA instruction.
-// -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to
-//  masked TU instruction. Both instructions must have the same merge operand.
-//  VMERGE_VVM must have have merge operand same as false operand.
-// Note: The VMERGE_VVM forms above (TA, and TU) refer to the policy implied,
-// not the pseudo name.  That is, a TA VMERGE_VVM can be either the _TU pseudo
-// form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo
...
[truncated]