diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 7a8ff84995ead..1c815424bdfa6 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -47,6 +47,18 @@ static cl::opt DisableInsertVSETVLPHIOpt( namespace { +/// Given a virtual register \p Reg, return the corresponding VNInfo for it. +/// This should never return nullptr. +static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI, + const LiveIntervals *LIS) { + assert(Reg.isVirtual()); + auto &LI = LIS->getInterval(Reg); + SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI); + VNInfo *VNI = LI.getVNInfoBefore(SI); + assert(VNI); + return VNI; +} + static unsigned getVLOpNum(const MachineInstr &MI) { return RISCVII::getVLOpNum(MI.getDesc()); } @@ -426,7 +438,8 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { /// values of the VL and VTYPE registers after insertion. class VSETVLIInfo { struct AVLDef { - const MachineInstr *DefMI; + // Every AVLDef should have a VNInfo. + const VNInfo *ValNo; Register DefReg; }; union { @@ -465,9 +478,9 @@ class VSETVLIInfo { void setUnknown() { State = Unknown; } bool isUnknown() const { return State == Unknown; } - void setAVLRegDef(const MachineInstr *DefMI, Register AVLReg) { - assert(DefMI && AVLReg.isVirtual()); - AVLRegDef.DefMI = DefMI; + void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) { + assert(VNInfo && AVLReg.isVirtual()); + AVLRegDef.ValNo = VNInfo; AVLRegDef.DefReg = AVLReg; State = AVLIsReg; } @@ -493,9 +506,18 @@ class VSETVLIInfo { assert(hasAVLImm()); return AVLImm; } - const MachineInstr &getAVLDefMI() const { - assert(hasAVLReg() && AVLRegDef.DefMI); - return *AVLRegDef.DefMI; + const VNInfo *getAVLVNInfo() const { + assert(hasAVLReg()); + return AVLRegDef.ValNo; + } + // Most AVLIsReg infos will have a single defining MachineInstr, unless it was + // a PHI node. In that case getAVLVNInfo()->def will point to the block + // boundary slot. + const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const { + assert(hasAVLReg()); + auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def); + assert(!(getAVLVNInfo()->isPHIDef() && MI)); + return MI; } void setAVL(VSETVLIInfo Info) { @@ -503,7 +525,7 @@ class VSETVLIInfo { if (Info.isUnknown()) setUnknown(); else if (Info.hasAVLReg()) - setAVLRegDef(&Info.getAVLDefMI(), Info.getAVLReg()); + setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg()); else if (Info.hasAVLVLMAX()) setAVLVLMAX(); else if (Info.hasAVLIgnored()) @@ -519,11 +541,13 @@ class VSETVLIInfo { bool getTailAgnostic() const { return TailAgnostic; } bool getMaskAgnostic() const { return MaskAgnostic; } - bool hasNonZeroAVL() const { + bool hasNonZeroAVL(const LiveIntervals *LIS) const { if (hasAVLImm()) return getAVLImm() > 0; - if (hasAVLReg()) - return isNonZeroLoadImmediate(getAVLDefMI()); + if (hasAVLReg()) { + if (auto *DefMI = getAVLDefMI(LIS)) + return isNonZeroLoadImmediate(*DefMI); + } if (hasAVLVLMAX()) return true; if (hasAVLIgnored()) @@ -531,16 +555,17 @@ class VSETVLIInfo { return false; } - bool hasEquallyZeroAVL(const VSETVLIInfo &Other) const { + bool hasEquallyZeroAVL(const VSETVLIInfo &Other, + const LiveIntervals *LIS) const { if (hasSameAVL(Other)) return true; - return (hasNonZeroAVL() && Other.hasNonZeroAVL()); + return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS)); } bool hasSameAVL(const VSETVLIInfo &Other) const { if (hasAVLReg() && Other.hasAVLReg()) - return AVLRegDef.DefMI == Other.AVLRegDef.DefMI && - AVLRegDef.DefReg == Other.AVLRegDef.DefReg; + return getAVLVNInfo()->id == Other.getAVLVNInfo()->id && + getAVLReg() == Other.getAVLReg(); if (hasAVLImm() && Other.hasAVLImm()) return getAVLImm() == Other.getAVLImm(); @@ -620,7 +645,7 @@ class VSETVLIInfo { // Require are compatible with the previous vsetvli instruction represented // by this. MI is the instruction whose requirements we're considering. bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require, - const MachineRegisterInfo &MRI) const { + const LiveIntervals *LIS) const { assert(isValid() && Require.isValid() && "Can't compare invalid VSETVLIInfos"); assert(!Require.SEWLMULRatioOnly && @@ -636,7 +661,7 @@ class VSETVLIInfo { if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require))) return false; - if (Used.VLZeroness && !hasEquallyZeroAVL(Require)) + if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS)) return false; return hasCompatibleVTYPE(Used, Require); @@ -765,6 +790,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass { const RISCVSubtarget *ST; const TargetInstrInfo *TII; MachineRegisterInfo *MRI; + LiveIntervals *LIS; std::vector BlockInfo; std::queue WorkList; @@ -777,6 +803,14 @@ class RISCVInsertVSETVLI : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); } @@ -848,7 +882,7 @@ INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli", // Return a VSETVLIInfo representing the changes made by this VSETVLI or // VSETIVLI instruction. static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI, - const MachineRegisterInfo &MRI) { + const LiveIntervals *LIS) { VSETVLIInfo NewInfo; if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { NewInfo.setAVLImm(MI.getOperand(1).getImm()); @@ -861,7 +895,7 @@ static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI, if (AVLReg == RISCV::X0) NewInfo.setAVLVLMAX(); else - NewInfo.setAVLRegDef(MRI.getUniqueVRegDef(AVLReg), AVLReg); + NewInfo.setAVLRegDef(getVNInfoFromReg(AVLReg, MI, LIS), AVLReg); } NewInfo.setVTYPE(MI.getOperand(2).getImm()); @@ -880,7 +914,7 @@ static unsigned computeVLMAX(unsigned VLEN, unsigned SEW, static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, const RISCVSubtarget &ST, - const MachineRegisterInfo *MRI) { + const LiveIntervals *LIS) { VSETVLIInfo InstrInfo; bool TailAgnostic = true; @@ -933,7 +967,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, else InstrInfo.setAVLImm(Imm); } else { - InstrInfo.setAVLRegDef(MRI->getUniqueVRegDef(VLOp.getReg()), + InstrInfo.setAVLRegDef(getVNInfoFromReg(VLOp.getReg(), MI, LIS), VLOp.getReg()); } } else { @@ -955,9 +989,9 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, // register AVLs to avoid extending live ranges without being sure we can // kill the original source reg entirely. if (InstrInfo.hasAVLReg()) { - const MachineInstr &DefMI = InstrInfo.getAVLDefMI(); - if (isVectorConfigInstr(DefMI)) { - VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(DefMI, *MRI); + if (const MachineInstr *DefMI = InstrInfo.getAVLDefMI(LIS); + DefMI && isVectorConfigInstr(*DefMI)) { + VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI, LIS); if (DefInstrInfo.hasSameVLMAX(InstrInfo) && (DefInstrInfo.hasAVLImm() || DefInstrInfo.hasAVLVLMAX())) InstrInfo.setAVL(DefInstrInfo); @@ -983,11 +1017,12 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same // VLMAX. if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { - BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addReg(RISCV::X0, RegState::Kill) - .addImm(Info.encodeVTYPE()) - .addReg(RISCV::VL, RegState::Implicit); + auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addReg(RISCV::X0, RegState::Kill) + .addImm(Info.encodeVTYPE()) + .addReg(RISCV::VL, RegState::Implicit); + LIS->InsertMachineInstrInMaps(*MI); return; } @@ -995,15 +1030,16 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, // it has the same VLMAX we want and the last VL/VTYPE we observed is the // same, we can use the X0, X0 form. if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) { - const MachineInstr &DefMI = Info.getAVLDefMI(); - if (isVectorConfigInstr(DefMI)) { - VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI); + if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS); + DefMI && isVectorConfigInstr(*DefMI)) { + VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, LIS); if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) { - BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addReg(RISCV::X0, RegState::Kill) - .addImm(Info.encodeVTYPE()) - .addReg(RISCV::VL, RegState::Implicit); + auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addReg(RISCV::X0, RegState::Kill) + .addImm(Info.encodeVTYPE()) + .addReg(RISCV::VL, RegState::Implicit); + LIS->InsertMachineInstrInMaps(*MI); return; } } @@ -1011,10 +1047,11 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, } if (Info.hasAVLImm()) { - BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addImm(Info.getAVLImm()) - .addImm(Info.encodeVTYPE()); + auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addImm(Info.getAVLImm()) + .addImm(Info.encodeVTYPE()); + LIS->InsertMachineInstrInMaps(*MI); return; } @@ -1023,36 +1060,46 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, // the previous vl to become invalid. if (PrevInfo.isValid() && !PrevInfo.isUnknown() && Info.hasSameVLMAX(PrevInfo)) { - BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addReg(RISCV::X0, RegState::Kill) - .addImm(Info.encodeVTYPE()) - .addReg(RISCV::VL, RegState::Implicit); + auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addReg(RISCV::X0, RegState::Kill) + .addImm(Info.encodeVTYPE()) + .addReg(RISCV::VL, RegState::Implicit); + LIS->InsertMachineInstrInMaps(*MI); return; } // Otherwise use an AVL of 1 to avoid depending on previous vl. - BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addImm(1) - .addImm(Info.encodeVTYPE()); + auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addImm(1) + .addImm(Info.encodeVTYPE()); + LIS->InsertMachineInstrInMaps(*MI); return; } if (Info.hasAVLVLMAX()) { Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); - BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) - .addReg(DestReg, RegState::Define | RegState::Dead) - .addReg(RISCV::X0, RegState::Kill) - .addImm(Info.encodeVTYPE()); + auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) + .addReg(DestReg, RegState::Define | RegState::Dead) + .addReg(RISCV::X0, RegState::Kill) + .addImm(Info.encodeVTYPE()); + LIS->InsertMachineInstrInMaps(*MI); + LIS->createAndComputeVirtRegInterval(DestReg); return; } Register AVLReg = Info.getAVLReg(); MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); - BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addReg(AVLReg) - .addImm(Info.encodeVTYPE()); + auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addReg(AVLReg) + .addImm(Info.encodeVTYPE()); + LIS->InsertMachineInstrInMaps(*MI); + // Normally the AVL's live range will already extend past the inserted vsetvli + // because the pseudos below will already use the AVL. But this isn't always + // the case, e.g. PseudoVMV_X_S doesn't have an AVL operand. + LIS->getInterval(AVLReg).extendInBlock( + LIS->getMBBStartIdx(&MBB), LIS->getInstructionIndex(*MI).getRegSlot()); } static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) { @@ -1065,7 +1112,7 @@ static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) { bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo) const { - assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, MRI)); + assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, LIS)); if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) return true; @@ -1106,7 +1153,7 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, Used.TailPolicy = false; } - if (CurInfo.isCompatible(Used, Require, *MRI)) + if (CurInfo.isCompatible(Used, Require, LIS)) return false; // We didn't find a compatible value. If our AVL is a virtual register, @@ -1114,9 +1161,9 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, // and the last VL/VTYPE we observed is the same, we don't need a // VSETVLI here. if (Require.hasAVLReg() && CurInfo.hasCompatibleVTYPE(Used, Require)) { - const MachineInstr &DefMI = Require.getAVLDefMI(); - if (isVectorConfigInstr(DefMI)) { - VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI); + if (const MachineInstr *DefMI = Require.getAVLDefMI(LIS); + DefMI && isVectorConfigInstr(*DefMI)) { + VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, LIS); if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo)) return false; } @@ -1152,7 +1199,7 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, if (!RISCVII::hasSEWOp(TSFlags)) return; - const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, MRI); + const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, LIS); assert(NewInfo.isValid() && !NewInfo.isUnknown()); if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info)) return; @@ -1171,7 +1218,7 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, // variant, so we avoid the transform to prevent extending live range of an // avl register operand. // TODO: We can probably relax this for immediates. - bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo) && + bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) && IncomingInfo.hasSameVLMAX(PrevInfo); if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero)) Info.setAVL(IncomingInfo); @@ -1202,14 +1249,17 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const { if (isVectorConfigInstr(MI)) { - Info = getInfoForVSETVLI(MI, *MRI); + Info = getInfoForVSETVLI(MI, LIS); return; } if (RISCV::isFaultFirstLoad(MI)) { // Update AVL to vl-output of the fault first load. - Info.setAVLRegDef(MRI->getUniqueVRegDef(MI.getOperand(1).getReg()), - MI.getOperand(1).getReg()); + assert(MI.getOperand(1).getReg().isVirtual()); + auto &LI = LIS->getInterval(MI.getOperand(1).getReg()); + SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot(); + VNInfo *VNI = LI.getVNInfoAt(SI); + Info.setAVLRegDef(VNI, MI.getOperand(1).getReg()); return; } @@ -1293,7 +1343,7 @@ void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { } // If we weren't able to prove a vsetvli was directly unneeded, it might still -// be unneeded if the AVL is a phi node where all incoming values are VL +// be unneeded if the AVL was a phi node where all incoming values are VL // outputs from the last VSETVLI in their respective basic blocks. bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB) const { @@ -1303,26 +1353,27 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, if (!Require.hasAVLReg()) return true; - // We need the AVL to be produce by a PHI node in this basic block. - const MachineInstr *PHI = &Require.getAVLDefMI(); - if (PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB) + // We need the AVL to have been produced by a PHI node in this basic block. + const VNInfo *Valno = Require.getAVLVNInfo(); + if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB) return true; - for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; - PHIOp += 2) { - Register InReg = PHI->getOperand(PHIOp).getReg(); - MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB(); + const LiveRange &LR = LIS->getInterval(Require.getAVLReg()); + + for (auto *PBB : MBB.predecessors()) { const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit; // We need the PHI input to the be the output of a VSET(I)VLI. - MachineInstr *DefMI = MRI->getUniqueVRegDef(InReg); - assert(DefMI); - if (!isVectorConfigInstr(*DefMI)) + const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB)); + if (!Value) + return true; + MachineInstr *DefMI = LIS->getInstructionFromIndex(Value->def); + if (!DefMI || !isVectorConfigInstr(*DefMI)) return true; // We found a VSET(I)VLI make sure it matches the output of the // predecessor block. - VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, *MRI); + VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, LIS); if (DefInfo != PBBExit) return true; @@ -1377,19 +1428,28 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); if (VLOp.isReg()) { Register Reg = VLOp.getReg(); - MachineInstr *VLOpDef = MRI->getUniqueVRegDef(Reg); - assert(VLOpDef); + LiveInterval &LI = LIS->getInterval(Reg); // Erase the AVL operand from the instruction. VLOp.setReg(RISCV::NoRegister); VLOp.setIsKill(false); + SmallVector DeadMIs; + LIS->shrinkToUses(&LI, &DeadMIs); + // We might have separate components that need split due to + // needVSETVLIPHI causing us to skip inserting a new VL def. + SmallVector SplitLIs; + LIS->splitSeparateComponents(LI, SplitLIs); // If the AVL was an immediate > 31, then it would have been emitted // as an ADDI. However, the ADDI might not have been used in the // vsetvli, or a vsetvli might not have been emitted, so it may be // dead now. - if (TII->isAddImmediate(*VLOpDef, Reg) && MRI->use_nodbg_empty(Reg)) - VLOpDef->eraseFromParent(); + for (MachineInstr *DeadMI : DeadMIs) { + if (!TII->isAddImmediate(*DeadMI, Reg)) + continue; + LIS->RemoveMachineInstrFromMaps(*DeadMI); + DeadMI->eraseFromParent(); + } } MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, /*isImp*/ true)); @@ -1458,14 +1518,14 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { // we need to prove the value is available at the point we're going // to insert the vsetvli at. if (AvailableInfo.hasAVLReg()) { - const MachineInstr *AVLDefMI = &AvailableInfo.getAVLDefMI(); + SlotIndex SI = AvailableInfo.getAVLVNInfo()->def; // This is an inline dominance check which covers the case of // UnavailablePred being the preheader of a loop. - if (AVLDefMI->getParent() != UnavailablePred) + if (LIS->getMBBFromIndex(SI) != UnavailablePred) + return; + if (!UnavailablePred->terminators().empty() && + SI >= LIS->getInstructionIndex(*UnavailablePred->getFirstTerminator())) return; - for (auto &TermMI : UnavailablePred->terminators()) - if (&TermMI == AVLDefMI) - return; } // If the AVL isn't used in its predecessors then bail, since we have no AVL @@ -1526,7 +1586,8 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { static bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI, const DemandedFields &Used, - const MachineRegisterInfo &MRI) { + const MachineRegisterInfo &MRI, + const LiveIntervals *LIS) { // If the VL values aren't equal, return false if either a) the former is // demanded, or b) we can't rewrite the former to be the later for // implementation reasons. @@ -1537,8 +1598,8 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI, if (Used.VLZeroness) { if (isVLPreservingConfig(PrevMI)) return false; - if (!getInfoForVSETVLI(PrevMI, MRI) - .hasEquallyZeroAVL(getInfoForVSETVLI(MI, MRI))) + if (!getInfoForVSETVLI(PrevMI, LIS) + .hasEquallyZeroAVL(getInfoForVSETVLI(MI, LIS), LIS)) return false; } @@ -1588,7 +1649,7 @@ bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) { continue; } - if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) { + if (canMutatePriorConfig(MI, *NextMI, Used, *MRI, LIS)) { if (!isVLPreservingConfig(*NextMI)) { Register DefReg = NextMI->getOperand(0).getReg(); @@ -1661,9 +1722,17 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { if (RISCV::isFaultFirstLoad(MI)) { Register VLOutput = MI.getOperand(1).getReg(); assert(VLOutput.isVirtual()); - if (!MRI->use_nodbg_empty(VLOutput)) - BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL), - VLOutput); + if (!MI.getOperand(1).isDead()) { + auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(), + TII->get(RISCV::PseudoReadVL), VLOutput); + // Move the LiveInterval's definition down to PseudoReadVL. + SlotIndex NewDefSI = + LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot(); + LiveInterval &DefLI = LIS->getInterval(VLOutput); + VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex()); + DefLI.removeSegment(DefLI.beginIndex(), NewDefSI); + DefVNI->def = NewDefSI; + } // We don't use the vl output of the VLEFF/VLSEGFF anymore. MI.getOperand(1).setReg(RISCV::X0); } @@ -1680,6 +1749,7 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { TII = ST->getInstrInfo(); MRI = &MF.getRegInfo(); + LIS = &getAnalysis(); assert(BlockInfo.empty() && "Expect empty block infos"); BlockInfo.resize(MF.getNumBlockIDs()); diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 7b2dcadc41917..5d598a275a008 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -541,9 +541,16 @@ void RISCVPassConfig::addPreRegAlloc() { addPass(createRISCVPreRAExpandPseudoPass()); if (TM->getOptLevel() != CodeGenOptLevel::None) addPass(createRISCVMergeBaseOffsetOptPass()); + addPass(createRISCVInsertReadWriteCSRPass()); addPass(createRISCVInsertWriteVXRMPass()); - addPass(createRISCVInsertVSETVLIPass()); + + // Run RISCVInsertVSETVLI after PHI elimination. On O1 and above do it after + // register coalescing so needVSETVLIPHI doesn't need to look through COPYs. + if (TM->getOptLevel() == CodeGenOptLevel::None) + insertPass(&PHIEliminationID, createRISCVInsertVSETVLIPass()); + else + insertPass(&RegisterCoalescerID, createRISCVInsertVSETVLIPass()); } void RISCVPassConfig::addFastRegAlloc() { diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index c4a7f9562534c..3aaa5dc03a7dc 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -42,12 +42,14 @@ ; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass -; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Eliminate PHI nodes for register allocation +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: Slot index numbering +; CHECK-NEXT: Live Interval Analysis +; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: Two-Address instruction pass ; CHECK-NEXT: Fast Register Allocator -; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Slot index numbering ; CHECK-NEXT: Live Interval Analysis ; CHECK-NEXT: RISC-V Coalesce VSETVLI pass diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 4a71d3276d263..52634b2a81629 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -117,7 +117,6 @@ ; CHECK-NEXT: RISC-V Merge Base Offset ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass -; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Process Implicit Definitions @@ -129,6 +128,7 @@ ; CHECK-NEXT: Slot index numbering ; CHECK-NEXT: Live Interval Analysis ; CHECK-NEXT: Register Coalescer +; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: Rename Disconnected Subregister Components ; CHECK-NEXT: Machine Instruction Scheduler ; CHECK-NEXT: Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll b/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll index 682ad57686724..61acf1afa94de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll @@ -36,8 +36,8 @@ define @vadd_undef( %a, define @vadd_same_passthru( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: vadd_same_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma ; CHECK-NEXT: vmv2r.v v14, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma ; CHECK-NEXT: vadd.vv v14, v10, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; CHECK-NEXT: vmv.v.v v8, v14 diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll index ff35043dbd7e7..c6b84209a8753 100644 --- a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll +++ b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll @@ -149,8 +149,8 @@ define void @constant_zero_stride(ptr %s, ptr %d) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v9, v8, 2 ; CHECK-NEXT: vse8.v v9, (a1) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll index dc4d28819bbbd..2b4b8e979f3d7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll @@ -141,9 +141,9 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) { ; SINK-NEXT: andi a4, a3, 1024 ; SINK-NEXT: xori a3, a4, 1024 ; SINK-NEXT: slli a5, a5, 1 -; SINK-NEXT: vsetvli a6, zero, e32, m2, ta, ma ; SINK-NEXT: mv a6, a0 ; SINK-NEXT: mv a7, a3 +; SINK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; SINK-NEXT: .LBB1_3: # %vector.body ; SINK-NEXT: # =>This Inner Loop Header: Depth=1 ; SINK-NEXT: vl2re32.v v8, (a6) @@ -183,9 +183,9 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) { ; DEFAULT-NEXT: andi a4, a3, 1024 ; DEFAULT-NEXT: xori a3, a4, 1024 ; DEFAULT-NEXT: slli a5, a5, 1 -; DEFAULT-NEXT: vsetvli a6, zero, e32, m2, ta, ma ; DEFAULT-NEXT: mv a6, a0 ; DEFAULT-NEXT: mv a7, a3 +; DEFAULT-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; DEFAULT-NEXT: .LBB1_3: # %vector.body ; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1 ; DEFAULT-NEXT: vl2re32.v v8, (a6) @@ -459,9 +459,9 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) { ; SINK-NEXT: addi a3, a2, -1 ; SINK-NEXT: andi a4, a3, 1024 ; SINK-NEXT: xori a3, a4, 1024 -; SINK-NEXT: vsetvli a5, zero, e32, m1, ta, ma ; SINK-NEXT: mv a5, a0 ; SINK-NEXT: mv a6, a3 +; SINK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; SINK-NEXT: .LBB4_3: # %vector.body ; SINK-NEXT: # =>This Inner Loop Header: Depth=1 ; SINK-NEXT: vl1re32.v v8, (a5) @@ -500,9 +500,9 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) { ; DEFAULT-NEXT: addi a3, a2, -1 ; DEFAULT-NEXT: andi a4, a3, 1024 ; DEFAULT-NEXT: xori a3, a4, 1024 -; DEFAULT-NEXT: vsetvli a5, zero, e32, m1, ta, ma ; DEFAULT-NEXT: mv a5, a0 ; DEFAULT-NEXT: mv a6, a3 +; DEFAULT-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; DEFAULT-NEXT: .LBB4_3: # %vector.body ; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1 ; DEFAULT-NEXT: vl1re32.v v8, (a5) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 03e99baf91c08..635869904832c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1155,8 +1155,8 @@ define void @mulhu_v8i16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma ; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a1 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v11, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 539a8403c9352..f42f32e246585 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -12092,8 +12092,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64V-NEXT: vsext.vf8 v16, v8 -; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64V-NEXT: vmv1r.v v12, v10 +; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64V-NEXT: vslidedown.vi v10, v10, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll index 175a3ee43f33d..d1fb30c7daa3e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll @@ -369,8 +369,8 @@ define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert3(<4 x i8> %v, i8 %b) { define <2 x i8> @vslide1up_4xi8_neg_length_changing(<4 x i8> %v, i8 %b) { ; CHECK-LABEL: vslide1up_4xi8_neg_length_changing: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vslideup.vi v9, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll index f0fcc482e2207..0e6b03bf16323 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll @@ -168,8 +168,8 @@ define void @strided_constant_0(ptr %x, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vi v9, v8, 4 ; CHECK-NEXT: vse16.v v9, (a1) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll index c38406bafa8a9..64ad86db04959 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll @@ -62,8 +62,8 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado ; CHECK-NEXT: li a4, 5 ; CHECK-NEXT: .LBB1_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu ; CHECK-NEXT: vlse8.v v9, (a1), a4, v0.t ; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: vadd.vv v9, v10, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index 4f16ce28bbb7e..9fa8ab39723f7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -394,7 +394,6 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: # %bb.11: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: .LBB16_12: -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: csrr a4, vlenb ; CHECK-NEXT: li a5, 24 ; CHECK-NEXT: mul a4, a4, a5 @@ -402,6 +401,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: addi a4, a4, 16 ; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v24, v8 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: csrr a4, vlenb ; CHECK-NEXT: li a5, 56 ; CHECK-NEXT: mul a4, a4, a5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll index 79b1e14b774a4..c8bed2de754b2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll @@ -15,8 +15,8 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) { ; RV32-NEXT: .LBB0_1: # %for.body ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetvli zero, a1, e8, mf2, tu, ma ; RV32-NEXT: vmv1r.v v10, v8 +; RV32-NEXT: vsetvli zero, a1, e8, mf2, tu, ma ; RV32-NEXT: vslideup.vx v10, v9, a2 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, tu, ma ; RV32-NEXT: vmv.s.x v10, a0 @@ -40,8 +40,8 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) { ; RV64-NEXT: .LBB0_1: # %for.body ; RV64-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli zero, a1, e8, mf2, tu, ma ; RV64-NEXT: vmv1r.v v10, v8 +; RV64-NEXT: vsetvli zero, a1, e8, mf2, tu, ma ; RV64-NEXT: vslideup.vx v10, v9, a2 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, tu, ma ; RV64-NEXT: vmv.s.x v10, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index a6b2d3141f22f..bb28ff5c6dc4f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -479,11 +479,11 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 @@ -640,11 +640,11 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 @@ -811,11 +811,11 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v10, 2 ; CHECK-V-NEXT: li a0, -1 ; CHECK-V-NEXT: srli a0, a0, 32 @@ -3850,11 +3850,11 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 @@ -4009,11 +4009,11 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 @@ -4179,11 +4179,11 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v10, 2 ; CHECK-V-NEXT: li a0, -1 ; CHECK-V-NEXT: srli a0, a0, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll index 129fbcfb88327..e73415ac0085e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -21,8 +21,8 @@ define @foo( %a, @foo( %a, @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 -; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; SPILL-O0-NEXT: # implicit-def: $v8_v9 +; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv1r.v v8, v9 ; SPILL-O0-NEXT: addi a0, sp, 16 @@ -90,8 +90,8 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 -; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; SPILL-O0-NEXT: # implicit-def: $v8_v9 +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv1r.v v8, v9 ; SPILL-O0-NEXT: addi a0, sp, 16 @@ -167,8 +167,8 @@ define @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 -; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2 +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv2r.v v8, v10 ; SPILL-O0-NEXT: addi a0, sp, 16 @@ -247,8 +247,8 @@ define @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 2 ; SPILL-O0-NEXT: sub sp, sp, a2 -; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; SPILL-O0-NEXT: # implicit-def: $v8m4_v12m4 +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv4r.v v8, v12 ; SPILL-O0-NEXT: addi a0, sp, 16 @@ -327,8 +327,8 @@ define @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 -; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2_v12m2 +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; SPILL-O0-NEXT: vlseg3e32.v v8, (a0) ; SPILL-O0-NEXT: vmv2r.v v8, v10 ; SPILL-O0-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll index 34eb58ee4d1c4..483f689cf633d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll @@ -24,8 +24,8 @@ define @foo( %a, @foo( %a, @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 -; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; SPILL-O0-NEXT: # implicit-def: $v8_v9 +; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv1r.v v8, v9 ; SPILL-O0-NEXT: addi a0, sp, 16 @@ -90,8 +90,8 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 -; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; SPILL-O0-NEXT: # implicit-def: $v8_v9 +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv1r.v v8, v9 ; SPILL-O0-NEXT: addi a0, sp, 16 @@ -167,8 +167,8 @@ define @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 -; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2 +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv2r.v v8, v10 ; SPILL-O0-NEXT: addi a0, sp, 16 @@ -247,8 +247,8 @@ define @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 2 ; SPILL-O0-NEXT: sub sp, sp, a2 -; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; SPILL-O0-NEXT: # implicit-def: $v8m4_v12m4 +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv4r.v v8, v12 ; SPILL-O0-NEXT: addi a0, sp, 16 @@ -327,8 +327,8 @@ define @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 -; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2_v12m2 +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; SPILL-O0-NEXT: vlseg3e32.v v8, (a0) ; SPILL-O0-NEXT: vmv2r.v v8, v10 ; SPILL-O0-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index 1a3a1a6c1ee6c..743016a7cbcdd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -893,10 +893,10 @@ define void @test_dag_loop() { ; CHECK-LABEL: test_dag_loop: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmclr.m v0 +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vsetivli zero, 0, e8, m4, tu, mu -; CHECK-NEXT: vmv4r.v v12, v8 ; CHECK-NEXT: vssubu.vx v12, v8, zero, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vmseq.vv v0, v12, v8 @@ -942,8 +942,8 @@ declare @llvm.riscv.vredsum.nxv2i32.nxv2i32( define @vredsum( %passthru, %x, %y, %m, i64 %vl) { ; CHECK-LABEL: vredsum: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vredsum.vs v11, v9, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 @@ -967,8 +967,8 @@ define @vfredusum( %passthru, This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a6) @@ -346,9 +346,9 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB8_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a6) @@ -437,9 +437,9 @@ define void @sink_splat_sub_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB9_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a6) @@ -528,9 +528,9 @@ define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB10_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a6) @@ -619,9 +619,9 @@ define void @sink_splat_and_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB11_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a6) @@ -710,9 +710,9 @@ define void @sink_splat_or_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB12_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a6) @@ -801,9 +801,9 @@ define void @sink_splat_xor_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB13_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a6) @@ -994,9 +994,9 @@ define void @sink_splat_shl_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB17_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a6) @@ -1085,9 +1085,9 @@ define void @sink_splat_lshr_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB18_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a6) @@ -1176,9 +1176,9 @@ define void @sink_splat_ashr_scalable(ptr nocapture %a) { ; CHECK-NEXT: andi a3, a1, 1024 ; CHECK-NEXT: xori a1, a3, 1024 ; CHECK-NEXT: slli a4, a4, 1 -; CHECK-NEXT: vsetvli a5, zero, e32, m2, ta, ma ; CHECK-NEXT: mv a5, a0 ; CHECK-NEXT: mv a6, a1 +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB19_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a5) @@ -1468,9 +1468,9 @@ define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma ; CHECK-NEXT: mv a5, a0 ; CHECK-NEXT: mv a6, a3 +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB26_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a5) @@ -1558,9 +1558,9 @@ define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma ; CHECK-NEXT: mv a5, a0 ; CHECK-NEXT: mv a6, a3 +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB27_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a5) @@ -1648,9 +1648,9 @@ define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma ; CHECK-NEXT: mv a5, a0 ; CHECK-NEXT: mv a6, a3 +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB28_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a5) @@ -1738,9 +1738,9 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma ; CHECK-NEXT: mv a5, a0 ; CHECK-NEXT: mv a6, a3 +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB29_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a5) @@ -1828,9 +1828,9 @@ define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma ; CHECK-NEXT: mv a5, a0 ; CHECK-NEXT: mv a6, a3 +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB30_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a5) @@ -1918,9 +1918,9 @@ define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma ; CHECK-NEXT: mv a5, a0 ; CHECK-NEXT: mv a6, a3 +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB31_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a5) @@ -2084,10 +2084,10 @@ define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocap ; CHECK-NEXT: addi a4, a3, -1 ; CHECK-NEXT: andi a5, a4, 1024 ; CHECK-NEXT: xori a4, a5, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a1 ; CHECK-NEXT: mv t0, a4 +; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB34_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) @@ -2184,10 +2184,10 @@ define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noali ; CHECK-NEXT: addi a4, a3, -1 ; CHECK-NEXT: andi a5, a4, 1024 ; CHECK-NEXT: xori a4, a5, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a1 ; CHECK-NEXT: mv t0, a4 +; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB35_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) @@ -2498,9 +2498,9 @@ define void @sink_splat_udiv_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB42_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a6) @@ -2589,9 +2589,9 @@ define void @sink_splat_sdiv_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB43_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a6) @@ -2680,9 +2680,9 @@ define void @sink_splat_urem_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB44_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a6) @@ -2771,9 +2771,9 @@ define void @sink_splat_srem_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB45_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a6) diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll index f41a3ec72aed7..48c30596ad518 100644 --- a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll +++ b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll @@ -161,8 +161,8 @@ declare @llvm.riscv.vrgatherei16.vv.nxv8i8.i64( %v, ptr noalias %q) { ; CHECK-LABEL: repeat_shuffle: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vmv2r.v v10, v8 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vslideup.vi v10, v8, 2 ; CHECK-NEXT: vse64.v v10, (a0) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll index 25e3468dcb62c..439301ff40110 100644 --- a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll @@ -711,8 +711,8 @@ define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64( @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64( @vadd_vv_passthru( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: vadd_vv_passthru: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma ; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma ; CHECK-NEXT: vadd.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vadd.vv v9, v8, v8 @@ -152,8 +152,8 @@ entry: define @vadd_vv_passthru_negative( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: vadd_vv_passthru_negative: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma ; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma ; CHECK-NEXT: vadd.vv v10, v8, v9 ; CHECK-NEXT: vadd.vv v9, v8, v10 ; CHECK-NEXT: vadd.vv v8, v8, v9 @@ -183,8 +183,8 @@ entry: define @vadd_vv_mask( %0, %1, i32 %2, %m) nounwind { ; CHECK-LABEL: vadd_vv_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vadd.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vadd.vv v9, v8, v8, v0.t @@ -218,8 +218,8 @@ entry: define @vadd_vv_mask_negative( %0, %1, i32 %2, %m, %m2) nounwind { ; CHECK-LABEL: vadd_vv_mask_negative: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vadd.vv v11, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vadd.vv v9, v8, v11, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll index fab76ac564581..78f3792dbaf06 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll @@ -85,8 +85,8 @@ define @vfmacc_vv_nxv1f32_tu( %a, @vfmacc_vv_nxv1f32_masked__tu( %a, %b, %c, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmacc_vv_nxv1f32_masked__tu: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmv1r.v v11, v10 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfwmacc.vv v11, v8, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, tu, ma ; ZVFH-NEXT: vmerge.vvm v10, v10, v11, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll index 0c0a3dc9675b1..462d49991ae4f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll @@ -16,14 +16,14 @@ define internal void @foo( %v15, %0, This Inner Loop Header: Depth=1 ; NOSUBREG-NEXT: vl1r.v v9, (zero) -; NOSUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; NOSUBREG-NEXT: vmv1r.v v13, v12 +; NOSUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; NOSUBREG-NEXT: vrgatherei16.vv v13, v9, v10 ; NOSUBREG-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; NOSUBREG-NEXT: vand.vv v9, v8, v13 @@ -36,14 +36,14 @@ define internal void @foo( %v15, %0, This Inner Loop Header: Depth=1 ; SUBREG-NEXT: vl1r.v v9, (zero) -; SUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; SUBREG-NEXT: vmv1r.v v13, v12 +; SUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; SUBREG-NEXT: vrgatherei16.vv v13, v9, v10 ; SUBREG-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; SUBREG-NEXT: vand.vv v9, v8, v13 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index 088d121564bc9..25aa3a7081a16 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -91,13 +91,11 @@ define @test3(i64 %avl, i8 zeroext %cond, @test18( %a, double %b) nounwind { ; CHECK-LABEL: test18: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 6, e64, m1, tu, ma -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v8 +; CHECK-NEXT: vsetivli zero, 6, e64, m1, ta, ma +; CHECK-NEXT: vfadd.vv v9, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfadd.vv v8, v9, v8 +; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret entry: %x = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 3, i64 0) @@ -380,8 +378,8 @@ entry: define @test19( %a, double %b) nounwind { ; CHECK-LABEL: test19: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfadd.vv v8, v9, v8