diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 69c20dda8a5bd..03b320cc966eb 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3423,7 +3423,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N, bool IsTA) { bool RISCVDAGToDAGISel::performVMergeToVMv(SDNode *N) { #define CASE_VMERGE_TO_VMV(lmul) \ case RISCV::PseudoVMERGE_VVM_##lmul##_TU: \ - NewOpc = RISCV::PseudoVMV_V_V_##lmul##_TU; \ + NewOpc = RISCV::PseudoVMV_V_V_##lmul; \ break; unsigned NewOpc; switch (N->getMachineOpcode()) { @@ -3441,9 +3441,13 @@ bool RISCVDAGToDAGISel::performVMergeToVMv(SDNode *N) { if (!usesAllOnesMask(N, /* MaskOpIdx */ 3)) return false; + SDLoc DL(N); + SDValue PolicyOp = + CurDAG->getTargetConstant(/*TUMU*/ 0, DL, Subtarget->getXLenVT()); SDNode *Result = CurDAG->getMachineNode( - NewOpc, SDLoc(N), N->getValueType(0), - {N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5)}); + NewOpc, DL, N->getValueType(0), + {N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5), + PolicyOp}); ReplaceUses(N, Result); return true; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index b88befe8fa690..a1f315daab478 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -453,14 +453,17 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (NF == 1) { auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg); + if (UseVMV_V_V) + MIB.addReg(DstReg, RegState::Undef); if (UseVMV_V_I) - MIB = MIB.add(DefMBBI->getOperand(1)); + MIB = MIB.add(DefMBBI->getOperand(2)); else MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc)); if (UseVMV_V_V) { const MCInstrDesc &Desc = DefMBBI->getDesc(); MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW + MIB.addImm(0); // tu, mu MIB.addReg(RISCV::VL, RegState::Implicit); MIB.addReg(RISCV::VTYPE, RegState::Implicit); } @@ -481,8 +484,11 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, for (; I != End; I += Incr) { auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I)); + if (UseVMV_V_V) + MIB.addReg(TRI->getSubReg(DstReg, SubRegIdx + I), + RegState::Undef); if (UseVMV_V_I) - MIB = MIB.add(DefMBBI->getOperand(1)); + MIB = MIB.add(DefMBBI->getOperand(2)); else MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I), getKillRegState(KillSrc)); @@ -490,6 +496,7 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, const MCInstrDesc &Desc = DefMBBI->getDesc(); MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW + MIB.addImm(0); // tu, mu MIB.addReg(RISCV::VL, RegState::Implicit); MIB.addReg(RISCV::VTYPE, RegState::Implicit); } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 5a213082e9e0c..620ef905615a1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1030,21 +1030,9 @@ class VPseudoNullaryPseudoM class VPseudoUnaryNoMask : - Pseudo<(outs RetClass:$rd), - (ins OpClass:$rs2, AVL:$vl, ixlenimm:$sew), []>, - RISCVVPseudo { - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; - let Constraints = Constraint; - let HasVLOp = 1; - let HasSEWOp = 1; -} - -class VPseudoUnaryNoMaskTU : Pseudo<(outs RetClass:$rd), - (ins RetClass:$merge, OpClass:$rs2, AVL:$vl, ixlenimm:$sew), []>, + (ins RetClass:$merge, OpClass:$rs2, AVL:$vl, ixlenimm:$sew, + ixlenimm:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1052,6 +1040,7 @@ class VPseudoUnaryNoMaskTU.ret; let HasVLOp = 1; let HasSEWOp = 1; + let HasVecPolicyOp = 1; } class VPseudoUnaryMask : @@ -1958,10 +1947,10 @@ multiclass VPseudoVIOT_M { let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVMIotV_MX, ReadVMIotV_MX, ReadVMask]>; - def "_" # m.MX # "_TU" : VPseudoUnaryNoMaskTU, - Sched<[WriteVMIotV_MX, ReadVMIotV_MX, ReadVMask]>; def "_" # m.MX # "_MASK" : VPseudoUnaryMask, - RISCVMaskedPseudo, + RISCVMaskedPseudo, Sched<[WriteVMIotV_MX, ReadVMIotV_MX, ReadVMask]>; } } @@ -2328,12 +2317,6 @@ multiclass VPseudoUnaryVMV_V_X_I { Sched<[WriteVIMovX_MX, ReadVIMovX_MX]>; def "_I_" # mx : VPseudoUnaryNoMask, Sched<[WriteVIMovI_MX]>; - def "_V_" # mx # "_TU": VPseudoUnaryNoMaskTU, - Sched<[WriteVIMovV_MX, ReadVIMovV_MX]>; - def "_X_" # mx # "_TU": VPseudoUnaryNoMaskTU, - Sched<[WriteVIMovX_MX, ReadVIMovX_MX]>; - def "_I_" # mx # "_TU": VPseudoUnaryNoMaskTU, - Sched<[WriteVIMovI_MX]>; } } } @@ -2350,9 +2333,6 @@ multiclass VPseudoVMV_F { def "_" # f.FX # "_" # mx : VPseudoUnaryNoMask, Sched<[WriteVFMovV_MX, ReadVFMovF_MX]>; - def "_" # f.FX # "_" # mx # "_TU": - VPseudoUnaryNoMaskTU, - Sched<[WriteVFMovV_MX, ReadVFMovF_MX]>; } } } @@ -2367,10 +2347,10 @@ multiclass VPseudoVCLS_V { let VLMul = m.value in { def "_V_" # mx : VPseudoUnaryNoMask, Sched<[WriteVFClassV_MX, ReadVFClassV_MX, ReadVMask]>; - def "_V_" # mx # "_TU": VPseudoUnaryNoMaskTU, - Sched<[WriteVFClassV_MX, ReadVFClassV_MX, ReadVMask]>; def "_V_" # mx # "_MASK" : VPseudoUnaryMask, - RISCVMaskedPseudo, + RISCVMaskedPseudo, Sched<[WriteVFClassV_MX, ReadVFClassV_MX, ReadVMask]>; } } @@ -2390,11 +2370,10 @@ multiclass VPseudoVSQR_V { def "_V" # suffix : VPseudoUnaryNoMask, Sched<[WriteVFSqrtV_MX_E, ReadVFSqrtV_MX_E, ReadVMask]>; - def "_V" # suffix # "_TU": VPseudoUnaryNoMaskTU, - Sched<[WriteVFSqrtV_MX_E, ReadVFSqrtV_MX_E, - ReadVMask]>; def "_V" # suffix # "_MASK" : VPseudoUnaryMask, - RISCVMaskedPseudo, + RISCVMaskedPseudo, Sched<[WriteVFSqrtV_MX_E, ReadVFSqrtV_MX_E, ReadVMask]>; } @@ -2410,10 +2389,10 @@ multiclass VPseudoVRCP_V { let VLMul = m.value in { def "_V_" # mx : VPseudoUnaryNoMask, Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>; - def "_V_" # mx # "_TU": VPseudoUnaryNoMaskTU, - Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>; def "_V_" # mx # "_MASK" : VPseudoUnaryMask, - RISCVMaskedPseudo, + RISCVMaskedPseudo, Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>; } } @@ -2430,11 +2409,9 @@ multiclass PseudoVEXT_VF2 { let VLMul = m.value in { def "_" # mx : VPseudoUnaryNoMask, Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; - def "_" # mx # "_TU": VPseudoUnaryNoMaskTU, - Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; def "_" # mx # "_MASK" : VPseudoUnaryMask, - RISCVMaskedPseudo, + RISCVMaskedPseudo, Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; } } @@ -2451,11 +2428,9 @@ multiclass PseudoVEXT_VF4 { let VLMul = m.value in { def "_" # mx : VPseudoUnaryNoMask, Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; - def "_" # mx # "_TU": VPseudoUnaryNoMaskTU, - Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; def "_" # mx # "_MASK" : VPseudoUnaryMask, - RISCVMaskedPseudo, + RISCVMaskedPseudo, Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; } } @@ -2472,11 +2447,9 @@ multiclass PseudoVEXT_VF8 { let VLMul = m.value in { def "_" # mx : VPseudoUnaryNoMask, Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; - def "_" # mx # "_TU": VPseudoUnaryNoMaskTU, - Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; def "_" # mx # "_MASK" : VPseudoUnaryMask, - RISCVMaskedPseudo, + RISCVMaskedPseudo, Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; } } @@ -3554,10 +3527,11 @@ multiclass VPseudoConversion { let VLMul = MInfo.value in { def "_" # MInfo.MX : VPseudoUnaryNoMask; - def "_" # MInfo.MX # "_TU": VPseudoUnaryNoMaskTU; def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask, - RISCVMaskedPseudo; + RISCVMaskedPseudo; } } @@ -3933,40 +3907,20 @@ class VPatUnaryNoMask : Pat<(result_type (!cast(intrinsic_name) - (result_type undef), + (result_type result_reg_class:$merge), (op2_type op2_reg_class:$rs2), VLOpFrag)), (!cast( !if(isSEWAware, inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew), inst#"_"#kind#"_"#vlmul.MX)) - (op2_type op2_reg_class:$rs2), - GPR:$vl, log2sew)>; - -class VPatUnaryNoMaskTU : - Pat<(result_type (!cast(intrinsic_name) (result_type result_reg_class:$merge), (op2_type op2_reg_class:$rs2), - VLOpFrag)), - (!cast( - !if(isSEWAware, - inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew)#"_TU", - inst#"_"#kind#"_"#vlmul.MX#"_TU")) - (result_type result_reg_class:$merge), - (op2_type op2_reg_class:$rs2), - GPR:$vl, log2sew)>; + GPR:$vl, log2sew, TU_MU)>; class VPatUnaryMask(inst#"_M_"#mti.BX) + (mti.Mask (IMPLICIT_DEF)), (mti.Mask VR:$rs2), - GPR:$vl, mti.Log2SEW)>; + GPR:$vl, mti.Log2SEW, TU_MU)>; class VPatMaskUnaryMask foreach vti = AllIntegerVectors in { let Predicates = GetVTypePredicates.Predicates in { def : VPatUnaryNoMask; - def : VPatUnaryNoMaskTU; + vti.Log2SEW, vti.LMul, vti.RegClass, VR>; def : VPatUnaryMask; } @@ -4497,10 +4450,7 @@ multiclass VPatUnaryV_VF.Predicates) in { def : VPatUnaryNoMask; - def : VPatUnaryNoMaskTU; + vti.Log2SEW, vti.LMul, vti.RegClass, fti.RegClass>; def : VPatUnaryMask; @@ -4514,10 +4464,7 @@ multiclass VPatUnaryV_V.Predicates in { def : VPatUnaryNoMask; - def : VPatUnaryNoMaskTU; + vti.LMul, vti.RegClass, vti.RegClass, isSEWAware>; def : VPatUnaryMask; @@ -4720,9 +4667,7 @@ multiclass VPatConversionTA { def : VPatUnaryNoMask; - def : VPatUnaryNoMaskTU; + sew, vlmul, result_reg_class, op1_reg_class>; def : VPatUnaryMask; } @@ -6561,16 +6506,11 @@ defm : VPatBinaryV_VM_XM_IM<"int_riscv_vmerge", "PseudoVMERGE">; //===----------------------------------------------------------------------===// foreach vti = AllVectors in { let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector undef), - (vti.Vector vti.RegClass:$rs1), - VLOpFrag)), - (!cast("PseudoVMV_V_V_"#vti.LMul.MX) - $rs1, GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector vti.RegClass:$passthru), (vti.Vector vti.RegClass:$rs1), VLOpFrag)), - (!cast("PseudoVMV_V_V_"#vti.LMul.MX#"_TU") - $passthru, $rs1, GPR:$vl, vti.Log2SEW)>; + (!cast("PseudoVMV_V_V_"#vti.LMul.MX) + $passthru, $rs1, GPR:$vl, vti.Log2SEW, TU_MU)>; // vmv.v.x/vmv.v.i are handled in RISCInstrVInstrInfoVVLPatterns.td } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index cb5b2d3945d4f..bb77e019ce364 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -316,7 +316,8 @@ multiclass VPatExtendSDNode_V ops, string inst_name, string suffix, GetVTypePredicates.Predicates) in def : Pat<(vti.Vector (op (fti.Vector fti.RegClass:$rs2))), (!cast(inst_name#"_"#suffix#"_"#vti.LMul.MX) - fti.RegClass:$rs2, fti.AVL, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + fti.RegClass:$rs2, fti.AVL, vti.Log2SEW, TU_MU)>; } } @@ -328,7 +329,8 @@ multiclass VPatConvertI2FPSDNode_V.Predicates) in def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1))), (!cast(instruction_name#"_"#fvti.LMul.MX) - ivti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW)>; + (fvti.Vector (IMPLICIT_DEF)), + ivti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW, TU_MU)>; } } @@ -340,7 +342,8 @@ multiclass VPatConvertFP2ISDNode_V.Predicates) in def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1))), (!cast(instruction_name#"_"#ivti.LMul.MX) - fvti.RegClass:$rs1, ivti.AVL, ivti.Log2SEW)>; + (ivti.Vector (IMPLICIT_DEF)), + fvti.RegClass:$rs1, ivti.AVL, ivti.Log2SEW, TU_MU)>; } } @@ -353,7 +356,8 @@ multiclass VPatWConvertI2FPSDNode_V.Predicates) in def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1))), (!cast(instruction_name#"_"#ivti.LMul.MX) - ivti.RegClass:$rs1, ivti.AVL, ivti.Log2SEW)>; + (fwti.Vector (IMPLICIT_DEF)), + ivti.RegClass:$rs1, ivti.AVL, ivti.Log2SEW, TU_MU)>; } } @@ -366,7 +370,8 @@ multiclass VPatWConvertFP2ISDNode_V.Predicates) in def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1))), (!cast(instruction_name#"_"#fvti.LMul.MX) - fvti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW)>; + (iwti.Vector (IMPLICIT_DEF)), + fvti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW, TU_MU)>; } } @@ -379,7 +384,8 @@ multiclass VPatNConvertI2FPSDNode_W.Predicates) in def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1))), (!cast(instruction_name#"_"#fvti.LMul.MX) - iwti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW)>; + (fvti.Vector (IMPLICIT_DEF)), + iwti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW, TU_MU)>; } } @@ -392,7 +398,8 @@ multiclass VPatNConvertFP2ISDNode_W.Predicates) in def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1))), (!cast(instruction_name#"_"#vti.LMul.MX) - fwti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + fwti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; } } @@ -1039,7 +1046,8 @@ foreach vti = AllFloatVectors in { // 13.8. Vector Floating-Point Square-Root Instruction def : Pat<(any_fsqrt (vti.Vector vti.RegClass:$rs2)), (!cast("PseudoVFSQRT_V_"# vti.LMul.MX#"_E"#vti.SEW) - vti.RegClass:$rs2, vti.AVL, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>; // 13.12. Vector Floating-Point Sign-Injection Instructions def : Pat<(fabs (vti.Vector vti.RegClass:$rs)), @@ -1141,7 +1149,8 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { GetVTypePredicates.Predicates) in def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))), (!cast("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX) - fwti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW)>; + (fvti.Vector (IMPLICIT_DEF)), + fwti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW, TU_MU)>; } //===----------------------------------------------------------------------===// @@ -1152,12 +1161,14 @@ foreach fvti = AllFloatVectors in { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(fvti.Vector (SplatFPOp fvti.ScalarRegClass:$rs1)), (!cast("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) + (fvti.Vector (IMPLICIT_DEF)), (fvti.Scalar fvti.ScalarRegClass:$rs1), - fvti.AVL, fvti.Log2SEW)>; + fvti.AVL, fvti.Log2SEW, TU_MU)>; def : Pat<(fvti.Vector (SplatFPOp (fvti.Scalar fpimm0))), (!cast("PseudoVMV_V_I_"#fvti.LMul.MX) - 0, fvti.AVL, fvti.Log2SEW)>; + (fvti.Vector (IMPLICIT_DEF)), + 0, fvti.AVL, fvti.Log2SEW, TU_MU)>; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 96b6f8e2c1ac8..ffa785ab2dc3f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -1788,32 +1788,21 @@ foreach vti = AllIntegerVectors in { // 11.16. Vector Integer Move Instructions foreach vti = AllVectors in { let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(vti.Vector (riscv_vmv_v_v_vl (vti.Vector undef), - vti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVMV_V_V_"#vti.LMul.MX) - vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_vmv_v_v_vl vti.RegClass:$passthru, vti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVMV_V_V_"#vti.LMul.MX#"_TU") - vti.RegClass:$passthru, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW)>; + (!cast("PseudoVMV_V_V_"#vti.LMul.MX) + vti.RegClass:$passthru, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; } foreach vti = AllIntegerVectors in { - def : Pat<(vti.Vector (riscv_vmv_v_x_vl (vti.Vector undef), GPR:$rs2, VLOpFrag)), - (!cast("PseudoVMV_V_X_"#vti.LMul.MX) - GPR:$rs2, GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_vmv_v_x_vl vti.RegClass:$passthru, GPR:$rs2, VLOpFrag)), - (!cast("PseudoVMV_V_X_"#vti.LMul.MX#"_TU") - vti.RegClass:$passthru, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; + (!cast("PseudoVMV_V_X_"#vti.LMul.MX) + vti.RegClass:$passthru, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; defvar ImmPat = !cast("sew"#vti.SEW#"simm5"); - def : Pat<(vti.Vector (riscv_vmv_v_x_vl (vti.Vector undef), (ImmPat simm5:$imm5), - VLOpFrag)), - (!cast("PseudoVMV_V_I_"#vti.LMul.MX) - simm5:$imm5, GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_vmv_v_x_vl vti.RegClass:$passthru, (ImmPat simm5:$imm5), VLOpFrag)), - (!cast("PseudoVMV_V_I_"#vti.LMul.MX#"_TU") - vti.RegClass:$passthru, simm5:$imm5, GPR:$vl, vti.Log2SEW)>; + (!cast("PseudoVMV_V_I_"#vti.LMul.MX) + vti.RegClass:$passthru, simm5:$imm5, GPR:$vl, vti.Log2SEW, TU_MU)>; } } @@ -1947,7 +1936,7 @@ foreach vti = AllFloatVectors in { def : Pat<(riscv_fclass_vl (vti.Vector vti.RegClass:$rs2), (vti.Mask true_mask), VLOpFrag), (!cast("PseudoVFCLASS_V_"# vti.LMul.MX) - vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; } } @@ -2020,35 +2009,21 @@ foreach fvti = AllFloatVectors in { // 13.16. Vector Floating-Point Move Instruction // If we're splatting fpimm0, use vmv.v.x vd, x0. - def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl - (fvti.Vector undef), (fvti.Scalar (fpimm0)), VLOpFrag)), - (!cast("PseudoVMV_V_I_"#fvti.LMul.MX) - 0, GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl fvti.Vector:$passthru, (fvti.Scalar (fpimm0)), VLOpFrag)), - (!cast("PseudoVMV_V_I_"#fvti.LMul.MX#"_TU") - $passthru, 0, GPR:$vl, fvti.Log2SEW)>; - def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl - (fvti.Vector undef), (fvti.Scalar (SelectFPImm (XLenVT GPR:$imm))), VLOpFrag)), - (!cast("PseudoVMV_V_X_"#fvti.LMul.MX) - GPR:$imm, GPR:$vl, fvti.Log2SEW)>; + (!cast("PseudoVMV_V_I_"#fvti.LMul.MX) + $passthru, 0, GPR:$vl, fvti.Log2SEW, TU_MU)>; def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl fvti.Vector:$passthru, (fvti.Scalar (SelectFPImm (XLenVT GPR:$imm))), VLOpFrag)), - (!cast("PseudoVMV_V_X_"#fvti.LMul.MX#"_TU") - $passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW)>; + (!cast("PseudoVMV_V_X_"#fvti.LMul.MX) + $passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW, TU_MU)>; - def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl - (fvti.Vector undef), (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)), - (!cast("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" # - fvti.LMul.MX) - (fvti.Scalar fvti.ScalarRegClass:$rs2), - GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl fvti.Vector:$passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)), (!cast("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" # - fvti.LMul.MX # "_TU") + fvti.LMul.MX) $passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), - GPR:$vl, fvti.Log2SEW)>; + GPR:$vl, fvti.Log2SEW, TU_MU)>; } } diff --git a/llvm/test/CodeGen/RISCV/calling-conv-vector-on-stack.ll b/llvm/test/CodeGen/RISCV/calling-conv-vector-on-stack.ll index 3e2af11365297..776ad183f3d90 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-vector-on-stack.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-vector-on-stack.ll @@ -17,11 +17,11 @@ define void @bar() nounwind { ; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: mv s1, sp ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: addi a0, s1, 64 -; CHECK-NEXT: vs8r.v v8, (a0) ; CHECK-NEXT: sd a0, 0(sp) +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vs8r.v v8, (a0) ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: li a2, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir index 72d30cbb69327..5294214ec6630 100644 --- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir +++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir @@ -88,7 +88,7 @@ body: | ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x10 ; CHECK-NEXT: $x2 = frame-setup ANDI $x2, -128 ; CHECK-NEXT: dead renamable $x15 = PseudoVSETIVLI 1, 72 /* e16, m1, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: renamable $v25 = PseudoVMV_V_X_M1 killed renamable $x12, $noreg, 4 /* e16 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: renamable $v25 = PseudoVMV_V_X_M1 undef $v25, killed renamable $x12, $noreg, 4 /* e16 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $x10 = PseudoReadVLENB ; CHECK-NEXT: $x11 = ADDI killed $x0, 50 ; CHECK-NEXT: $x10 = MUL killed $x10, killed $x11 @@ -172,7 +172,7 @@ body: | liveins: $x12 dead renamable $x15 = PseudoVSETIVLI 1, 72, implicit-def $vl, implicit-def $vtype - renamable $v25 = PseudoVMV_V_X_M1 killed renamable $x12, $noreg, 4, implicit $vl, implicit $vtype + renamable $v25 = PseudoVMV_V_X_M1 undef $v25, killed renamable $x12, $noreg, 4, 0, implicit $vl, implicit $vtype VS1R_V killed renamable $v25, %stack.1 :: (store unknown-size into %stack.1, align 8) renamable $x1 = ADDI $x0, 255 renamable $x5 = nuw ADDI %stack.0, 256 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll index 4f29bc48228d2..2b6e66b8c0d26 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll @@ -42,12 +42,12 @@ define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vid.v v11 ; RV32-NEXT: vrgather.vv v10, v8, v11 -; RV32-NEXT: vadd.vi v8, v11, -1 ; RV32-NEXT: lui a0, 11 ; RV32-NEXT: addi a0, a0, -1366 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-NEXT: vadd.vi v8, v11, -1 ; RV32-NEXT: vrgather.vv v10, v9, v8, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret @@ -57,12 +57,12 @@ define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vid.v v11 ; RV64-NEXT: vrgather.vv v10, v8, v11 -; RV64-NEXT: vadd.vi v8, v11, -1 ; RV64-NEXT: lui a0, 11 ; RV64-NEXT: addiw a0, a0, -1366 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64-NEXT: vadd.vi v8, v11, -1 ; RV64-NEXT: vrgather.vv v10, v9, v8, v0.t ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index 16e0114511fd4..e38bc4434c5c1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1396,10 +1396,9 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %e ; RV32-NEXT: vand.vx v11, v11, a3, v0.t ; RV32-NEXT: vor.vv v10, v11, v10, v0.t ; RV32-NEXT: vsrl.vi v11, v8, 8, v0.t -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v12, v12, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -1528,10 +1527,9 @@ define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v10, v10, a3 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v10, v10, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -1665,31 +1663,31 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %e ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v14, v14, a3, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t -; RV32-NEXT: vsrl.vi v14, v8, 8, v0.t -; RV32-NEXT: li a4, 85 +; RV32-NEXT: vsrl.vi v14, v8, 24, v0.t +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v14, v14, a4, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: li a5, 85 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.x v0, a4 +; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 +; RV32-NEXT: vmv.v.i v18, 0 +; RV32-NEXT: lui a5, 1044480 +; RV32-NEXT: vmerge.vxm v18, v18, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vand.vv v14, v14, v16, v0.t -; RV32-NEXT: vsrl.vi v18, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v18, v18, a4, v0.t -; RV32-NEXT: vor.vv v14, v14, v18, v0.t +; RV32-NEXT: vand.vv v16, v16, v18, v0.t +; RV32-NEXT: vor.vv v14, v16, v14, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t ; RV32-NEXT: vsll.vx v14, v8, a1, v0.t -; RV32-NEXT: vand.vx v18, v8, a3, v0.t -; RV32-NEXT: vsll.vx v18, v18, a2, v0.t -; RV32-NEXT: vor.vv v14, v14, v18, v0.t -; RV32-NEXT: vand.vx v18, v8, a4, v0.t -; RV32-NEXT: vsll.vi v18, v18, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vx v16, v8, a3, v0.t +; RV32-NEXT: vsll.vx v16, v16, a2, v0.t +; RV32-NEXT: vor.vv v14, v14, v16, v0.t +; RV32-NEXT: vand.vx v16, v8, a4, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v18, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v18, v8, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vor.vv v8, v14, v8, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t @@ -1799,31 +1797,31 @@ define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsrl.vi v12, v8, 8 ; RV32-NEXT: li a4, 85 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmv.v.i v14, 0 ; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v12, v12, a4, v0 +; RV32-NEXT: vmerge.vxm v14, v14, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v14, v8, 8 -; RV32-NEXT: vand.vv v14, v14, v12 +; RV32-NEXT: vand.vv v12, v12, v14 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vor.vv v14, v14, v16 -; RV32-NEXT: vor.vv v10, v14, v10 -; RV32-NEXT: vsll.vx v14, v8, a1 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsll.vx v12, v8, a1 ; RV32-NEXT: vand.vx v16, v8, a3 ; RV32-NEXT: vsll.vx v16, v16, a2 -; RV32-NEXT: vor.vv v14, v14, v16 -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v14, v8 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v16, v16, 24 +; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -1936,35 +1934,35 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v20, v20, a3, v0.t -; RV32-NEXT: vor.vv v20, v20, v16, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV32-NEXT: lui a4, 5 -; RV32-NEXT: addi a4, a4, 1365 +; RV32-NEXT: vor.vv v16, v20, v16, v0.t +; RV32-NEXT: vsrl.vi v20, v8, 24, v0.t +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v24, v20, a4, v0.t +; RV32-NEXT: vsrl.vi v28, v8, 8, v0.t +; RV32-NEXT: lui a5, 5 +; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.v.x v0, a4 +; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 +; RV32-NEXT: vmv.v.i v20, 0 +; RV32-NEXT: lui a5, 1044480 +; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v28, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v28, v28, a4, v0.t -; RV32-NEXT: vor.vv v24, v24, v28, v0.t -; RV32-NEXT: vor.vv v20, v24, v20, v0.t +; RV32-NEXT: vand.vv v28, v28, v20, v0.t +; RV32-NEXT: vor.vv v24, v28, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: vsll.vx v24, v8, a1, v0.t ; RV32-NEXT: vand.vx v28, v8, a3, v0.t ; RV32-NEXT: vsll.vx v28, v28, a2, v0.t ; RV32-NEXT: vor.vv v24, v24, v28, v0.t ; RV32-NEXT: vand.vx v28, v8, a4, v0.t ; RV32-NEXT: vsll.vi v28, v28, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v20, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t ; RV32-NEXT: vor.vv v8, v28, v8, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v20, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 @@ -2072,6 +2070,7 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v20, v8, 8 ; RV32-NEXT: lui a4, 5 ; RV32-NEXT: addi a4, a4, 1365 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma @@ -2081,7 +2080,6 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v20, v8, 8 ; RV32-NEXT: vand.vv v20, v20, v16 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: lui a4, 4080 @@ -2092,11 +2090,11 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: vand.vx v24, v8, a3 ; RV32-NEXT: vsll.vx v24, v24, a2 ; RV32-NEXT: vor.vv v20, v20, v24 -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vand.vx v24, v8, a4 +; RV32-NEXT: vsll.vi v24, v24, 24 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: vor.vv v8, v20, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 4 @@ -2204,54 +2202,36 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v24, v8, a3, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t +; RV32-NEXT: li a4, 40 +; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t ; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a4, a1, -256 -; RV32-NEXT: vand.vx v24, v8, a4, v0.t -; RV32-NEXT: li a5, 40 -; RV32-NEXT: vsll.vx v24, v24, a5, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: addi a5, a1, -256 +; RV32-NEXT: vand.vx v24, v24, a5, v0.t ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a6, 4080 -; RV32-NEXT: vand.vx v24, v8, a6, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a6, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 @@ -2260,189 +2240,103 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: lui a7, 1044480 ; RV32-NEXT: vmv.v.x v0, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 -; RV32-NEXT: addi a7, sp, 16 -; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vmerge.vxm v16, v24, a7, v0 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: li t0, 24 +; RV32-NEXT: mul a7, a7, t0 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 16 +; RV32-NEXT: vs8r.v v16, (a7) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vand.vv v16, v8, v24, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: addi a7, sp, 16 +; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: li t0, 24 +; RV32-NEXT: mul a7, a7, t0 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 16 +; RV32-NEXT: vl8r.v v16, (a7) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 3 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v16, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vsll.vx v16, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v8, a5, v0.t +; RV32-NEXT: vsll.vx v24, v24, a4, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v8, a6, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a5, v0.t -; RV32-NEXT: vand.vx v16, v24, a4, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a6, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 4, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v24, v8, v24, v0.t +; RV32-NEXT: vsrl.vi v16, v24, 4, v0.t ; RV32-NEXT: lui a3, 61681 ; RV32-NEXT: addi a3, a3, -241 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v24, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t ; RV32-NEXT: lui a3, 209715 ; RV32-NEXT: addi a3, a3, 819 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v24, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v24, 1, v0.t ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v8, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2536,70 +2430,74 @@ define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %ev ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v24, v16, 24 +; RV32-NEXT: li a5, 32 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: lui a6, 349525 +; RV32-NEXT: addi a6, a6, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v0, a2 -; RV32-NEXT: lui a3, 1044480 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a3, v0 +; RV32-NEXT: lui a7, 1044480 +; RV32-NEXT: vmv.v.x v0, a6 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v16, v16, a7, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v0, v0, a3 +; RV32-NEXT: vand.vv v0, v8, v16 +; RV32-NEXT: vsll.vi v0, v0, 8 ; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: li a4, 56 -; RV32-NEXT: li a5, 40 -; RV32-NEXT: vsrl.vx v0, v8, a5 -; RV32-NEXT: lui a6, 16 -; RV32-NEXT: addi a6, a6, -256 -; RV32-NEXT: vand.vx v0, v0, a6 -; RV32-NEXT: vsrl.vx v24, v8, a4 -; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a7, sp, 16 ; RV32-NEXT: vl8r.v v0, (a7) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vi v0, v0, 24 -; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: vsll.vx v0, v8, a4 -; RV32-NEXT: vand.vx v8, v8, a6 -; RV32-NEXT: vsll.vx v8, v8, a5 -; RV32-NEXT: vor.vv v8, v0, v8 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v0, v8, a3 +; RV32-NEXT: vand.vx v0, v0, a2 +; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: lui a3, 61681 -; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: lui a3, 209715 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a6 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 @@ -2684,54 +2582,36 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v24, v8, a3, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t +; RV32-NEXT: li a4, 40 +; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t ; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a4, a1, -256 -; RV32-NEXT: vand.vx v24, v8, a4, v0.t -; RV32-NEXT: li a5, 40 -; RV32-NEXT: vsll.vx v24, v24, a5, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: addi a5, a1, -256 +; RV32-NEXT: vand.vx v24, v24, a5, v0.t ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a6, 4080 -; RV32-NEXT: vand.vx v24, v8, a6, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a6, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 @@ -2740,189 +2620,103 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: lui a7, 1044480 ; RV32-NEXT: vmv.v.x v0, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 -; RV32-NEXT: addi a7, sp, 16 -; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vmerge.vxm v16, v24, a7, v0 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: li t0, 24 +; RV32-NEXT: mul a7, a7, t0 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 16 +; RV32-NEXT: vs8r.v v16, (a7) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vand.vv v16, v8, v24, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: addi a7, sp, 16 +; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: li t0, 24 +; RV32-NEXT: mul a7, a7, t0 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 16 +; RV32-NEXT: vl8r.v v16, (a7) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 3 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v16, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vsll.vx v16, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v8, a5, v0.t +; RV32-NEXT: vsll.vx v24, v24, a4, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v8, a6, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a5, v0.t -; RV32-NEXT: vand.vx v16, v24, a4, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a6, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 4, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v24, v8, v24, v0.t +; RV32-NEXT: vsrl.vi v16, v24, 4, v0.t ; RV32-NEXT: lui a3, 61681 ; RV32-NEXT: addi a3, a3, -241 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v24, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t ; RV32-NEXT: lui a3, 209715 ; RV32-NEXT: addi a3, a3, 819 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v24, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v24, 1, v0.t ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v8, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -3016,70 +2810,74 @@ define <16 x i64> @vp_bitreverse_v16i64_unmasked(<16 x i64> %va, i32 zeroext %ev ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v24, v16, 24 +; RV32-NEXT: li a5, 32 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: lui a6, 349525 +; RV32-NEXT: addi a6, a6, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v0, a2 -; RV32-NEXT: lui a3, 1044480 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a3, v0 +; RV32-NEXT: lui a7, 1044480 +; RV32-NEXT: vmv.v.x v0, a6 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v16, v16, a7, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v0, v0, a3 +; RV32-NEXT: vand.vv v0, v8, v16 +; RV32-NEXT: vsll.vi v0, v0, 8 ; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: li a4, 56 -; RV32-NEXT: li a5, 40 -; RV32-NEXT: vsrl.vx v0, v8, a5 -; RV32-NEXT: lui a6, 16 -; RV32-NEXT: addi a6, a6, -256 -; RV32-NEXT: vand.vx v0, v0, a6 -; RV32-NEXT: vsrl.vx v24, v8, a4 -; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a7, sp, 16 ; RV32-NEXT: vl8r.v v0, (a7) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vi v0, v0, 24 -; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: vsll.vx v0, v8, a4 -; RV32-NEXT: vand.vx v8, v8, a6 -; RV32-NEXT: vsll.vx v8, v8, a5 -; RV32-NEXT: vor.vv v8, v0, v8 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v0, v8, a3 +; RV32-NEXT: vand.vx v0, v0, a2 +; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: lui a3, 61681 -; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: lui a3, 209715 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a6 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll index 6fb287d440655..52399b3e776b6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -165,9 +165,9 @@ define void @bitreverse_v2i64(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: vmerge.vxm v9, v9, a1, v0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -696,40 +696,40 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) { ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 85 +; LMULMAX2-RV32-NEXT: li a1, 56 +; LMULMAX2-RV32-NEXT: vsrl.vx v10, v8, a1 +; LMULMAX2-RV32-NEXT: li a2, 40 +; LMULMAX2-RV32-NEXT: vsrl.vx v12, v8, a2 +; LMULMAX2-RV32-NEXT: lui a3, 16 +; LMULMAX2-RV32-NEXT: addi a3, a3, -256 +; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a3 +; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 +; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 +; LMULMAX2-RV32-NEXT: lui a4, 4080 +; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a4 +; LMULMAX2-RV32-NEXT: li a5, 85 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a5 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v10, 0 -; LMULMAX2-RV32-NEXT: lui a1, 1044480 -; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 +; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV32-NEXT: lui a5, 1044480 +; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a5, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v12, v12, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v14, v8, 24 -; LMULMAX2-RV32-NEXT: lui a1, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v14, v14, a1 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v14 -; LMULMAX2-RV32-NEXT: li a2, 56 -; LMULMAX2-RV32-NEXT: vsrl.vx v14, v8, a2 -; LMULMAX2-RV32-NEXT: li a3, 40 -; LMULMAX2-RV32-NEXT: vsrl.vx v16, v8, a3 -; LMULMAX2-RV32-NEXT: lui a4, 16 -; LMULMAX2-RV32-NEXT: addi a4, a4, -256 -; LMULMAX2-RV32-NEXT: vand.vx v16, v16, a4 -; LMULMAX2-RV32-NEXT: vor.vv v14, v16, v14 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v14 -; LMULMAX2-RV32-NEXT: vand.vv v10, v8, v10 -; LMULMAX2-RV32-NEXT: vsll.vi v10, v10, 8 -; LMULMAX2-RV32-NEXT: vand.vx v14, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v14, v14, 24 -; LMULMAX2-RV32-NEXT: vor.vv v10, v14, v10 -; LMULMAX2-RV32-NEXT: vsll.vx v14, v8, a2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX2-RV32-NEXT: vsll.vx v8, v8, a3 -; LMULMAX2-RV32-NEXT: vor.vv v8, v14, v8 +; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 8 +; LMULMAX2-RV32-NEXT: vand.vv v16, v16, v14 +; LMULMAX2-RV32-NEXT: vor.vv v12, v16, v12 +; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 +; LMULMAX2-RV32-NEXT: vsll.vx v12, v8, a1 +; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a3 +; LMULMAX2-RV32-NEXT: vsll.vx v16, v16, a2 +; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 +; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a4 +; LMULMAX2-RV32-NEXT: vsll.vi v16, v16, 24 +; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14 +; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 +; LMULMAX2-RV32-NEXT: vor.vv v8, v16, v8 +; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241 @@ -831,9 +831,9 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) { ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a1) -; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0 +; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 ; LMULMAX1-RV32-NEXT: lui a2, 1044480 ; LMULMAX1-RV32-NEXT: vmerge.vxm v9, v9, a2, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index 7c40108e340ca..7f4304c55f6cd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -436,10 +436,9 @@ define <2 x i64> @vp_bswap_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: vand.vx v11, v11, a3, v0.t ; RV32-NEXT: vor.vv v10, v11, v10, v0.t ; RV32-NEXT: vsrl.vi v11, v8, 8, v0.t -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v12, v12, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -511,10 +510,9 @@ define <2 x i64> @vp_bswap_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v10, v10, a3 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v10, v10, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -591,31 +589,31 @@ define <4 x i64> @vp_bswap_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v14, v14, a3, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t -; RV32-NEXT: vsrl.vi v14, v8, 8, v0.t -; RV32-NEXT: li a4, 85 +; RV32-NEXT: vsrl.vi v14, v8, 24, v0.t +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v14, v14, a4, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: li a5, 85 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.x v0, a4 +; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 +; RV32-NEXT: vmv.v.i v18, 0 +; RV32-NEXT: lui a5, 1044480 +; RV32-NEXT: vmerge.vxm v18, v18, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vand.vv v14, v14, v16, v0.t -; RV32-NEXT: vsrl.vi v18, v8, 24, v0.t -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v18, v18, a0, v0.t -; RV32-NEXT: vor.vv v14, v14, v18, v0.t +; RV32-NEXT: vand.vv v16, v16, v18, v0.t +; RV32-NEXT: vor.vv v14, v16, v14, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t ; RV32-NEXT: vsll.vx v14, v8, a1, v0.t -; RV32-NEXT: vand.vx v18, v8, a3, v0.t -; RV32-NEXT: vsll.vx v18, v18, a2, v0.t -; RV32-NEXT: vor.vv v14, v14, v18, v0.t -; RV32-NEXT: vand.vx v18, v8, a0, v0.t -; RV32-NEXT: vsll.vi v18, v18, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vx v16, v8, a3, v0.t +; RV32-NEXT: vsll.vx v16, v16, a2, v0.t +; RV32-NEXT: vor.vv v14, v14, v16, v0.t +; RV32-NEXT: vand.vx v16, v8, a4, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v18, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v18, v8, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vor.vv v8, v14, v8, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: ret @@ -668,31 +666,31 @@ define <4 x i64> @vp_bswap_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsrl.vi v12, v8, 8 ; RV32-NEXT: li a4, 85 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmv.v.i v14, 0 ; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v12, v12, a4, v0 +; RV32-NEXT: vmerge.vxm v14, v14, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v14, v8, 8 -; RV32-NEXT: vand.vv v14, v14, v12 +; RV32-NEXT: vand.vv v12, v12, v14 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: lui a0, 4080 ; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vor.vv v14, v14, v16 -; RV32-NEXT: vor.vv v10, v14, v10 -; RV32-NEXT: vsll.vx v14, v8, a1 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsll.vx v12, v8, a1 ; RV32-NEXT: vand.vx v16, v8, a3 ; RV32-NEXT: vsll.vx v16, v16, a2 -; RV32-NEXT: vor.vv v14, v14, v16 -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v14, v8 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vsll.vi v16, v16, 24 +; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: ret ; @@ -748,35 +746,35 @@ define <8 x i64> @vp_bswap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v20, v20, a3, v0.t -; RV32-NEXT: vor.vv v20, v20, v16, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV32-NEXT: lui a4, 5 -; RV32-NEXT: addi a4, a4, 1365 +; RV32-NEXT: vor.vv v16, v20, v16, v0.t +; RV32-NEXT: vsrl.vi v20, v8, 24, v0.t +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v24, v20, a4, v0.t +; RV32-NEXT: vsrl.vi v28, v8, 8, v0.t +; RV32-NEXT: lui a5, 5 +; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.v.x v0, a4 +; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 +; RV32-NEXT: vmv.v.i v20, 0 +; RV32-NEXT: lui a5, 1044480 +; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v28, v8, 24, v0.t -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v28, v28, a0, v0.t -; RV32-NEXT: vor.vv v24, v24, v28, v0.t -; RV32-NEXT: vor.vv v20, v24, v20, v0.t +; RV32-NEXT: vand.vv v28, v28, v20, v0.t +; RV32-NEXT: vor.vv v24, v28, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: vsll.vx v24, v8, a1, v0.t ; RV32-NEXT: vand.vx v28, v8, a3, v0.t ; RV32-NEXT: vsll.vx v28, v28, a2, v0.t ; RV32-NEXT: vor.vv v24, v24, v28, v0.t -; RV32-NEXT: vand.vx v28, v8, a0, v0.t +; RV32-NEXT: vand.vx v28, v8, a4, v0.t ; RV32-NEXT: vsll.vi v28, v28, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v20, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t ; RV32-NEXT: vor.vv v8, v28, v8, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v20, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_v8i64: @@ -827,6 +825,7 @@ define <8 x i64> @vp_bswap_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v20, v8, 8 ; RV32-NEXT: lui a4, 5 ; RV32-NEXT: addi a4, a4, 1365 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma @@ -836,7 +835,6 @@ define <8 x i64> @vp_bswap_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v20, v8, 8 ; RV32-NEXT: vand.vv v20, v20, v16 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: lui a0, 4080 @@ -847,11 +845,11 @@ define <8 x i64> @vp_bswap_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vand.vx v24, v8, a3 ; RV32-NEXT: vsll.vx v24, v24, a2 ; RV32-NEXT: vor.vv v20, v20, v24 -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vand.vx v24, v8, a0 +; RV32-NEXT: vsll.vi v24, v24, 24 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: vor.vv v8, v20, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: ret @@ -902,54 +900,36 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v24, v8, a1, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v8, a2, v0.t -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v24, v24, a3, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v24, v24, a3, v0.t ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: li a5, 32 +; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: addi a6, sp, 16 +; RV32-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma ; RV32-NEXT: lui a6, 349525 ; RV32-NEXT: addi a6, a6, 1365 @@ -958,71 +938,75 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: lui a7, 1044480 ; RV32-NEXT: vmv.v.x v0, a6 ; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 -; RV32-NEXT: addi a5, sp, 16 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vmerge.vxm v16, v24, a7, v0 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 24 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vand.vv v16, v8, v24, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v16, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t +; RV32-NEXT: vsll.vx v16, v8, a1, v0.t +; RV32-NEXT: vand.vx v24, v8, a3, v0.t +; RV32-NEXT: vsll.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1089,47 +1073,50 @@ define <15 x i64> @vp_bswap_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v24, v16, 24 +; RV32-NEXT: li a5, 32 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: lui a6, 349525 +; RV32-NEXT: addi a6, a6, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v0, a2 -; RV32-NEXT: lui a2, 1044480 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a2, v0 +; RV32-NEXT: lui a7, 1044480 +; RV32-NEXT: vmv.v.x v0, a6 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v16, v16, a7, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v0, v0, a0 +; RV32-NEXT: vand.vv v0, v8, v16 +; RV32-NEXT: vsll.vi v0, v0, 8 ; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: li a1, 56 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v0, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v0, v0, a3 -; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v0, v8, a3 +; RV32-NEXT: vand.vx v0, v0, a2 +; RV32-NEXT: vsrl.vx v24, v8, a1 ; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vand.vx v0, v8, a0 -; RV32-NEXT: vsll.vi v0, v0, 24 -; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: vsll.vx v0, v8, a1 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vsll.vx v8, v8, a2 -; RV32-NEXT: vor.vv v8, v0, v8 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 @@ -1182,54 +1169,36 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v24, v8, a1, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v8, a2, v0.t -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v24, v24, a3, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v24, v24, a3, v0.t ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: li a5, 32 +; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: addi a6, sp, 16 +; RV32-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma ; RV32-NEXT: lui a6, 349525 ; RV32-NEXT: addi a6, a6, 1365 @@ -1238,71 +1207,75 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: lui a7, 1044480 ; RV32-NEXT: vmv.v.x v0, a6 ; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 -; RV32-NEXT: addi a5, sp, 16 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vmerge.vxm v16, v24, a7, v0 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 24 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vand.vv v16, v8, v24, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v16, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t +; RV32-NEXT: vsll.vx v16, v8, a1, v0.t +; RV32-NEXT: vand.vx v24, v8, a3, v0.t +; RV32-NEXT: vsll.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1369,47 +1342,50 @@ define <16 x i64> @vp_bswap_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v24, v16, 24 +; RV32-NEXT: li a5, 32 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: lui a6, 349525 +; RV32-NEXT: addi a6, a6, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v0, a2 -; RV32-NEXT: lui a2, 1044480 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a2, v0 +; RV32-NEXT: lui a7, 1044480 +; RV32-NEXT: vmv.v.x v0, a6 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v16, v16, a7, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v0, v0, a0 +; RV32-NEXT: vand.vv v0, v8, v16 +; RV32-NEXT: vsll.vi v0, v0, 8 ; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: li a1, 56 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v0, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v0, v0, a3 -; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v0, v8, a3 +; RV32-NEXT: vand.vx v0, v0, a2 +; RV32-NEXT: vsrl.vx v24, v8, a1 ; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vand.vx v0, v8, a0 -; RV32-NEXT: vsll.vi v0, v0, 24 -; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: vsll.vx v0, v8, a1 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vsll.vx v8, v8, a2 -; RV32-NEXT: vor.vv v8, v0, v8 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll index 8f4ad2a7c86e3..7b2486edcac3f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -71,9 +71,9 @@ define void @bswap_v2i64(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: vmerge.vxm v9, v9, a1, v0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -317,40 +317,40 @@ define void @bswap_v4i64(ptr %x, ptr %y) { ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 85 +; LMULMAX2-RV32-NEXT: li a1, 56 +; LMULMAX2-RV32-NEXT: vsrl.vx v10, v8, a1 +; LMULMAX2-RV32-NEXT: li a2, 40 +; LMULMAX2-RV32-NEXT: vsrl.vx v12, v8, a2 +; LMULMAX2-RV32-NEXT: lui a3, 16 +; LMULMAX2-RV32-NEXT: addi a3, a3, -256 +; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a3 +; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 +; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 +; LMULMAX2-RV32-NEXT: lui a4, 4080 +; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a4 +; LMULMAX2-RV32-NEXT: li a5, 85 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a5 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v10, 0 -; LMULMAX2-RV32-NEXT: lui a1, 1044480 -; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 +; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV32-NEXT: lui a5, 1044480 +; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a5, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v12, v12, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v14, v8, 24 -; LMULMAX2-RV32-NEXT: lui a1, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v14, v14, a1 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v14 -; LMULMAX2-RV32-NEXT: li a2, 56 -; LMULMAX2-RV32-NEXT: vsrl.vx v14, v8, a2 -; LMULMAX2-RV32-NEXT: li a3, 40 -; LMULMAX2-RV32-NEXT: vsrl.vx v16, v8, a3 -; LMULMAX2-RV32-NEXT: lui a4, 16 -; LMULMAX2-RV32-NEXT: addi a4, a4, -256 -; LMULMAX2-RV32-NEXT: vand.vx v16, v16, a4 -; LMULMAX2-RV32-NEXT: vor.vv v14, v16, v14 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v14 -; LMULMAX2-RV32-NEXT: vand.vv v10, v8, v10 -; LMULMAX2-RV32-NEXT: vsll.vi v10, v10, 8 -; LMULMAX2-RV32-NEXT: vand.vx v14, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v14, v14, 24 -; LMULMAX2-RV32-NEXT: vor.vv v10, v14, v10 -; LMULMAX2-RV32-NEXT: vsll.vx v14, v8, a2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX2-RV32-NEXT: vsll.vx v8, v8, a3 -; LMULMAX2-RV32-NEXT: vor.vv v8, v14, v8 +; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 8 +; LMULMAX2-RV32-NEXT: vand.vv v16, v16, v14 +; LMULMAX2-RV32-NEXT: vor.vv v12, v16, v12 +; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 +; LMULMAX2-RV32-NEXT: vsll.vx v12, v8, a1 +; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a3 +; LMULMAX2-RV32-NEXT: vsll.vx v16, v16, a2 +; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 +; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a4 +; LMULMAX2-RV32-NEXT: vsll.vi v16, v16, 24 +; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14 +; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 +; LMULMAX2-RV32-NEXT: vor.vv v8, v16, v8 +; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -395,9 +395,9 @@ define void @bswap_v4i64(ptr %x, ptr %y) { ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.i v10, 0 +; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 ; LMULMAX1-RV32-NEXT: lui a2, 1044480 ; LMULMAX1-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll index 768a00e39ba7a..1c2efcaae560f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll @@ -412,9 +412,9 @@ define fastcc <32 x i32> @pass_vector_arg_indirect_stack(<32 x i32> %x, <32 x i3 ; LMULMAX4-NEXT: addi s0, sp, 256 ; LMULMAX4-NEXT: .cfi_def_cfa s0, 0 ; LMULMAX4-NEXT: andi sp, sp, -128 -; LMULMAX4-NEXT: addi a0, sp, 64 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; LMULMAX4-NEXT: vmv.v.i v8, 0 +; LMULMAX4-NEXT: addi a0, sp, 64 ; LMULMAX4-NEXT: vse32.v v8, (a0) ; LMULMAX4-NEXT: mv a0, sp ; LMULMAX4-NEXT: li a1, 1 @@ -516,9 +516,9 @@ define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32> ; LMULMAX4-NEXT: sd a0, 136(sp) ; LMULMAX4-NEXT: li a0, 13 ; LMULMAX4-NEXT: sd a0, 0(sp) -; LMULMAX4-NEXT: addi a0, sp, 72 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; LMULMAX4-NEXT: vmv.v.i v8, 0 +; LMULMAX4-NEXT: addi a0, sp, 72 ; LMULMAX4-NEXT: vse32.v v8, (a0) ; LMULMAX4-NEXT: addi a0, sp, 8 ; LMULMAX4-NEXT: li a1, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll index 08184026c88c5..497cafd62800d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1618,15 +1618,14 @@ define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v15i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: vand.vv v16, v24, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 @@ -1776,15 +1775,14 @@ define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev define <16 x i64> @vp_ctpop_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: vand.vv v16, v24, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 @@ -1871,104 +1869,110 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: li a2, 16 ; RV32-NEXT: vslidedown.vi v24, v0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB34_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB34_2: -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 40 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vmv.v.x v8, a2 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a2, a2, a4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a2, a2, a4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a2, a1, 1365 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vmv.v.x v16, a2 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a2, a2, a4 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a0, a3, .LBB34_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB34_2: +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV32-NEXT: vadd.vv v16, v16, v8, v0.t -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a2 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: addi a3, a3, 819 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: lui a3, 61681 +; RV32-NEXT: addi a3, a3, -241 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: lui a3, 4112 +; RV32-NEXT: addi a3, a3, 257 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t @@ -1993,7 +1997,7 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 +; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 @@ -2009,8 +2013,7 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2023,8 +2026,7 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2039,7 +2041,8 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2153,72 +2156,65 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; RV32-NEXT: li a2, 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB35_2 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a2, a1, 1365 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vmv.v.x v0, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a0, a3, .LBB35_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB35_2: -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a2 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a2, a2, a4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v0, a2 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v0 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: vand.vv v24, v24, v0 +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: addi a3, a3, 819 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vmv.v.x v0, a3 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v0 ; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a2 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v24, v24, v8 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 +; RV32-NEXT: lui a3, 61681 +; RV32-NEXT: addi a3, a3, -241 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: lui a3, 4112 +; RV32-NEXT: addi a3, a3, 257 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v24, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 @@ -2229,20 +2225,15 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: vsrl.vi v24, v16, 1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v24, v8, v24 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vsub.vv v24, v16, v24 ; RV32-NEXT: vand.vv v8, v24, v0 ; RV32-NEXT: vsrl.vi v24, v24, 2 ; RV32-NEXT: vand.vv v24, v24, v0 @@ -2265,8 +2256,7 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll index c5ed48ffdffe9..cf4727aaee3d5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll @@ -264,13 +264,13 @@ define void @ctpop_v2i64(ptr %x, ptr %y) { ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v9, v9, v10 +; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV32-NEXT: vand.vv v9, v10, v9 ; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819 @@ -340,13 +340,13 @@ define void @ctpop_v2i64(ptr %x, ptr %y) { ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX1-RV32-NEXT: lui a1, 349525 ; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 +; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-RV32-NEXT: vand.vv v9, v10, v9 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 ; LMULMAX1-RV32-NEXT: lui a1, 209715 ; LMULMAX1-RV32-NEXT: addi a1, a1, 819 @@ -772,13 +772,13 @@ define void @ctpop_v4i64(ptr %x, ptr %y) { ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v12 +; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 1 +; LMULMAX2-RV32-NEXT: vand.vv v10, v12, v10 ; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819 @@ -847,56 +847,56 @@ define void @ctpop_v4i64(ptr %x, ptr %y) { ; LMULMAX1-RV32-LABEL: ctpop_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) ; LMULMAX1-RV32-NEXT: lui a2, 349525 ; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v11, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 1 +; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v10 +; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: lui a2, 209715 ; LMULMAX1-RV32-NEXT: addi a2, a2, 819 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v11, a2 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v12, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v12, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v12, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v12 +; LMULMAX1-RV32-NEXT: vand.vv v12, v9, v11 +; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 +; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11 +; LMULMAX1-RV32-NEXT: vadd.vv v9, v12, v9 +; LMULMAX1-RV32-NEXT: vsrl.vi v12, v9, 4 +; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v12 ; LMULMAX1-RV32-NEXT: lui a2, 61681 ; LMULMAX1-RV32-NEXT: addi a2, a2, -241 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.x v12, a2 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v12 +; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v12 ; LMULMAX1-RV32-NEXT: lui a2, 4112 ; LMULMAX1-RV32-NEXT: addi a2, a2, 257 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.x v13, a2 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v13 -; LMULMAX1-RV32-NEXT: li a2, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a2 -; LMULMAX1-RV32-NEXT: vsrl.vi v14, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vv v11, v14, v11 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vand.vv v11, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v11, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v12 ; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v13 +; LMULMAX1-RV32-NEXT: li a2, 56 ; LMULMAX1-RV32-NEXT: vsrl.vx v9, v9, a2 -; LMULMAX1-RV32-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v8, (a1) +; LMULMAX1-RV32-NEXT: vsrl.vi v14, v8, 1 +; LMULMAX1-RV32-NEXT: vand.vv v10, v14, v10 +; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-RV32-NEXT: vand.vv v10, v8, v11 +; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11 +; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 +; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v12 +; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v13 +; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a2 +; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: ctpop_v4i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index a08e678c21755..689b037ced8d6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -2133,47 +2133,32 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb +; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv8r.v v16, v8 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vslidedown.vi v24, v0, 2 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a1, .LBB34_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB34_2: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v8, v16, a1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 56 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a2, a0, a1 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a2, a2, a1 ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v8, -1 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: li a1, 1 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 56 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vxor.vv v8, v16, v8, v0.t +; RV32-NEXT: vsub.vx v16, v16, a1, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 @@ -2191,27 +2176,27 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 56 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 @@ -2222,12 +2207,6 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a4, a4, 819 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 56 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 @@ -2248,6 +2227,12 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 56 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 @@ -2261,36 +2246,33 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a4, a4, -241 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: lui a4, 4112 ; RV32-NEXT: addi a4, a4, 257 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a4 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: bltu a0, a3, .LBB34_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a0, 16 +; RV32-NEXT: .LBB34_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a0, vlenb @@ -2299,26 +2281,23 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vx v8, v16, a1, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vxor.vv v16, v16, v8, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vxor.vv v8, v16, v8, v0.t +; RV32-NEXT: vsub.vx v16, v16, a1, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 @@ -2330,31 +2309,50 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 @@ -2373,8 +2371,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2385,12 +2382,9 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 6 ; RV32-NEXT: add sp, sp, a0 @@ -2508,24 +2502,23 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; RV32-NEXT: li a2, 1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a2 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v16, -1 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vxor.vv v16, v8, v16 +; RV32-NEXT: li a3, 1 +; RV32-NEXT: vsub.vx v8, v8, a3 +; RV32-NEXT: vand.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a4, 349525 ; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 @@ -2538,7 +2531,7 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a4, 209715 ; RV32-NEXT: addi a4, a4, 819 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v16, a4 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 @@ -2549,7 +2542,7 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: lui a4, 61681 ; RV32-NEXT: addi a4, a4, -241 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 @@ -2560,23 +2553,23 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: lui a4, 4112 ; RV32-NEXT: addi a4, a4, 257 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 @@ -2584,7 +2577,7 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vxor.vv v8, v0, v8 -; RV32-NEXT: vsub.vx v0, v0, a2 +; RV32-NEXT: vsub.vx v0, v0, a3 ; RV32-NEXT: vand.vv v8, v8, v0 ; RV32-NEXT: vsrl.vi v0, v8, 1 ; RV32-NEXT: csrr a0, vlenb @@ -4785,47 +4778,32 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb +; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv8r.v v16, v8 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vslidedown.vi v24, v0, 2 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a1, .LBB70_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB70_2: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v8, v16, a1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 56 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a2, a0, a1 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a2, a2, a1 ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v8, -1 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: li a1, 1 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 56 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vxor.vv v8, v16, v8, v0.t +; RV32-NEXT: vsub.vx v16, v16, a1, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 @@ -4843,27 +4821,27 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 56 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 @@ -4874,12 +4852,6 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a4, a4, 819 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 56 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 @@ -4900,6 +4872,12 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 56 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 @@ -4913,36 +4891,33 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a4, a4, -241 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: lui a4, 4112 ; RV32-NEXT: addi a4, a4, 257 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a4 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: bltu a0, a3, .LBB70_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a0, 16 +; RV32-NEXT: .LBB70_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a0, vlenb @@ -4951,26 +4926,23 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vx v8, v16, a1, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vxor.vv v16, v16, v8, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vxor.vv v8, v16, v8, v0.t +; RV32-NEXT: vsub.vx v16, v16, a1, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 @@ -4982,31 +4954,50 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 @@ -5025,8 +5016,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -5037,12 +5027,9 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 6 ; RV32-NEXT: add sp, sp, a0 @@ -5160,24 +5147,23 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; RV32-NEXT: li a2, 1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a2 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v16, -1 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vxor.vv v16, v8, v16 +; RV32-NEXT: li a3, 1 +; RV32-NEXT: vsub.vx v8, v8, a3 +; RV32-NEXT: vand.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a4, 349525 ; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 @@ -5190,7 +5176,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a4, 209715 ; RV32-NEXT: addi a4, a4, 819 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v16, a4 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 @@ -5201,7 +5187,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: lui a4, 61681 ; RV32-NEXT: addi a4, a4, -241 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 @@ -5212,23 +5198,23 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: lui a4, 4112 ; RV32-NEXT: addi a4, a4, 257 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 @@ -5236,7 +5222,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vxor.vv v8, v0, v8 -; RV32-NEXT: vsub.vx v0, v0, a2 +; RV32-NEXT: vsub.vx v0, v0, a3 ; RV32-NEXT: vand.vv v8, v8, v0 ; RV32-NEXT: vsrl.vi v0, v8, 1 ; RV32-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll index fd3c65da1a2e8..c83845af6aac7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll @@ -141,12 +141,13 @@ define <4 x i64> @sextload_v4i8_v4i64(ptr %x) { ; LMULMAX1-LABEL: sextload_v4i8_v4i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-NEXT: vle8.v v10, (a0) +; LMULMAX1-NEXT: vle8.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vsext.vf8 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v9, v8 -; LMULMAX1-NEXT: vsext.vf8 v8, v10 +; LMULMAX1-NEXT: vsext.vf8 v9, v10 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v4i8_v4i64: @@ -164,12 +165,13 @@ define <4 x i64> @zextload_v4i8_v4i64(ptr %x) { ; LMULMAX1-LABEL: zextload_v4i8_v4i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-NEXT: vle8.v v10, (a0) +; LMULMAX1-NEXT: vle8.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vzext.vf8 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v9, v8 -; LMULMAX1-NEXT: vzext.vf8 v8, v10 +; LMULMAX1-NEXT: vzext.vf8 v9, v10 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v4i8_v4i64: @@ -211,12 +213,13 @@ define <8 x i32> @sextload_v8i8_v8i32(ptr %x) { ; LMULMAX1-LABEL: sextload_v8i8_v8i32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vle8.v v10, (a0) +; LMULMAX1-NEXT: vle8.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-NEXT: vsext.vf4 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 4 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v9, v8 -; LMULMAX1-NEXT: vsext.vf4 v8, v10 +; LMULMAX1-NEXT: vsext.vf4 v9, v10 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v8i8_v8i32: @@ -234,12 +237,13 @@ define <8 x i32> @zextload_v8i8_v8i32(ptr %x) { ; LMULMAX1-LABEL: zextload_v8i8_v8i32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vle8.v v10, (a0) +; LMULMAX1-NEXT: vle8.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-NEXT: vzext.vf4 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 4 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v9, v8 -; LMULMAX1-NEXT: vzext.vf4 v8, v10 +; LMULMAX1-NEXT: vzext.vf4 v9, v10 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v8i8_v8i32: @@ -257,20 +261,21 @@ define <8 x i64> @sextload_v8i8_v8i64(ptr %x) { ; LMULMAX1-LABEL: sextload_v8i8_v8i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vle8.v v12, (a0) +; LMULMAX1-NEXT: vle8.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vsext.vf8 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v12, 4 +; LMULMAX1-NEXT: vslidedown.vi v11, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v10, v8 +; LMULMAX1-NEXT: vsext.vf8 v10, v11 ; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v12, 2 +; LMULMAX1-NEXT: vslidedown.vi v12, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v9, v11 +; LMULMAX1-NEXT: vsext.vf8 v9, v12 ; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 +; LMULMAX1-NEXT: vslidedown.vi v12, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v11, v8 -; LMULMAX1-NEXT: vsext.vf8 v8, v12 +; LMULMAX1-NEXT: vsext.vf8 v11, v12 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v8i8_v8i64: @@ -288,20 +293,21 @@ define <8 x i64> @zextload_v8i8_v8i64(ptr %x) { ; LMULMAX1-LABEL: zextload_v8i8_v8i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vle8.v v12, (a0) +; LMULMAX1-NEXT: vle8.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vzext.vf8 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v12, 4 +; LMULMAX1-NEXT: vslidedown.vi v11, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v10, v8 +; LMULMAX1-NEXT: vzext.vf8 v10, v11 ; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v12, 2 +; LMULMAX1-NEXT: vslidedown.vi v12, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v9, v11 +; LMULMAX1-NEXT: vzext.vf8 v9, v12 ; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 +; LMULMAX1-NEXT: vslidedown.vi v12, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v11, v8 -; LMULMAX1-NEXT: vzext.vf8 v8, v12 +; LMULMAX1-NEXT: vzext.vf8 v11, v12 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v8i8_v8i64: @@ -319,12 +325,13 @@ define <16 x i16> @sextload_v16i8_v16i16(ptr %x) { ; LMULMAX1-LABEL: sextload_v16i8_v16i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vle8.v v10, (a0) +; LMULMAX1-NEXT: vle8.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX1-NEXT: vsext.vf2 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 8 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 8 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v9, v8 -; LMULMAX1-NEXT: vsext.vf2 v8, v10 +; LMULMAX1-NEXT: vsext.vf2 v9, v10 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v16i8_v16i16: @@ -342,12 +349,13 @@ define <16 x i16> @zextload_v16i8_v16i16(ptr %x) { ; LMULMAX1-LABEL: zextload_v16i8_v16i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vle8.v v10, (a0) +; LMULMAX1-NEXT: vle8.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX1-NEXT: vzext.vf2 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 8 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 8 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v9, v8 -; LMULMAX1-NEXT: vzext.vf2 v8, v10 +; LMULMAX1-NEXT: vzext.vf2 v9, v10 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v16i8_v16i16: @@ -365,20 +373,21 @@ define <16 x i32> @sextload_v16i8_v16i32(ptr %x) { ; LMULMAX1-LABEL: sextload_v16i8_v16i32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vle8.v v12, (a0) +; LMULMAX1-NEXT: vle8.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-NEXT: vsext.vf4 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v12, 8 +; LMULMAX1-NEXT: vslidedown.vi v11, v9, 8 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v10, v8 +; LMULMAX1-NEXT: vsext.vf4 v10, v11 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v12, 4 +; LMULMAX1-NEXT: vslidedown.vi v12, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v9, v11 +; LMULMAX1-NEXT: vsext.vf4 v9, v12 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4 +; LMULMAX1-NEXT: vslidedown.vi v12, v11, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v11, v8 -; LMULMAX1-NEXT: vsext.vf4 v8, v12 +; LMULMAX1-NEXT: vsext.vf4 v11, v12 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v16i8_v16i32: @@ -396,20 +405,21 @@ define <16 x i32> @zextload_v16i8_v16i32(ptr %x) { ; LMULMAX1-LABEL: zextload_v16i8_v16i32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vle8.v v12, (a0) +; LMULMAX1-NEXT: vle8.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-NEXT: vzext.vf4 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v12, 8 +; LMULMAX1-NEXT: vslidedown.vi v11, v9, 8 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v10, v8 +; LMULMAX1-NEXT: vzext.vf4 v10, v11 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v12, 4 +; LMULMAX1-NEXT: vslidedown.vi v12, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v9, v11 +; LMULMAX1-NEXT: vzext.vf4 v9, v12 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4 +; LMULMAX1-NEXT: vslidedown.vi v12, v11, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v11, v8 -; LMULMAX1-NEXT: vzext.vf4 v8, v12 +; LMULMAX1-NEXT: vzext.vf4 v11, v12 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v16i8_v16i32: @@ -427,47 +437,49 @@ define <16 x i64> @sextload_v16i8_v16i64(ptr %x) { ; LMULMAX1-LABEL: sextload_v16i8_v16i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vle8.v v16, (a0) +; LMULMAX1-NEXT: vle8.v v10, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vsext.vf8 v8, v10 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v16, 8 +; LMULMAX1-NEXT: vslidedown.vi v11, v10, 8 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v12, v8 +; LMULMAX1-NEXT: vsext.vf8 v12, v11 ; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v16, 2 +; LMULMAX1-NEXT: vslidedown.vi v13, v10, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v9, v10 +; LMULMAX1-NEXT: vsext.vf8 v9, v13 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v16, 4 +; LMULMAX1-NEXT: vslidedown.vi v15, v10, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v10, v11 +; LMULMAX1-NEXT: vsext.vf8 v10, v15 ; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v14, v8, 2 +; LMULMAX1-NEXT: vslidedown.vi v14, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-NEXT: vsext.vf8 v13, v14 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4 +; LMULMAX1-NEXT: vslidedown.vi v16, v11, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v14, v8 +; LMULMAX1-NEXT: vsext.vf8 v14, v16 ; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v15, v11, 2 +; LMULMAX1-NEXT: vslidedown.vi v15, v15, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-NEXT: vsext.vf8 v11, v15 ; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 +; LMULMAX1-NEXT: vslidedown.vi v16, v16, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v15, v8 -; LMULMAX1-NEXT: vsext.vf8 v8, v16 +; LMULMAX1-NEXT: vsext.vf8 v15, v16 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v16i8_v16i64: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX4-NEXT: vle8.v v16, (a0) +; LMULMAX4-NEXT: vle8.v v12, (a0) +; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; LMULMAX4-NEXT: vsext.vf8 v8, v12 ; LMULMAX4-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX4-NEXT: vslidedown.vi v8, v16, 8 +; LMULMAX4-NEXT: vslidedown.vi v16, v12, 8 ; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vsext.vf8 v12, v8 -; LMULMAX4-NEXT: vsext.vf8 v8, v16 +; LMULMAX4-NEXT: vsext.vf8 v12, v16 ; LMULMAX4-NEXT: ret %y = load <16 x i8>, ptr %x %z = sext <16 x i8> %y to <16 x i64> @@ -478,47 +490,49 @@ define <16 x i64> @zextload_v16i8_v16i64(ptr %x) { ; LMULMAX1-LABEL: zextload_v16i8_v16i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vle8.v v16, (a0) +; LMULMAX1-NEXT: vle8.v v10, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vzext.vf8 v8, v10 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v16, 8 +; LMULMAX1-NEXT: vslidedown.vi v11, v10, 8 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v12, v8 +; LMULMAX1-NEXT: vzext.vf8 v12, v11 ; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v16, 2 +; LMULMAX1-NEXT: vslidedown.vi v13, v10, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v9, v10 +; LMULMAX1-NEXT: vzext.vf8 v9, v13 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v16, 4 +; LMULMAX1-NEXT: vslidedown.vi v15, v10, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v10, v11 +; LMULMAX1-NEXT: vzext.vf8 v10, v15 ; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v14, v8, 2 +; LMULMAX1-NEXT: vslidedown.vi v14, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-NEXT: vzext.vf8 v13, v14 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4 +; LMULMAX1-NEXT: vslidedown.vi v16, v11, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v14, v8 +; LMULMAX1-NEXT: vzext.vf8 v14, v16 ; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v15, v11, 2 +; LMULMAX1-NEXT: vslidedown.vi v15, v15, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-NEXT: vzext.vf8 v11, v15 ; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 +; LMULMAX1-NEXT: vslidedown.vi v16, v16, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v15, v8 -; LMULMAX1-NEXT: vzext.vf8 v8, v16 +; LMULMAX1-NEXT: vzext.vf8 v15, v16 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v16i8_v16i64: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX4-NEXT: vle8.v v16, (a0) +; LMULMAX4-NEXT: vle8.v v12, (a0) +; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; LMULMAX4-NEXT: vzext.vf8 v8, v12 ; LMULMAX4-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX4-NEXT: vslidedown.vi v8, v16, 8 +; LMULMAX4-NEXT: vslidedown.vi v16, v12, 8 ; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vzext.vf8 v12, v8 -; LMULMAX4-NEXT: vzext.vf8 v8, v16 +; LMULMAX4-NEXT: vzext.vf8 v12, v16 ; LMULMAX4-NEXT: ret %y = load <16 x i8>, ptr %x %z = zext <16 x i8> %y to <16 x i64> @@ -646,12 +660,13 @@ define <4 x i64> @sextload_v4i16_v4i64(ptr %x) { ; LMULMAX1-LABEL: sextload_v4i16_v4i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vle16.v v10, (a0) +; LMULMAX1-NEXT: vle16.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vsext.vf4 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v9, v8 -; LMULMAX1-NEXT: vsext.vf4 v8, v10 +; LMULMAX1-NEXT: vsext.vf4 v9, v10 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v4i16_v4i64: @@ -669,12 +684,13 @@ define <4 x i64> @zextload_v4i16_v4i64(ptr %x) { ; LMULMAX1-LABEL: zextload_v4i16_v4i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vle16.v v10, (a0) +; LMULMAX1-NEXT: vle16.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vzext.vf4 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v9, v8 -; LMULMAX1-NEXT: vzext.vf4 v8, v10 +; LMULMAX1-NEXT: vzext.vf4 v9, v10 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v4i16_v4i64: @@ -704,12 +720,13 @@ define <8 x i32> @sextload_v8i16_v8i32(ptr %x) { ; LMULMAX1-LABEL: sextload_v8i16_v8i32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v10, (a0) +; LMULMAX1-NEXT: vle16.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-NEXT: vsext.vf2 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 4 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v9, v8 -; LMULMAX1-NEXT: vsext.vf2 v8, v10 +; LMULMAX1-NEXT: vsext.vf2 v9, v10 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v8i16_v8i32: @@ -727,12 +744,13 @@ define <8 x i32> @zextload_v8i16_v8i32(ptr %x) { ; LMULMAX1-LABEL: zextload_v8i16_v8i32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v10, (a0) +; LMULMAX1-NEXT: vle16.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-NEXT: vzext.vf2 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 4 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v9, v8 -; LMULMAX1-NEXT: vzext.vf2 v8, v10 +; LMULMAX1-NEXT: vzext.vf2 v9, v10 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v8i16_v8i32: @@ -750,20 +768,21 @@ define <8 x i64> @sextload_v8i16_v8i64(ptr %x) { ; LMULMAX1-LABEL: sextload_v8i16_v8i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v12, (a0) +; LMULMAX1-NEXT: vle16.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vsext.vf4 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v12, 4 +; LMULMAX1-NEXT: vslidedown.vi v11, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v10, v8 +; LMULMAX1-NEXT: vsext.vf4 v10, v11 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v12, 2 +; LMULMAX1-NEXT: vslidedown.vi v12, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v9, v11 +; LMULMAX1-NEXT: vsext.vf4 v9, v12 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 +; LMULMAX1-NEXT: vslidedown.vi v12, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v11, v8 -; LMULMAX1-NEXT: vsext.vf4 v8, v12 +; LMULMAX1-NEXT: vsext.vf4 v11, v12 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v8i16_v8i64: @@ -781,20 +800,21 @@ define <8 x i64> @zextload_v8i16_v8i64(ptr %x) { ; LMULMAX1-LABEL: zextload_v8i16_v8i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v12, (a0) +; LMULMAX1-NEXT: vle16.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vzext.vf4 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v12, 4 +; LMULMAX1-NEXT: vslidedown.vi v11, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v10, v8 +; LMULMAX1-NEXT: vzext.vf4 v10, v11 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v12, 2 +; LMULMAX1-NEXT: vslidedown.vi v12, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v9, v11 +; LMULMAX1-NEXT: vzext.vf4 v9, v12 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 +; LMULMAX1-NEXT: vslidedown.vi v12, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v11, v8 -; LMULMAX1-NEXT: vzext.vf4 v8, v12 +; LMULMAX1-NEXT: vzext.vf4 v11, v12 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v8i16_v8i64: @@ -834,19 +854,20 @@ define <16 x i32> @sextload_v16i16_v16i32(ptr %x) { ; LMULMAX1-LABEL: sextload_v16i16_v16i32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v10, (a0) +; LMULMAX1-NEXT: vle16.v v9, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle16.v v12, (a0) +; LMULMAX1-NEXT: vle16.v v11, (a0) +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-NEXT: vsext.vf2 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 4 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v9, v8 +; LMULMAX1-NEXT: vsext.vf2 v9, v10 +; LMULMAX1-NEXT: vsext.vf2 v10, v11 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v12, 4 +; LMULMAX1-NEXT: vslidedown.vi v12, v11, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v11, v8 -; LMULMAX1-NEXT: vsext.vf2 v8, v10 -; LMULMAX1-NEXT: vsext.vf2 v10, v12 +; LMULMAX1-NEXT: vsext.vf2 v11, v12 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v16i16_v16i32: @@ -864,19 +885,20 @@ define <16 x i32> @zextload_v16i16_v16i32(ptr %x) { ; LMULMAX1-LABEL: zextload_v16i16_v16i32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v10, (a0) +; LMULMAX1-NEXT: vle16.v v9, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle16.v v12, (a0) +; LMULMAX1-NEXT: vle16.v v11, (a0) +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-NEXT: vzext.vf2 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 4 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v9, v8 +; LMULMAX1-NEXT: vzext.vf2 v9, v10 +; LMULMAX1-NEXT: vzext.vf2 v10, v11 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v12, 4 +; LMULMAX1-NEXT: vslidedown.vi v12, v11, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v11, v8 -; LMULMAX1-NEXT: vzext.vf2 v8, v10 -; LMULMAX1-NEXT: vzext.vf2 v10, v12 +; LMULMAX1-NEXT: vzext.vf2 v11, v12 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v16i16_v16i32: @@ -894,46 +916,48 @@ define <16 x i64> @sextload_v16i16_v16i64(ptr %x) { ; LMULMAX1-LABEL: sextload_v16i16_v16i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v12, (a0) +; LMULMAX1-NEXT: vle16.v v9, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle16.v v16, (a0) +; LMULMAX1-NEXT: vle16.v v13, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vsext.vf4 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v12, 4 +; LMULMAX1-NEXT: vslidedown.vi v11, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v10, v8 +; LMULMAX1-NEXT: vsext.vf4 v10, v11 +; LMULMAX1-NEXT: vsext.vf4 v12, v13 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v15, v16, 4 +; LMULMAX1-NEXT: vslidedown.vi v15, v13, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-NEXT: vsext.vf4 v14, v15 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v12, 2 +; LMULMAX1-NEXT: vslidedown.vi v16, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v9, v11 +; LMULMAX1-NEXT: vsext.vf4 v9, v16 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 +; LMULMAX1-NEXT: vslidedown.vi v16, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v11, v8 +; LMULMAX1-NEXT: vsext.vf4 v11, v16 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v16, 2 +; LMULMAX1-NEXT: vslidedown.vi v16, v13, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v13, v8 +; LMULMAX1-NEXT: vsext.vf4 v13, v16 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v15, 2 +; LMULMAX1-NEXT: vslidedown.vi v16, v15, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v15, v8 -; LMULMAX1-NEXT: vsext.vf4 v8, v12 -; LMULMAX1-NEXT: vsext.vf4 v12, v16 +; LMULMAX1-NEXT: vsext.vf4 v15, v16 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v16i16_v16i64: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX4-NEXT: vle16.v v16, (a0) +; LMULMAX4-NEXT: vle16.v v12, (a0) +; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; LMULMAX4-NEXT: vsext.vf4 v8, v12 ; LMULMAX4-NEXT: vsetivli zero, 8, e16, m2, ta, ma -; LMULMAX4-NEXT: vslidedown.vi v8, v16, 8 +; LMULMAX4-NEXT: vslidedown.vi v16, v12, 8 ; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vsext.vf4 v12, v8 -; LMULMAX4-NEXT: vsext.vf4 v8, v16 +; LMULMAX4-NEXT: vsext.vf4 v12, v16 ; LMULMAX4-NEXT: ret %y = load <16 x i16>, ptr %x %z = sext <16 x i16> %y to <16 x i64> @@ -944,46 +968,48 @@ define <16 x i64> @zextload_v16i16_v16i64(ptr %x) { ; LMULMAX1-LABEL: zextload_v16i16_v16i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v12, (a0) +; LMULMAX1-NEXT: vle16.v v9, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle16.v v16, (a0) +; LMULMAX1-NEXT: vle16.v v13, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vzext.vf4 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v12, 4 +; LMULMAX1-NEXT: vslidedown.vi v11, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v10, v8 +; LMULMAX1-NEXT: vzext.vf4 v10, v11 +; LMULMAX1-NEXT: vzext.vf4 v12, v13 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v15, v16, 4 +; LMULMAX1-NEXT: vslidedown.vi v15, v13, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-NEXT: vzext.vf4 v14, v15 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v12, 2 +; LMULMAX1-NEXT: vslidedown.vi v16, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v9, v11 +; LMULMAX1-NEXT: vzext.vf4 v9, v16 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 +; LMULMAX1-NEXT: vslidedown.vi v16, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v11, v8 +; LMULMAX1-NEXT: vzext.vf4 v11, v16 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v16, 2 +; LMULMAX1-NEXT: vslidedown.vi v16, v13, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v13, v8 +; LMULMAX1-NEXT: vzext.vf4 v13, v16 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v15, 2 +; LMULMAX1-NEXT: vslidedown.vi v16, v15, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v15, v8 -; LMULMAX1-NEXT: vzext.vf4 v8, v12 -; LMULMAX1-NEXT: vzext.vf4 v12, v16 +; LMULMAX1-NEXT: vzext.vf4 v15, v16 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v16i16_v16i64: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX4-NEXT: vle16.v v16, (a0) +; LMULMAX4-NEXT: vle16.v v12, (a0) +; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; LMULMAX4-NEXT: vzext.vf4 v8, v12 ; LMULMAX4-NEXT: vsetivli zero, 8, e16, m2, ta, ma -; LMULMAX4-NEXT: vslidedown.vi v8, v16, 8 +; LMULMAX4-NEXT: vslidedown.vi v16, v12, 8 ; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vzext.vf4 v12, v8 -; LMULMAX4-NEXT: vzext.vf4 v8, v16 +; LMULMAX4-NEXT: vzext.vf4 v12, v16 ; LMULMAX4-NEXT: ret %y = load <16 x i16>, ptr %x %z = zext <16 x i16> %y to <16 x i64> @@ -1070,12 +1096,13 @@ define <4 x i64> @sextload_v4i32_v4i64(ptr %x) { ; LMULMAX1-LABEL: sextload_v4i32_v4i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v10, (a0) +; LMULMAX1-NEXT: vle32.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vsext.vf2 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v9, v8 -; LMULMAX1-NEXT: vsext.vf2 v8, v10 +; LMULMAX1-NEXT: vsext.vf2 v9, v10 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v4i32_v4i64: @@ -1093,12 +1120,13 @@ define <4 x i64> @zextload_v4i32_v4i64(ptr %x) { ; LMULMAX1-LABEL: zextload_v4i32_v4i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v10, (a0) +; LMULMAX1-NEXT: vle32.v v9, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vzext.vf2 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v9, v8 -; LMULMAX1-NEXT: vzext.vf2 v8, v10 +; LMULMAX1-NEXT: vzext.vf2 v9, v10 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v4i32_v4i64: @@ -1167,19 +1195,20 @@ define <8 x i64> @sextload_v8i32_v8i64(ptr %x) { ; LMULMAX1-LABEL: sextload_v8i32_v8i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v10, (a0) +; LMULMAX1-NEXT: vle32.v v9, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v12, (a0) +; LMULMAX1-NEXT: vle32.v v11, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vsext.vf2 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v9, v8 +; LMULMAX1-NEXT: vsext.vf2 v9, v10 +; LMULMAX1-NEXT: vsext.vf2 v10, v11 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v12, 2 +; LMULMAX1-NEXT: vslidedown.vi v12, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v11, v8 -; LMULMAX1-NEXT: vsext.vf2 v8, v10 -; LMULMAX1-NEXT: vsext.vf2 v10, v12 +; LMULMAX1-NEXT: vsext.vf2 v11, v12 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v8i32_v8i64: @@ -1197,19 +1226,20 @@ define <8 x i64> @zextload_v8i32_v8i64(ptr %x) { ; LMULMAX1-LABEL: zextload_v8i32_v8i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v10, (a0) +; LMULMAX1-NEXT: vle32.v v9, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v12, (a0) +; LMULMAX1-NEXT: vle32.v v11, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vzext.vf2 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v9, v8 +; LMULMAX1-NEXT: vzext.vf2 v9, v10 +; LMULMAX1-NEXT: vzext.vf2 v10, v11 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v12, 2 +; LMULMAX1-NEXT: vslidedown.vi v12, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v11, v8 -; LMULMAX1-NEXT: vzext.vf2 v8, v10 -; LMULMAX1-NEXT: vzext.vf2 v10, v12 +; LMULMAX1-NEXT: vzext.vf2 v11, v12 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v8i32_v8i64: @@ -1298,43 +1328,45 @@ define <16 x i64> @sextload_v16i32_v16i64(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: addi a1, a0, 48 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v16, (a1) +; LMULMAX1-NEXT: vle32.v v15, (a1) ; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vle32.v v14, (a1) -; LMULMAX1-NEXT: vle32.v v10, (a0) +; LMULMAX1-NEXT: vle32.v v13, (a1) +; LMULMAX1-NEXT: vle32.v v9, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v12, (a0) +; LMULMAX1-NEXT: vle32.v v11, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vsext.vf2 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v9, v8 +; LMULMAX1-NEXT: vsext.vf2 v9, v10 +; LMULMAX1-NEXT: vsext.vf2 v10, v11 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v12, 2 +; LMULMAX1-NEXT: vslidedown.vi v12, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v11, v8 +; LMULMAX1-NEXT: vsext.vf2 v11, v12 +; LMULMAX1-NEXT: vsext.vf2 v12, v13 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v14, 2 +; LMULMAX1-NEXT: vslidedown.vi v14, v13, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v13, v8 +; LMULMAX1-NEXT: vsext.vf2 v13, v14 +; LMULMAX1-NEXT: vsext.vf2 v14, v15 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v16, 2 +; LMULMAX1-NEXT: vslidedown.vi v16, v15, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v15, v8 -; LMULMAX1-NEXT: vsext.vf2 v8, v10 -; LMULMAX1-NEXT: vsext.vf2 v10, v12 -; LMULMAX1-NEXT: vsext.vf2 v12, v14 -; LMULMAX1-NEXT: vsext.vf2 v14, v16 +; LMULMAX1-NEXT: vsext.vf2 v15, v16 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v16i32_v16i64: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v16, (a0) +; LMULMAX4-NEXT: vle32.v v12, (a0) +; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; LMULMAX4-NEXT: vsext.vf2 v8, v12 ; LMULMAX4-NEXT: vsetivli zero, 8, e32, m4, ta, ma -; LMULMAX4-NEXT: vslidedown.vi v8, v16, 8 +; LMULMAX4-NEXT: vslidedown.vi v16, v12, 8 ; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vsext.vf2 v12, v8 -; LMULMAX4-NEXT: vsext.vf2 v8, v16 +; LMULMAX4-NEXT: vsext.vf2 v12, v16 ; LMULMAX4-NEXT: ret %y = load <16 x i32>, ptr %x %z = sext <16 x i32> %y to <16 x i64> @@ -1346,43 +1378,45 @@ define <16 x i64> @zextload_v16i32_v16i64(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: addi a1, a0, 48 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v16, (a1) +; LMULMAX1-NEXT: vle32.v v15, (a1) ; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vle32.v v14, (a1) -; LMULMAX1-NEXT: vle32.v v10, (a0) +; LMULMAX1-NEXT: vle32.v v13, (a1) +; LMULMAX1-NEXT: vle32.v v9, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v12, (a0) +; LMULMAX1-NEXT: vle32.v v11, (a0) +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: vzext.vf2 v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 +; LMULMAX1-NEXT: vslidedown.vi v10, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v9, v8 +; LMULMAX1-NEXT: vzext.vf2 v9, v10 +; LMULMAX1-NEXT: vzext.vf2 v10, v11 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v12, 2 +; LMULMAX1-NEXT: vslidedown.vi v12, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v11, v8 +; LMULMAX1-NEXT: vzext.vf2 v11, v12 +; LMULMAX1-NEXT: vzext.vf2 v12, v13 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v14, 2 +; LMULMAX1-NEXT: vslidedown.vi v14, v13, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v13, v8 +; LMULMAX1-NEXT: vzext.vf2 v13, v14 +; LMULMAX1-NEXT: vzext.vf2 v14, v15 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v16, 2 +; LMULMAX1-NEXT: vslidedown.vi v16, v15, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v15, v8 -; LMULMAX1-NEXT: vzext.vf2 v8, v10 -; LMULMAX1-NEXT: vzext.vf2 v10, v12 -; LMULMAX1-NEXT: vzext.vf2 v12, v14 -; LMULMAX1-NEXT: vzext.vf2 v14, v16 +; LMULMAX1-NEXT: vzext.vf2 v15, v16 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v16i32_v16i64: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v16, (a0) +; LMULMAX4-NEXT: vle32.v v12, (a0) +; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; LMULMAX4-NEXT: vzext.vf2 v8, v12 ; LMULMAX4-NEXT: vsetivli zero, 8, e32, m4, ta, ma -; LMULMAX4-NEXT: vslidedown.vi v8, v16, 8 +; LMULMAX4-NEXT: vslidedown.vi v16, v12, 8 ; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vzext.vf2 v12, v8 -; LMULMAX4-NEXT: vzext.vf2 v8, v16 +; LMULMAX4-NEXT: vzext.vf2 v12, v16 ; LMULMAX4-NEXT: ret %y = load <16 x i32>, ptr %x %z = zext <16 x i32> %y to <16 x i64> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index 740ad95532316..71f60d636b6ea 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -95,14 +95,14 @@ define void @buildvec_dominant1_v2f32(<2 x float>* %x) { define void @buildvec_dominant0_v4f32(<4 x float>* %x) { ; CHECK-LABEL: buildvec_dominant0_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: lui a1, 262144 -; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret store <4 x float> , <4 x float>* %x ret void @@ -112,12 +112,12 @@ define void @buildvec_dominant1_v4f32(<4 x float>* %x, float %f) { ; CHECK-LABEL: buildvec_dominant1_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %v0 = insertelement <4 x float> poison, float %f, i32 0 %v1 = insertelement <4 x float> %v0, float 0.0, i32 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll index b3e590c48607b..ffd35f0ea1921 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll @@ -47,11 +47,12 @@ define void @fpext_v8f16_v8f32(ptr %x, ptr %y) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vle16.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v9, v8, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.f.f.v v10, v9 ; LMULMAX1-NEXT: vfwcvt.f.f.v v9, v8 +; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma +; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4 +; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; LMULMAX1-NEXT: vfwcvt.f.f.v v10, v8 ; LMULMAX1-NEXT: addi a0, a1, 16 ; LMULMAX1-NEXT: vse32.v v10, (a0) ; LMULMAX1-NEXT: vse32.v v9, (a1) @@ -92,13 +93,13 @@ define void @fpext_v8f16_v8f64(ptr %x, ptr %y) { ; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; LMULMAX1-NEXT: vfwcvt.f.f.v v11, v12 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfwcvt.f.f.v v12, v10 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.f.f.v v10, v12 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; LMULMAX1-NEXT: vfwcvt.f.f.v v12, v8 ; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; LMULMAX1-NEXT: vfwcvt.f.f.v v8, v12 +; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; LMULMAX1-NEXT: vfwcvt.f.f.v v12, v10 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; LMULMAX1-NEXT: vfwcvt.f.f.v v10, v12 ; LMULMAX1-NEXT: addi a0, a1, 32 ; LMULMAX1-NEXT: vse64.v v10, (a0) ; LMULMAX1-NEXT: vse64.v v8, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll index 9761196f6e42c..72ff4f4efe6ef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll @@ -160,8 +160,8 @@ define void @splat_zero_16f16(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse16.v v8, (a1) ; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <16 x half> poison, half 0.0, i32 0 @@ -182,8 +182,8 @@ define void @splat_zero_v8f32(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse32.v v8, (a1) ; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <8 x float> poison, float 0.0, i32 0 @@ -204,8 +204,8 @@ define void @splat_zero_v4f64(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse64.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse64.v v8, (a1) ; LMULMAX1-NEXT: vse64.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <4 x double> poison, double 0.0, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll index 9b46d4441a21f..62a0c2cc696f1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -77,8 +77,8 @@ define void @fp2si_v2f32_v2i64(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) @@ -95,8 +95,8 @@ define void @fp2ui_v2f32_v2i64(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) @@ -114,8 +114,8 @@ define void @fp2si_v8f32_v8i64(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) @@ -133,8 +133,8 @@ define void @fp2ui_v8f32_v8i64(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) @@ -151,12 +151,13 @@ define void @fp2si_v2f16_v2i64(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v9 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, ptr %x @@ -171,12 +172,13 @@ define void @fp2ui_v2f16_v2i64(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v9 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, ptr %x @@ -661,11 +663,12 @@ declare <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> %a) define void @fp2si_v2f64_v2i32(ptr %x, ptr %y) { ; CHECK-LABEL: fp2si_v2f64_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: ret @@ -679,11 +682,12 @@ declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double>) define void @fp2ui_v2f64_v2i32(ptr %x, ptr %y) { ; CHECK-LABEL: fp2ui_v2f64_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll index bb39feeb1d067..8be3d42671495 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -491,16 +491,17 @@ define void @fp2si_v8f32_v8i64(ptr %x, ptr %y) { ; LMULMAX1-NEXT: addi a2, a0, 16 ; LMULMAX1-NEXT: vle32.v v8, (a2) ; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v8, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v11, v10 +; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v10, v8 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v9, 2 +; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v12, v10 -; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v11, v8 ; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v8, v9 +; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; LMULMAX1-NEXT: vslidedown.vi v9, v9, 2 +; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v12, v9 ; LMULMAX1-NEXT: addi a0, a1, 16 ; LMULMAX1-NEXT: vse64.v v12, (a0) ; LMULMAX1-NEXT: vse64.v v8, (a1) @@ -530,16 +531,17 @@ define void @fp2ui_v8f32_v8i64(ptr %x, ptr %y) { ; LMULMAX1-NEXT: addi a2, a0, 16 ; LMULMAX1-NEXT: vle32.v v8, (a2) ; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v8, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v11, v10 +; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v9, 2 +; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v12, v10 -; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v10, v8 +; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v11, v8 ; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v8, v9 +; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; LMULMAX1-NEXT: vslidedown.vi v9, v9, 2 +; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v12, v9 ; LMULMAX1-NEXT: addi a0, a1, 16 ; LMULMAX1-NEXT: vse64.v v12, (a0) ; LMULMAX1-NEXT: vse64.v v8, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll index 5f1cc2d4b4e96..08f27bd50a504 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll @@ -799,9 +799,9 @@ define <16 x i64> @fshr_v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16 ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v24, (a0) ; RV32-NEXT: addi a0, sp, 16 @@ -809,20 +809,30 @@ define <16 x i64> @fshr_v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16 ; RV32-NEXT: li a0, 63 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vx v8, v24, a0, v0.t -; RV32-NEXT: vsrl.vv v16, v16, v8, v0.t +; RV32-NEXT: vsrl.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsll.vi v16, v8, 1, v0.t ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v8, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vxor.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsll.vi v24, v24, 1, v0.t -; RV32-NEXT: vsll.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsll.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -866,9 +876,9 @@ define <16 x i64> @fshl_v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16 ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v24, (a0) ; RV32-NEXT: addi a0, sp, 16 @@ -878,19 +888,29 @@ define <16 x i64> @fshl_v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vx v8, v24, a0, v0.t ; RV32-NEXT: vsll.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 +; RV32-NEXT: vmv.v.i v8, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v16, v24, v16, v0.t -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v24, v24, 1, v0.t -; RV32-NEXT: vsrl.vv v16, v24, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vxor.vv v8, v24, v8, v0.t +; RV32-NEXT: vand.vx v8, v8, a0, v0.t +; RV32-NEXT: vsrl.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll index b3cda0a4ac342..8019d3f3c75d2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -456,10 +456,10 @@ define void @si2fp_v8i16_v8f64(ptr %x, ptr %y) { ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1-NEXT: vsext.vf2 v12, v11 ; LMULMAX1-NEXT: vfwcvt.f.x.v v11, v12 -; LMULMAX1-NEXT: vsext.vf2 v12, v10 -; LMULMAX1-NEXT: vfwcvt.f.x.v v10, v12 ; LMULMAX1-NEXT: vsext.vf2 v12, v8 ; LMULMAX1-NEXT: vfwcvt.f.x.v v8, v12 +; LMULMAX1-NEXT: vsext.vf2 v12, v10 +; LMULMAX1-NEXT: vfwcvt.f.x.v v10, v12 ; LMULMAX1-NEXT: addi a0, a1, 32 ; LMULMAX1-NEXT: vse64.v v10, (a0) ; LMULMAX1-NEXT: vse64.v v8, (a1) @@ -500,10 +500,10 @@ define void @ui2fp_v8i16_v8f64(ptr %x, ptr %y) { ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1-NEXT: vzext.vf2 v12, v11 ; LMULMAX1-NEXT: vfwcvt.f.xu.v v11, v12 -; LMULMAX1-NEXT: vzext.vf2 v12, v10 -; LMULMAX1-NEXT: vfwcvt.f.xu.v v10, v12 ; LMULMAX1-NEXT: vzext.vf2 v12, v8 ; LMULMAX1-NEXT: vfwcvt.f.xu.v v8, v12 +; LMULMAX1-NEXT: vzext.vf2 v12, v10 +; LMULMAX1-NEXT: vfwcvt.f.xu.v v10, v12 ; LMULMAX1-NEXT: addi a0, a1, 32 ; LMULMAX1-NEXT: vse64.v v10, (a0) ; LMULMAX1-NEXT: vse64.v v8, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll index b823814630db4..1732e19b883f9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll @@ -106,14 +106,15 @@ define <64 x i1> @insertelt_v64i1(<64 x i1> %x, i1 %elt) nounwind { ; CHECK-LABEL: insertelt_v64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v12, 1 +; CHECK-NEXT: vslideup.vi v12, v8, 1 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vand.vi v8, v12, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <64 x i1> %x, i1 %elt, i64 1 @@ -124,15 +125,16 @@ define <64 x i1> @insertelt_idx_v64i1(<64 x i1> %x, i1 %elt, i32 zeroext %idx) n ; CHECK-LABEL: insertelt_idx_v64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma -; CHECK-NEXT: vslideup.vx v8, v12, a1 +; CHECK-NEXT: vslideup.vx v12, v8, a1 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vand.vi v8, v12, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <64 x i1> %x, i1 %elt, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index 6ad736f0dbaf9..8395c092cadb5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -458,14 +458,15 @@ define @insert_nxv2i1_v4i1_0( %v, ptr %svp) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vlm.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma ; CHECK-NEXT: vmv.v.v v9, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index 2d4d798f164c2..4e756448663ed 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -154,10 +154,10 @@ define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() { ; CHECK-LABEL: buildvec_vid_stepn3_add3_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vmv.v.i v8, 3 +; CHECK-NEXT: vmv.v.i v9, 3 +; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: li a0, -3 -; CHECK-NEXT: vmacc.vx v8, a0, v9 +; CHECK-NEXT: vmadd.vx v8, a0, v9 ; CHECK-NEXT: ret ret <4 x i8> } @@ -166,10 +166,10 @@ define void @buildvec_vid_stepn3_addn3_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3) ; CHECK-LABEL: buildvec_vid_stepn3_addn3_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vmv.v.i v9, -3 +; CHECK-NEXT: vmv.v.i v8, -3 +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: li a4, -3 -; CHECK-NEXT: vmacc.vx v9, a4, v8 +; CHECK-NEXT: vmadd.vx v9, a4, v8 ; CHECK-NEXT: vse32.v v9, (a0) ; CHECK-NEXT: vse32.v v9, (a1) ; CHECK-NEXT: vse32.v v9, (a2) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll index d3c843c304f5e..4686870754cea 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll @@ -55,11 +55,12 @@ define void @sext_v8i8_v8i32(ptr %x, ptr %z) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; LMULMAX1-NEXT: vle8.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v9, v8, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v10, v9 ; LMULMAX1-NEXT: vsext.vf4 v9, v8 +; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4 +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-NEXT: vsext.vf4 v10, v8 ; LMULMAX1-NEXT: addi a0, a1, 16 ; LMULMAX1-NEXT: vse32.v v10, (a0) ; LMULMAX1-NEXT: vse32.v v9, (a1) @@ -132,17 +133,17 @@ define void @sext_v32i8_v32i32(ptr %x, ptr %z) { ; LMULMAX1-NEXT: vslidedown.vi v15, v12, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vsext.vf4 v16, v15 -; LMULMAX1-NEXT: vsext.vf4 v15, v10 -; LMULMAX1-NEXT: vsext.vf4 v10, v12 -; LMULMAX1-NEXT: vsext.vf4 v12, v8 -; LMULMAX1-NEXT: vsext.vf4 v8, v9 +; LMULMAX1-NEXT: vsext.vf4 v15, v8 +; LMULMAX1-NEXT: vsext.vf4 v8, v10 +; LMULMAX1-NEXT: vsext.vf4 v10, v9 +; LMULMAX1-NEXT: vsext.vf4 v9, v12 ; LMULMAX1-NEXT: addi a0, a1, 32 -; LMULMAX1-NEXT: vse32.v v10, (a0) -; LMULMAX1-NEXT: vse32.v v8, (a1) +; LMULMAX1-NEXT: vse32.v v9, (a0) +; LMULMAX1-NEXT: vse32.v v10, (a1) ; LMULMAX1-NEXT: addi a0, a1, 96 -; LMULMAX1-NEXT: vse32.v v15, (a0) +; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: addi a0, a1, 64 -; LMULMAX1-NEXT: vse32.v v12, (a0) +; LMULMAX1-NEXT: vse32.v v15, (a0) ; LMULMAX1-NEXT: addi a0, a1, 48 ; LMULMAX1-NEXT: vse32.v v16, (a0) ; LMULMAX1-NEXT: addi a0, a1, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 40412ae8e322f..adbb69a36ac09 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -99,10 +99,10 @@ define <4 x i16> @vrgather_shuffle_xv_v4i16(<4 x i16> %x) { ; CHECK-LABEL: vrgather_shuffle_xv_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vi v10, v9, 4 -; CHECK-NEXT: vmv.v.i v0, 12 ; CHECK-NEXT: vmv.v.i v9, 5 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vmv.v.i v0, 12 +; CHECK-NEXT: vrsub.vi v10, v10, 4 ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret @@ -219,33 +219,32 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) { ; RV32-LABEL: vrgather_shuffle_xv_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: lui a0, %hi(.LCPI12_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0) -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: vmv.v.i v20, -1 ; RV32-NEXT: vrgatherei16.vv v12, v20, v16 -; RV32-NEXT: lui a0, %hi(.LCPI12_1) -; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) -; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: li a0, 113 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v0, a0 +; RV32-NEXT: lui a0, %hi(.LCPI12_1) +; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: vrgatherei16.vv v12, v8, v16, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_xv_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI12_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI12_0) -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: li a0, 113 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: lui a0, %hi(.LCPI12_0) +; RV64-NEXT: addi a0, a0, %lo(.LCPI12_0) +; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vrgather.vv v12, v8, v16, v0.t ; RV64-NEXT: vmv.v.v v8, v12 @@ -262,13 +261,13 @@ define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) { ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: vrgatherei16.vv v12, v8, v16 -; RV32-NEXT: lui a0, %hi(.LCPI13_1) -; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1) -; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: li a0, 140 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: lui a0, %hi(.LCPI13_1) +; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1) +; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: vmv.v.i v16, 5 ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv.v.v v8, v12 @@ -276,14 +275,13 @@ define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) { ; ; RV64-LABEL: vrgather_shuffle_vx_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI13_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI13_0) -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: li a0, 115 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: lui a0, %hi(.LCPI13_0) +; RV64-NEXT: addi a0, a0, %lo(.LCPI13_0) +; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vmv.v.i v12, 5 ; RV64-NEXT: vrgather.vv v12, v8, v16, v0.t ; RV64-NEXT: vmv.v.v v8, v12 @@ -388,9 +386,10 @@ define <8 x i8> @splat_ve4_ins_i1ve3(<8 x i8> %v) { define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: li a0, 66 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -421,12 +420,13 @@ define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) { define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i0we4: ; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 67 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 4 -; CHECK-NEXT: li a0, 67 -; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -475,9 +475,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v11, v10, 2 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: li a0, 70 ; CHECK-NEXT: vmv.v.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll index c8c2aea4b4ebb..7e092ae0a7574 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -339,8 +339,8 @@ define void @splat_zero_v32i8(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse8.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse8.v v8, (a1) ; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <32 x i8> poison, i8 0, i32 0 @@ -368,8 +368,8 @@ define void @splat_zero_v16i16(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse16.v v8, (a1) ; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <16 x i16> poison, i16 0, i32 0 @@ -397,8 +397,8 @@ define void @splat_zero_v8i32(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse32.v v8, (a1) ; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <8 x i32> poison, i32 0, i32 0 @@ -426,8 +426,8 @@ define void @splat_zero_v4i64(ptr %x) { ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a0, a0, 16 +; LMULMAX1-RV32-NEXT: addi a1, a0, 16 +; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: ret ; @@ -435,8 +435,8 @@ define void @splat_zero_v4i64(ptr %x) { ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a0, a0, 16 +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = insertelement <4 x i64> poison, i64 0, i32 0 @@ -632,8 +632,8 @@ define void @splat_allones_v32i8(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, -1 -; LMULMAX1-NEXT: vse8.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse8.v v8, (a1) ; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <32 x i8> poison, i8 -1, i32 0 @@ -661,8 +661,8 @@ define void @splat_allones_v16i16(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, -1 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse16.v v8, (a1) ; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <16 x i16> poison, i16 -1, i32 0 @@ -690,8 +690,8 @@ define void @splat_allones_v8i32(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, -1 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse32.v v8, (a1) ; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <8 x i32> poison, i32 -1, i32 0 @@ -719,8 +719,8 @@ define void @splat_allones_v4i64(ptr %x) { ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.i v8, -1 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a0, a0, 16 +; LMULMAX1-RV32-NEXT: addi a1, a0, 16 +; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: ret ; @@ -728,8 +728,8 @@ define void @splat_allones_v4i64(ptr %x) { ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vmv.v.i v8, -1 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a0, a0, 16 +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = insertelement <4 x i64> poison, i64 -1, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 7dc7d7c49b4dc..98f43e1c0af51 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1105,45 +1105,45 @@ define void @mulhu_v16i8(ptr %x) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: lui a1, 3 -; RV32-NEXT: addi a1, a1, -2044 +; RV32-NEXT: li a1, 513 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a1 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: li a1, -128 -; RV32-NEXT: vmerge.vxm v10, v9, a1, v0 +; RV32-NEXT: vmv.v.i v9, 4 +; RV32-NEXT: vmerge.vim v9, v9, 1, v0 ; RV32-NEXT: lui a1, 1 -; RV32-NEXT: addi a2, a1, 32 +; RV32-NEXT: addi a2, a1, 78 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a2 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: lui a2, %hi(.LCPI65_0) -; RV32-NEXT: addi a2, a2, %lo(.LCPI65_0) -; RV32-NEXT: vle8.v v11, (a2) -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vsrl.vv v9, v8, v9 -; RV32-NEXT: vmulhu.vv v9, v9, v11 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: vmulhu.vv v8, v8, v10 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: li a2, 513 +; RV32-NEXT: vmerge.vim v9, v9, 3, v0 +; RV32-NEXT: lui a2, 8 +; RV32-NEXT: addi a2, a2, 304 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a2 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 4 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: addi a1, a1, 78 +; RV32-NEXT: vmerge.vim v9, v9, 2, v0 +; RV32-NEXT: lui a2, 3 +; RV32-NEXT: addi a2, a2, -2044 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.v.x v0, a1 +; RV32-NEXT: vmv.v.x v0, a2 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmerge.vim v9, v9, 3, v0 -; RV32-NEXT: lui a1, 8 -; RV32-NEXT: addi a1, a1, 304 +; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: li a2, -128 +; RV32-NEXT: vmerge.vxm v11, v10, a2, v0 +; RV32-NEXT: addi a1, a1, 32 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a1 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmerge.vim v9, v9, 2, v0 +; RV32-NEXT: lui a1, %hi(.LCPI65_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI65_0) +; RV32-NEXT: vle8.v v12, (a1) +; RV32-NEXT: vmerge.vim v10, v10, 1, v0 +; RV32-NEXT: vsrl.vv v10, v8, v10 +; RV32-NEXT: vmulhu.vv v10, v10, v12 +; RV32-NEXT: vsub.vv v8, v8, v10 +; RV32-NEXT: vmulhu.vv v8, v8, v11 +; RV32-NEXT: vadd.vv v8, v8, v10 ; RV32-NEXT: vsrl.vv v8, v8, v9 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret @@ -1152,45 +1152,45 @@ define void @mulhu_v16i8(ptr %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: lui a1, 3 -; RV64-NEXT: addiw a1, a1, -2044 +; RV64-NEXT: li a1, 513 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: li a1, -128 -; RV64-NEXT: vmerge.vxm v10, v9, a1, v0 +; RV64-NEXT: vmv.v.i v9, 4 +; RV64-NEXT: vmerge.vim v9, v9, 1, v0 ; RV64-NEXT: lui a1, 1 -; RV64-NEXT: addiw a2, a1, 32 +; RV64-NEXT: addiw a2, a1, 78 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: lui a2, %hi(.LCPI65_0) -; RV64-NEXT: addi a2, a2, %lo(.LCPI65_0) -; RV64-NEXT: vle8.v v11, (a2) -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: vsrl.vv v9, v8, v9 -; RV64-NEXT: vmulhu.vv v9, v9, v11 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vmulhu.vv v8, v8, v10 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: li a2, 513 +; RV64-NEXT: vmerge.vim v9, v9, 3, v0 +; RV64-NEXT: lui a2, 8 +; RV64-NEXT: addiw a2, a2, 304 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, 4 -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: addiw a1, a1, 78 +; RV64-NEXT: vmerge.vim v9, v9, 2, v0 +; RV64-NEXT: lui a2, 3 +; RV64-NEXT: addiw a2, a2, -2044 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.v.x v0, a1 +; RV64-NEXT: vmv.v.x v0, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmerge.vim v9, v9, 3, v0 -; RV64-NEXT: lui a1, 8 -; RV64-NEXT: addiw a1, a1, 304 +; RV64-NEXT: vmv.v.i v10, 0 +; RV64-NEXT: li a2, -128 +; RV64-NEXT: vmerge.vxm v11, v10, a2, v0 +; RV64-NEXT: addiw a1, a1, 32 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmerge.vim v9, v9, 2, v0 +; RV64-NEXT: lui a1, %hi(.LCPI65_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI65_0) +; RV64-NEXT: vle8.v v12, (a1) +; RV64-NEXT: vmerge.vim v10, v10, 1, v0 +; RV64-NEXT: vsrl.vv v10, v8, v10 +; RV64-NEXT: vmulhu.vv v10, v10, v12 +; RV64-NEXT: vsub.vv v8, v8, v10 +; RV64-NEXT: vmulhu.vv v8, v8, v11 +; RV64-NEXT: vadd.vv v8, v8, v10 ; RV64-NEXT: vsrl.vv v8, v8, v9 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret @@ -1205,30 +1205,31 @@ define void @mulhu_v8i16(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: li a1, 33 -; CHECK-NEXT: vmv.v.x v0, a1 -; CHECK-NEXT: vmv.v.i v9, 3 -; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 -; CHECK-NEXT: vmv.v.i v10, 1 -; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vi v9, v10, 6 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: lui a1, 1048568 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmv.s.x v12, a1 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v11, 1 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vi v11, v10, 6 +; CHECK-NEXT: vslideup.vi v9, v11, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI66_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI66_0) -; CHECK-NEXT: vle16.v v10, (a1) -; CHECK-NEXT: vsrl.vv v11, v8, v11 -; CHECK-NEXT: vmulhu.vv v10, v11, v10 -; CHECK-NEXT: vsub.vv v8, v8, v10 -; CHECK-NEXT: vmulhu.vv v8, v8, v12 -; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vle16.v v12, (a1) +; CHECK-NEXT: vsrl.vv v9, v8, v9 +; CHECK-NEXT: vmulhu.vv v9, v9, v12 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: li a1, 33 +; CHECK-NEXT: vmv.v.x v0, a1 +; CHECK-NEXT: vmv.v.i v9, 3 +; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 +; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; CHECK-NEXT: vslideup.vi v9, v11, 6 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret @@ -1438,16 +1439,16 @@ define void @mulhs_v6i16(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.v.i v0, 6 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v0, 6 ; CHECK-NEXT: vmv.v.i v9, -7 ; CHECK-NEXT: vmerge.vim v9, v9, 7, v0 ; CHECK-NEXT: vdiv.vv v9, v8, v9 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vmv.v.i v11, 7 +; CHECK-NEXT: vmv.v.i v10, 7 +; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: li a1, -14 -; CHECK-NEXT: vmacc.vx v11, a1, v10 +; CHECK-NEXT: vmadd.vx v11, a1, v10 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 4 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma @@ -5072,38 +5073,38 @@ define void @mulhu_v16i16(ptr %x) { ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle16.v v10, (a0) -; LMULMAX2-RV32-NEXT: li a1, 257 +; LMULMAX2-RV32-NEXT: lui a1, 2 +; LMULMAX2-RV32-NEXT: addi a1, a1, 289 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v12, 0 -; LMULMAX2-RV32-NEXT: lui a1, 1048568 -; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v12, a1, v0 +; LMULMAX2-RV32-NEXT: vmv.v.i v8, 3 +; LMULMAX2-RV32-NEXT: vmerge.vim v12, v8, 2, v0 ; LMULMAX2-RV32-NEXT: lui a1, 4 ; LMULMAX2-RV32-NEXT: addi a1, a1, 64 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.x v8, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI182_0) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI182_0) -; LMULMAX2-RV32-NEXT: vle16.v v16, (a1) ; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 ; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 1, v0 -; LMULMAX2-RV32-NEXT: vsrl.vv v12, v10, v12 -; LMULMAX2-RV32-NEXT: vmulhu.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vsub.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v14 -; LMULMAX2-RV32-NEXT: vadd.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: lui a1, 2 -; LMULMAX2-RV32-NEXT: addi a1, a1, 289 +; LMULMAX2-RV32-NEXT: li a1, 257 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v12, 3 -; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 2, v0 +; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI182_0) +; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI182_0) +; LMULMAX2-RV32-NEXT: vle16.v v16, (a1) +; LMULMAX2-RV32-NEXT: lui a1, 1048568 +; LMULMAX2-RV32-NEXT: vmerge.vxm v18, v14, a1, v0 ; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV32-NEXT: vmerge.vim v8, v12, 1, v0 +; LMULMAX2-RV32-NEXT: vmerge.vim v8, v14, 1, v0 ; LMULMAX2-RV32-NEXT: vsrl.vv v8, v10, v8 +; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v16 +; LMULMAX2-RV32-NEXT: vsub.vv v10, v10, v8 +; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v18 +; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -5111,38 +5112,38 @@ define void @mulhu_v16i16(ptr %x) { ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle16.v v10, (a0) -; LMULMAX2-RV64-NEXT: li a1, 257 +; LMULMAX2-RV64-NEXT: lui a1, 2 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 289 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; LMULMAX2-RV64-NEXT: vmv.v.x v0, a1 ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0 -; LMULMAX2-RV64-NEXT: lui a1, 1048568 -; LMULMAX2-RV64-NEXT: vmerge.vxm v14, v12, a1, v0 +; LMULMAX2-RV64-NEXT: vmv.v.i v8, 3 +; LMULMAX2-RV64-NEXT: vmerge.vim v12, v8, 2, v0 ; LMULMAX2-RV64-NEXT: lui a1, 4 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 64 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; LMULMAX2-RV64-NEXT: vmv.v.x v8, a1 ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI182_0) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI182_0) -; LMULMAX2-RV64-NEXT: vle16.v v16, (a1) ; LMULMAX2-RV64-NEXT: vmv1r.v v0, v8 ; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 1, v0 -; LMULMAX2-RV64-NEXT: vsrl.vv v12, v10, v12 -; LMULMAX2-RV64-NEXT: vmulhu.vv v12, v12, v16 -; LMULMAX2-RV64-NEXT: vsub.vv v10, v10, v12 -; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v10, v14 -; LMULMAX2-RV64-NEXT: vadd.vv v10, v10, v12 -; LMULMAX2-RV64-NEXT: lui a1, 2 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 289 +; LMULMAX2-RV64-NEXT: li a1, 257 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; LMULMAX2-RV64-NEXT: vmv.v.x v0, a1 ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.i v12, 3 -; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 2, v0 +; LMULMAX2-RV64-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI182_0) +; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI182_0) +; LMULMAX2-RV64-NEXT: vle16.v v16, (a1) +; LMULMAX2-RV64-NEXT: lui a1, 1048568 +; LMULMAX2-RV64-NEXT: vmerge.vxm v18, v14, a1, v0 ; LMULMAX2-RV64-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV64-NEXT: vmerge.vim v8, v12, 1, v0 +; LMULMAX2-RV64-NEXT: vmerge.vim v8, v14, 1, v0 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v10, v8 +; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v16 +; LMULMAX2-RV64-NEXT: vsub.vv v10, v10, v8 +; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v10, v18 +; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -5656,10 +5657,10 @@ define void @mulhs_v4i64(ptr %x) { ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64-NEXT: vmv.v.i v10, 1 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; LMULMAX2-RV64-NEXT: vmv.v.i v0, 5 ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.i v10, 1 ; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 0, v0 ; LMULMAX2-RV64-NEXT: lui a1, 349525 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index a491b26b45d10..80abefb2639ac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -12359,24 +12359,26 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; ; RV64V-LABEL: mgather_baseidx_v32i8: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64V-NEXT: vsext.vf8 v16, v8 -; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64V-NEXT: vmv1r.v v12, v10 -; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t +; RV64V-NEXT: vmv1r.v v12, v0 ; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64V-NEXT: vslidedown.vi v10, v10, 16 -; RV64V-NEXT: vslidedown.vi v8, v8, 16 +; RV64V-NEXT: vslidedown.vi v14, v8, 16 ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64V-NEXT: vsext.vf8 v16, v8 +; RV64V-NEXT: vsext.vf8 v16, v14 +; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma +; RV64V-NEXT: vslidedown.vi v14, v10, 16 ; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64V-NEXT: vslidedown.vi v0, v0, 2 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64V-NEXT: vluxei64.v v14, (a0), v16, v0.t +; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64V-NEXT: vsext.vf8 v16, v8 +; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64V-NEXT: vmv1r.v v0, v12 ; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64V-NEXT: li a0, 32 ; RV64V-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV64V-NEXT: vslideup.vi v12, v10, 16 -; RV64V-NEXT: vmv.v.v v8, v12 +; RV64V-NEXT: vslideup.vi v10, v14, 16 +; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_baseidx_v32i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index 74eeb01f57ae5..56377658a1ca2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -10854,10 +10854,11 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vslidedown.vi v10, v10, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll index 369a7ad175a5a..1769caabf173a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -968,10 +968,10 @@ define i64 @vwreduce_add_v1i64(ptr %x) { ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: vsext.vf2 v9, v8 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v8, v9, a0 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vwreduce_add_v1i64: @@ -993,10 +993,10 @@ define i64 @vwreduce_uadd_v1i64(ptr %x) { ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: vzext.vf2 v9, v8 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v8, v9, a0 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vwreduce_uadd_v1i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll index 6bdb1a7e85e84..236ae793dc521 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -246,8 +246,8 @@ define <8 x i1> @fcmp_ord_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmand.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -262,8 +262,8 @@ define <8 x i1> @fcmp_ord_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmand.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -512,8 +512,8 @@ define <8 x i1> @fcmp_uno_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmor.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -528,8 +528,8 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -854,9 +854,9 @@ define <8 x i1> @fcmp_ord_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vf v16, v12, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v8, v12, fa0, v0.t +; CHECK-NEXT: vmand.mm v0, v16, v8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -870,9 +870,9 @@ define <8 x i1> @fcmp_ord_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vf v16, v12, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v8, v12, fa0, v0.t +; CHECK-NEXT: vmand.mm v0, v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -1123,9 +1123,9 @@ define <8 x i1> @fcmp_uno_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vf v16, v12, fa0, v0.t -; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v8, v12, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v16, v8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -1139,9 +1139,9 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vf v16, v12, fa0, v0.t -; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v8, v12, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll index ea14e40d08751..e6b3c25b5d935 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll @@ -139,10 +139,10 @@ define void @store_constant_v2i32(ptr %p) { ; CHECK-LABEL: store_constant_v2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vmv.v.i v9, 3 +; CHECK-NEXT: vmv.v.i v8, 3 +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: li a1, 3 -; CHECK-NEXT: vmacc.vx v9, a1, v8 +; CHECK-NEXT: vmadd.vx v9, a1, v8 ; CHECK-NEXT: vse32.v v9, (a0) ; CHECK-NEXT: ret store <2 x i32> , ptr %p @@ -215,10 +215,10 @@ define void @store_constant_v2i8_align1(ptr %p) { ; CHECK-LABEL: store_constant_v2i8_align1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vmv.v.i v9, 3 +; CHECK-NEXT: vmv.v.i v8, 3 +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: li a1, 3 -; CHECK-NEXT: vmacc.vx v9, a1, v8 +; CHECK-NEXT: vmadd.vx v9, a1, v8 ; CHECK-NEXT: vse8.v v9, (a0) ; CHECK-NEXT: ret store <2 x i8> , ptr %p, align 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll index b435aed268a2d..d0b2cabb8a649 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -1649,16 +1649,17 @@ define <32 x i64> @vadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { define <32 x i64> @vadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV32-LABEL: vadd_vx_v32i64_evl27: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v1, v0, 2 +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v8, v24, v0.t ; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vadd.vv v8, v8, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vadd_vx_v32i64_evl27: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll index 2046c51967037..7b41aeaca9661 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll @@ -3227,10 +3227,10 @@ define <32 x i1> @fcmps_uno_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmfle.vf v8, v12, fa0 -; CHECK-NEXT: vmnot.m v8, v8 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vf v16, v12, fa0 +; CHECK-NEXT: vmnot.m v12, v16 +; CHECK-NEXT: vmfle.vv v13, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v12, v13 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index 30be917939e9d..7ebfca70481bb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -1941,41 +1941,41 @@ define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroex define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v32i8_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsext.vf4 v16, v8 -; RV32-NEXT: vsll.vi v24, v16, 3 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: bltu a1, a3, .LBB87_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB87_2: +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: sltu a3, a1, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: sltu a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB87_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB87_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_v32i8_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v10, v0 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 16 +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v12 +; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: vsext.vf8 v24, v8 -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a3, a1, a2 ; RV64-NEXT: addi a3, a3, -1 @@ -2001,29 +2001,28 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v32i8_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsext.vf4 v16, v8 -; RV32-NEXT: vsll.vi v24, v16, 3 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: bltu a1, a3, .LBB88_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB88_2: +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: sltu a3, a1, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: sltu a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB88_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB88_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_v32i8_v32f64: @@ -2034,8 +2033,8 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v12 ; RV64-NEXT: vsext.vf8 v24, v8 -; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a3, a1, a2 ; RV64-NEXT: addi a3, a3, -1 @@ -2062,29 +2061,28 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v32i8_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vzext.vf4 v16, v8 -; RV32-NEXT: vsll.vi v24, v16, 3 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: bltu a1, a3, .LBB89_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB89_2: +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: sltu a3, a1, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: sltu a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB89_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB89_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64: @@ -2095,8 +2093,8 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vzext.vf8 v16, v12 ; RV64-NEXT: vzext.vf8 v24, v8 -; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a3, a1, a2 ; RV64-NEXT: addi a3, a3, -1 @@ -2123,41 +2121,41 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v32i16_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v24, v16, 3 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: bltu a1, a3, .LBB90_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB90_2: +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: sltu a3, a1, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: sltu a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB90_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB90_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_v32i16_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v12, v0 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma -; RV64-NEXT: vslidedown.vi v16, v8, 16 +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsext.vf4 v24, v8 -; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a3, a1, a2 ; RV64-NEXT: addi a3, a3, -1 @@ -2183,29 +2181,28 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v32i16_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v24, v16, 3 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: bltu a1, a3, .LBB91_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB91_2: +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: sltu a3, a1, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: sltu a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB91_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB91_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_v32i16_v32f64: @@ -2214,10 +2211,10 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v24, v16 -; RV64-NEXT: vsext.vf4 v0, v8 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsll.vi v24, v0, 3 +; RV64-NEXT: vsext.vf4 v0, v16 +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v24, v16, 3 +; RV64-NEXT: vsll.vi v16, v0, 3 ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a3, a1, a2 ; RV64-NEXT: addi a3, a3, -1 @@ -2244,29 +2241,28 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v32i16_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vzext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v24, v16, 3 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: bltu a1, a3, .LBB92_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB92_2: +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: sltu a3, a1, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: sltu a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB92_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB92_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64: @@ -2275,10 +2271,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v24, v16 -; RV64-NEXT: vzext.vf4 v0, v8 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsll.vi v24, v0, 3 +; RV64-NEXT: vzext.vf4 v0, v16 +; RV64-NEXT: vzext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v24, v16, 3 +; RV64-NEXT: vsll.vi v16, v0, 3 ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a3, a1, a2 ; RV64-NEXT: addi a3, a3, -1 @@ -2331,22 +2327,31 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; ; RV64-LABEL: vpgather_baseidx_v32i32_v32f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsext.vf2 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v16, v8, 16 +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v0, v16 -; RV64-NEXT: vsll.vi v16, v0, 3 -; RV64-NEXT: vsext.vf2 v0, v8 -; RV64-NEXT: vsll.vi v8, v0, 3 +; RV64-NEXT: vsext.vf2 v16, v8 +; RV64-NEXT: vsll.vi v8, v16, 3 ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a3, a1, a2 ; RV64-NEXT: addi a3, a3, -1 ; RV64-NEXT: and a2, a3, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v24, 2 +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: li a2, 16 ; RV64-NEXT: bltu a1, a2, .LBB93_2 ; RV64-NEXT: # %bb.1: @@ -2354,7 +2359,13 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; RV64-NEXT: .LBB93_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <32 x i32> %idxs %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) @@ -2390,22 +2401,14 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; ; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v0, v16 -; RV64-NEXT: vsext.vf2 v24, v8 +; RV64-NEXT: vsext.vf2 v16, v8 +; RV64-NEXT: vsll.vi v8, v16, 3 ; RV64-NEXT: vsll.vi v16, v0, 3 -; RV64-NEXT: vsll.vi v8, v24, 3 -; RV64-NEXT: vl1r.v v24, (a2) # Unknown-size Folded Reload ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a3, a1, a2 ; RV64-NEXT: addi a3, a3, -1 @@ -2422,10 +2425,6 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext <32 x i32> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2462,22 +2461,14 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; ; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vzext.vf2 v0, v16 -; RV64-NEXT: vzext.vf2 v24, v8 +; RV64-NEXT: vzext.vf2 v16, v8 +; RV64-NEXT: vsll.vi v8, v16, 3 ; RV64-NEXT: vsll.vi v16, v0, 3 -; RV64-NEXT: vsll.vi v8, v24, 3 -; RV64-NEXT: vl1r.v v24, (a2) # Unknown-size Folded Reload ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a3, a1, a2 ; RV64-NEXT: addi a3, a3, -1 @@ -2494,10 +2485,6 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = zext <32 x i32> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs diff --git a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll index 15d4e707ee5d8..2c917fc09fa8e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll @@ -53,10 +53,11 @@ define @test_signed_v8f32_v8i32( %f) { define @test_signed_v4f32_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f32.nxv4i16( %f) @@ -66,10 +67,11 @@ define @test_signed_v4f32_v4i16( %f) { define @test_signed_v8f32_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f32.nxv8i16( %f) @@ -80,8 +82,8 @@ define @test_signed_v2f32_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f32_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -93,8 +95,8 @@ define @test_signed_v4f32_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret @@ -115,10 +117,11 @@ declare @llvm.fptosi.sat.nxv4f64.nxv4i64( @test_signed_v2f64_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f64_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f64.nxv2i32( %f) @@ -128,10 +131,11 @@ define @test_signed_v2f64_v2i32( %f) { define @test_signed_v4f64_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f64.nxv4i32( %f) @@ -141,10 +145,11 @@ define @test_signed_v4f64_v4i32( %f) { define @test_signed_v8f64_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f64_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8 ; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f64.nxv8i32( %f) @@ -236,8 +241,8 @@ define @test_signed_v2f16_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: ret @@ -249,8 +254,8 @@ define @test_signed_v4f16_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -262,8 +267,8 @@ define @test_signed_v8f16_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret @@ -299,10 +304,11 @@ define @test_signed_v2f16_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -314,10 +320,11 @@ define @test_signed_v4f16_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll index dc068ef7a63af..368a5524aaad3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll @@ -53,10 +53,11 @@ define @test_signed_v8f32_v8i32( %f) { define @test_signed_v4f32_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f32.nxv4i16( %f) @@ -66,10 +67,11 @@ define @test_signed_v4f32_v4i16( %f) { define @test_signed_v8f32_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f32.nxv8i16( %f) @@ -80,8 +82,8 @@ define @test_signed_v2f32_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f32_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -93,8 +95,8 @@ define @test_signed_v4f32_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret @@ -115,10 +117,11 @@ declare @llvm.fptoui.sat.nxv4f64.nxv4i64( @test_signed_v2f64_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f64_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f64.nxv2i32( %f) @@ -128,10 +131,11 @@ define @test_signed_v2f64_v2i32( %f) { define @test_signed_v4f64_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f64.nxv4i32( %f) @@ -141,10 +145,11 @@ define @test_signed_v4f64_v4i32( %f) { define @test_signed_v8f64_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f64_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8 ; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f64.nxv8i32( %f) @@ -254,8 +259,8 @@ define @test_signed_v2f16_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: ret @@ -267,8 +272,8 @@ define @test_signed_v4f16_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -280,8 +285,8 @@ define @test_signed_v8f16_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret @@ -317,10 +322,11 @@ define @test_signed_v2f16_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -332,10 +338,11 @@ define @test_signed_v4f16_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index d6ec784144be6..fd6cf687d5d3e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -2176,10 +2176,7 @@ define @mgather_baseidx_nxv16i8(ptr %base, ; ; RV64-LABEL: mgather_baseidx_nxv16i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma @@ -2188,6 +2185,11 @@ define @mgather_baseidx_nxv16i8(ptr %base, ; RV64-NEXT: vsext.vf8 v16, v9 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v11, (a0), v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64-NEXT: vmv2r.v v8, v10 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs @@ -2200,45 +2202,49 @@ declare @llvm.masked.gather.nxv32i8.nxv32p0( @mgather_baseidx_nxv32i8(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_nxv32i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vsext.vf4 v16, v8 -; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32-NEXT: vmv1r.v v16, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a1, a1, 2 ; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vsext.vf4 v16, v10 +; RV32-NEXT: vsext.vf4 v24, v10 +; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; RV32-NEXT: vluxei32.v v14, (a0), v24, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; RV32-NEXT: vsext.vf4 v24, v8 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV32-NEXT: vluxei32.v v14, (a0), v16, v0.t +; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vluxei32.v v12, (a0), v24, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_nxv32i8: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v16, v0 -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v8 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a2, a1, 3 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a2 -; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma +; RV64-NEXT: srli a2, a1, 2 +; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vx v17, v0, a2 +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v24, v10 +; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64-NEXT: vmv1r.v v0, v17 +; RV64-NEXT: vluxei64.v v14, (a0), v24, v0.t +; RV64-NEXT: srli a1, a1, 3 +; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v16, a1 +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v9 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v13, (a0), v24, v0.t -; RV64-NEXT: srli a1, a1, 2 -; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v0, v16, a1 -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v24, v8 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v14, (a0), v16, v0.t -; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a2 +; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t +; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v17, a1 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v11 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index c4c849dfed449..a3f74ea3058e6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -905,8 +905,8 @@ define void @test_dag_loop() { ; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (zero) ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: vsetivli zero, 0, e8, m4, tu, mu ; CHECK-NEXT: vmv4r.v v20, v16 ; CHECK-NEXT: vssubu.vx v20, v16, zero, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll index b86f36336081f..941c3bbb4ab3b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -234,8 +234,8 @@ define @fcmp_ord_vf_nxv1f16( %va, half %b, ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmand.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -250,8 +250,8 @@ define @fcmp_ord_vf_swap_nxv1f16( %va, half ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmand.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -500,8 +500,8 @@ define @fcmp_uno_vf_nxv1f16( %va, half %b, ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmor.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -516,8 +516,8 @@ define @fcmp_uno_vf_swap_nxv1f16( %va, half ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -783,9 +783,9 @@ define @fcmp_ord_vf_nxv8f16( %va, half %b, ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vf v12, v10, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v10, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v8, v10, fa0, v0.t +; CHECK-NEXT: vmand.mm v0, v12, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -799,9 +799,9 @@ define @fcmp_ord_vf_swap_nxv8f16( %va, half ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vf v12, v10, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v10, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v8, v10, fa0, v0.t +; CHECK-NEXT: vmand.mm v0, v8, v12 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1052,9 +1052,9 @@ define @fcmp_uno_vf_nxv8f16( %va, half %b, ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vf v12, v10, fa0, v0.t -; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v8, v10, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v12, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1068,9 +1068,9 @@ define @fcmp_uno_vf_swap_nxv8f16( %va, half ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vf v12, v10, fa0, v0.t -; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v10 +; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v8, v10, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v8, v12 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1369,8 +1369,8 @@ define @fcmp_ord_vf_nxv1f64( %va, double ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmand.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1385,8 +1385,8 @@ define @fcmp_ord_vf_swap_nxv1f64( %va, do ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmand.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1635,8 +1635,8 @@ define @fcmp_uno_vf_nxv1f64( %va, double ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmor.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1651,8 +1651,8 @@ define @fcmp_uno_vf_swap_nxv1f64( %va, do ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1919,9 +1919,9 @@ define @fcmp_ord_vf_nxv8f64( %va, double ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vv v24, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v8, v16, fa0, v0.t +; CHECK-NEXT: vmand.mm v0, v24, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1935,9 +1935,9 @@ define @fcmp_ord_vf_swap_nxv8f64( %va, do ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v8, v16, fa0, v0.t +; CHECK-NEXT: vmand.mm v0, v8, v24 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2188,9 +2188,9 @@ define @fcmp_uno_vf_nxv8f64( %va, double ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vv v24, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v8, v16, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v24, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2204,9 +2204,9 @@ define @fcmp_uno_vf_swap_nxv8f64( %va, do ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmfne.vv v24, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v8, v16, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v8, v24 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll index 30ec0899da8e4..5b4018fe8189e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -78,11 +78,11 @@ define <16 x i8> @v8i8_2(<8 x i8> %a, <8 x i8> %b) { ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 15 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vrsub.vi v8, v11, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vrsub.vi v8, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -224,11 +224,11 @@ define <16 x i16> @v8i16_2(<8 x i16> %a, <8 x i16> %b) { ; CHECK-NEXT: vid.v v14 ; CHECK-NEXT: vrsub.vi v16, v14, 15 ; CHECK-NEXT: vrgather.vv v10, v8, v16 -; CHECK-NEXT: vrsub.vi v8, v14, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vrsub.vi v8, v14, 7 ; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -341,10 +341,10 @@ define <8 x i32> @v4i32_2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vid.v v14 ; CHECK-NEXT: vrsub.vi v16, v14, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v16 +; CHECK-NEXT: vrsub.vi v8, v14, 3 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vrsub.vi v8, v14, 3 ; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -373,11 +373,11 @@ define <16 x i32> @v8i32_2(<8 x i32> %a, <8 x i32> %b) { ; CHECK-NEXT: vid.v v20 ; CHECK-NEXT: vrsub.vi v24, v20, 15 ; CHECK-NEXT: vrgather.vv v12, v8, v24 -; CHECK-NEXT: vrsub.vi v8, v20, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vrsub.vi v8, v20, 7 ; CHECK-NEXT: vrgather.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -501,10 +501,9 @@ define <8 x i64> @v4i64_2(<4 x i64> %a, <4 x i64> %b) { ; RV32-NEXT: vrsub.vi v19, v18, 7 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; RV32-NEXT: vrgatherei16.vv v12, v8, v19 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.i v0, 15 -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vrsub.vi v8, v18, 3 +; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv.v.v v8, v12 @@ -517,10 +516,10 @@ define <8 x i64> @v4i64_2(<4 x i64> %a, <4 x i64> %b) { ; RV64-NEXT: vid.v v20 ; RV64-NEXT: vrsub.vi v24, v20, 7 ; RV64-NEXT: vrgather.vv v12, v8, v24 +; RV64-NEXT: vrsub.vi v8, v20, 3 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.i v0, 15 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrsub.vi v8, v20, 3 ; RV64-NEXT: vrgather.vv v12, v16, v8, v0.t ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret @@ -605,11 +604,11 @@ define <16 x half> @v8f16_2(<8 x half> %a, <8 x half> %b) { ; CHECK-NEXT: vid.v v14 ; CHECK-NEXT: vrsub.vi v16, v14, 15 ; CHECK-NEXT: vrgather.vv v10, v8, v16 -; CHECK-NEXT: vrsub.vi v8, v14, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vrsub.vi v8, v14, 7 ; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -693,10 +692,10 @@ define <8 x float> @v4f32_2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: vid.v v14 ; CHECK-NEXT: vrsub.vi v16, v14, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v16 +; CHECK-NEXT: vrsub.vi v8, v14, 3 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vrsub.vi v8, v14, 3 ; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -725,11 +724,11 @@ define <16 x float> @v8f32_2(<8 x float> %a, <8 x float> %b) { ; CHECK-NEXT: vid.v v20 ; CHECK-NEXT: vrsub.vi v24, v20, 15 ; CHECK-NEXT: vrgather.vv v12, v8, v24 -; CHECK-NEXT: vrsub.vi v8, v20, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vrsub.vi v8, v20, 7 ; CHECK-NEXT: vrgather.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -796,10 +795,9 @@ define <8 x double> @v4f64_2(<4 x double> %a, <4 x double> %b) { ; RV32-NEXT: vrsub.vi v19, v18, 7 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; RV32-NEXT: vrgatherei16.vv v12, v8, v19 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.i v0, 15 -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vrsub.vi v8, v18, 3 +; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv.v.v v8, v12 @@ -812,10 +810,10 @@ define <8 x double> @v4f64_2(<4 x double> %a, <4 x double> %b) { ; RV64-NEXT: vid.v v20 ; RV64-NEXT: vrsub.vi v24, v20, 7 ; RV64-NEXT: vrgather.vv v12, v8, v24 +; RV64-NEXT: vrsub.vi v8, v20, 3 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.i v0, 15 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrsub.vi v8, v20, 3 ; RV64-NEXT: vrgather.vv v12, v16, v8, v0.t ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll index 443fe93a618c5..8fb00364e285d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll @@ -9,18 +9,18 @@ declare <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8>, <16 x i8>) define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; CHECK-LABEL: vec_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vmsne.vv v9, v8, v9 +; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: slli a0, a0, 63 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmerge.vxm v8, v10, a0, v0 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 ; CHECK-NEXT: ret %tmp = call <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64> %x, <2 x i64> %y) ret <2 x i64> %tmp @@ -29,19 +29,19 @@ define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 524288 ; CHECK-NEXT: addiw a0, a0, -1 -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vmsne.vv v9, v8, v9 +; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, 1 ; CHECK-NEXT: slli a0, a0, 31 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmerge.vxm v8, v10, a0, v0 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 ; CHECK-NEXT: ret %tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y) ret <4 x i32> %tmp @@ -50,10 +50,10 @@ define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; CHECK-LABEL: vec_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 8 ; CHECK-NEXT: addiw a1, a0, -1 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: vsll.vv v10, v8, v9 ; CHECK-NEXT: vsra.vv v9, v10, v9 ; CHECK-NEXT: vmsne.vv v8, v8, v9 @@ -69,17 +69,17 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; CHECK-LABEL: vec_v16i8: ; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 127 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vmsne.vv v9, v8, v9 ; CHECK-NEXT: vmsle.vi v0, v8, -1 -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmerge.vxm v8, v10, a0, v0 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 ; CHECK-NEXT: ret %tmp = call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> %x, <16 x i8> %y) ret <16 x i8> %tmp @@ -94,15 +94,15 @@ define @vec_nxv2i64( %x, ; CHECK-LABEL: vec_nxv2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmsle.vi v0, v8, -1 -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: srli a1, a0, 1 ; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: vsra.vv v14, v12, v10 ; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: srli a1, a0, 1 +; CHECK-NEXT: vmv.v.x v14, a1 +; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: slli a0, a0, 63 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: vmerge.vxm v8, v14, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret @@ -114,16 +114,16 @@ define @vec_nxv4i32( %x, ; CHECK-LABEL: vec_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vi v0, v8, -1 -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: addiw a0, a0, -1 ; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: vsra.vv v14, v12, v10 ; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: addiw a0, a0, -1 +; CHECK-NEXT: vmv.v.x v14, a0 +; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, 1 ; CHECK-NEXT: slli a0, a0, 31 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: vmerge.vxm v8, v14, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret @@ -135,12 +135,12 @@ define @vec_nxv8i16( %x, ; CHECK-LABEL: vec_nxv8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmsle.vi v0, v8, -1 -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addiw a1, a0, -1 ; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: vsra.vv v14, v12, v10 ; CHECK-NEXT: vmsne.vv v10, v8, v14 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addiw a1, a0, -1 +; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 @@ -154,14 +154,14 @@ define @vec_nxv16i8( %x, ; CHECK-LABEL: vec_nxv16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmsle.vi v0, v8, -1 -; CHECK-NEXT: li a0, 127 ; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: vsra.vv v14, v12, v10 ; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vmv.v.x v14, a0 +; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: vmerge.vxm v8, v14, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index 8d353ddcb060d..5e1e0fb5f6d29 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -17,12 +17,12 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-NEXT: vadd.vi v12, v11, -16 ; RV32-NEXT: lui a0, 16 ; RV32-NEXT: addi a0, a0, -256 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-NEXT: vadd.vi v12, v11, -16 ; RV32-NEXT: vrgather.vv v9, v8, v12, v0.t ; RV32-NEXT: vmsne.vi v9, v9, 0 ; RV32-NEXT: vadd.vi v12, v11, 1 @@ -45,12 +45,12 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-NEXT: vadd.vi v12, v11, -16 ; RV64-NEXT: lui a0, 16 ; RV64-NEXT: addiw a0, a0, -256 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64-NEXT: vadd.vi v12, v11, -16 ; RV64-NEXT: vrgather.vv v9, v8, v12, v0.t ; RV64-NEXT: vmsne.vi v9, v9, 0 ; RV64-NEXT: vadd.vi v12, v11, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll index 18ced7052fc2d..1ec638257e55e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll @@ -7,12 +7,13 @@ define <32 x i1> @vector_interleave_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b) { ; CHECK-LABEL: vector_interleave_v32i1_v16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v0, v8, 2 -; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 16 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir b/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir index 116d8d56eb2a2..46f3b0975b73b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir @@ -31,7 +31,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 82 /* e32, m4, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v12m4 = PseudoVMV_V_V_M4 $v28m4, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v12m4 = PseudoVMV_V_V_M4 undef $v12m4, $v28m4, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype $v12m4 = COPY $v28m4 @@ -47,10 +47,10 @@ body: | ; CHECK: liveins: $x14 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 82 /* e32, m4, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v28m4 = PseudoVMV_V_I_M4 0, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v12m4 = PseudoVMV_V_I_M4 0, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v28m4 = PseudoVMV_V_I_M4 undef $v28m4, 0, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v12m4 = PseudoVMV_V_I_M4 undef $v12m4, 0, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype - $v28m4 = PseudoVMV_V_I_M4 0, $noreg, 5, implicit $vl, implicit $vtype + $v28m4 = PseudoVMV_V_I_M4 undef $v28m4, 0, $noreg, 5, 0, implicit $vl, implicit $vtype $v12m4 = COPY $v28m4 ... --- @@ -81,11 +81,11 @@ body: | ; CHECK: liveins: $x14, $x16 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 82 /* e32, m4, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v28m4 = PseudoVMV_V_I_M4 0, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v28m4 = PseudoVMV_V_I_M4 undef $v28m4, 0, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v4m4, $x0 = PseudoVLE32FF_V_M4 $x16, $noreg, 5 /* e32 */, implicit-def $vl ; CHECK-NEXT: $v12m4 = VMV4R_V $v28m4 $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype - $v28m4 = PseudoVMV_V_I_M4 0, $noreg, 5, implicit $vl, implicit $vtype + $v28m4 = PseudoVMV_V_I_M4 undef $v28m4, 0, $noreg, 5, 0, implicit $vl, implicit $vtype $v4m4,$x0 = PseudoVLE32FF_V_M4 $x16, $noreg, 5, implicit-def $vl $v12m4 = COPY $v28m4 ... @@ -132,7 +132,7 @@ body: | ; CHECK-NEXT: $v0m2 = PseudoVLE32_V_M2 $x18, $noreg, 4 /* e16 */, implicit $vl, implicit $vtype ; CHECK-NEXT: $x0 = PseudoVSETVLIX0 $x0, 82 /* e32, m4, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: $v4m4 = PseudoVLE32_V_M4 killed $x18, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v12m4 = PseudoVMV_V_V_M4 $v28m4, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v12m4 = PseudoVMV_V_V_M4 undef $v12m4, $v28m4, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype $x0 = PseudoVSETVLIX0 $x0, 73, implicit-def $vl, implicit-def $vtype @@ -253,8 +253,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 80 /* e32, m1, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: $v8_v9 = PseudoVLSEG2E32_V_M1 killed $x16, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v10 = PseudoVMV_V_V_M1 $v8, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v11 = PseudoVMV_V_V_M1 $v9, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v10 = PseudoVMV_V_V_M1 undef $v10, $v8, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v11 = PseudoVMV_V_V_M1 undef $v11, $v9, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype $x15 = PseudoVSETVLI $x14, 80, implicit-def $vl, implicit-def $vtype $v8_v9 = PseudoVLSEG2E32_V_M1 killed $x16, $noreg, 5, implicit $vl, implicit $vtype $v10_v11 = COPY $v8_v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll index be927fcbda9c7..e51bc9c4daff8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -978,14 +978,14 @@ declare half @llvm.vector.reduce.fadd.nxv3f16(half, ) define half @vreduce_ord_fadd_nxv3f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv3f16: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: slli a1, a0, 1 ; CHECK-NEXT: add a1, a1, a0 ; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: lui a2, 1048568 -; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a2 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -1002,16 +1002,16 @@ declare half @llvm.vector.reduce.fadd.nxv6f16(half, ) define half @vreduce_ord_fadd_nxv6f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv6f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmv.v.x v11, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vx v9, v10, a0 +; CHECK-NEXT: vslideup.vx v9, v11, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1029,13 +1029,12 @@ define half @vreduce_ord_fadd_nxv10f16( %v, half %s) { ; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vx v10, v12, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vmv.v.v v11, v12 +; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v11, v12, a0 +; CHECK-NEXT: vslideup.vx v10, v12, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v12 @@ -1066,14 +1065,14 @@ define half @vreduce_ord_fadd_nxv12f16( %v, half %s) { define half @vreduce_fadd_nxv3f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv3f16: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: slli a1, a0, 1 ; CHECK-NEXT: add a1, a1, a0 ; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: lui a2, 1048568 -; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a2 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -1088,16 +1087,16 @@ define half @vreduce_fadd_nxv3f16( %v, half %s) { define half @vreduce_fadd_nxv6f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv6f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmv.v.x v11, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vx v9, v10, a0 +; CHECK-NEXT: vslideup.vx v9, v11, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1110,19 +1109,18 @@ declare half @llvm.vector.reduce.fmin.nxv10f16() define half @vreduce_fmin_nxv10f16( %v) { ; CHECK-LABEL: vreduce_fmin_nxv10f16: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI73_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI73_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v12, fa5 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: lui a1, %hi(.LCPI73_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI73_0)(a1) ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v12, fa5 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vx v10, v12, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vmv.v.v v11, v12 +; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v11, v12, a0 +; CHECK-NEXT: vslideup.vx v10, v12, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa5 ; CHECK-NEXT: vfredmin.vs v8, v8, v12 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir index d29b6306f581a..0cbb7df2a339f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -284,15 +284,17 @@ body: | ; CHECK-NEXT: bb.1.if.then: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %dead1:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl - ; CHECK-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 %dead1, [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoBR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.if.else: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %dead2:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl - ; CHECK-NEXT: early-clobber %2:vr = PseudoVSEXT_VF2_M1 [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %2:vr = PseudoVSEXT_VF2_M1 %dead2, [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.if.end: ; CHECK-NEXT: [[PHI:%[0-9]+]]:vr = PHI %1, %bb.1, %2, %bb.2 @@ -312,11 +314,13 @@ body: | PseudoBR %bb.1 bb.1.if.then: - early-clobber %1:vr = PseudoVZEXT_VF2_M1 %0, %7, 6 + %dead1:vr = IMPLICIT_DEF + early-clobber %1:vr = PseudoVZEXT_VF2_M1 %dead1, %0, %7, 6, 0 PseudoBR %bb.3 bb.2.if.else: - early-clobber %2:vr = PseudoVSEXT_VF2_M1 %0, %7, 6 + %dead2:vr = IMPLICIT_DEF + early-clobber %2:vr = PseudoVSEXT_VF2_M1 %dead2, %0, %7, 6, 0 bb.3.if.end: %3:vr = PHI %1, %bb.1, %2, %bb.2 @@ -510,8 +514,9 @@ body: | ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF ; CHECK-NEXT: dead [[PseudoVSETVLIX0_:%[0-9]+]]:gpr = PseudoVSETVLIX0 $x0, 223 /* e64, mf2, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoVID_V_MF2_:%[0-9]+]]:vr = PseudoVID_V_MF2 %pt, -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF ; CHECK-NEXT: dead [[PseudoVSETVLIX0_1:%[0-9]+]]:gpr = PseudoVSETVLIX0 $x0, 215 /* e32, mf2, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVMV_V_I_MF2_:%[0-9]+]]:vrnov0 = PseudoVMV_V_I_MF2 0, -1, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVMV_V_I_MF2_:%[0-9]+]]:vrnov0 = PseudoVMV_V_I_MF2 %pt2, 0, -1, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) @@ -546,7 +551,8 @@ body: | %2:gpr = IMPLICIT_DEF %pt:vr = IMPLICIT_DEF %3:vr = PseudoVID_V_MF2 %pt, -1, 6, 0 - %4:vrnov0 = PseudoVMV_V_I_MF2 0, -1, 5 + %pt2:vr = IMPLICIT_DEF + %4:vrnov0 = PseudoVMV_V_I_MF2 %pt2, 0, -1, 5, 0 bb.1: successors: %bb.2(0x40000000), %bb.3(0x40000000) @@ -761,8 +767,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x12 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: %dead:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 4, 208 /* e32, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 0, 4, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 %dead, 0, 4, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY [[PseudoVMV_V_I_M1_]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vr = COPY [[COPY2]] ; CHECK-NEXT: [[LUI:%[0-9]+]]:gpr = LUI 1 @@ -796,7 +803,8 @@ body: | %8:gpr = COPY $x12 %6:gpr = COPY $x10 - %11:vr = PseudoVMV_V_I_M1 0, 4, 5 + %dead:vr = IMPLICIT_DEF + %11:vr = PseudoVMV_V_I_M1 %dead, 0, 4, 5, 0 %12:vr = COPY %11 %10:vr = COPY %12 %13:gpr = LUI 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir index 247b83581c3e3..80665cd11fc64 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir @@ -200,13 +200,15 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 [[COPY1]], $noreg, 5 /* e32 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: early-clobber %3:vr = PseudoVZEXT_VF2_M1 killed [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: %dead:vr = IMPLICIT_DEF + ; CHECK-NEXT: early-clobber %3:vr = PseudoVZEXT_VF2_M1 %dead, killed [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v8 = COPY %3 ; CHECK-NEXT: PseudoRET implicit $v8 %1:gprnox0 = COPY $x11 %0:gpr = COPY $x10 %2:vr = PseudoVLE32_V_MF2 %0, %1, 5 - early-clobber %3:vr = PseudoVZEXT_VF2_M1 killed %2, %1, 6 + %dead:vr = IMPLICIT_DEF + early-clobber %3:vr = PseudoVZEXT_VF2_M1 %dead, killed %2, %1, 6, 0 $v8 = COPY %3 PseudoRET implicit $v8 @@ -307,8 +309,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY]], 2, 6 /* e64 */, implicit $vl, implicit $vtype :: (load (s128) from %ir.x) - ; CHECK-NEXT: dead [[PseudoVSETVLIX0_:%[0-9]+]]:gpr = PseudoVSETVLIX0 $x0, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 0, -1, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead [[PseudoVSETVLIX0_:%[0-9]+]]:gpr = PseudoVSETVLIX0 $x0, 152 /* e64, m1, tu, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 undef $v2, 0, -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoVREDSUM_VS_M1_E8_:%[0-9]+]]:vr = PseudoVREDSUM_VS_M1_E8 [[DEF]], killed [[PseudoVLE64_V_M1_]], killed [[PseudoVMV_V_I_M1_]], 2, 6 /* e64 */, 1 /* ta, mu */, implicit $vl, implicit $vtype @@ -317,7 +319,7 @@ body: | ; CHECK-NEXT: PseudoRET implicit $x10 %0:gpr = COPY $x10 %1:vr = PseudoVLE64_V_M1 %0, 2, 6 :: (load (s128) from %ir.x) - %2:vr = PseudoVMV_V_I_M1 0, -1, 6 + %2:vr = PseudoVMV_V_I_M1 undef $v2, 0, -1, 6, 0 %4:vr = IMPLICIT_DEF %3:vr = PseudoVREDSUM_VS_M1_E8 %4, killed %1, killed %2, 2, 6, 1 %5:gpr = PseudoVMV_X_S_M1 killed %3, 6 @@ -422,11 +424,11 @@ body: | ; CHECK-NEXT: %pt:vrm2 = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 4, 217 /* e64, m2, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoVID_V_M2_:%[0-9]+]]:vrm2 = PseudoVID_V_M2 %pt, 4, 6 /* e64 */, 3 /* ta, ma */, implicit $vl, implicit $vtype - ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 198 /* e8, mf4, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl - ; CHECK-NEXT: [[PseudoVMV_V_I_MF4_:%[0-9]+]]:vr = PseudoVMV_V_I_MF4 0, 4, 3 /* e8 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 134 /* e8, mf4, tu, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl + ; CHECK-NEXT: [[PseudoVMV_V_I_MF4_:%[0-9]+]]:vr = PseudoVMV_V_I_MF4 undef [[PseudoVMV_V_I_MF4_]], 0, 4, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoRET %pt:vrm2 = IMPLICIT_DEF %0:vrm2 = PseudoVID_V_M2 %pt, 4, 6, 3 - %4:vr = PseudoVMV_V_I_MF4 0, 4, 3 + %4:vr = PseudoVMV_V_I_MF4 undef %4, 0, 4, 3, 0 PseudoRET ... diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir index 7dfc79ab70a0e..6d05a8e62ea11 100644 --- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir @@ -169,7 +169,7 @@ body: | ; CHECK-NEXT: $x10 = ADD $x2, killed $x10 ; CHECK-NEXT: SD killed renamable $x16, killed $x10, 64 :: (store (s64) into %fixed-stack.1, align 16) ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: renamable $v8 = PseudoVMV_V_I_MF8 0, 2, 3 /* e8 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: renamable $v8 = PseudoVMV_V_I_MF8 undef $v8, 0, 2, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $x10 = ADDI $x2, 32 ; CHECK-NEXT: VS1R_V killed renamable $v8, killed $x10 :: (store unknown-size into %stack.1, align 8) ; CHECK-NEXT: {{ $}} @@ -200,7 +200,7 @@ body: | SD killed renamable $x17, %fixed-stack.0, 0 :: (store (s64)) SD killed renamable $x16, %fixed-stack.1, 0 :: (store (s64) into %fixed-stack.1, align 16) dead $x0 = PseudoVSETIVLI 2, 69, implicit-def $vl, implicit-def $vtype - renamable $v8 = PseudoVMV_V_I_MF8 0, 2, 3, implicit $vl, implicit $vtype + renamable $v8 = PseudoVMV_V_I_MF8 undef $v8, 0, 2, 3, 0, implicit $vl, implicit $vtype VS1R_V killed renamable $v8, %stack.1 :: (store unknown-size into %stack.1, align 8) bb.1.while.cond: