From 59aa5312bd4270cf76b6ba64a9646b26167ae41d Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Mon, 1 Dec 2025 11:32:06 +0800 Subject: [PATCH 1/7] [RISCV] Sources of vmerge shouldn't overlap V0 According to the spec: > A vector register cannot be used to provide source operands with more > than one EEW for a single instruction. A mask register source is > considered to have EEW=1 for this constraint. There must be a mask `V0` in `vmerge` variants so the sources should use register classes without `V0`. This fixes #169905. --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index eb3c9b0defccb..e2103e6f342de 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -2984,17 +2984,17 @@ multiclass VPseudoVMRG_VM_XM_IM { defvar mx = m.MX; def "_VVM" # "_" # m.MX: VPseudoTiedBinaryCarryIn.R, - m.vrclass, m.vrclass, m>, + GetVRegNoV0.R, GetVRegNoV0.R, m>, SchedBinary<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV", mx, forcePassthruRead=true>; def "_VXM" # "_" # m.MX: VPseudoTiedBinaryCarryIn.R, - m.vrclass, GPR, m>, + GetVRegNoV0.R, GPR, m>, SchedBinary<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX", mx, forcePassthruRead=true>; def "_VIM" # "_" # m.MX: VPseudoTiedBinaryCarryIn.R, - m.vrclass, simm5, m>, + GetVRegNoV0.R, simm5, m>, SchedUnary<"WriteVIMergeI", "ReadVIMergeV", mx, forcePassthruRead=true>; } From 546261c3241345b028b5f28a2e502a1e2fe8992d Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Mon, 1 Dec 2025 11:36:12 +0800 Subject: [PATCH 2/7] clang-format --- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index e2103e6f342de..e36204c536c0d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -2982,21 +2982,21 @@ multiclass VPseudoVFWALU_WV_WF_RM { multiclass VPseudoVMRG_VM_XM_IM { foreach m = MxList in { defvar mx = m.MX; - def "_VVM" # "_" # m.MX: - VPseudoTiedBinaryCarryIn.R, - GetVRegNoV0.R, GetVRegNoV0.R, m>, - SchedBinary<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV", mx, - forcePassthruRead=true>; - def "_VXM" # "_" # m.MX: - VPseudoTiedBinaryCarryIn.R, - GetVRegNoV0.R, GPR, m>, - SchedBinary<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX", mx, - forcePassthruRead=true>; - def "_VIM" # "_" # m.MX: - VPseudoTiedBinaryCarryIn.R, - GetVRegNoV0.R, simm5, m>, - SchedUnary<"WriteVIMergeI", "ReadVIMergeV", mx, - forcePassthruRead=true>; + def "_VVM"#"_"#m.MX : VPseudoTiedBinaryCarryIn.R, + GetVRegNoV0.R, + GetVRegNoV0.R, m>, + SchedBinary<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV", mx, + forcePassthruRead = true>; + def "_VXM"#"_"#m.MX + : VPseudoTiedBinaryCarryIn.R, + GetVRegNoV0.R, GPR, m>, + SchedBinary<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX", mx, + forcePassthruRead = true>; + def "_VIM"#"_"#m.MX + : VPseudoTiedBinaryCarryIn.R, + GetVRegNoV0.R, simm5, m>, + SchedUnary<"WriteVIMergeI", "ReadVIMergeV", mx, + forcePassthruRead = true>; } } From 026669e779b1ab43f7bd03c6661ae6b899f4046a Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Mon, 1 Dec 2025 12:01:06 +0800 Subject: [PATCH 3/7] Update tests --- .../instruction-select/rvv/select.mir | 44 +- .../RISCV/rvv/combine-reduce-add-to-vcpop.ll | 246 ++++-- llvm/test/CodeGen/RISCV/rvv/copyprop.mir | 2 +- .../rvv/fixed-vector-i8-index-cornercase.ll | 55 +- .../rvv/fixed-vectors-interleaved-access.ll | 822 +++++++++--------- .../RISCV/rvv/fixed-vectors-reduction-fp.ll | 434 +++++---- .../RISCV/rvv/fixed-vectors-select-addsub.ll | 39 +- .../fixed-vectors-shuffle-int-interleave.ll | 79 +- .../RISCV/rvv/fixed-vectors-vselect-vp.ll | 33 +- .../test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll | 261 ++---- .../test/CodeGen/RISCV/rvv/fminimum-sdnode.ll | 261 ++---- .../test/CodeGen/RISCV/rvv/mask-reg-alloc.mir | 4 +- llvm/test/CodeGen/RISCV/rvv/pr88576.ll | 2 +- .../RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir | 36 +- .../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/vector-splice.ll | 24 +- llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll | 144 +-- llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll | 144 +-- llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll | 144 +-- llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll | 144 +-- .../test/CodeGen/RISCV/rvv/vl-opt-op-info.mir | 36 +- .../RISCV/rvv/vl-optimizer-subreg-assert.mir | 8 +- llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll | 158 ++-- llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll | 149 ++-- .../CodeGen/RISCV/rvv/vmerge-peephole.mir | 14 +- llvm/test/CodeGen/RISCV/rvv/vmerge.ll | 5 +- llvm/test/CodeGen/RISCV/rvv/vmv.s.x.ll | 9 +- .../CodeGen/RISCV/rvv/vmv.v.v-peephole.mir | 2 +- llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll | 158 ++-- llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll | 33 +- llvm/test/CodeGen/RISCV/rvv/vselect-int.ll | 5 +- 32 files changed, 1901 insertions(+), 1620 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir index f8061462c6220..ada76a43639d7 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir @@ -11,7 +11,7 @@ body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv1i8 ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]] @@ -19,7 +19,7 @@ body: | ; ; RV64I-LABEL: name: select_nxv1i8 ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]] @@ -40,7 +40,7 @@ body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv4i8 ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]] @@ -48,7 +48,7 @@ body: | ; ; RV64I-LABEL: name: select_nxv4i8 ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]] @@ -69,7 +69,7 @@ body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv16i8 ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF ; RV32I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV32I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]] @@ -77,7 +77,7 @@ body: | ; ; RV64I-LABEL: name: select_nxv16i8 ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF ; RV64I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV64I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]] @@ -98,7 +98,7 @@ body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv64i8 ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]] @@ -106,7 +106,7 @@ body: | ; ; RV64I-LABEL: name: select_nxv64i8 ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]] @@ -127,7 +127,7 @@ body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv2i16 ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]] @@ -135,7 +135,7 @@ body: | ; ; RV64I-LABEL: name: select_nxv2i16 ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]] @@ -156,7 +156,7 @@ body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv8i16 ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF ; RV32I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV32I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]] @@ -164,7 +164,7 @@ body: | ; ; RV64I-LABEL: name: select_nxv8i16 ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF ; RV64I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV64I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]] @@ -185,7 +185,7 @@ body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv32i16 ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF2_]] @@ -193,7 +193,7 @@ body: | ; ; RV64I-LABEL: name: select_nxv32i16 ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF2_]] @@ -214,7 +214,7 @@ body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv2i32 ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF ; RV32I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV32I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]] @@ -222,7 +222,7 @@ body: | ; ; RV64I-LABEL: name: select_nxv2i32 ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF ; RV64I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV64I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]] @@ -243,7 +243,7 @@ body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv8i32 ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF ; RV32I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV32I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]] @@ -251,7 +251,7 @@ body: | ; ; RV64I-LABEL: name: select_nxv8i32 ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF ; RV64I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV64I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]] @@ -272,7 +272,7 @@ body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv1i64 ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF ; RV32I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */ ; RV32I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]] @@ -280,7 +280,7 @@ body: | ; ; RV64I-LABEL: name: select_nxv1i64 ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF ; RV64I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */ ; RV64I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]] @@ -301,7 +301,7 @@ body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv4i64 ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF ; RV32I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */ ; RV32I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]] @@ -309,7 +309,7 @@ body: | ; ; RV64I-LABEL: name: select_nxv4i64 ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF ; RV64I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */ ; RV64I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]] diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll index 2d4fce68f9545..27b53befbf4a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll @@ -288,54 +288,89 @@ define i32 @test_nxv128i1( %x) { ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill ; CHECK-NEXT: srli a1, a0, 1 ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a1 ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a0 ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v6, a0 +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v7, a0 +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; CHECK-NEXT: vadd.vi v24, v16, 1 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v7, a1 -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v7, a0 -; CHECK-NEXT: vslidedown.vx v5, v6, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t -; CHECK-NEXT: vmv1r.v v0, v5 -; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vx v0, v6, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vadd.vi v24, v8, 1 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vadd.vi v16, v8, 1 ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vadd.vi v24, v24, 1 ; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 +; CHECK-NEXT: vadd.vv v16, v24, v16 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t -; CHECK-NEXT: vadd.vv v16, v16, v24 +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-NEXT: vadd.vv v8, v16, v8 ; CHECK-NEXT: vmv.s.x v16, zero ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: add sp, sp, a1 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -353,12 +388,14 @@ define i32 @test_nxv256i1( %x) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v6, v10 ; CHECK-NEXT: vmv1r.v v7, v9 @@ -368,9 +405,9 @@ define i32 @test_nxv256i1( %x) { ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 @@ -378,7 +415,10 @@ define i32 @test_nxv256i1( %x) { ; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill @@ -391,127 +431,169 @@ define i32 @test_nxv256i1( %x) { ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v3, a1 ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v2, a1 ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v24, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v4, a1 ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v5, a1 ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v6, a1 -; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu -; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; CHECK-NEXT: vadd.vi v24, v16, 1 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v7, a1 -; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu -; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t -; CHECK-NEXT: vadd.vv v8, v16, v8 -; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; CHECK-NEXT: vadd.vi v24, v8, 1 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v5, v7, a0 -; CHECK-NEXT: vslidedown.vx v4, v6, a0 +; CHECK-NEXT: vslidedown.vx v5, v6, a0 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v5, a1 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, 1 +; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v4, v7, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v4, a1 -; CHECK-NEXT: vslidedown.vx v3, v5, a1 -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t -; CHECK-NEXT: vmv1r.v v0, v3 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vadd.vi v24, v8, 1 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 +; CHECK-NEXT: vadd.vv v8, v24, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vadd.vi v24, v8, 1 ; CHECK-NEXT: vmv1r.v v0, v5 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vadd.vi v8, v16, 1 +; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t -; CHECK-NEXT: vadd.vv v16, v8, v16 +; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vadd.vi v8, v16, 1 ; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t -; CHECK-NEXT: vadd.vv v24, v24, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vv v8, v8, v0 -; CHECK-NEXT: vadd.vv v16, v24, v16 -; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vadd.vv v16, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: vmv.s.x v16, zero ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a2, a2, a1 +; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: add sp, sp, a1 ; CHECK-NEXT: .cfi_def_cfa sp, 16 @@ -541,16 +623,16 @@ define i16 @test_narrow_nxv64i1( %x) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v0, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vredsum.vs v8, v16, v8 +; CHECK-NEXT: vslidedown.vx v0, v0, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, 1 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir index 31e79e58f44c5..aba75ffe29d33 100644 --- a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir +++ b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir @@ -43,7 +43,7 @@ body: | %2:gpr = COPY $x11 %1:gpr = COPY $x10 %3:vr = COPY $v8 - %17:vr = PseudoVSLL_VI_M1 undef $noreg, %3, 5, 1, 6 /* e64 */, 0 + %17:vrnov0 = PseudoVSLL_VI_M1 undef $noreg, %3, 5, 1, 6 /* e64 */, 0 %22:vr = PseudoVMSNE_VI_M1 %3, 0, 1, 6 /* e64 */ %23:vmv0 = COPY %22 %25:vrnov0 = PseudoVMERGE_VIM_M1 undef $noreg, %17, -1, %23, 1, 6 /* e64 */ diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll index 7a337aa253805..fd467dd0b8912 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll @@ -104,55 +104,54 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) { ; CHECK-NEXT: addi s0, sp, 1536 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -512 -; CHECK-NEXT: addi a0, sp, 1520 -; CHECK-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: li a0, 512 ; CHECK-NEXT: addi a1, sp, 512 -; CHECK-NEXT: vslidedown.vi v0, v24, 5 -; CHECK-NEXT: vmv.x.s a2, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v8, 5 +; CHECK-NEXT: vmv.x.s a2, v8 ; CHECK-NEXT: li a3, 432 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a2 +; CHECK-NEXT: vmv.v.x v24, a2 ; CHECK-NEXT: li a2, 431 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v0, a2 +; CHECK-NEXT: vslideup.vx v24, v0, a2 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v24, 4 +; CHECK-NEXT: vslidedown.vi v0, v8, 4 ; CHECK-NEXT: li a2, 466 ; CHECK-NEXT: li a3, 465 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vse8.v v24, (a1) +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: lbu a1, 985(sp) ; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v0, a3 +; CHECK-NEXT: vslideup.vx v24, v0, a3 ; CHECK-NEXT: li a2, 478 ; CHECK-NEXT: lbu a3, 1012(sp) -; CHECK-NEXT: vmv.s.x v24, a1 +; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: li a1, 477 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v24, a1 +; CHECK-NEXT: vslideup.vx v24, v8, a1 ; CHECK-NEXT: li a1, 501 -; CHECK-NEXT: lui a2, %hi(.LCPI2_1) -; CHECK-NEXT: addi a2, a2, %lo(.LCPI2_1) -; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v0, (a2) -; CHECK-NEXT: li a2, 500 -; CHECK-NEXT: vmv.s.x v24, a3 -; CHECK-NEXT: lui a3, %hi(.LCPI2_0) -; CHECK-NEXT: addi a3, a3, %lo(.LCPI2_0) +; CHECK-NEXT: lui a2, %hi(.LCPI2_0) +; CHECK-NEXT: addi a2, a2, %lo(.LCPI2_0) ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vle8.v v16, (a3) +; CHECK-NEXT: vle8.v v0, (a2) +; CHECK-NEXT: li a2, 500 +; CHECK-NEXT: vmv.s.x v8, a3 +; CHECK-NEXT: lui a3, %hi(.LCPI2_1) +; CHECK-NEXT: addi a3, a3, %lo(.LCPI2_1) +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma +; CHECK-NEXT: vle64.v v9, (a3) +; CHECK-NEXT: addi a3, sp, 1520 +; CHECK-NEXT: vs1r.v v9, (a3) # vscale x 8-byte Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v24, a2 -; CHECK-NEXT: addi a1, sp, 1520 -; CHECK-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vrgather.vv v8, v24, v16, v0.t +; CHECK-NEXT: vslideup.vx v24, v8, a2 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vrgather.vv v8, v16, v0 +; CHECK-NEXT: addi a0, sp, 1520 +; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: addi sp, s0, -1536 ; CHECK-NEXT: .cfi_def_cfa sp, 1536 ; CHECK-NEXT: ld ra, 1528(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 5567310bb2a61..9b35860904f11 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -530,290 +530,267 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 100 +; RV32-NEXT: li a3, 84 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xe4, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 100 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd4, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 84 * vlenb ; RV32-NEXT: addi a4, a1, 128 ; RV32-NEXT: addi a5, a1, 256 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: lui a3, 12 -; RV32-NEXT: lui a6, 12291 -; RV32-NEXT: lui a7, %hi(.LCPI27_0) -; RV32-NEXT: addi a7, a7, %lo(.LCPI27_0) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v24, (a5) -; RV32-NEXT: vmv.s.x v0, a3 +; RV32-NEXT: lui a5, 12291 +; RV32-NEXT: vmv.s.x v3, a3 ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a6, 76 +; RV32-NEXT: mul a1, a1, a6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill -; RV32-NEXT: addi a6, a6, 3 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vslideup.vi v16, v24, 4 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a5, 76 -; RV32-NEXT: mul a1, a1, a5 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a5, 92 -; RV32-NEXT: mul a1, a1, a5 +; RV32-NEXT: li a6, 60 +; RV32-NEXT: mul a1, a1, a6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill -; RV32-NEXT: vmv1r.v v30, v0 +; RV32-NEXT: vmv1r.v v0, v3 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vslideup.vi v16, v8, 10, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a5, 72 -; RV32-NEXT: mul a1, a1, a5 +; RV32-NEXT: li a6, 56 +; RV32-NEXT: mul a1, a1, a6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v16, (a1) # vscale x 32-byte Folded Spill ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v8, (a4) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 84 +; RV32-NEXT: li a4, 68 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi a5, a5, 3 +; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vle16.v v28, (a7) -; RV32-NEXT: vmv.s.x v0, a6 +; RV32-NEXT: vslideup.vi v28, v24, 2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a4, 76 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 84 +; RV32-NEXT: li a4, 68 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v16, v8, v16, v0 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v0, v16, v28 +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 52 +; RV32-NEXT: li a4, 44 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v0, (a1) # vscale x 64-byte Folded Spill -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vslideup.vi v8, v24, 2 -; RV32-NEXT: vmv1r.v v0, v30 +; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: vmv1r.v v0, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 92 +; RV32-NEXT: li a4, 60 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload -; RV32-NEXT: vslideup.vi v8, v16, 8, v0.t +; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vslideup.vi v28, v8, 8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 60 +; RV32-NEXT: li a4, 52 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill -; RV32-NEXT: lui a7, 49164 -; RV32-NEXT: lui a1, %hi(.LCPI27_1) -; RV32-NEXT: addi a1, a1, %lo(.LCPI27_1) -; RV32-NEXT: lui t2, 3 -; RV32-NEXT: lui t1, 196656 -; RV32-NEXT: lui a4, %hi(.LCPI27_3) -; RV32-NEXT: addi a4, a4, %lo(.LCPI27_3) -; RV32-NEXT: lui t0, 786624 -; RV32-NEXT: li a5, 48 -; RV32-NEXT: lui a6, 768 -; RV32-NEXT: addi a7, a7, 12 -; RV32-NEXT: vmv.s.x v0, a7 -; RV32-NEXT: addi t2, t2, 3 -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t3, 84 -; RV32-NEXT: mul a7, a7, t3 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vl8r.v v16, (a7) # vscale x 64-byte Folded Reload -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: slli a7, a7, 6 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vl8r.v v8, (a7) # vscale x 64-byte Folded Reload -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v8, v16, v8, v0 -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: slli a7, a7, 5 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vs8r.v v8, (a7) # vscale x 64-byte Folded Spill -; RV32-NEXT: vmv.s.x v0, t2 -; RV32-NEXT: addi a7, t1, 48 -; RV32-NEXT: csrr t1, vlenb -; RV32-NEXT: li t2, 92 -; RV32-NEXT: mul t1, t1, t2 -; RV32-NEXT: add t1, sp, t1 -; RV32-NEXT: addi t1, t1, 16 -; RV32-NEXT: vl8r.v v24, (t1) # vscale x 64-byte Folded Reload -; RV32-NEXT: csrr t1, vlenb +; RV32-NEXT: vs4r.v v28, (a1) # vscale x 32-byte Folded Spill +; RV32-NEXT: lui a1, %hi(.LCPI27_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI27_0) +; RV32-NEXT: lui a6, 49164 +; RV32-NEXT: lui t1, 3 +; RV32-NEXT: lui t0, 196656 +; RV32-NEXT: lui a7, 786624 +; RV32-NEXT: li a4, 48 +; RV32-NEXT: lui a5, 768 +; RV32-NEXT: addi a6, a6, 12 +; RV32-NEXT: vmv.s.x v0, a6 +; RV32-NEXT: addi t1, t1, 3 +; RV32-NEXT: csrr a6, vlenb ; RV32-NEXT: li t2, 76 -; RV32-NEXT: mul t1, t1, t2 -; RV32-NEXT: add t1, sp, t1 -; RV32-NEXT: addi t1, t1, 16 -; RV32-NEXT: vl8r.v v8, (t1) # vscale x 64-byte Folded Reload +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vl8r.v v16, (a6) # vscale x 64-byte Folded Reload +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: li t2, 68 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vl8r.v v8, (a6) # vscale x 64-byte Folded Reload +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: li t2, 28 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs8r.v v8, (a6) # vscale x 64-byte Folded Spill +; RV32-NEXT: vmv.s.x v0, t1 +; RV32-NEXT: addi a6, t0, 48 +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: li t1, 60 +; RV32-NEXT: mul t0, t0, t1 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: addi t0, t0, 16 +; RV32-NEXT: vl8r.v v8, (t0) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v8, v24, v8, v0 -; RV32-NEXT: csrr t1, vlenb -; RV32-NEXT: li t2, 44 -; RV32-NEXT: mul t1, t1, t2 -; RV32-NEXT: add t1, sp, t1 -; RV32-NEXT: addi t1, t1, 16 -; RV32-NEXT: vs4r.v v8, (t1) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmv.s.x v0, a7 +; RV32-NEXT: vmerge.vvm v8, v8, v24, v0 +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: li t1, 36 +; RV32-NEXT: mul t0, t0, t1 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: addi t0, t0, 16 +; RV32-NEXT: vs4r.v v8, (t0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmv.s.x v0, a6 ; RV32-NEXT: addi a3, a3, 12 -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: slli a7, a7, 6 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: slli a7, a7, 4 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vs8r.v v8, (a7) # vscale x 64-byte Folded Spill -; RV32-NEXT: vmv8r.v v16, v24 -; RV32-NEXT: vmv.s.x v0, a3 -; RV32-NEXT: addi a3, t0, 192 -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 92 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload -; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: csrr a6, vlenb ; RV32-NEXT: li t0, 76 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vl8r.v v8, (a7) # vscale x 64-byte Folded Reload -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v8, v24, v8, v0 -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 48 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vs4r.v v8, (a7) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmv.s.x v0, a3 -; RV32-NEXT: li a3, 192 -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 84 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vl8r.v v8, (a7) # vscale x 64-byte Folded Reload +; RV32-NEXT: mul a6, a6, t0 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vl8r.v v16, (a6) # vscale x 64-byte Folded Reload +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: li t0, 68 +; RV32-NEXT: mul a6, a6, t0 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vl8r.v v8, (a6) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 24 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vs8r.v v8, (a7) # vscale x 64-byte Folded Spill -; RV32-NEXT: vmv.s.x v0, a5 -; RV32-NEXT: addi a5, a6, 768 ; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: li a7, 92 -; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: li t0, 20 +; RV32-NEXT: mul a6, a6, t0 ; RV32-NEXT: add a6, sp, a6 ; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload +; RV32-NEXT: vs8r.v v8, (a6) # vscale x 64-byte Folded Spill +; RV32-NEXT: vmv.s.x v0, a3 +; RV32-NEXT: addi a3, a7, 192 ; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: li a7, 76 +; RV32-NEXT: li a7, 60 ; RV32-NEXT: mul a6, a6, a7 ; RV32-NEXT: add a6, sp, a6 ; RV32-NEXT: addi a6, a6, 16 ; RV32-NEXT: vl8r.v v8, (a6) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v8, v24, v8, v0 +; RV32-NEXT: vmerge.vvm v8, v8, v24, v0 ; RV32-NEXT: csrr a6, vlenb ; RV32-NEXT: li a7, 40 ; RV32-NEXT: mul a6, a6, a7 ; RV32-NEXT: add a6, sp, a6 ; RV32-NEXT: addi a6, a6, 16 ; RV32-NEXT: vs4r.v v8, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmv.s.x v0, a5 -; RV32-NEXT: vle16.v v6, (a1) -; RV32-NEXT: vle16.v v2, (a4) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 84 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmv.s.x v0, a3 +; RV32-NEXT: li a3, 192 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: li a7, 76 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vl8r.v v16, (a6) # vscale x 64-byte Folded Reload +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: li a7, 68 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vl8r.v v8, (a6) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs8r.v v8, (a6) # vscale x 64-byte Folded Spill +; RV32-NEXT: vmv.s.x v0, a4 +; RV32-NEXT: addi a4, a5, 768 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 60 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmerge.vvm v8, v8, v24, v0 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v8, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmv.s.x v0, a4 +; RV32-NEXT: vle16.v v2, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: li a4, 76 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill -; RV32-NEXT: vmv.s.x v0, a3 +; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a4, 68 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v24, v8, v6 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 92 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload +; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 76 +; RV32-NEXT: li a3, 60 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v8, v24, v8, v0 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmerge.vvm v8, v8, v24, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 92 +; RV32-NEXT: li a3, 60 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: li a3, 44 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vrgatherei16.vv v24, v8, v2 -; RV32-NEXT: lui a1, %hi(.LCPI27_2) -; RV32-NEXT: addi a1, a1, %lo(.LCPI27_2) +; RV32-NEXT: lui a1, %hi(.LCPI27_1) +; RV32-NEXT: addi a1, a1, %lo(.LCPI27_1) ; RV32-NEXT: lui a3, 3073 ; RV32-NEXT: addi a3, a3, -1024 ; RV32-NEXT: vmv.s.x v0, a3 -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v3, (a1) +; RV32-NEXT: vle16.v v30, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 84 +; RV32-NEXT: li a3, 76 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 68 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -821,179 +798,191 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 72 +; RV32-NEXT: li a2, 76 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v28, (a1) # vscale x 32-byte Folded Reload +; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: lui a1, %hi(.LCPI27_3) +; RV32-NEXT: addi a1, a1, %lo(.LCPI27_3) +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle16.v v28, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 52 +; RV32-NEXT: li a2, 28 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload -; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v28, v16 +; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload +; RV32-NEXT: vrgatherei16.vv v16, v8, v30 +; RV32-NEXT: lui a1, %hi(.LCPI27_2) +; RV32-NEXT: addi a1, a1, %lo(.LCPI27_2) +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vle16.v v20, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 72 +; RV32-NEXT: li a2, 20 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v28, (a1) # vscale x 32-byte Folded Spill -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload +; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vrgatherei16.vv v8, v0, v28 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 60 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v20, (a1) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmv.v.v v20, v16 +; RV32-NEXT: vl4r.v v12, (a1) # vscale x 32-byte Folded Reload +; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma +; RV32-NEXT: vmv.v.v v12, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 60 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v20, (a1) # vscale x 32-byte Folded Spill +; RV32-NEXT: vs4r.v v12, (a1) # vscale x 32-byte Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 44 +; RV32-NEXT: li a2, 52 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v16, (a1) # vscale x 32-byte Folded Reload -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v20, v16, v3 -; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v20, v24 +; RV32-NEXT: vl4r.v v12, (a1) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmv.v.v v12, v16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 52 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs4r.v v12, (a1) # vscale x 32-byte Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a2, 36 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v20, (a1) # vscale x 32-byte Folded Spill +; RV32-NEXT: vl4r.v v12, (a1) # vscale x 32-byte Folded Reload +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vrgatherei16.vv v24, v12, v20 +; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma +; RV32-NEXT: vmv.v.v v24, v8 ; RV32-NEXT: lui a1, %hi(.LCPI27_4) ; RV32-NEXT: addi a1, a1, %lo(.LCPI27_4) ; RV32-NEXT: lui a2, %hi(.LCPI27_5) ; RV32-NEXT: addi a2, a2, %lo(.LCPI27_5) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v24, (a2) +; RV32-NEXT: vle16.v v28, (a2) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v16, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 84 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v16, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vle16.v v1, (a1) ; RV32-NEXT: lui a1, %hi(.LCPI27_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI27_7) ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle16.v v16, (a1) +; RV32-NEXT: vle16.v v2, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 76 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs2r.v v16, (a1) # vscale x 16-byte Folded Spill +; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v16, v28 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 +; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload -; RV32-NEXT: vrgatherei16.vv v16, v0, v24 +; RV32-NEXT: vl4r.v v12, (a1) # vscale x 32-byte Folded Reload +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vrgatherei16.vv v28, v12, v1 +; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma +; RV32-NEXT: vmv.v.v v28, v8 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vrgatherei16.vv v16, v8, v2 +; RV32-NEXT: lui a1, %hi(.LCPI27_6) +; RV32-NEXT: addi a1, a1, %lo(.LCPI27_6) +; RV32-NEXT: lui a2, %hi(.LCPI27_8) +; RV32-NEXT: addi a2, a2, %lo(.LCPI27_8) +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: lui a1, %hi(.LCPI27_9) +; RV32-NEXT: addi a1, a1, %lo(.LCPI27_9) +; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV32-NEXT: vle16.v v10, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: li a3, 44 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v20, (a1) # vscale x 32-byte Folded Reload +; RV32-NEXT: vs2r.v v10, (a1) # vscale x 16-byte Folded Spill +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vle16.v v9, (a2) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 84 +; RV32-NEXT: li a2, 68 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v7, (a1) # vscale x 8-byte Folded Reload -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v24, v20, v7 +; RV32-NEXT: vs1r.v v9, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v12, (a1) # vscale x 32-byte Folded Reload +; RV32-NEXT: vrgatherei16.vv v20, v12, v8 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v24, v16 +; RV32-NEXT: vmv.v.v v20, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: li a2, 76 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 76 +; RV32-NEXT: li a2, 44 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl2r.v v28, (a1) # vscale x 16-byte Folded Reload +; RV32-NEXT: vl2r.v v16, (a1) # vscale x 16-byte Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v0, v28 -; RV32-NEXT: lui a1, %hi(.LCPI27_6) -; RV32-NEXT: addi a1, a1, %lo(.LCPI27_6) -; RV32-NEXT: lui a2, %hi(.LCPI27_8) -; RV32-NEXT: addi a2, a2, %lo(.LCPI27_8) -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v4, (a1) -; RV32-NEXT: lui a1, %hi(.LCPI27_9) -; RV32-NEXT: addi a1, a1, %lo(.LCPI27_9) -; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v6, (a1) -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vle16.v v5, (a2) +; RV32-NEXT: vrgatherei16.vv v8, v0, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 +; RV32-NEXT: li a2, 60 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v20, (a1) # vscale x 32-byte Folded Reload -; RV32-NEXT: vrgatherei16.vv v0, v20, v4 -; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v0, v16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v8, v6 +; RV32-NEXT: vl4r.v v16, (a1) # vscale x 32-byte Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 92 +; RV32-NEXT: li a2, 68 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v12, (a1) # vscale x 32-byte Folded Reload +; RV32-NEXT: vl1r.v v7, (a1) # vscale x 8-byte Folded Reload ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v8, v12, v5 +; RV32-NEXT: vrgatherei16.vv v12, v16, v7 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: vmv.v.v v12, v8 ; RV32-NEXT: addi a1, a0, 320 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: vse32.v v12, (a1) ; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: vse32.v v0, (a1) +; RV32-NEXT: vse32.v v20, (a1) ; RV32-NEXT: addi a1, a0, 192 -; RV32-NEXT: vse32.v v24, (a1) +; RV32-NEXT: vse32.v v28, (a1) ; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 6 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl4r.v v8, (a2) # vscale x 32-byte Folded Reload -; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: vse32.v v24, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 60 +; RV32-NEXT: li a3, 52 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # vscale x 32-byte Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 72 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # vscale x 32-byte Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 100 +; RV32-NEXT: li a1, 84 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 @@ -1013,60 +1002,48 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a1) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 53 +; RV64-NEXT: li a3, 85 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill ; RV64-NEXT: addi a2, a1, 128 -; RV64-NEXT: addi a3, a1, 256 -; RV64-NEXT: li a4, 128 +; RV64-NEXT: addi a1, a1, 256 +; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: li a3, 128 ; RV64-NEXT: lui a1, 1 -; RV64-NEXT: vle64.v v8, (a3) -; RV64-NEXT: lui a3, %hi(.LCPI27_0) -; RV64-NEXT: addi a3, a3, %lo(.LCPI27_0) -; RV64-NEXT: vmv.s.x v0, a4 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: li a5, 61 -; RV64-NEXT: mul a4, a4, a5 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 16 -; RV64-NEXT: vs1r.v v0, (a4) # vscale x 8-byte Folded Spill -; RV64-NEXT: addi a4, a1, 65 +; RV64-NEXT: vmv.s.x v3, a3 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vslideup.vi v24, v8, 2 ; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 8 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 77 -; RV64-NEXT: mul a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 16 -; RV64-NEXT: vs8r.v v16, (a5) # vscale x 64-byte Folded Spill -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 77 -; RV64-NEXT: mul a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 16 -; RV64-NEXT: vl8r.v v16, (a5) # vscale x 64-byte Folded Reload +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 45 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs8r.v v16, (a3) # vscale x 64-byte Folded Spill +; RV64-NEXT: vmv1r.v v0, v3 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vslideup.vi v24, v16, 5, v0.t -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 73 -; RV64-NEXT: mul a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 16 -; RV64-NEXT: vs4r.v v24, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 73 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs4r.v v24, (a3) # vscale x 32-byte Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v24, (a2) +; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a5, 85 -; RV64-NEXT: mul a2, a2, a5 +; RV64-NEXT: li a3, 77 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill -; RV64-NEXT: vle16.v v12, (a3) -; RV64-NEXT: vmv.s.x v0, a4 +; RV64-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill +; RV64-NEXT: addi a2, a1, 65 +; RV64-NEXT: vmv.s.x v0, a2 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vslideup.vi v12, v8, 1 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 85 ; RV64-NEXT: mul a2, a2, a3 @@ -1074,35 +1051,28 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 53 +; RV64-NEXT: li a3, 77 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vmerge.vvm v24, v24, v16, v0 -; RV64-NEXT: vrgatherei16.vv v0, v24, v12 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 37 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v0, (a2) # vscale x 64-byte Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vslideup.vi v12, v8, 1 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 61 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl1r.v v7, (a2) # vscale x 8-byte Folded Reload -; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill +; RV64-NEXT: vmv1r.v v0, v3 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 77 +; RV64-NEXT: li a3, 45 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vslideup.vi v12, v24, 4, v0.t +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vslideup.vi v12, v16, 4, v0.t ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 69 ; RV64-NEXT: mul a2, a2, a3 @@ -1115,17 +1085,23 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a2, a2, 130 ; RV64-NEXT: vmv.s.x v0, a2 ; RV64-NEXT: addi a2, a3, 260 -; RV64-NEXT: vmv8r.v v24, v16 ; RV64-NEXT: csrr a3, vlenb ; RV64-NEXT: li a5, 85 ; RV64-NEXT: mul a3, a3, a5 ; RV64-NEXT: add a3, sp, a3 ; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a5, 77 +; RV64-NEXT: mul a3, a3, a5 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 ; RV64-NEXT: vl8r.v v16, (a3) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: li a5, 21 +; RV64-NEXT: mul a3, a3, a5 ; RV64-NEXT: add a3, sp, a3 ; RV64-NEXT: addi a3, a3, 16 ; RV64-NEXT: vs8r.v v16, (a3) # vscale x 64-byte Folded Spill @@ -1137,6 +1113,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 77 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 ; RV64-NEXT: csrr a2, vlenb @@ -1147,21 +1129,21 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill ; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 45 +; RV64-NEXT: li a3, 53 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vslideup.vi v12, v8, 5, v0.t -; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vmv1r.v v0, v3 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 77 +; RV64-NEXT: li a3, 45 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vrgather.vi v12, v24, 4, v0.t +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vrgather.vi v12, v16, 4, v0.t ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: slli a3, a2, 6 ; RV64-NEXT: add a2, a3, a2 @@ -1171,84 +1153,65 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vslidedown.vi v12, v8, 1 ; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: vslideup.vi v12, v8, 4, v0.t -; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: vrgather.vi v12, v24, 5, v0.t +; RV64-NEXT: vmv1r.v v0, v3 +; RV64-NEXT: vmv4r.v v8, v16 +; RV64-NEXT: vrgather.vi v12, v16, 5, v0.t ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 25 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 4 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs4r.v v12, (a2) # vscale x 32-byte Folded Spill ; RV64-NEXT: lui a2, 8 ; RV64-NEXT: addi a2, a2, 520 ; RV64-NEXT: vmv.s.x v0, a2 -; RV64-NEXT: vslideup.vi v12, v24, 6 +; RV64-NEXT: vslideup.vi v4, v16, 6 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 85 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 53 +; RV64-NEXT: li a3, 77 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 4 +; RV64-NEXT: slli a3, a2, 3 ; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill -; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 77 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vmv1r.v v0, v3 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t -; RV64-NEXT: lui a2, %hi(.LCPI27_1) -; RV64-NEXT: addi a2, a2, %lo(.LCPI27_1) -; RV64-NEXT: li a3, 192 -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vle16.v v6, (a2) -; RV64-NEXT: vmv.s.x v0, a3 +; RV64-NEXT: vslideup.vi v4, v8, 1, v0.t +; RV64-NEXT: li a2, 192 +; RV64-NEXT: vmv.s.x v0, a2 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs1r.v v0, (a2) # vscale x 8-byte Folded Spill ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 45 +; RV64-NEXT: li a3, 53 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vrgather.vi v28, v16, 2 -; RV64-NEXT: vmerge.vvm v16, v28, v12, v0 +; RV64-NEXT: vrgather.vi v12, v16, 2 +; RV64-NEXT: vmerge.vvm v12, v12, v4, v0 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 61 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v24, v16, v6 -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui a2, %hi(.LCPI27_2) -; RV64-NEXT: addi a2, a2, %lo(.LCPI27_2) +; RV64-NEXT: vs4r.v v12, (a2) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui a2, %hi(.LCPI27_0) +; RV64-NEXT: addi a2, a2, %lo(.LCPI27_0) ; RV64-NEXT: li a3, 1040 ; RV64-NEXT: vmv.s.x v0, a3 ; RV64-NEXT: addi a1, a1, -2016 @@ -1259,41 +1222,77 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a3, a3, 16 ; RV64-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 53 +; RV64-NEXT: li a4, 77 ; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: add a3, sp, a3 ; RV64-NEXT: addi a3, a3, 16 ; RV64-NEXT: vl8r.v v16, (a3) # vscale x 64-byte Folded Reload -; RV64-NEXT: vmerge.vvm v8, v24, v16, v0 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 16 -; RV64-NEXT: vs8r.v v8, (a3) # vscale x 64-byte Folded Spill -; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: vle16.v v6, (a2) -; RV64-NEXT: li a1, 64 -; RV64-NEXT: vmerge.vvm v8, v24, v16, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 85 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vs8r.v v16, (a3) # vscale x 64-byte Folded Spill ; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vle16.v v12, (a2) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 29 +; RV64-NEXT: li a2, 85 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload -; RV64-NEXT: vrgatherei16.vv v24, v16, v6 +; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 85 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui a1, %hi(.LCPI27_1) +; RV64-NEXT: addi a1, a1, %lo(.LCPI27_1) +; RV64-NEXT: vle16.v v24, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 37 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload +; RV64-NEXT: vrgatherei16.vv v0, v16, v12 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload +; RV64-NEXT: vs8r.v v0, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui a1, %hi(.LCPI27_2) +; RV64-NEXT: addi a1, a1, %lo(.LCPI27_2) +; RV64-NEXT: vle16.v v12, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 21 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload +; RV64-NEXT: vrgatherei16.vv v0, v16, v24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 37 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v0, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: li a1, 64 +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 29 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload +; RV64-NEXT: vrgatherei16.vv v24, v16, v12 ; RV64-NEXT: vmv4r.v v28, v8 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vslideup.vi v28, v8, 5, v0.t @@ -1304,13 +1303,13 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # vscale x 32-byte Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 37 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload +; RV64-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v8, v0 +; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 73 ; RV64-NEXT: mul a1, a1, a2 @@ -1323,7 +1322,11 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # vscale x 32-byte Folded Reload -; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 37 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload ; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: csrr a1, vlenb @@ -1335,62 +1338,59 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: lui a1, %hi(.LCPI27_3) ; RV64-NEXT: addi a1, a1, %lo(.LCPI27_3) ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vle16.v v20, (a1) +; RV64-NEXT: vle16.v v8, (a1) ; RV64-NEXT: lui a1, %hi(.LCPI27_4) ; RV64-NEXT: addi a1, a1, %lo(.LCPI27_4) -; RV64-NEXT: vle16.v v8, (a1) +; RV64-NEXT: vle16.v v10, (a1) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs2r.v v8, (a1) # vscale x 16-byte Folded Spill +; RV64-NEXT: vs2r.v v10, (a1) # vscale x 16-byte Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a2, a1, 6 ; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # vscale x 32-byte Folded Reload +; RV64-NEXT: vl4r.v v12, (a1) # vscale x 32-byte Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v8, v24 +; RV64-NEXT: vmv.v.v v12, v24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a2, a1, 6 ; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill +; RV64-NEXT: vs4r.v v12, (a1) # vscale x 32-byte Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: slli a2, a1, 3 ; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload +; RV64-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v0, v8, v20 +; RV64-NEXT: vrgatherei16.vv v0, v16, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 25 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # vscale x 32-byte Folded Reload +; RV64-NEXT: vl4r.v v20, (a1) # vscale x 32-byte Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v12, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload +; RV64-NEXT: vmv.v.v v20, v0 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload +; RV64-NEXT: vl2r.v v16, (a1) # vscale x 16-byte Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v0, v16, v8 +; RV64-NEXT: vrgatherei16.vv v0, v8, v16 ; RV64-NEXT: lui a1, %hi(.LCPI27_5) ; RV64-NEXT: addi a1, a1, %lo(.LCPI27_5) -; RV64-NEXT: vle16.v v20, (a1) +; RV64-NEXT: vle16.v v12, (a1) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 @@ -1406,7 +1406,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -1414,7 +1414,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vrgather.vi v8, v0, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # vscale x 8-byte Folded Reload @@ -1426,7 +1426,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v24, v0, v20 +; RV64-NEXT: vrgatherei16.vv v24, v0, v12 ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v8, v24 ; RV64-NEXT: addi a1, a0, 320 @@ -1441,7 +1441,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vl4r.v v8, (a2) # vscale x 32-byte Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 192 -; RV64-NEXT: vse64.v v12, (a1) +; RV64-NEXT: vse64.v v20, (a1) ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: slli a3, a2, 6 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index ffbf1c7a548e1..1ccc52be36215 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1874,57 +1874,77 @@ define float @vreduce_fminimum_v128f32(ptr %x) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: addi a2, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v24, (a2) ; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmfeq.vv v7, v8, v8 +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 -; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vfmin.vv v24, v16, v24 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vfmin.vv v24, v8, v24 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-NEXT: vmfeq.vv v7, v8, v8 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vfmin.vv v16, v8, v16 +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vfmin.vv v16, v16, v8 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 @@ -1943,7 +1963,10 @@ define float @vreduce_fminimum_v128f32(ptr %x) { ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: .LBB121_3: ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -2257,56 +2280,76 @@ define double @vreduce_fminimum_v64f64(ptr %x) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v24, (a1) ; RV32-NEXT: addi a1, a0, 384 -; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: vle64.v v8, (a1) ; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vle64.v v16, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; RV32-NEXT: vmfeq.vv v0, v24, v24 -; RV32-NEXT: vmfeq.vv v7, v16, v16 -; RV32-NEXT: vmerge.vvm v8, v24, v16, v0 +; RV32-NEXT: vmfeq.vv v7, v8, v8 +; RV32-NEXT: vle64.v v16, (a1) ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vle64.v v8, (a1) +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vmerge.vvm v16, v24, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vmerge.vvm v16, v16, v24, v0 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vfmin.vv v24, v16, v24 +; RV32-NEXT: vmerge.vvm v8, v8, v24, v0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vfmin.vv v24, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmfeq.vv v0, v8, v8 +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmfeq.vv v0, v16, v16 -; RV32-NEXT: vmfeq.vv v7, v8, v8 +; RV32-NEXT: vmfeq.vv v7, v16, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmerge.vvm v16, v16, v8, v0 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmerge.vvm v16, v16, v8, v0 +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vfmin.vv v16, v8, v16 +; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vfmin.vv v16, v16, v8 ; RV32-NEXT: vmfeq.vv v0, v16, v16 ; RV32-NEXT: vmfeq.vv v7, v24, v24 ; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 @@ -2325,7 +2368,10 @@ define double @vreduce_fminimum_v64f64(ptr %x) { ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: .LBB133_3: ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 ; RV32-NEXT: addi sp, sp, 16 @@ -2337,56 +2383,76 @@ define double @vreduce_fminimum_v64f64(ptr %x) { ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v24, (a1) ; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: vle64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 256 -; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; RV64-NEXT: vmfeq.vv v0, v24, v24 -; RV64-NEXT: vmfeq.vv v7, v16, v16 -; RV64-NEXT: vmerge.vvm v8, v24, v16, v0 +; RV64-NEXT: vmfeq.vv v7, v8, v8 +; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vmerge.vvm v16, v24, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vfmin.vv v24, v16, v24 +; RV64-NEXT: vmerge.vvm v8, v8, v24, v0 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vfmin.vv v24, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vmfeq.vv v0, v8, v8 +; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vmfeq.vv v0, v16, v16 -; RV64-NEXT: vmfeq.vv v7, v8, v8 +; RV64-NEXT: vmfeq.vv v7, v16, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vmerge.vvm v16, v16, v8, v0 -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vmerge.vvm v16, v16, v8, v0 +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vfmin.vv v16, v8, v16 +; RV64-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vfmin.vv v16, v16, v8 ; RV64-NEXT: vmfeq.vv v0, v16, v16 ; RV64-NEXT: vmfeq.vv v7, v24, v24 ; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 @@ -2406,7 +2472,10 @@ define double @vreduce_fminimum_v64f64(ptr %x) { ; RV64-NEXT: vfmv.f.s fa0, v8 ; RV64-NEXT: .LBB133_3: ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: .cfi_def_cfa sp, 16 ; RV64-NEXT: addi sp, sp, 16 @@ -2702,57 +2771,77 @@ define float @vreduce_fmaximum_v128f32(ptr %x) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: addi a2, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v24, (a2) ; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmfeq.vv v7, v8, v8 +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 -; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vfmax.vv v24, v16, v24 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vfmax.vv v24, v8, v24 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-NEXT: vmfeq.vv v7, v8, v8 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vfmax.vv v16, v8, v16 +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vfmax.vv v16, v16, v8 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 @@ -2771,7 +2860,10 @@ define float @vreduce_fmaximum_v128f32(ptr %x) { ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: .LBB149_3: ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -3085,56 +3177,76 @@ define double @vreduce_fmaximum_v64f64(ptr %x) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v24, (a1) ; RV32-NEXT: addi a1, a0, 384 -; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: vle64.v v8, (a1) ; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vle64.v v16, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; RV32-NEXT: vmfeq.vv v0, v24, v24 -; RV32-NEXT: vmfeq.vv v7, v16, v16 -; RV32-NEXT: vmerge.vvm v8, v24, v16, v0 +; RV32-NEXT: vmfeq.vv v7, v8, v8 +; RV32-NEXT: vle64.v v16, (a1) ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vle64.v v8, (a1) +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vmerge.vvm v16, v24, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vmerge.vvm v16, v16, v24, v0 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vfmax.vv v24, v16, v24 +; RV32-NEXT: vmerge.vvm v8, v8, v24, v0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vfmax.vv v24, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmfeq.vv v0, v8, v8 +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmfeq.vv v0, v16, v16 -; RV32-NEXT: vmfeq.vv v7, v8, v8 +; RV32-NEXT: vmfeq.vv v7, v16, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmerge.vvm v16, v16, v8, v0 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmerge.vvm v16, v16, v8, v0 +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vfmax.vv v16, v8, v16 +; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vfmax.vv v16, v16, v8 ; RV32-NEXT: vmfeq.vv v0, v16, v16 ; RV32-NEXT: vmfeq.vv v7, v24, v24 ; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 @@ -3153,7 +3265,10 @@ define double @vreduce_fmaximum_v64f64(ptr %x) { ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: .LBB161_3: ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 ; RV32-NEXT: addi sp, sp, 16 @@ -3165,56 +3280,76 @@ define double @vreduce_fmaximum_v64f64(ptr %x) { ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v24, (a1) ; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: vle64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 256 -; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; RV64-NEXT: vmfeq.vv v0, v24, v24 -; RV64-NEXT: vmfeq.vv v7, v16, v16 -; RV64-NEXT: vmerge.vvm v8, v24, v16, v0 +; RV64-NEXT: vmfeq.vv v7, v8, v8 +; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vmerge.vvm v16, v24, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vfmax.vv v24, v16, v24 +; RV64-NEXT: vmerge.vvm v8, v8, v24, v0 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vfmax.vv v24, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vmfeq.vv v0, v8, v8 +; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vmfeq.vv v0, v16, v16 -; RV64-NEXT: vmfeq.vv v7, v8, v8 +; RV64-NEXT: vmfeq.vv v7, v16, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vmerge.vvm v16, v16, v8, v0 -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vmerge.vvm v16, v16, v8, v0 +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vfmax.vv v16, v8, v16 +; RV64-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vfmax.vv v16, v16, v8 ; RV64-NEXT: vmfeq.vv v0, v16, v16 ; RV64-NEXT: vmfeq.vv v7, v24, v24 ; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 @@ -3234,7 +3369,10 @@ define double @vreduce_fmaximum_v64f64(ptr %x) { ; RV64-NEXT: vfmv.f.s fa0, v8 ; RV64-NEXT: .LBB161_3: ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: .cfi_def_cfa sp, 16 ; RV64-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll index 9d3fe3a90b463..9696a3fe62883 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll @@ -128,8 +128,9 @@ define <32 x i32> @select_addsub_v32i32(<32 x i1> %cc, <32 x i32> %a, <32 x i32> ; CHECK-LABEL: select_addsub_v32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vrsub.vi v16, v16, 0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vrsub.vi v24, v16, 0 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 ; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: ret %sub = sub <32 x i32> %a, %b @@ -144,29 +145,39 @@ define <64 x i32> @select_addsub_v64i32(<64 x i1> %cc, <64 x i32> %a, <64 x i32> ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t +; CHECK-NEXT: vrsub.vi v16, v8, 0 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 4 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vadd.vv v8, v16, v8 -; CHECK-NEXT: vrsub.vi v24, v24, 0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vrsub.vi v16, v24, 0 +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vv v16, v16, v24 +; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vadd.vv v8, v24, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vadd.vv v16, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll index 9bbfad4f32ffa..9c3730a7bf36b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll @@ -598,80 +598,53 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; ZIP-NEXT: addi sp, sp, -16 ; ZIP-NEXT: .cfi_def_cfa_offset 16 ; ZIP-NEXT: csrr a0, vlenb -; ZIP-NEXT: li a1, 40 -; ZIP-NEXT: mul a0, a0, a1 +; ZIP-NEXT: slli a1, a0, 4 +; ZIP-NEXT: add a0, a1, a0 ; ZIP-NEXT: sub sp, sp, a0 -; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; ZIP-NEXT: csrr a0, vlenb -; ZIP-NEXT: slli a0, a0, 5 +; ZIP-NEXT: slli a1, a0, 3 +; ZIP-NEXT: add a0, a1, a0 ; ZIP-NEXT: add a0, sp, a0 ; ZIP-NEXT: addi a0, a0, 16 ; ZIP-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; ZIP-NEXT: addi a0, sp, 16 -; ZIP-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; ZIP-NEXT: li a0, 32 ; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; ZIP-NEXT: vslidedown.vi v16, v8, 16 +; ZIP-NEXT: vslidedown.vi v0, v16, 16 +; ZIP-NEXT: li a0, 32 ; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZIP-NEXT: ri.vzip2a.vv v8, v16, v0 -; ZIP-NEXT: csrr a1, vlenb -; ZIP-NEXT: slli a1, a1, 3 -; ZIP-NEXT: add a1, sp, a1 -; ZIP-NEXT: addi a1, a1, 16 -; ZIP-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill +; ZIP-NEXT: ri.vzip2a.vv v16, v24, v0 ; ZIP-NEXT: csrr a1, vlenb -; ZIP-NEXT: slli a1, a1, 5 -; ZIP-NEXT: add a1, sp, a1 -; ZIP-NEXT: addi a1, a1, 16 -; ZIP-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload -; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; ZIP-NEXT: vslidedown.vi v16, v16, 16 -; ZIP-NEXT: csrr a1, vlenb -; ZIP-NEXT: li a2, 24 -; ZIP-NEXT: mul a1, a1, a2 ; ZIP-NEXT: add a1, sp, a1 ; ZIP-NEXT: addi a1, a1, 16 ; ZIP-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; ZIP-NEXT: vslidedown.vi v0, v8, 16 ; ZIP-NEXT: lui a1, 699051 ; ZIP-NEXT: addi a1, a1, -1366 -; ZIP-NEXT: vmv.s.x v0, a1 -; ZIP-NEXT: csrr a1, vlenb -; ZIP-NEXT: slli a1, a1, 4 -; ZIP-NEXT: add a1, sp, a1 -; ZIP-NEXT: addi a1, a1, 16 -; ZIP-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; ZIP-NEXT: csrr a1, vlenb -; ZIP-NEXT: li a2, 24 -; ZIP-NEXT: mul a1, a1, a2 -; ZIP-NEXT: add a1, sp, a1 -; ZIP-NEXT: addi a1, a1, 16 -; ZIP-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload -; ZIP-NEXT: csrr a1, vlenb -; ZIP-NEXT: slli a1, a1, 4 -; ZIP-NEXT: add a1, sp, a1 -; ZIP-NEXT: addi a1, a1, 16 -; ZIP-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload -; ZIP-NEXT: csrr a1, vlenb -; ZIP-NEXT: slli a1, a1, 3 -; ZIP-NEXT: add a1, sp, a1 -; ZIP-NEXT: addi a1, a1, 16 -; ZIP-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload -; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; ZIP-NEXT: ri.vzip2a.vv v8, v24, v16, v0.t -; ZIP-NEXT: vmv.v.v v24, v8 +; ZIP-NEXT: vmv.s.x v16, a1 +; ZIP-NEXT: addi a1, sp, 16 +; ZIP-NEXT: vs1r.v v16, (a1) # vscale x 8-byte Folded Spill +; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v16, v0, v24 +; ZIP-NEXT: addi a0, sp, 16 +; ZIP-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload ; ZIP-NEXT: csrr a0, vlenb -; ZIP-NEXT: slli a0, a0, 5 +; ZIP-NEXT: add a0, sp, a0 +; ZIP-NEXT: addi a0, a0, 16 +; ZIP-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; ZIP-NEXT: vmerge.vvm v24, v16, v24, v0 +; ZIP-NEXT: csrr a0, vlenb +; ZIP-NEXT: slli a1, a0, 3 +; ZIP-NEXT: add a0, a1, a0 ; ZIP-NEXT: add a0, sp, a0 ; ZIP-NEXT: addi a0, a0, 16 ; ZIP-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; ZIP-NEXT: addi a0, sp, 16 -; ZIP-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; ZIP-NEXT: ri.vzip2a.vv v0, v8, v16 ; ZIP-NEXT: vmv.v.v v8, v0 ; ZIP-NEXT: vmv.v.v v16, v24 ; ZIP-NEXT: csrr a0, vlenb -; ZIP-NEXT: li a1, 40 -; ZIP-NEXT: mul a0, a0, a1 +; ZIP-NEXT: slli a1, a0, 4 +; ZIP-NEXT: add a0, a1, a0 ; ZIP-NEXT: add sp, sp, a0 ; ZIP-NEXT: .cfi_def_cfa sp, 16 ; ZIP-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index f2f9f90f386c0..740f8115983fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -179,21 +179,38 @@ define <256 x i8> @select_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i3 define <256 x i8> @select_evl_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c) { ; CHECK-LABEL: select_evl_v256i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 -; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: addi a3, a1, 128 +; CHECK-NEXT: vle8.v v24, (a0) +; CHECK-NEXT: addi a0, a1, 128 +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vle8.v v24, (a3) -; CHECK-NEXT: vle8.v v8, (a1) +; CHECK-NEXT: vle8.v v16, (a1) +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, mu -; CHECK-NEXT: vle8.v v24, (a0), v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa sp, 16 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.select.v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i32 129) ret <256 x i8> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll index 25a4eb74eeba7..fd70f95ed53c6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll @@ -121,155 +121,68 @@ define @vfmax_nxv16bf16_vv( %a, @vfmax_nxv32bf16_vv( %a, %b) nounwind { -; ZVFH-LABEL: vfmax_nxv32bf16_vv: -; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: mv a1, a0 -; ZVFH-NEXT: slli a0, a0, 1 -; ZVFH-NEXT: add a0, a0, a1 -; ZVFH-NEXT: sub sp, sp, a0 -; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFH-NEXT: vmv8r.v v24, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; ZVFH-NEXT: vmv8r.v v0, v8 -; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v24 -; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v0 -; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v3, v16, v16 -; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill -; ZVFH-NEXT: vmv1r.v v0, v3 -; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v12 -; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v4 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFH-NEXT: vfmax.vv v16, v0, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v7, v24, v24 -; ZVFH-NEXT: vmerge.vvm v16, v8, v24, v0 -; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: vmerge.vvm v8, v24, v8, v0 -; ZVFH-NEXT: vfmax.vv v16, v8, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v24 -; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: mv a1, a0 -; ZVFH-NEXT: slli a0, a0, 1 -; ZVFH-NEXT: add a0, a0, a1 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: addi sp, sp, 16 -; ZVFH-NEXT: ret -; -; ZVFHMIN-LABEL: vfmax_nxv32bf16_vv: -; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 24 -; ZVFHMIN-NEXT: mul a0, a0, a1 -; ZVFHMIN-NEXT: sub sp, sp, a0 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v24, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; ZVFHMIN-NEXT: vmv8r.v v0, v8 -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v24 -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v0 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v3, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v3 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v8, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v12 -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v4 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmax.vv v16, v0, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 -; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0 -; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v24 -; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 24 -; ZVFHMIN-NEXT: mul a0, a0, a1 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 -; ZVFHMIN-NEXT: ret +; CHECK-LABEL: vfmax_nxv32bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv8r.v v0, v16 +; CHECK-NEXT: vmv8r.v v24, v8 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v3, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vmv1r.v v0, v3 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v4 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v7, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vfmax.vv v16, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %v = call @llvm.maximum.nxv32bf16( %a, %b) ret %v } @@ -444,54 +357,45 @@ define @vfmax_nxv32f16_vv( %a, @vfmax_nxv32f16_vv( %a, @vfmax_nxv32f16_vv( %a, @vfmin_nxv16bf16_vv( %a, @vfmin_nxv32bf16_vv( %a, %b) nounwind { -; ZVFH-LABEL: vfmin_nxv32bf16_vv: -; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: mv a1, a0 -; ZVFH-NEXT: slli a0, a0, 1 -; ZVFH-NEXT: add a0, a0, a1 -; ZVFH-NEXT: sub sp, sp, a0 -; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFH-NEXT: vmv8r.v v24, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; ZVFH-NEXT: vmv8r.v v0, v8 -; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v24 -; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v0 -; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v3, v16, v16 -; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill -; ZVFH-NEXT: vmv1r.v v0, v3 -; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v12 -; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v4 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFH-NEXT: vfmin.vv v16, v0, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v7, v24, v24 -; ZVFH-NEXT: vmerge.vvm v16, v8, v24, v0 -; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: vmerge.vvm v8, v24, v8, v0 -; ZVFH-NEXT: vfmin.vv v16, v8, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v24 -; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: mv a1, a0 -; ZVFH-NEXT: slli a0, a0, 1 -; ZVFH-NEXT: add a0, a0, a1 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: addi sp, sp, 16 -; ZVFH-NEXT: ret -; -; ZVFHMIN-LABEL: vfmin_nxv32bf16_vv: -; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 24 -; ZVFHMIN-NEXT: mul a0, a0, a1 -; ZVFHMIN-NEXT: sub sp, sp, a0 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v24, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; ZVFHMIN-NEXT: vmv8r.v v0, v8 -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v24 -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v0 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v3, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v3 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v8, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v12 -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v4 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmin.vv v16, v0, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 -; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0 -; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v24 -; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 24 -; ZVFHMIN-NEXT: mul a0, a0, a1 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 -; ZVFHMIN-NEXT: ret +; CHECK-LABEL: vfmin_nxv32bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv8r.v v0, v16 +; CHECK-NEXT: vmv8r.v v24, v8 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v3, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vmv1r.v v0, v3 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v4 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v7, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vfmin.vv v16, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %v = call @llvm.minimum.nxv32bf16( %a, %b) ret %v } @@ -444,54 +357,45 @@ define @vfmin_nxv32f16_vv( %a, @vfmin_nxv32f16_vv( %a, @vfmin_nxv32f16_vv( %a, %x, i64 %y) { ; CHECK-NEXT: add a0, a2, a0 ; CHECK-NEXT: add a1, a2, a1 ; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 -; CHECK-NEXT: vs8r.v v24, (a1) ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vs8r.v v24, (a1) ; CHECK-NEXT: vs8r.v v8, (a2) ; CHECK-NEXT: lbu a0, 0(a0) ; CHECK-NEXT: addi sp, s0, -80 diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir index a7eaf39793236..c73c2004834db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir @@ -10,13 +10,13 @@ body: | ; CHECK-LABEL: name: undef_passthru ; CHECK: liveins: $x1, $v8, $v9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %false:vr = COPY $v8 - ; CHECK-NEXT: %true:vr = COPY $v9 + ; CHECK-NEXT: %false:vrnov0 = COPY $v8 + ; CHECK-NEXT: %true:vrnov0 = COPY $v9 ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 ; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 0 /* e8 */ ; CHECK-NEXT: $v0 = COPY %mask - %false:vr = COPY $v8 - %true:vr = COPY $v9 + %false:vrnov0 = COPY $v8 + %true:vrnov0 = COPY $v9 %avl:gprnox0 = COPY $x1 %mask:vmv0 = PseudoVMSET_M_B8 %avl, 0 $v0 = COPY %mask @@ -31,15 +31,15 @@ body: | ; CHECK: liveins: $x1, $v8, $v9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %pt:vr = COPY $v8 - ; CHECK-NEXT: %false:vr = COPY $noreg - ; CHECK-NEXT: %true:vr = COPY $v9 + ; CHECK-NEXT: %false:vrnov0 = COPY $noreg + ; CHECK-NEXT: %true:vrnov0 = COPY $v9 ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 ; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 0 /* e8 */ ; CHECK-NEXT: $v0 = COPY %mask ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, %avl, 5 /* e32 */, 0 /* tu, mu */ %pt:vrnov0 = COPY $v8 - %false:vr = COPY $noreg - %true:vr = COPY $v9 + %false:vrnov0 = COPY $noreg + %true:vrnov0 = COPY $v9 %avl:gprnox0 = COPY $x1 %mask:vmv0 = PseudoVMSET_M_B8 %avl, 0 $v0 = COPY %mask @@ -53,15 +53,15 @@ body: | ; CHECK-LABEL: name: equal_passthru_false ; CHECK: liveins: $x1, $v8, $v9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %false:vr = COPY $v8 + ; CHECK-NEXT: %false:vrnov0 = COPY $v8 ; CHECK-NEXT: %pt:vr = COPY $v8 - ; CHECK-NEXT: %true:vr = COPY $v9 + ; CHECK-NEXT: %true:vrnov0 = COPY $v9 ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 ; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 0 /* e8 */ ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, %avl, 5 /* e32 */, 0 /* tu, mu */ - %false:vr = COPY $v8 + %false:vrnov0 = COPY $v8 %pt:vrnov0 = COPY $v8 - %true:vr = COPY $v9 + %true:vrnov0 = COPY $v9 %avl:gprnox0 = COPY $x1 %mask:vmv0 = PseudoVMSET_M_B8 %avl, 0 %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl, 5 @@ -136,7 +136,7 @@ body: | ; CHECK-NEXT: %false:vrnov0 = COPY $v8 ; CHECK-NEXT: %mask:vmv0 = COPY $v0 ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 1 /* ta, mu */ - %false:vr = COPY $v8 + %false:vrnov0 = COPY $v8 %mask:vmv0 = COPY $v0 %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */ %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, %mask, 4, 5 /* e32 */ @@ -150,7 +150,7 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $v8, $v0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %false:vr = COPY $v8 + ; CHECK-NEXT: %false:vrnov0 = COPY $v8 ; CHECK-NEXT: %mask:vmv0 = COPY $v0 ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: {{ $}} @@ -158,7 +158,7 @@ body: | ; CHECK-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, %mask, 4, 5 /* e32 */ bb.0: liveins: $v8, $v0 - %false:vr = COPY $v8 + %false:vrnov0 = COPY $v8 %mask:vmv0 = COPY $v0 %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */ bb.1: @@ -174,14 +174,14 @@ body: | ; CHECK: liveins: $v8, $v9, $v0, $x8, $x9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %pt:vrnov0 = COPY $v8 - ; CHECK-NEXT: %false:vr = COPY $v9 + ; CHECK-NEXT: %false:vrnov0 = COPY $v9 ; CHECK-NEXT: %mask:vmv0 = COPY $v0 ; CHECK-NEXT: %avl1:gprnox0 = COPY $x8 ; CHECK-NEXT: %avl2:gprnox0 = COPY $x9 ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, %avl1, 5 /* e32 */, 3 /* ta, ma */ ; CHECK-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl2, 5 /* e32 */ %pt:vrnov0 = COPY $v8 - %false:vr = COPY $v9 + %false:vrnov0 = COPY $v9 %mask:vmv0 = COPY $v0 %avl1:gprnox0 = COPY $x8 %avl2:gprnox0 = COPY $x9 @@ -203,7 +203,7 @@ body: | ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 1, 5 /* e32 */, 3 /* ta, ma */ ; CHECK-NEXT: [[PseudoVMV_V_V_M1_:%[0-9]+]]:vr = PseudoVMV_V_V_M1 %pt, %true, 1, 5 /* e32 */, 0 /* tu, mu */ %pt:vrnov0 = COPY $v8 - %false:vr = COPY $v9 + %false:vrnov0 = COPY $v9 %mask:vmv0 = COPY $v0 %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 2, 5 /* e32 */, 3 /* ta, ma */ %5:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 1, 5 /* e32 */ diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index acd9519bb5a8e..971fbff3ff418 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -859,17 +859,19 @@ define void @test_dag_loop() { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 0, e8, m4, ta, ma ; CHECK-NEXT: vmclr.m v0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, m4, tu, mu -; CHECK-NEXT: vssubu.vx v12, v8, zero, v0.t +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma +; CHECK-NEXT: vle16.v v8, (zero) +; CHECK-NEXT: vmv4r.v v20, v16 +; CHECK-NEXT: vsetivli zero, 0, e8, m4, tu, mu +; CHECK-NEXT: vssubu.vx v20, v16, zero, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma -; CHECK-NEXT: vmseq.vv v0, v12, v8 +; CHECK-NEXT: vmseq.vv v0, v20, v16 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, mu -; CHECK-NEXT: vle16.v v8, (zero), v0.t -; CHECK-NEXT: vse16.v v8, (zero) +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vse16.v v16, (zero) ; CHECK-NEXT: ret entry: %0 = call @llvm.riscv.vle.nxv32i16.i64( poison, ptr null, i64 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll index e3f43cd904198..cc389236df3ff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll @@ -516,15 +516,15 @@ define @splice_nxv64i1_offset_negone( %a, < ; NOVLDEP-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; NOVLDEP-NEXT: vmv1r.v v9, v0 ; NOVLDEP-NEXT: vmv1r.v v0, v8 -; NOVLDEP-NEXT: vmv.v.i v24, 0 +; NOVLDEP-NEXT: vmv.v.i v16, 0 ; NOVLDEP-NEXT: csrr a0, vlenb -; NOVLDEP-NEXT: vmerge.vim v16, v24, 1, v0 +; NOVLDEP-NEXT: vmerge.vim v24, v16, 1, v0 ; NOVLDEP-NEXT: vmv1r.v v0, v9 -; NOVLDEP-NEXT: vmerge.vim v8, v24, 1, v0 +; NOVLDEP-NEXT: vmerge.vim v8, v16, 1, v0 ; NOVLDEP-NEXT: slli a0, a0, 3 ; NOVLDEP-NEXT: addi a0, a0, -1 ; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 -; NOVLDEP-NEXT: vslideup.vi v8, v16, 1 +; NOVLDEP-NEXT: vslideup.vi v8, v24, 1 ; NOVLDEP-NEXT: vand.vi v8, v8, 1 ; NOVLDEP-NEXT: vmsne.vi v0, v8, 0 ; NOVLDEP-NEXT: ret @@ -534,17 +534,17 @@ define @splice_nxv64i1_offset_negone( %a, < ; VLDEP-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; VLDEP-NEXT: vmv1r.v v9, v0 ; VLDEP-NEXT: vmv1r.v v0, v8 -; VLDEP-NEXT: vmv.v.i v24, 0 +; VLDEP-NEXT: vmv.v.i v16, 0 ; VLDEP-NEXT: csrr a0, vlenb -; VLDEP-NEXT: vmerge.vim v16, v24, 1, v0 +; VLDEP-NEXT: vmerge.vim v24, v16, 1, v0 ; VLDEP-NEXT: vmv1r.v v0, v9 -; VLDEP-NEXT: vmerge.vim v8, v24, 1, v0 +; VLDEP-NEXT: vmerge.vim v8, v16, 1, v0 ; VLDEP-NEXT: slli a0, a0, 3 ; VLDEP-NEXT: addi a0, a0, -1 ; VLDEP-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; VLDEP-NEXT: vslidedown.vx v8, v8, a0 ; VLDEP-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; VLDEP-NEXT: vslideup.vi v8, v16, 1 +; VLDEP-NEXT: vslideup.vi v8, v24, 1 ; VLDEP-NEXT: vand.vi v8, v8, 1 ; VLDEP-NEXT: vmsne.vi v0, v8, 0 ; VLDEP-NEXT: ret @@ -558,16 +558,16 @@ define @splice_nxv64i1_offset_max( %a, @vfmacc_vv_nxv32f16( %a, @llvm.vp.fma.nxv32f16( %a, %b, %c, splat (i1 -1), i32 %evl) @@ -551,8 +553,10 @@ define @vfmacc_vv_nxv32f16_unmasked( %a ; CHECK-LABEL: vfmacc_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vfmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv32f16( %a, %b, %c, splat (i1 -1), i32 %evl) @@ -563,8 +567,10 @@ define @vfmacc_vv_nxv32f16_unmasked( %a define @vfmacc_vf_nxv32f16( %va, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -577,8 +583,10 @@ define @vfmacc_vf_nxv32f16( %va, half % define @vfmacc_vf_nxv32f16_commute( %va, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv32f16_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -591,8 +599,10 @@ define @vfmacc_vf_nxv32f16_commute( %va define @vfmacc_vf_nxv32f16_unmasked( %va, half %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv32f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; CHECK-NEXT: vfmacc.vf v16, fa0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -606,9 +616,9 @@ define @vfmacc_vv_nxv32f16_ta( %a, @llvm.vp.fma.nxv32f16( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) @@ -618,9 +628,9 @@ define @vfmacc_vv_nxv32f16_ta( %a, @vfmacc_vf_nxv32f16_ta( %va, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv32f16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -632,9 +642,9 @@ define @vfmacc_vf_nxv32f16_ta( %va, hal define @vfmacc_vf_nxv32f16_commute_ta( %va, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv32f16_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1071,8 +1081,10 @@ define @vfmacc_vv_nxv16f32( %a, @llvm.vp.fma.nxv16f32( %a, %b, %c, splat (i1 -1), i32 %evl) @@ -1084,8 +1096,10 @@ define @vfmacc_vv_nxv16f32_unmasked( ; CHECK-LABEL: vfmacc_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vfmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv16f32( %a, %b, %c, splat (i1 -1), i32 %evl) @@ -1096,8 +1110,10 @@ define @vfmacc_vv_nxv16f32_unmasked( define @vfmacc_vf_nxv16f32( %va, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1110,8 +1126,10 @@ define @vfmacc_vf_nxv16f32( %va, floa define @vfmacc_vf_nxv16f32_commute( %va, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv16f32_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1124,8 +1142,10 @@ define @vfmacc_vf_nxv16f32_commute( % define @vfmacc_vf_nxv16f32_unmasked( %va, float %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma -; CHECK-NEXT: vfmacc.vf v16, fa0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1139,9 +1159,9 @@ define @vfmacc_vv_nxv16f32_ta( %a, @llvm.vp.fma.nxv16f32( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) @@ -1151,9 +1171,9 @@ define @vfmacc_vv_nxv16f32_ta( %a, @vfmacc_vf_nxv16f32_ta( %va, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv16f32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1165,9 +1185,9 @@ define @vfmacc_vf_nxv16f32_ta( %va, f define @vfmacc_vf_nxv16f32_commute_ta( %va, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv16f32_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1498,8 +1518,10 @@ define @vfmacc_vv_nxv8f64( %a, @llvm.vp.fma.nxv8f64( %a, %b, %c, splat (i1 -1), i32 %evl) @@ -1511,8 +1533,10 @@ define @vfmacc_vv_nxv8f64_unmasked( % ; CHECK-LABEL: vfmacc_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma -; CHECK-NEXT: vfmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv8f64( %a, %b, %c, splat (i1 -1), i32 %evl) @@ -1523,8 +1547,10 @@ define @vfmacc_vv_nxv8f64_unmasked( % define @vfmacc_vf_nxv8f64( %va, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1537,8 +1563,10 @@ define @vfmacc_vf_nxv8f64( %va, doubl define @vfmacc_vf_nxv8f64_commute( %va, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv8f64_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1551,8 +1579,10 @@ define @vfmacc_vf_nxv8f64_commute( %v define @vfmacc_vf_nxv8f64_unmasked( %va, double %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma -; CHECK-NEXT: vfmacc.vf v16, fa0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1566,9 +1596,9 @@ define @vfmacc_vv_nxv8f64_ta( %a, @llvm.vp.fma.nxv8f64( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) @@ -1578,9 +1608,9 @@ define @vfmacc_vv_nxv8f64_ta( %a, @vfmacc_vf_nxv8f64_ta( %va, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv8f64_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1592,9 +1622,9 @@ define @vfmacc_vf_nxv8f64_ta( %va, do define @vfmacc_vf_nxv8f64_commute_ta( %va, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv8f64_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll index 72ed38b53d2ff..e3afbfb2969a3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll @@ -578,8 +578,10 @@ define @vmfsac_vv_nxv32f16( %a, @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) @@ -592,8 +594,10 @@ define @vmfsac_vv_nxv32f16_unmasked( %a ; CHECK-LABEL: vmfsac_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vfmsac.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vfmsub.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) @@ -605,8 +609,10 @@ define @vmfsac_vv_nxv32f16_unmasked( %a define @vmfsac_vf_nxv32f16( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -620,8 +626,10 @@ define @vmfsac_vf_nxv32f16( %a, half %b define @vmfsac_vf_nxv32f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv32f16_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -635,8 +643,10 @@ define @vmfsac_vf_nxv32f16_commute( %a, define @vmfsac_vf_nxv32f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv32f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; CHECK-NEXT: vfmsac.vf v16, fa0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -651,9 +661,9 @@ define @vmfsac_vv_nxv32f16_ta( %a, @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) %v = call @llvm.vp.fma.nxv32f16( %a, %b, %negc, splat (i1 -1), i32 %evl) @@ -664,9 +674,9 @@ define @vmfsac_vv_nxv32f16_ta( %a, @vmfsac_vf_nxv32f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv32f16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -679,9 +689,9 @@ define @vmfsac_vf_nxv32f16_ta( %a, half define @vmfsac_vf_nxv32f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv32f16_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1151,8 +1161,10 @@ define @vmfsac_vv_nxv16f32( %a, @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) @@ -1165,8 +1177,10 @@ define @vmfsac_vv_nxv16f32_unmasked( ; CHECK-LABEL: vmfsac_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vfmsac.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfmsub.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) @@ -1178,8 +1192,10 @@ define @vmfsac_vv_nxv16f32_unmasked( define @vmfsac_vf_nxv16f32( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1193,8 +1209,10 @@ define @vmfsac_vf_nxv16f32( %a, float define @vmfsac_vf_nxv16f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv16f32_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1208,8 +1226,10 @@ define @vmfsac_vf_nxv16f32_commute( % define @vmfsac_vf_nxv16f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma -; CHECK-NEXT: vfmsac.vf v16, fa0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1224,9 +1244,9 @@ define @vmfsac_vv_nxv16f32_ta( %a, @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) %v = call @llvm.vp.fma.nxv16f32( %a, %b, %negc, splat (i1 -1), i32 %evl) @@ -1237,9 +1257,9 @@ define @vmfsac_vv_nxv16f32_ta( %a, @vmfsac_vf_nxv16f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv16f32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1252,9 +1272,9 @@ define @vmfsac_vf_nxv16f32_ta( %a, fl define @vmfsac_vf_nxv16f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv16f32_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1610,8 +1630,10 @@ define @vmfsac_vv_nxv8f64( %a, @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) @@ -1624,8 +1646,10 @@ define @vmfsac_vv_nxv8f64_unmasked( % ; CHECK-LABEL: vmfsac_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma -; CHECK-NEXT: vfmsac.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfmsub.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) @@ -1637,8 +1661,10 @@ define @vmfsac_vv_nxv8f64_unmasked( % define @vmfsac_vf_nxv8f64( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1652,8 +1678,10 @@ define @vmfsac_vf_nxv8f64( %a, double define @vmfsac_vf_nxv8f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv8f64_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1667,8 +1695,10 @@ define @vmfsac_vf_nxv8f64_commute( %a define @vmfsac_vf_nxv8f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma -; CHECK-NEXT: vfmsac.vf v16, fa0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1683,9 +1713,9 @@ define @vmfsac_vv_nxv8f64_ta( %a, @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) %v = call @llvm.vp.fma.nxv8f64( %a, %b, %negc, splat (i1 -1), i32 %evl) @@ -1696,9 +1726,9 @@ define @vmfsac_vv_nxv8f64_ta( %a, @vmfsac_vf_nxv8f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv8f64_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1711,9 +1741,9 @@ define @vmfsac_vf_nxv8f64_ta( %a, dou define @vmfsac_vf_nxv8f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv8f64_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll index 7f6fb030b13be..dc62dd909de2d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll @@ -618,8 +618,10 @@ define @vfnmacc_vv_nxv32f16( %a, @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) @@ -633,8 +635,10 @@ define @vfnmacc_vv_nxv32f16_unmasked( % ; CHECK-LABEL: vfnmacc_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vfnmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) @@ -647,8 +651,10 @@ define @vfnmacc_vv_nxv32f16_unmasked( % define @vfnmacc_vf_nxv32f16( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -663,8 +669,10 @@ define @vfnmacc_vf_nxv32f16( %a, half % define @vfnmacc_vf_nxv32f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv32f16_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -679,8 +687,10 @@ define @vfnmacc_vf_nxv32f16_commute( %a define @vfnmacc_vf_nxv32f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv32f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; CHECK-NEXT: vfnmacc.vf v16, fa0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -696,9 +706,9 @@ define @vfnmacc_vv_nxv32f16_ta( %a, @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) @@ -710,9 +720,9 @@ define @vfnmacc_vv_nxv32f16_ta( %a, @vfnmacc_vf_nxv32f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv32f16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -726,9 +736,9 @@ define @vfnmacc_vf_nxv32f16_ta( %a, hal define @vfnmacc_vf_nxv32f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv32f16_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1231,8 +1241,10 @@ define @vfnmacc_vv_nxv16f32( %a, @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) @@ -1246,8 +1258,10 @@ define @vfnmacc_vv_nxv16f32_unmasked( ; CHECK-LABEL: vfnmacc_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vfnmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) @@ -1260,8 +1274,10 @@ define @vfnmacc_vv_nxv16f32_unmasked( define @vfnmacc_vf_nxv16f32( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1276,8 +1292,10 @@ define @vfnmacc_vf_nxv16f32( %a, floa define @vfnmacc_vf_nxv16f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv16f32_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1292,8 +1310,10 @@ define @vfnmacc_vf_nxv16f32_commute( define @vfnmacc_vf_nxv16f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma -; CHECK-NEXT: vfnmacc.vf v16, fa0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1309,9 +1329,9 @@ define @vfnmacc_vv_nxv16f32_ta( %a, < ; CHECK-LABEL: vfnmacc_vv_nxv16f32_ta: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfnmacc.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) @@ -1323,9 +1343,9 @@ define @vfnmacc_vv_nxv16f32_ta( %a, < define @vfnmacc_vf_nxv16f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv16f32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1339,9 +1359,9 @@ define @vfnmacc_vf_nxv16f32_ta( %a, f define @vfnmacc_vf_nxv16f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv16f32_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1722,8 +1742,10 @@ define @vfnmacc_vv_nxv8f64( %a, @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) @@ -1737,8 +1759,10 @@ define @vfnmacc_vv_nxv8f64_unmasked( ; CHECK-LABEL: vfnmacc_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma -; CHECK-NEXT: vfnmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) @@ -1751,8 +1775,10 @@ define @vfnmacc_vv_nxv8f64_unmasked( define @vfnmacc_vf_nxv8f64( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1767,8 +1793,10 @@ define @vfnmacc_vf_nxv8f64( %a, doubl define @vfnmacc_vf_nxv8f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv8f64_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1783,8 +1811,10 @@ define @vfnmacc_vf_nxv8f64_commute( % define @vfnmacc_vf_nxv8f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma -; CHECK-NEXT: vfnmacc.vf v16, fa0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1800,9 +1830,9 @@ define @vfnmacc_vv_nxv8f64_ta( %a, @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) @@ -1814,9 +1844,9 @@ define @vfnmacc_vv_nxv8f64_ta( %a, @vfnmacc_vf_nxv8f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv8f64_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1830,9 +1860,9 @@ define @vfnmacc_vf_nxv8f64_ta( %a, do define @vfnmacc_vf_nxv8f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv8f64_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll index 37b223be1150c..05cf261790cb1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll @@ -578,8 +578,10 @@ define @vfnmsac_vv_nxv32f16( %a, @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) @@ -592,8 +594,10 @@ define @vfnmsac_vv_nxv32f16_unmasked( % ; CHECK-LABEL: vfnmsac_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vfnmsac.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vfnmsub.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) @@ -605,8 +609,10 @@ define @vfnmsac_vv_nxv32f16_unmasked( % define @vfnmsac_vf_nxv32f16( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -620,8 +626,10 @@ define @vfnmsac_vf_nxv32f16( %a, half % define @vfnmsac_vf_nxv32f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv32f16_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -635,8 +643,10 @@ define @vfnmsac_vf_nxv32f16_commute( %a define @vfnmsac_vf_nxv32f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv32f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; CHECK-NEXT: vfnmsac.vf v16, fa0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -651,9 +661,9 @@ define @vfnmsac_vv_nxv32f16_ta( %a, @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %c, splat (i1 -1), i32 %evl) @@ -664,9 +674,9 @@ define @vfnmsac_vv_nxv32f16_ta( %a, @vfnmsac_vf_nxv32f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv32f16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -679,9 +689,9 @@ define @vfnmsac_vf_nxv32f16_ta( %a, hal define @vfnmsac_vf_nxv32f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv32f16_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1151,8 +1161,10 @@ define @vfnmsac_vv_nxv16f32( %a, @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) @@ -1165,8 +1177,10 @@ define @vfnmsac_vv_nxv16f32_unmasked( ; CHECK-LABEL: vfnmsac_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vfnmsac.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfnmsub.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) @@ -1178,8 +1192,10 @@ define @vfnmsac_vv_nxv16f32_unmasked( define @vfnmsac_vf_nxv16f32( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1193,8 +1209,10 @@ define @vfnmsac_vf_nxv16f32( %a, floa define @vfnmsac_vf_nxv16f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv16f32_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1208,8 +1226,10 @@ define @vfnmsac_vf_nxv16f32_commute( define @vfnmsac_vf_nxv16f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma -; CHECK-NEXT: vfnmsac.vf v16, fa0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1224,9 +1244,9 @@ define @vfnmsac_vv_nxv16f32_ta( %a, < ; CHECK-LABEL: vfnmsac_vv_nxv16f32_ta: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfnmsub.vv v8, v16, v24 +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %c, splat (i1 -1), i32 %evl) @@ -1237,9 +1257,9 @@ define @vfnmsac_vv_nxv16f32_ta( %a, < define @vfnmsac_vf_nxv16f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv16f32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1252,9 +1272,9 @@ define @vfnmsac_vf_nxv16f32_ta( %a, f define @vfnmsac_vf_nxv16f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv16f32_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1610,8 +1630,10 @@ define @vfnmsac_vv_nxv8f64( %a, @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) @@ -1624,8 +1646,10 @@ define @vfnmsac_vv_nxv8f64_unmasked( ; CHECK-LABEL: vfnmsac_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma -; CHECK-NEXT: vfnmsac.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfnmsub.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) @@ -1637,8 +1661,10 @@ define @vfnmsac_vv_nxv8f64_unmasked( define @vfnmsac_vf_nxv8f64( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1652,8 +1678,10 @@ define @vfnmsac_vf_nxv8f64( %a, doubl define @vfnmsac_vf_nxv8f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv8f64_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1667,8 +1695,10 @@ define @vfnmsac_vf_nxv8f64_commute( % define @vfnmsac_vf_nxv8f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma -; CHECK-NEXT: vfnmsac.vf v16, fa0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1683,9 +1713,9 @@ define @vfnmsac_vv_nxv8f64_ta( %a, @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %c, splat (i1 -1), i32 %evl) @@ -1696,9 +1726,9 @@ define @vfnmsac_vv_nxv8f64_ta( %a, @vfnmsac_vf_nxv8f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv8f64_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1711,9 +1741,9 @@ define @vfnmsac_vf_nxv8f64_ta( %a, do define @vfnmsac_vf_nxv8f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv8f64_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir index cd85853c2d12c..81a2388421cee 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir @@ -1346,10 +1346,10 @@ name: vmerge_vim body: | bb.0: ; CHECK-LABEL: name: vmerge_vim - ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK: %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VIM_M1 $noreg, %x, 9, $v0, 1, 3 /* e8 */ ; CHECK-NEXT: $v8 = COPY %y - %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 + %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrnov0 = PseudoVMERGE_VIM_M1 $noreg, %x, 9, $v0, 1, 3 /* e8 */ $v8 = COPY %y ... @@ -1358,10 +1358,10 @@ name: vmerge_vim_incompatible_eew body: | bb.0: ; CHECK-LABEL: name: vmerge_vim_incompatible_eew - ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK: %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VIM_M1 $noreg, %x, 9, $v0, 1, 3 /* e8 */ ; CHECK-NEXT: $v8 = COPY %y - %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 + %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vrnov0 = PseudoVMERGE_VIM_M1 $noreg, %x, 9, $v0, 1, 3 /* e8 */ $v8 = COPY %y ... @@ -1370,10 +1370,10 @@ name: vmerge_vim_incompatible_emul body: | bb.0: ; CHECK-LABEL: name: vmerge_vim_incompatible_emul - ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK: %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VIM_MF2 $noreg, %x, 9, $v0, 1, 3 /* e8 */ ; CHECK-NEXT: $v8 = COPY %y - %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 + %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrnov0 = PseudoVMERGE_VIM_MF2 $noreg, %x, 9, $v0, 1, 3 /* e8 */ $v8 = COPY %y ... @@ -1382,10 +1382,10 @@ name: vmerge_vxm body: | bb.0: ; CHECK-LABEL: name: vmerge_vxm - ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK: %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VXM_M1 $noreg, %x, $noreg, $v0, 1, 3 /* e8 */ ; CHECK-NEXT: $v8 = COPY %y - %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 + %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrnov0 = PseudoVMERGE_VXM_M1 $noreg, %x, $noreg, $v0, 1, 3 /* e8 */ $v8 = COPY %y ... @@ -1394,10 +1394,10 @@ name: vmerge_vxm_incompatible_eew body: | bb.0: ; CHECK-LABEL: name: vmerge_vxm_incompatible_eew - ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK: %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VXM_M1 $noreg, %x, $noreg, $v0, 1, 3 /* e8 */ ; CHECK-NEXT: $v8 = COPY %y - %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 + %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vrnov0 = PseudoVMERGE_VXM_M1 $noreg, %x, $noreg, $v0, 1, 3 /* e8 */ $v8 = COPY %y ... @@ -1406,10 +1406,10 @@ name: vmerge_vxm_incompatible_emul body: | bb.0: ; CHECK-LABEL: name: vmerge_vxm_incompatible_emul - ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK: %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VXM_MF2 $noreg, %x, $noreg, $v0, 1, 3 /* e8 */ ; CHECK-NEXT: $v8 = COPY %y - %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 + %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrnov0 = PseudoVMERGE_VXM_MF2 $noreg, %x, $noreg, $v0, 1, 3 /* e8 */ $v8 = COPY %y ... @@ -1418,10 +1418,10 @@ name: vmerge_vvm body: | bb.0: ; CHECK-LABEL: name: vmerge_vvm - ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK: %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, $noreg, %x, $v0, 1, 3 /* e8 */ ; CHECK-NEXT: $v8 = COPY %y - %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 + %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, $noreg, %x, $v0, 1, 3 /* e8 */ $v8 = COPY %y ... @@ -1430,10 +1430,10 @@ name: vmerge_vvm_incompatible_eew body: | bb.0: ; CHECK-LABEL: name: vmerge_vvm_incompatible_eew - ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK: %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, $noreg, %x, $v0, 1, 3 /* e8 */ ; CHECK-NEXT: $v8 = COPY %y - %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 + %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, $noreg, %x, $v0, 1, 3 /* e8 */ $v8 = COPY %y ... @@ -1442,10 +1442,10 @@ name: vmerge_vvm_incompatible_emul body: | bb.0: ; CHECK-LABEL: name: vmerge_vvm_incompatible_emul - ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK: %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VVM_MF2 $noreg, $noreg, %x, $v0, 1, 3 /* e8 */ ; CHECK-NEXT: $v8 = COPY %y - %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 + %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrnov0 = PseudoVMERGE_VVM_MF2 $noreg, $noreg, %x, $v0, 1, 3 /* e8 */ $v8 = COPY %y ... diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-optimizer-subreg-assert.mir b/llvm/test/CodeGen/RISCV/rvv/vl-optimizer-subreg-assert.mir index b816741285b43..7525bf70e62d8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-optimizer-subreg-assert.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-optimizer-subreg-assert.mir @@ -12,17 +12,17 @@ body: | ; CHECK-LABEL: name: vl_optimizer_subreg_assert ; CHECK: liveins: $v8m2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vmv0 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF ; CHECK-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 $noreg, killed [[DEF2]], [[DEF]], [[DEF1]], -1, 6 /* e64 */ ; CHECK-NEXT: [[PseudoVREDMAXU_VS_M8_E64_:%[0-9]+]]:vr = PseudoVREDMAXU_VS_M8_E64 $noreg, [[PseudoVMERGE_VVM_M8_]], [[PseudoVMERGE_VVM_M8_]].sub_vrm1_0, -1, 6 /* e64 */, 1 /* ta, mu */ ; CHECK-NEXT: [[PseudoVMV_X_S:%[0-9]+]]:gpr = PseudoVMV_X_S killed [[PseudoVREDMAXU_VS_M8_E64_]], 6 /* e64 */ ; CHECK-NEXT: $x10 = COPY [[PseudoVMV_X_S]] ; CHECK-NEXT: PseudoRET implicit $x10 - %0:vrm8 = IMPLICIT_DEF + %0:vrm8nov0 = IMPLICIT_DEF %1:vmv0 = IMPLICIT_DEF - %2:vrm8 = IMPLICIT_DEF + %2:vrm8nov0 = IMPLICIT_DEF %3:vrm8nov0 = PseudoVMERGE_VVM_M8 $noreg, killed %2, %0, %1, -1, 6 /* e64 */ %4:vr = PseudoVREDMAXU_VS_M8_E64 $noreg, %3, %3.sub_vrm1_0, -1, 6 /* e64 */, 1 /* ta, mu */ %5:gpr = PseudoVMV_X_S killed %4, 6 /* e64 */ diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll index 2ad7ac9390515..64af4e9c534fd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll @@ -512,8 +512,10 @@ define @vmacc_vv_nxv64i8( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) @@ -526,8 +528,10 @@ define @vmacc_vv_nxv64i8_unmasked( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) @@ -539,8 +543,10 @@ define @vmacc_vv_nxv64i8_unmasked( %a, @vmacc_vx_nxv64i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu -; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -554,8 +560,10 @@ define @vmacc_vx_nxv64i8( %a, i8 %b, @vmacc_vx_nxv64i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv64i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -570,9 +578,9 @@ define @vmacc_vv_nxv64i8_ta( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) @@ -583,9 +591,9 @@ define @vmacc_vv_nxv64i8_ta( %a, @vmacc_vx_nxv64i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv64i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1019,8 +1027,10 @@ define @vmacc_vv_nxv32i16( %a, @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) @@ -1033,8 +1043,10 @@ define @vmacc_vv_nxv32i16_unmasked( %a, < ; CHECK-LABEL: vmacc_vv_nxv32i16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) @@ -1046,8 +1058,10 @@ define @vmacc_vv_nxv32i16_unmasked( %a, < define @vmacc_vx_nxv32i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu -; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 @@ -1061,8 +1075,10 @@ define @vmacc_vx_nxv32i16( %a, i16 %b, @vmacc_vx_nxv32i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv32i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 @@ -1077,9 +1093,9 @@ define @vmacc_vv_nxv32i16_ta( %a, @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) @@ -1090,9 +1106,9 @@ define @vmacc_vv_nxv32i16_ta( %a, @vmacc_vx_nxv32i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv32i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1442,8 +1458,10 @@ define @vmacc_vv_nxv16i32( %a, @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) @@ -1456,8 +1474,10 @@ define @vmacc_vv_nxv16i32_unmasked( %a, < ; CHECK-LABEL: vmacc_vv_nxv16i32_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) @@ -1469,8 +1489,10 @@ define @vmacc_vv_nxv16i32_unmasked( %a, < define @vmacc_vx_nxv16i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu -; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1484,8 +1506,10 @@ define @vmacc_vx_nxv16i32( %a, i32 %b, @vmacc_vx_nxv16i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv16i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1500,9 +1524,9 @@ define @vmacc_vv_nxv16i32_ta( %a, @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1513,9 +1537,9 @@ define @vmacc_vv_nxv16i32_ta( %a, @vmacc_vx_nxv16i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv16i32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1922,8 +1946,10 @@ define @vmacc_vv_nxv8i64( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) @@ -1936,8 +1962,10 @@ define @vmacc_vv_nxv8i64_unmasked( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) @@ -1956,8 +1984,9 @@ define @vmacc_vx_nxv8i64( %a, i64 %b, @vmacc_vx_nxv8i64( %a, i64 %b, poison, i64 %b, i32 0 @@ -1987,8 +2018,9 @@ define @vmacc_vx_nxv8i64_unmasked( %a, i64 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vmadd.vv v24, v8, v16 ; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; RV32-NEXT: vmacc.vv v16, v8, v24 +; RV32-NEXT: vmv.v.v v16, v24 ; RV32-NEXT: vmv8r.v v8, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -1996,8 +2028,10 @@ define @vmacc_vx_nxv8i64_unmasked( %a, i64 ; ; RV64-LABEL: vmacc_vx_nxv8i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, ma -; RV64-NEXT: vmacc.vx v16, a0, v8 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmadd.vx v8, a0, v16 +; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; RV64-NEXT: vmv.v.v v16, v8 ; RV64-NEXT: vmv8r.v v8, v16 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 @@ -2012,9 +2046,9 @@ define @vmacc_vv_nxv8i64_ta( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) @@ -2030,19 +2064,19 @@ define @vmacc_vx_nxv8i64_ta( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll index fe5b8b9bf6d52..51c3edeeb356f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll @@ -476,8 +476,10 @@ define @vmadd_vv_nxv64i8( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) @@ -489,8 +491,10 @@ define @vmadd_vv_nxv64i8_unmasked( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) @@ -501,8 +505,10 @@ define @vmadd_vv_nxv64i8_unmasked( %a, @vmadd_vx_nxv64i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu -; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -515,8 +521,10 @@ define @vmadd_vx_nxv64i8( %a, i8 %b, @vmadd_vx_nxv64i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv64i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -530,8 +538,9 @@ define @vmadd_vv_nxv64i8_ta( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) @@ -542,8 +551,9 @@ define @vmadd_vv_nxv64i8_ta( %a, @vmadd_vx_nxv64i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv64i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -947,8 +957,10 @@ define @vmadd_vv_nxv32i16( %a, @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) @@ -960,8 +972,10 @@ define @vmadd_vv_nxv32i16_unmasked( %a, < ; CHECK-LABEL: vmadd_vv_nxv32i16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vmadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) @@ -972,8 +986,10 @@ define @vmadd_vv_nxv32i16_unmasked( %a, < define @vmadd_vx_nxv32i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu -; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -986,8 +1002,10 @@ define @vmadd_vx_nxv32i16( %a, i16 %b, @vmadd_vx_nxv32i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv32i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1001,8 +1019,9 @@ define @vmadd_vv_nxv32i16_ta( %a, @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) @@ -1013,8 +1032,9 @@ define @vmadd_vv_nxv32i16_ta( %a, @vmadd_vx_nxv32i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv32i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1340,8 +1360,10 @@ define @vmadd_vv_nxv16i32( %a, @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1353,8 +1375,10 @@ define @vmadd_vv_nxv16i32_unmasked( %a, < ; CHECK-LABEL: vmadd_vv_nxv16i32_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vmadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1365,8 +1389,10 @@ define @vmadd_vv_nxv16i32_unmasked( %a, < define @vmadd_vx_nxv16i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu -; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1379,8 +1405,10 @@ define @vmadd_vx_nxv16i32( %a, i32 %b, @vmadd_vx_nxv16i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv16i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1394,8 +1422,9 @@ define @vmadd_vv_nxv16i32_ta( %a, @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1406,8 +1435,9 @@ define @vmadd_vv_nxv16i32_ta( %a, @vmadd_vx_nxv16i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv16i32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1787,8 +1817,10 @@ define @vmadd_vv_nxv8i64( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) @@ -1800,8 +1832,10 @@ define @vmadd_vv_nxv8i64_unmasked( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) @@ -1819,16 +1853,19 @@ define @vmadd_vx_nxv8i64( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1848,16 +1885,19 @@ define @vmadd_vx_nxv8i64_unmasked( %a, i64 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vmadd.vv v24, v8, v16 ; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; RV32-NEXT: vmadd.vv v8, v24, v16 +; RV32-NEXT: vmv.v.v v8, v24 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: vmadd_vx_nxv8i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, ma -; RV64-NEXT: vmadd.vx v8, a0, v16 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmacc.vx v16, a0, v8 +; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1871,8 +1911,9 @@ define @vmadd_vv_nxv8i64_ta( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) @@ -1888,17 +1929,19 @@ define @vmadd_vx_nxv8i64_ta( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir index 374afa3aafdea..817e1028c1ad7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir @@ -15,7 +15,7 @@ body: | ; CHECK-NEXT: %y:vrnov0 = PseudoVLE32_V_M1_MASK %passthru, $noreg, %mask, %avl, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size, align 1) %avl:gprnox0 = COPY $x8 %passthru:vrnov0 = COPY $v8 - %x:vr = PseudoVLE32_V_M1 $noreg, $noreg, %avl, 5 /* e32 */, 2 /* tu, ma */ :: (load unknown-size) + %x:vrnov0 = PseudoVLE32_V_M1 $noreg, $noreg, %avl, 5 /* e32 */, 2 /* tu, ma */ :: (load unknown-size) %mask:vmv0 = COPY $v0 %y:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %x, %mask, %avl, 5 /* e32 */ ... @@ -32,8 +32,8 @@ body: | ; CHECK-NEXT: %mask:vmv0 = COPY $v0 ; CHECK-NEXT: %y:vrnov0 = PseudoVLE32_V_M1_MASK %false, $noreg, %mask, %avl, 5 /* e32 */, 1 /* ta, mu */ :: (load unknown-size, align 1) %avl:gprnox0 = COPY $x8 - %false:vr = COPY $v8 - %x:vr = PseudoVLE32_V_M1 $noreg, $noreg, %avl, 5 /* e32 */, 2 /* tu, ma */ :: (load unknown-size) + %false:vrnov0 = COPY $v8 + %x:vrnov0 = PseudoVLE32_V_M1 $noreg, $noreg, %avl, 5 /* e32 */, 2 /* tu, ma */ :: (load unknown-size) %mask:vmv0 = COPY $v0 %y:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %x, %mask, %avl, 5 /* e32 */ ... @@ -50,7 +50,7 @@ body: | ; CHECK-NEXT: %mask:vmv0 = COPY $v0 ; CHECK-NEXT: %y:vrnov0 = PseudoVLE32_V_M1_MASK %passthru, $noreg, %mask, %avl, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size, align 1) %avl:gprnox0 = COPY $x8 - %x:vr = PseudoVLE32_V_M1 $noreg, $noreg, %avl, 5 /* e32 */, 2 /* tu, ma */ :: (load unknown-size) + %x:vrnov0 = PseudoVLE32_V_M1 $noreg, $noreg, %avl, 5 /* e32 */, 2 /* tu, ma */ :: (load unknown-size) %passthru:vrnov0 = COPY $v8 %mask:vmv0 = COPY $v0 %y:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %x, %mask, %avl, 5 /* e32 */ @@ -68,7 +68,7 @@ body: | ; CHECK-NEXT: %mask:vmv0 = COPY $v0 ; CHECK-NEXT: %y:vrnov0 = PseudoVNCLIPU_WV_MF2_MASK %passthru, $noreg, $noreg, %mask, 0, %avl, 5 /* e32 */, 0 /* tu, mu */, implicit-def $vxsat %avl:gprnox0 = COPY $x8 - %x:vr = PseudoVNCLIPU_WV_MF2 $noreg, $noreg, $noreg, 0, -1, 5, 3, implicit-def $vxsat + %x:vrnov0 = PseudoVNCLIPU_WV_MF2 $noreg, $noreg, $noreg, 0, -1, 5, 3, implicit-def $vxsat %passthru:vrnov0 = COPY $v8 %mask:vmv0 = COPY $v0 %y:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %x, %mask, %avl, 5 /* e32 */ @@ -82,13 +82,13 @@ body: | ; CHECK: liveins: $x8, $v0, $v8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %avl:gprnox0 = COPY $x8 - ; CHECK-NEXT: %x:vr = PseudoVNCLIPU_WV_MF2 $noreg, $noreg, $noreg, 0, -1, 5 /* e32 */, 3 /* ta, ma */, implicit-def $vxsat + ; CHECK-NEXT: %x:vrnov0 = PseudoVNCLIPU_WV_MF2 $noreg, $noreg, $noreg, 0, -1, 5 /* e32 */, 3 /* ta, ma */, implicit-def $vxsat ; CHECK-NEXT: %vxsat:gpr = COPY $vxsat ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8 ; CHECK-NEXT: %mask:vmv0 = COPY $v0 ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %x, %mask, %avl, 5 /* e32 */ %avl:gprnox0 = COPY $x8 - %x:vr = PseudoVNCLIPU_WV_MF2 $noreg, $noreg, $noreg, 0, -1, 5, 3, implicit-def $vxsat + %x:vrnov0 = PseudoVNCLIPU_WV_MF2 $noreg, $noreg, $noreg, 0, -1, 5, 3, implicit-def $vxsat %vxsat:gpr = COPY $vxsat %passthru:vrnov0 = COPY $v8 %mask:vmv0 = COPY $v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge.ll b/llvm/test/CodeGen/RISCV/rvv/vmerge.ll index 4a411475e337a..81515c4a81f94 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmerge.ll @@ -775,8 +775,9 @@ define @intrinsic_vmerge_vxm_nxv8i64_nxv8i64_i64( @intrinsic_vmv.s.x_x_nxv8i64( %0, i6 ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; RV32-NEXT: vid.v v16 -; RV32-NEXT: vmseq.vi v0, v16, 0 ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vid.v v24 +; RV32-NEXT: vmseq.vi v0, v24, 0 +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir index 9c3e96d818556..95232e734bb18 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir @@ -163,7 +163,7 @@ body: | ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8 ; CHECK-NEXT: %mask:vmv0 = COPY $v0 ; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, $noreg, %mask, 4, 5 /* e32 */ - %passthru:vr = COPY $v8 + %passthru:vrnov0 = COPY $v8 %mask:vmv0 = COPY $v0 %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %passthru, $noreg, %mask, 4, 5 /* e32 */ %z:vr = PseudoVMV_V_V_M1 %passthru, %x, 4, 5 /* e32 */, 0 /* tu, mu */ diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll index 1c4294990f90a..822bbe9e94e5b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll @@ -512,8 +512,10 @@ define @vnmsac_vv_nxv64i8( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) @@ -526,8 +528,10 @@ define @vnmsac_vv_nxv64i8_unmasked( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) @@ -539,8 +543,10 @@ define @vnmsac_vv_nxv64i8_unmasked( %a, @vnmsac_vx_nxv64i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu -; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vnmsub.vx v8, a0, v16 +; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -554,8 +560,10 @@ define @vnmsac_vx_nxv64i8( %a, i8 %b, @vnmsac_vx_nxv64i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv64i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: vnmsac.vx v16, a0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vnmsub.vx v8, a0, v16 +; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -570,9 +578,9 @@ define @vnmsac_vv_nxv64i8_ta( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.sub.nxv64i8( %c, %x, splat (i1 -1), i32 %evl) @@ -583,9 +591,9 @@ define @vnmsac_vv_nxv64i8_ta( %a, @vnmsac_vx_nxv64i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv64i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vnmsub.vx v8, a0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1019,8 +1027,10 @@ define @vnmsac_vv_nxv32i16( %a, @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) @@ -1033,8 +1043,10 @@ define @vnmsac_vv_nxv32i16_unmasked( %a, ; CHECK-LABEL: vnmsac_vv_nxv32i16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vnmsac.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vnmsub.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) @@ -1046,8 +1058,10 @@ define @vnmsac_vv_nxv32i16_unmasked( %a, define @vnmsac_vx_nxv32i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu -; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vnmsub.vx v8, a0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 @@ -1061,8 +1075,10 @@ define @vnmsac_vx_nxv32i16( %a, i16 %b, < define @vnmsac_vx_nxv32i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv32i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vnmsac.vx v16, a0, v8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vnmsub.vx v8, a0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 @@ -1077,9 +1093,9 @@ define @vnmsac_vv_nxv32i16_ta( %a, @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.sub.nxv32i16( %c, %x, splat (i1 -1), i32 %evl) @@ -1090,9 +1106,9 @@ define @vnmsac_vv_nxv32i16_ta( %a, @vnmsac_vx_nxv32i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv32i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vnmsub.vx v8, a0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1442,8 +1458,10 @@ define @vnmsac_vv_nxv16i32( %a, @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) @@ -1456,8 +1474,10 @@ define @vnmsac_vv_nxv16i32_unmasked( %a, ; CHECK-LABEL: vnmsac_vv_nxv16i32_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vnmsac.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vnmsub.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmv.v.v v24, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) @@ -1469,8 +1489,10 @@ define @vnmsac_vv_nxv16i32_unmasked( %a, define @vnmsac_vx_nxv16i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu -; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vnmsub.vx v8, a0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1484,8 +1506,10 @@ define @vnmsac_vx_nxv16i32( %a, i32 %b, < define @vnmsac_vx_nxv16i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv16i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vnmsac.vx v16, a0, v8 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vnmsub.vx v8, a0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1500,9 +1524,9 @@ define @vnmsac_vv_nxv16i32_ta( %a, @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.sub.nxv16i32( %c, %x, splat (i1 -1), i32 %evl) @@ -1513,9 +1537,9 @@ define @vnmsac_vv_nxv16i32_ta( %a, @vnmsac_vx_nxv16i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv16i32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vnmsub.vx v8, a0, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1922,8 +1946,10 @@ define @vnmsac_vv_nxv8i64( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) @@ -1936,8 +1962,10 @@ define @vnmsac_vv_nxv8i64_unmasked( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) @@ -1956,8 +1984,9 @@ define @vnmsac_vx_nxv8i64( %a, i64 %b, @vnmsac_vx_nxv8i64( %a, i64 %b, poison, i64 %b, i32 0 @@ -1987,8 +2018,9 @@ define @vnmsac_vx_nxv8i64_unmasked( %a, i64 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vnmsub.vv v24, v8, v16 ; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; RV32-NEXT: vnmsac.vv v16, v8, v24 +; RV32-NEXT: vmv.v.v v16, v24 ; RV32-NEXT: vmv8r.v v8, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -1996,8 +2028,10 @@ define @vnmsac_vx_nxv8i64_unmasked( %a, i64 ; ; RV64-LABEL: vnmsac_vx_nxv8i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, ma -; RV64-NEXT: vnmsac.vx v16, a0, v8 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vnmsub.vx v8, a0, v16 +; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; RV64-NEXT: vmv.v.v v16, v8 ; RV64-NEXT: vmv8r.v v8, v16 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 @@ -2012,9 +2046,9 @@ define @vnmsac_vv_nxv8i64_ta( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.sub.nxv8i64( %c, %x, splat (i1 -1), i32 %evl) @@ -2030,19 +2064,19 @@ define @vnmsac_vx_nxv8i64_ta( %a, i64 %b, < ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vnmsac.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: vnmsub.vv v24, v8, v16 +; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: vnmsac_vx_nxv8i64_ta: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; RV64-NEXT: vnmsac.vx v16, a0, v8, v0.t -; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vnmsub.vx v8, a0, v16 +; RV64-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll index 03697aafea45d..2e9b9467ccaff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -1148,8 +1148,10 @@ define @vpmerge_vx_nxv8i64(i64 %a, %vb, @vfmerge_nzv_nxv8f64( %va, @vselect_combine_regression( %va, %vb) { ; CHECK-LABEL: vselect_combine_regression: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv8r.v v24, v16 -; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vle64.v v8, (a0), v0.t -; CHECK-NEXT: vmseq.vi v0, v24, 0 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vle64.v v16, (a0), v0.t +; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vmseq.vi v0, v24, 0 +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa sp, 16 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %cond = icmp eq %va, zeroinitializer %sel = select %cond, %vb, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll index 4ec9e344e6278..ce06a3645d5ac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll @@ -765,8 +765,9 @@ define @vmerge_xv_nxv8i64( %va, i64 %b, Date: Mon, 1 Dec 2025 14:28:45 +0800 Subject: [PATCH 4/7] Peek through COPY in RISCVVectorPeephole --- llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 16 +- .../RISCV/rvv/combine-reduce-add-to-vcpop.ll | 244 ++++++------------ .../rvv/fixed-vector-i8-index-cornercase.ll | 55 ++-- .../RISCV/rvv/fixed-vectors-select-addsub.ll | 39 +-- .../fixed-vectors-shuffle-int-interleave.ll | 79 ++++-- .../RISCV/rvv/fixed-vectors-vselect-vp.ll | 33 +-- .../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 21 +- llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll | 68 ++--- llvm/test/CodeGen/RISCV/rvv/vmerge.ll | 5 +- llvm/test/CodeGen/RISCV/rvv/vmv.s.x.ll | 9 +- llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll | 33 +-- llvm/test/CodeGen/RISCV/rvv/vselect-int.ll | 5 +- 13 files changed, 251 insertions(+), 362 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index e1ff243bb1a47..158971eb0c2c2 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -421,6 +421,12 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { return false; MachineInstr *True = MRI->getVRegDef(MI.getOperand(3).getReg()); + // Peek through COPY. + if (True && True->isCopy()) { + if (Register TrueReg = True->getOperand(1).getReg(); TrueReg.isVirtual()) + True = MRI->getVRegDef(TrueReg); + } + if (!True || True->getParent() != MI.getParent()) return false; @@ -722,7 +728,15 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const { Register TrueReg = MI.getOperand(3).getReg(); if (!TrueReg.isVirtual() || !MRI->hasOneUse(TrueReg)) return false; - MachineInstr &True = *MRI->getUniqueVRegDef(TrueReg); + MachineInstr *TrueMI = MRI->getUniqueVRegDef(TrueReg); + // Peek through COPY. + if (TrueMI->isCopy()) { + if (TrueReg = TrueMI->getOperand(1).getReg(); + TrueReg.isVirtual() && MRI->hasOneUse(TrueReg)) + TrueMI = MRI->getVRegDef(TrueReg); + } + + MachineInstr &True = *TrueMI; if (True.getParent() != MI.getParent()) return false; const MachineOperand &MaskOp = MI.getOperand(4); diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll index 27b53befbf4a7..96252f070a580 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll @@ -288,89 +288,54 @@ define i32 @test_nxv128i1( %x) { ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill ; CHECK-NEXT: srli a1, a0, 1 ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a1 ; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a0 ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v6, a0 -; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v7, a0 +; CHECK-NEXT: vslidedown.vx v0, v6, a0 ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vadd.vi v24, v16, 1 -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v7, a1 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v6, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vadd.vi v24, v8, 1 -; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 -; CHECK-NEXT: vadd.vv v8, v16, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vi v16, v8, 1 +; CHECK-NEXT: vslidedown.vx v0, v7, a0 +; CHECK-NEXT: vslidedown.vx v5, v6, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: vmv1r.v v0, v5 +; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vi v24, v24, 1 +; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 -; CHECK-NEXT: vadd.vv v16, v24, v16 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t +; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: vadd.vv v8, v16, v8 ; CHECK-NEXT: vmv.s.x v16, zero ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: add sp, sp, a1 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -388,14 +353,12 @@ define i32 @test_nxv256i1( %x) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add a1, a1, a0 -; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v6, v10 ; CHECK-NEXT: vmv1r.v v7, v9 @@ -405,9 +368,9 @@ define i32 @test_nxv256i1( %x) { ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 @@ -415,10 +378,7 @@ define i32 @test_nxv256i1( %x) { ; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill @@ -431,13 +391,6 @@ define i32 @test_nxv256i1( %x) { ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: mv a3, a2 ; CHECK-NEXT: slli a2, a2, 1 @@ -445,155 +398,120 @@ define i32 @test_nxv256i1( %x) { ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v3, a1 ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v2, a1 ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vmerge.vim v24, v8, 1, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v4, a1 ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v5, a1 ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v6, a1 -; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vadd.vi v24, v16, 1 -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v7, a1 -; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vadd.vi v24, v8, 1 -; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 -; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v5, v6, a0 -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v5, a1 +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu +; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t +; CHECK-NEXT: vadd.vv v8, v16, v8 ; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vadd.vi v16, v8, 1 -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v4, v7, a0 +; CHECK-NEXT: vslidedown.vx v5, v7, a0 +; CHECK-NEXT: vslidedown.vx v4, v6, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v4, a1 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vadd.vi v24, v8, 1 -; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 -; CHECK-NEXT: vadd.vv v8, v24, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vslidedown.vx v3, v5, a1 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t +; CHECK-NEXT: vmv1r.v v0, v3 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vi v24, v8, 1 -; CHECK-NEXT: vmv1r.v v0, v5 -; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vi v8, v16, 1 +; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: vadd.vv v8, v8, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t +; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vi v8, v16, 1 +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: vadd.vv v16, v8, v16 ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vi v8, v8, 1 +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t +; CHECK-NEXT: vadd.vv v24, v24, v8 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vv v16, v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vv v8, v8, v24 -; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vadd.vv v8, v8, v0 +; CHECK-NEXT: vadd.vv v16, v24, v16 +; CHECK-NEXT: vadd.vv v8, v16, v8 ; CHECK-NEXT: vmv.s.x v16, zero ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: add a2, a2, a1 -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: add sp, sp, a1 ; CHECK-NEXT: .cfi_def_cfa sp, 16 @@ -623,16 +541,16 @@ define i16 @test_narrow_nxv64i1( %x) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vadd.vi v16, v8, 1 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vredsum.vs v8, v8, v16 +; CHECK-NEXT: vslidedown.vx v8, v0, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vredsum.vs v8, v16, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll index fd467dd0b8912..7a337aa253805 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll @@ -104,54 +104,55 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) { ; CHECK-NEXT: addi s0, sp, 1536 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -512 +; CHECK-NEXT: addi a0, sp, 1520 +; CHECK-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: li a0, 512 ; CHECK-NEXT: addi a1, sp, 512 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v8, 5 -; CHECK-NEXT: vmv.x.s a2, v8 +; CHECK-NEXT: vslidedown.vi v0, v24, 5 +; CHECK-NEXT: vmv.x.s a2, v24 ; CHECK-NEXT: li a3, 432 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.x v24, a2 +; CHECK-NEXT: vmv.v.x v8, a2 ; CHECK-NEXT: li a2, 431 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v24, v0, a2 +; CHECK-NEXT: vslideup.vx v8, v0, a2 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v8, 4 +; CHECK-NEXT: vslidedown.vi v0, v24, 4 ; CHECK-NEXT: li a2, 466 ; CHECK-NEXT: li a3, 465 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: vse8.v v24, (a1) ; CHECK-NEXT: lbu a1, 985(sp) ; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v24, v0, a3 +; CHECK-NEXT: vslideup.vx v8, v0, a3 ; CHECK-NEXT: li a2, 478 ; CHECK-NEXT: lbu a3, 1012(sp) -; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vmv.s.x v24, a1 ; CHECK-NEXT: li a1, 477 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v24, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v24, a1 ; CHECK-NEXT: li a1, 501 -; CHECK-NEXT: lui a2, %hi(.LCPI2_0) -; CHECK-NEXT: addi a2, a2, %lo(.LCPI2_0) -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vle8.v v0, (a2) -; CHECK-NEXT: li a2, 500 -; CHECK-NEXT: vmv.s.x v8, a3 -; CHECK-NEXT: lui a3, %hi(.LCPI2_1) -; CHECK-NEXT: addi a3, a3, %lo(.LCPI2_1) +; CHECK-NEXT: lui a2, %hi(.LCPI2_1) +; CHECK-NEXT: addi a2, a2, %lo(.LCPI2_1) ; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v9, (a3) -; CHECK-NEXT: addi a3, sp, 1520 -; CHECK-NEXT: vs1r.v v9, (a3) # vscale x 8-byte Folded Spill -; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v24, v8, a2 +; CHECK-NEXT: vle64.v v0, (a2) +; CHECK-NEXT: li a2, 500 +; CHECK-NEXT: vmv.s.x v24, a3 +; CHECK-NEXT: lui a3, %hi(.LCPI2_0) +; CHECK-NEXT: addi a3, a3, %lo(.LCPI2_0) ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vrgather.vv v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 1520 -; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vle8.v v16, (a3) +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma +; CHECK-NEXT: vslideup.vx v8, v24, a2 +; CHECK-NEXT: addi a1, sp, 1520 +; CHECK-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vrgather.vv v8, v24, v16, v0.t ; CHECK-NEXT: addi sp, s0, -1536 ; CHECK-NEXT: .cfi_def_cfa sp, 1536 ; CHECK-NEXT: ld ra, 1528(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll index 9696a3fe62883..9d3fe3a90b463 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll @@ -128,9 +128,8 @@ define <32 x i32> @select_addsub_v32i32(<32 x i1> %cc, <32 x i32> %a, <32 x i32> ; CHECK-LABEL: select_addsub_v32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vrsub.vi v24, v16, 0 -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vrsub.vi v16, v16, 0, v0.t ; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: ret %sub = sub <32 x i32> %a, %b @@ -145,39 +144,29 @@ define <64 x i32> @select_addsub_v64i32(<64 x i1> %cc, <64 x i32> %a, <64 x i32> ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vrsub.vi v16, v8, 0 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 4 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vrsub.vi v16, v24, 0 -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vrsub.vi v24, v24, 0, v0.t ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vv v8, v24, v8 +; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vadd.vv v16, v24, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll index 9c3730a7bf36b..9bbfad4f32ffa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll @@ -598,53 +598,80 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; ZIP-NEXT: addi sp, sp, -16 ; ZIP-NEXT: .cfi_def_cfa_offset 16 ; ZIP-NEXT: csrr a0, vlenb -; ZIP-NEXT: slli a1, a0, 4 -; ZIP-NEXT: add a0, a1, a0 +; ZIP-NEXT: li a1, 40 +; ZIP-NEXT: mul a0, a0, a1 ; ZIP-NEXT: sub sp, sp, a0 -; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZIP-NEXT: csrr a0, vlenb -; ZIP-NEXT: slli a1, a0, 3 -; ZIP-NEXT: add a0, a1, a0 +; ZIP-NEXT: slli a0, a0, 5 ; ZIP-NEXT: add a0, sp, a0 ; ZIP-NEXT: addi a0, a0, 16 ; ZIP-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; ZIP-NEXT: vslidedown.vi v0, v16, 16 +; ZIP-NEXT: addi a0, sp, 16 +; ZIP-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; ZIP-NEXT: li a0, 32 +; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; ZIP-NEXT: vslidedown.vi v16, v8, 16 ; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZIP-NEXT: ri.vzip2a.vv v16, v24, v0 +; ZIP-NEXT: ri.vzip2a.vv v8, v16, v0 ; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: slli a1, a1, 3 ; ZIP-NEXT: add a1, sp, a1 ; ZIP-NEXT: addi a1, a1, 16 -; ZIP-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; ZIP-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: slli a1, a1, 5 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload ; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; ZIP-NEXT: vslidedown.vi v0, v8, 16 +; ZIP-NEXT: vslidedown.vi v16, v16, 16 +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: li a2, 24 +; ZIP-NEXT: mul a1, a1, a2 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill ; ZIP-NEXT: lui a1, 699051 ; ZIP-NEXT: addi a1, a1, -1366 -; ZIP-NEXT: vmv.s.x v16, a1 -; ZIP-NEXT: addi a1, sp, 16 -; ZIP-NEXT: vs1r.v v16, (a1) # vscale x 8-byte Folded Spill -; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZIP-NEXT: ri.vzip2a.vv v16, v0, v24 -; ZIP-NEXT: addi a0, sp, 16 -; ZIP-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; ZIP-NEXT: csrr a0, vlenb -; ZIP-NEXT: add a0, sp, a0 -; ZIP-NEXT: addi a0, a0, 16 -; ZIP-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; ZIP-NEXT: vmerge.vvm v24, v16, v24, v0 +; ZIP-NEXT: vmv.s.x v0, a1 +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: slli a1, a1, 4 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: li a2, 24 +; ZIP-NEXT: mul a1, a1, a2 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: slli a1, a1, 4 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: slli a1, a1, 3 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload +; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; ZIP-NEXT: ri.vzip2a.vv v8, v24, v16, v0.t +; ZIP-NEXT: vmv.v.v v24, v8 ; ZIP-NEXT: csrr a0, vlenb -; ZIP-NEXT: slli a1, a0, 3 -; ZIP-NEXT: add a0, a1, a0 +; ZIP-NEXT: slli a0, a0, 5 ; ZIP-NEXT: add a0, sp, a0 ; ZIP-NEXT: addi a0, a0, 16 ; ZIP-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; ZIP-NEXT: addi a0, sp, 16 +; ZIP-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; ZIP-NEXT: ri.vzip2a.vv v0, v8, v16 ; ZIP-NEXT: vmv.v.v v8, v0 ; ZIP-NEXT: vmv.v.v v16, v24 ; ZIP-NEXT: csrr a0, vlenb -; ZIP-NEXT: slli a1, a0, 4 -; ZIP-NEXT: add a0, a1, a0 +; ZIP-NEXT: li a1, 40 +; ZIP-NEXT: mul a0, a0, a1 ; ZIP-NEXT: add sp, sp, a0 ; ZIP-NEXT: .cfi_def_cfa sp, 16 ; ZIP-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index 740f8115983fc..f2f9f90f386c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -179,38 +179,21 @@ define <256 x i8> @select_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i3 define <256 x i8> @select_evl_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c) { ; CHECK-LABEL: select_evl_v256i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: vle8.v v24, (a0) -; CHECK-NEXT: addi a0, a1, 128 -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: addi a3, a1, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vle8.v v16, (a1) -; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: vle8.v v24, (a3) +; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, mu +; CHECK-NEXT: vle8.v v24, (a0), v0.t ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vmv8r.v v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.select.v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i32 129) ret <256 x i8> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index 971fbff3ff418..5be32cc35fe37 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -859,19 +859,18 @@ define void @test_dag_loop() { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 0, e8, m4, ta, ma ; CHECK-NEXT: vmclr.m v0 -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma -; CHECK-NEXT: vle16.v v8, (zero) -; CHECK-NEXT: vmv4r.v v20, v16 -; CHECK-NEXT: vsetivli zero, 0, e8, m4, tu, mu -; CHECK-NEXT: vssubu.vx v20, v16, zero, v0.t +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m4, tu, mu +; CHECK-NEXT: vssubu.vx v12, v8, zero, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma -; CHECK-NEXT: vmseq.vv v0, v20, v16 +; CHECK-NEXT: vmseq.vv v0, v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 -; CHECK-NEXT: vse16.v v16, (zero) +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetivli zero, 1, e16, m8, tu, mu +; CHECK-NEXT: vle16.v v8, (zero), v0.t +; CHECK-NEXT: vsetivli zero, 0, e16, m8, ta, ma +; CHECK-NEXT: vse16.v v8, (zero) ; CHECK-NEXT: ret entry: %0 = call @llvm.riscv.vle.nxv32i16.i64( poison, ptr null, i64 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll index 51c3edeeb356f..863678ed79453 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll @@ -505,10 +505,8 @@ define @vmadd_vv_nxv64i8_unmasked( %a, @vmadd_vx_nxv64i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmacc.vx v16, a0, v8 -; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -521,10 +519,8 @@ define @vmadd_vx_nxv64i8( %a, i8 %b, @vmadd_vx_nxv64i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv64i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmacc.vx v16, a0, v8 -; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -551,9 +547,8 @@ define @vmadd_vv_nxv64i8_ta( %a, @vmadd_vx_nxv64i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv64i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmacc.vx v16, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -986,10 +981,8 @@ define @vmadd_vv_nxv32i16_unmasked( %a, < define @vmadd_vx_nxv32i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vmacc.vx v16, a0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1002,10 +995,8 @@ define @vmadd_vx_nxv32i16( %a, i16 %b, @vmadd_vx_nxv32i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv32i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vmacc.vx v16, a0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1032,9 +1023,8 @@ define @vmadd_vv_nxv32i16_ta( %a, @vmadd_vx_nxv32i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv32i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vmacc.vx v16, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1389,10 +1379,8 @@ define @vmadd_vv_nxv16i32_unmasked( %a, < define @vmadd_vx_nxv16i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmacc.vx v16, a0, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1405,10 +1393,8 @@ define @vmadd_vx_nxv16i32( %a, i32 %b, @vmadd_vx_nxv16i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv16i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmacc.vx v16, a0, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1435,9 +1421,8 @@ define @vmadd_vv_nxv16i32_ta( %a, @vmadd_vx_nxv16i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv16i32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmacc.vx v16, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1862,10 +1847,8 @@ define @vmadd_vx_nxv8i64( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1894,10 +1877,8 @@ define @vmadd_vx_nxv8i64_unmasked( %a, i64 ; ; RV64-LABEL: vmadd_vx_nxv8i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmacc.vx v16, a0, v8 -; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, ma +; RV64-NEXT: vmadd.vx v8, a0, v16 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1939,9 +1920,8 @@ define @vmadd_vx_nxv8i64_ta( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge.ll b/llvm/test/CodeGen/RISCV/rvv/vmerge.ll index 81515c4a81f94..4a411475e337a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmerge.ll @@ -775,9 +775,8 @@ define @intrinsic_vmerge_vxm_nxv8i64_nxv8i64_i64( @intrinsic_vmv.s.x_x_nxv8i64( %0, i6 ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vid.v v16 +; RV32-NEXT: vmseq.vi v0, v16, 0 ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vid.v v24 -; RV32-NEXT: vmseq.vi v0, v24, 0 -; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll index 2e9b9467ccaff..03697aafea45d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -1148,10 +1148,8 @@ define @vpmerge_vx_nxv8i64(i64 %a, %vb, @vfmerge_nzv_nxv8f64( %va, @vselect_combine_regression( %va, %vb) { ; CHECK-LABEL: vselect_combine_regression: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; CHECK-NEXT: vmv8r.v v24, v16 ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: vmseq.vi v0, v24, 0 -; CHECK-NEXT: vmv.v.i v24, 0 -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vle64.v v16, (a0), v0.t ; CHECK-NEXT: ret %cond = icmp eq %va, zeroinitializer %sel = select %cond, %vb, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll index ce06a3645d5ac..4ec9e344e6278 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll @@ -765,9 +765,8 @@ define @vmerge_xv_nxv8i64( %va, i64 %b, Date: Mon, 1 Dec 2025 15:17:26 +0800 Subject: [PATCH 5/7] Cherry-pick Luke's changes --- llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 69 +++----- llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll | 144 +++++++--------- llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll | 144 +++++++--------- llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll | 144 +++++++--------- llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll | 144 +++++++--------- llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll | 158 +++++++----------- llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll | 81 ++++----- .../CodeGen/RISCV/rvv/vmerge-peephole.mir | 21 +++ llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll | 158 +++++++----------- 9 files changed, 428 insertions(+), 635 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index 158971eb0c2c2..b5e561caecf51 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -73,7 +73,7 @@ class RISCVVectorPeephole : public MachineFunctionPass { bool isAllOnesMask(const MachineInstr *MaskDef) const; std::optional getConstant(const MachineOperand &VL) const; bool ensureDominates(const MachineOperand &Use, MachineInstr &Src) const; - bool isKnownSameDefs(Register A, Register B) const; + Register lookThruCopies(Register Reg) const; }; } // namespace @@ -387,23 +387,18 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const { return true; } -bool RISCVVectorPeephole::isKnownSameDefs(Register A, Register B) const { - if (A.isPhysical() || B.isPhysical()) - return false; - - auto LookThruVirtRegCopies = [this](Register Reg) { - while (MachineInstr *Def = MRI->getUniqueVRegDef(Reg)) { - if (!Def->isFullCopy()) - break; - Register Src = Def->getOperand(1).getReg(); - if (!Src.isVirtual()) - break; - Reg = Src; - } - return Reg; - }; - - return LookThruVirtRegCopies(A) == LookThruVirtRegCopies(B); +// If \p Reg is defined by one or more COPYs of virtual registers, traverses +/// the chain and returns the root non-COPY source. +Register RISCVVectorPeephole::lookThruCopies(Register Reg) const { + while (MachineInstr *Def = MRI->getUniqueVRegDef(Reg)) { + if (!Def->isFullCopy()) + break; + Register Src = Def->getOperand(1).getReg(); + if (!Src.isVirtual()) + break; + Reg = Src; + } + return Reg; } /// If a PseudoVMERGE_VVM's true operand is a masked pseudo and both have the @@ -421,12 +416,6 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { return false; MachineInstr *True = MRI->getVRegDef(MI.getOperand(3).getReg()); - // Peek through COPY. - if (True && True->isCopy()) { - if (Register TrueReg = True->getOperand(1).getReg(); TrueReg.isVirtual()) - True = MRI->getVRegDef(TrueReg); - } - if (!True || True->getParent() != MI.getParent()) return false; @@ -434,10 +423,11 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { if (!TrueMaskedInfo || !hasSameEEW(MI, *True)) return false; - const MachineOperand &TrueMask = - True->getOperand(TrueMaskedInfo->MaskOpIdx + True->getNumExplicitDefs()); - const MachineOperand &MIMask = MI.getOperand(4); - if (!isKnownSameDefs(TrueMask.getReg(), MIMask.getReg())) + Register TrueMaskReg = lookThruCopies( + True->getOperand(TrueMaskedInfo->MaskOpIdx + True->getNumExplicitDefs()) + .getReg()); + Register MIMaskReg = lookThruCopies(MI.getOperand(4).getReg()); + if (!TrueMaskReg.isVirtual() || TrueMaskReg != MIMaskReg) return false; // Masked off lanes past TrueVL will come from False, and converting to vmv @@ -723,20 +713,12 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const { if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMERGE_VVM) return false; - Register PassthruReg = MI.getOperand(1).getReg(); - Register FalseReg = MI.getOperand(2).getReg(); - Register TrueReg = MI.getOperand(3).getReg(); + Register PassthruReg = lookThruCopies(MI.getOperand(1).getReg()); + Register FalseReg = lookThruCopies(MI.getOperand(2).getReg()); + Register TrueReg = lookThruCopies(MI.getOperand(3).getReg()); if (!TrueReg.isVirtual() || !MRI->hasOneUse(TrueReg)) return false; - MachineInstr *TrueMI = MRI->getUniqueVRegDef(TrueReg); - // Peek through COPY. - if (TrueMI->isCopy()) { - if (TrueReg = TrueMI->getOperand(1).getReg(); - TrueReg.isVirtual() && MRI->hasOneUse(TrueReg)) - TrueMI = MRI->getVRegDef(TrueReg); - } - - MachineInstr &True = *TrueMI; + MachineInstr &True = *MRI->getUniqueVRegDef(TrueReg); if (True.getParent() != MI.getParent()) return false; const MachineOperand &MaskOp = MI.getOperand(4); @@ -754,16 +736,17 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const { // We require that either passthru and false are the same, or that passthru // is undefined. - if (PassthruReg && !isKnownSameDefs(PassthruReg, FalseReg)) + if (PassthruReg && !(PassthruReg.isVirtual() && PassthruReg == FalseReg)) return false; std::optional> NeedsCommute; // If True has a passthru operand then it needs to be the same as vmerge's // False, since False will be used for the result's passthru operand. - Register TruePassthru = True.getOperand(True.getNumExplicitDefs()).getReg(); + Register TruePassthru = + lookThruCopies(True.getOperand(True.getNumExplicitDefs()).getReg()); if (RISCVII::isFirstDefTiedToFirstUse(True.getDesc()) && TruePassthru && - !isKnownSameDefs(TruePassthru, FalseReg)) { + !(TruePassthru.isVirtual() && TruePassthru == FalseReg)) { // If True's passthru != False, check if it uses False in another operand // and try to commute it. int OtherIdx = True.findRegisterUseOperandIdx(FalseReg, TRI); diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll index 18a32abf8e725..28a8ef0087d85 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll @@ -538,10 +538,8 @@ define @vfmacc_vv_nxv32f16( %a, @llvm.vp.fma.nxv32f16( %a, %b, %c, splat (i1 -1), i32 %evl) @@ -553,10 +551,8 @@ define @vfmacc_vv_nxv32f16_unmasked( %a ; CHECK-LABEL: vfmacc_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vfmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv32f16( %a, %b, %c, splat (i1 -1), i32 %evl) @@ -567,10 +563,8 @@ define @vfmacc_vv_nxv32f16_unmasked( %a define @vfmacc_vf_nxv32f16( %va, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -583,10 +577,8 @@ define @vfmacc_vf_nxv32f16( %va, half % define @vfmacc_vf_nxv32f16_commute( %va, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv32f16_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -599,10 +591,8 @@ define @vfmacc_vf_nxv32f16_commute( %va define @vfmacc_vf_nxv32f16_unmasked( %va, half %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv32f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma +; CHECK-NEXT: vfmacc.vf v16, fa0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -616,9 +606,9 @@ define @vfmacc_vv_nxv32f16_ta( %a, @llvm.vp.fma.nxv32f16( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) @@ -628,9 +618,9 @@ define @vfmacc_vv_nxv32f16_ta( %a, @vfmacc_vf_nxv32f16_ta( %va, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv32f16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -642,9 +632,9 @@ define @vfmacc_vf_nxv32f16_ta( %va, hal define @vfmacc_vf_nxv32f16_commute_ta( %va, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv32f16_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1081,10 +1071,8 @@ define @vfmacc_vv_nxv16f32( %a, @llvm.vp.fma.nxv16f32( %a, %b, %c, splat (i1 -1), i32 %evl) @@ -1096,10 +1084,8 @@ define @vfmacc_vv_nxv16f32_unmasked( ; CHECK-LABEL: vfmacc_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vfmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv16f32( %a, %b, %c, splat (i1 -1), i32 %evl) @@ -1110,10 +1096,8 @@ define @vfmacc_vv_nxv16f32_unmasked( define @vfmacc_vf_nxv16f32( %va, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1126,10 +1110,8 @@ define @vfmacc_vf_nxv16f32( %va, floa define @vfmacc_vf_nxv16f32_commute( %va, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv16f32_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1142,10 +1124,8 @@ define @vfmacc_vf_nxv16f32_commute( % define @vfmacc_vf_nxv16f32_unmasked( %va, float %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma +; CHECK-NEXT: vfmacc.vf v16, fa0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1159,9 +1139,9 @@ define @vfmacc_vv_nxv16f32_ta( %a, @llvm.vp.fma.nxv16f32( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) @@ -1171,9 +1151,9 @@ define @vfmacc_vv_nxv16f32_ta( %a, @vfmacc_vf_nxv16f32_ta( %va, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv16f32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1185,9 +1165,9 @@ define @vfmacc_vf_nxv16f32_ta( %va, f define @vfmacc_vf_nxv16f32_commute_ta( %va, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv16f32_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1518,10 +1498,8 @@ define @vfmacc_vv_nxv8f64( %a, @llvm.vp.fma.nxv8f64( %a, %b, %c, splat (i1 -1), i32 %evl) @@ -1533,10 +1511,8 @@ define @vfmacc_vv_nxv8f64_unmasked( % ; CHECK-LABEL: vfmacc_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma +; CHECK-NEXT: vfmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv8f64( %a, %b, %c, splat (i1 -1), i32 %evl) @@ -1547,10 +1523,8 @@ define @vfmacc_vv_nxv8f64_unmasked( % define @vfmacc_vf_nxv8f64( %va, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1563,10 +1537,8 @@ define @vfmacc_vf_nxv8f64( %va, doubl define @vfmacc_vf_nxv8f64_commute( %va, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv8f64_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1579,10 +1551,8 @@ define @vfmacc_vf_nxv8f64_commute( %v define @vfmacc_vf_nxv8f64_unmasked( %va, double %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma +; CHECK-NEXT: vfmacc.vf v16, fa0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1596,9 +1566,9 @@ define @vfmacc_vv_nxv8f64_ta( %a, @llvm.vp.fma.nxv8f64( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) @@ -1608,9 +1578,9 @@ define @vfmacc_vv_nxv8f64_ta( %a, @vfmacc_vf_nxv8f64_ta( %va, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv8f64_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1622,9 +1592,9 @@ define @vfmacc_vf_nxv8f64_ta( %va, do define @vfmacc_vf_nxv8f64_commute_ta( %va, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vf_nxv8f64_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll index e3afbfb2969a3..72ed38b53d2ff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll @@ -578,10 +578,8 @@ define @vmfsac_vv_nxv32f16( %a, @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) @@ -594,10 +592,8 @@ define @vmfsac_vv_nxv32f16_unmasked( %a ; CHECK-LABEL: vmfsac_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vfmsub.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vfmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) @@ -609,10 +605,8 @@ define @vmfsac_vv_nxv32f16_unmasked( %a define @vmfsac_vf_nxv32f16( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -626,10 +620,8 @@ define @vmfsac_vf_nxv32f16( %a, half %b define @vmfsac_vf_nxv32f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv32f16_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -643,10 +635,8 @@ define @vmfsac_vf_nxv32f16_commute( %a, define @vmfsac_vf_nxv32f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv32f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma +; CHECK-NEXT: vfmsac.vf v16, fa0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -661,9 +651,9 @@ define @vmfsac_vv_nxv32f16_ta( %a, @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) %v = call @llvm.vp.fma.nxv32f16( %a, %b, %negc, splat (i1 -1), i32 %evl) @@ -674,9 +664,9 @@ define @vmfsac_vv_nxv32f16_ta( %a, @vmfsac_vf_nxv32f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv32f16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -689,9 +679,9 @@ define @vmfsac_vf_nxv32f16_ta( %a, half define @vmfsac_vf_nxv32f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv32f16_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1161,10 +1151,8 @@ define @vmfsac_vv_nxv16f32( %a, @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) @@ -1177,10 +1165,8 @@ define @vmfsac_vv_nxv16f32_unmasked( ; CHECK-LABEL: vmfsac_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vfmsub.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vfmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) @@ -1192,10 +1178,8 @@ define @vmfsac_vv_nxv16f32_unmasked( define @vmfsac_vf_nxv16f32( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1209,10 +1193,8 @@ define @vmfsac_vf_nxv16f32( %a, float define @vmfsac_vf_nxv16f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv16f32_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1226,10 +1208,8 @@ define @vmfsac_vf_nxv16f32_commute( % define @vmfsac_vf_nxv16f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma +; CHECK-NEXT: vfmsac.vf v16, fa0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1244,9 +1224,9 @@ define @vmfsac_vv_nxv16f32_ta( %a, @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) %v = call @llvm.vp.fma.nxv16f32( %a, %b, %negc, splat (i1 -1), i32 %evl) @@ -1257,9 +1237,9 @@ define @vmfsac_vv_nxv16f32_ta( %a, @vmfsac_vf_nxv16f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv16f32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1272,9 +1252,9 @@ define @vmfsac_vf_nxv16f32_ta( %a, fl define @vmfsac_vf_nxv16f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv16f32_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1630,10 +1610,8 @@ define @vmfsac_vv_nxv8f64( %a, @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) @@ -1646,10 +1624,8 @@ define @vmfsac_vv_nxv8f64_unmasked( % ; CHECK-LABEL: vmfsac_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfmsub.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma +; CHECK-NEXT: vfmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) @@ -1661,10 +1637,8 @@ define @vmfsac_vv_nxv8f64_unmasked( % define @vmfsac_vf_nxv8f64( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1678,10 +1652,8 @@ define @vmfsac_vf_nxv8f64( %a, double define @vmfsac_vf_nxv8f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv8f64_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1695,10 +1667,8 @@ define @vmfsac_vf_nxv8f64_commute( %a define @vmfsac_vf_nxv8f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma +; CHECK-NEXT: vfmsac.vf v16, fa0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1713,9 +1683,9 @@ define @vmfsac_vv_nxv8f64_ta( %a, @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) %v = call @llvm.vp.fma.nxv8f64( %a, %b, %negc, splat (i1 -1), i32 %evl) @@ -1726,9 +1696,9 @@ define @vmfsac_vv_nxv8f64_ta( %a, @vmfsac_vf_nxv8f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv8f64_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1741,9 +1711,9 @@ define @vmfsac_vf_nxv8f64_ta( %a, dou define @vmfsac_vf_nxv8f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vf_nxv8f64_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll index dc62dd909de2d..7f6fb030b13be 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll @@ -618,10 +618,8 @@ define @vfnmacc_vv_nxv32f16( %a, @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) @@ -635,10 +633,8 @@ define @vfnmacc_vv_nxv32f16_unmasked( % ; CHECK-LABEL: vfnmacc_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vfnmadd.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vfnmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) @@ -651,10 +647,8 @@ define @vfnmacc_vv_nxv32f16_unmasked( % define @vfnmacc_vf_nxv32f16( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -669,10 +663,8 @@ define @vfnmacc_vf_nxv32f16( %a, half % define @vfnmacc_vf_nxv32f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv32f16_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -687,10 +679,8 @@ define @vfnmacc_vf_nxv32f16_commute( %a define @vfnmacc_vf_nxv32f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv32f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -706,9 +696,9 @@ define @vfnmacc_vv_nxv32f16_ta( %a, @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) @@ -720,9 +710,9 @@ define @vfnmacc_vv_nxv32f16_ta( %a, @vfnmacc_vf_nxv32f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv32f16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -736,9 +726,9 @@ define @vfnmacc_vf_nxv32f16_ta( %a, hal define @vfnmacc_vf_nxv32f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv32f16_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1241,10 +1231,8 @@ define @vfnmacc_vv_nxv16f32( %a, @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) @@ -1258,10 +1246,8 @@ define @vfnmacc_vv_nxv16f32_unmasked( ; CHECK-LABEL: vfnmacc_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vfnmadd.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vfnmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) @@ -1274,10 +1260,8 @@ define @vfnmacc_vv_nxv16f32_unmasked( define @vfnmacc_vf_nxv16f32( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1292,10 +1276,8 @@ define @vfnmacc_vf_nxv16f32( %a, floa define @vfnmacc_vf_nxv16f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv16f32_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1310,10 +1292,8 @@ define @vfnmacc_vf_nxv16f32_commute( define @vfnmacc_vf_nxv16f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1329,9 +1309,9 @@ define @vfnmacc_vv_nxv16f32_ta( %a, < ; CHECK-LABEL: vfnmacc_vv_nxv16f32_ta: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vfnmadd.vv v8, v16, v24 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vfnmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) @@ -1343,9 +1323,9 @@ define @vfnmacc_vv_nxv16f32_ta( %a, < define @vfnmacc_vf_nxv16f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv16f32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1359,9 +1339,9 @@ define @vfnmacc_vf_nxv16f32_ta( %a, f define @vfnmacc_vf_nxv16f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv16f32_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1742,10 +1722,8 @@ define @vfnmacc_vv_nxv8f64( %a, @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) @@ -1759,10 +1737,8 @@ define @vfnmacc_vv_nxv8f64_unmasked( ; CHECK-LABEL: vfnmacc_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfnmadd.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma +; CHECK-NEXT: vfnmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) @@ -1775,10 +1751,8 @@ define @vfnmacc_vv_nxv8f64_unmasked( define @vfnmacc_vf_nxv8f64( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1793,10 +1767,8 @@ define @vfnmacc_vf_nxv8f64( %a, doubl define @vfnmacc_vf_nxv8f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv8f64_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1811,10 +1783,8 @@ define @vfnmacc_vf_nxv8f64_commute( % define @vfnmacc_vf_nxv8f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1830,9 +1800,9 @@ define @vfnmacc_vv_nxv8f64_ta( %a, @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) @@ -1844,9 +1814,9 @@ define @vfnmacc_vv_nxv8f64_ta( %a, @vfnmacc_vf_nxv8f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv8f64_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1860,9 +1830,9 @@ define @vfnmacc_vf_nxv8f64_ta( %a, do define @vfnmacc_vf_nxv8f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vf_nxv8f64_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll index 05cf261790cb1..37b223be1150c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll @@ -578,10 +578,8 @@ define @vfnmsac_vv_nxv32f16( %a, @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) @@ -594,10 +592,8 @@ define @vfnmsac_vv_nxv32f16_unmasked( % ; CHECK-LABEL: vfnmsac_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vfnmsub.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vfnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) @@ -609,10 +605,8 @@ define @vfnmsac_vv_nxv32f16_unmasked( % define @vfnmsac_vf_nxv32f16( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -626,10 +620,8 @@ define @vfnmsac_vf_nxv32f16( %a, half % define @vfnmsac_vf_nxv32f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv32f16_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -643,10 +635,8 @@ define @vfnmsac_vf_nxv32f16_commute( %a define @vfnmsac_vf_nxv32f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv32f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -661,9 +651,9 @@ define @vfnmsac_vv_nxv32f16_ta( %a, @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %c, splat (i1 -1), i32 %evl) @@ -674,9 +664,9 @@ define @vfnmsac_vv_nxv32f16_ta( %a, @vfnmsac_vf_nxv32f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv32f16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -689,9 +679,9 @@ define @vfnmsac_vf_nxv32f16_ta( %a, hal define @vfnmsac_vf_nxv32f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv32f16_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1161,10 +1151,8 @@ define @vfnmsac_vv_nxv16f32( %a, @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) @@ -1177,10 +1165,8 @@ define @vfnmsac_vv_nxv16f32_unmasked( ; CHECK-LABEL: vfnmsac_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vfnmsub.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vfnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) @@ -1192,10 +1178,8 @@ define @vfnmsac_vv_nxv16f32_unmasked( define @vfnmsac_vf_nxv16f32( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1209,10 +1193,8 @@ define @vfnmsac_vf_nxv16f32( %a, floa define @vfnmsac_vf_nxv16f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv16f32_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1226,10 +1208,8 @@ define @vfnmsac_vf_nxv16f32_commute( define @vfnmsac_vf_nxv16f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 @@ -1244,9 +1224,9 @@ define @vfnmsac_vv_nxv16f32_ta( %a, < ; CHECK-LABEL: vfnmsac_vv_nxv16f32_ta: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vfnmsub.vv v8, v16, v24 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %c, splat (i1 -1), i32 %evl) @@ -1257,9 +1237,9 @@ define @vfnmsac_vv_nxv16f32_ta( %a, < define @vfnmsac_vf_nxv16f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv16f32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1272,9 +1252,9 @@ define @vfnmsac_vf_nxv16f32_ta( %a, f define @vfnmsac_vf_nxv16f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv16f32_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1630,10 +1610,8 @@ define @vfnmsac_vv_nxv8f64( %a, @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) @@ -1646,10 +1624,8 @@ define @vfnmsac_vv_nxv8f64_unmasked( ; CHECK-LABEL: vfnmsac_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfnmsub.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma +; CHECK-NEXT: vfnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) @@ -1661,10 +1637,8 @@ define @vfnmsac_vv_nxv8f64_unmasked( define @vfnmsac_vf_nxv8f64( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1678,10 +1652,8 @@ define @vfnmsac_vf_nxv8f64( %a, doubl define @vfnmsac_vf_nxv8f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv8f64_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1695,10 +1667,8 @@ define @vfnmsac_vf_nxv8f64_commute( % define @vfnmsac_vf_nxv8f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1713,9 +1683,9 @@ define @vfnmsac_vv_nxv8f64_ta( %a, @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %c, splat (i1 -1), i32 %evl) @@ -1726,9 +1696,9 @@ define @vfnmsac_vv_nxv8f64_ta( %a, @vfnmsac_vf_nxv8f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv8f64_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1741,9 +1711,9 @@ define @vfnmsac_vf_nxv8f64_ta( %a, do define @vfnmsac_vf_nxv8f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vf_nxv8f64_commute_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll index 64af4e9c534fd..2ad7ac9390515 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll @@ -512,10 +512,8 @@ define @vmacc_vv_nxv64i8( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) @@ -528,10 +526,8 @@ define @vmacc_vv_nxv64i8_unmasked( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) @@ -543,10 +539,8 @@ define @vmacc_vv_nxv64i8_unmasked( %a, @vmacc_vx_nxv64i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmadd.vx v8, a0, v16 -; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu +; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -560,10 +554,8 @@ define @vmacc_vx_nxv64i8( %a, i8 %b, @vmacc_vx_nxv64i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv64i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmadd.vx v8, a0, v16 -; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -578,9 +570,9 @@ define @vmacc_vv_nxv64i8_ta( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) @@ -591,9 +583,9 @@ define @vmacc_vv_nxv64i8_ta( %a, @vmacc_vx_nxv64i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv64i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmadd.vx v8, a0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1027,10 +1019,8 @@ define @vmacc_vv_nxv32i16( %a, @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) @@ -1043,10 +1033,8 @@ define @vmacc_vv_nxv32i16_unmasked( %a, < ; CHECK-LABEL: vmacc_vv_nxv32i16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vmadd.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) @@ -1058,10 +1046,8 @@ define @vmacc_vv_nxv32i16_unmasked( %a, < define @vmacc_vx_nxv32i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vmadd.vx v8, a0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 @@ -1075,10 +1061,8 @@ define @vmacc_vx_nxv32i16( %a, i16 %b, @vmacc_vx_nxv32i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv32i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vmadd.vx v8, a0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 @@ -1093,9 +1077,9 @@ define @vmacc_vv_nxv32i16_ta( %a, @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) @@ -1106,9 +1090,9 @@ define @vmacc_vv_nxv32i16_ta( %a, @vmacc_vx_nxv32i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv32i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vmadd.vx v8, a0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1458,10 +1442,8 @@ define @vmacc_vv_nxv16i32( %a, @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) @@ -1474,10 +1456,8 @@ define @vmacc_vv_nxv16i32_unmasked( %a, < ; CHECK-LABEL: vmacc_vv_nxv16i32_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmadd.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) @@ -1489,10 +1469,8 @@ define @vmacc_vv_nxv16i32_unmasked( %a, < define @vmacc_vx_nxv16i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmadd.vx v8, a0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1506,10 +1484,8 @@ define @vmacc_vx_nxv16i32( %a, i32 %b, @vmacc_vx_nxv16i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv16i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmadd.vx v8, a0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1524,9 +1500,9 @@ define @vmacc_vv_nxv16i32_ta( %a, @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1537,9 +1513,9 @@ define @vmacc_vv_nxv16i32_ta( %a, @vmacc_vx_nxv16i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vx_nxv16i32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmadd.vx v8, a0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1946,10 +1922,8 @@ define @vmacc_vv_nxv8i64( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) @@ -1962,10 +1936,8 @@ define @vmacc_vv_nxv8i64_unmasked( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) @@ -1984,9 +1956,8 @@ define @vmacc_vx_nxv8i64( %a, i64 %b, @vmacc_vx_nxv8i64( %a, i64 %b, poison, i64 %b, i32 0 @@ -2018,9 +1987,8 @@ define @vmacc_vx_nxv8i64_unmasked( %a, i64 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vmadd.vv v24, v8, v16 ; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; RV32-NEXT: vmv.v.v v16, v24 +; RV32-NEXT: vmacc.vv v16, v8, v24 ; RV32-NEXT: vmv8r.v v8, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -2028,10 +1996,8 @@ define @vmacc_vx_nxv8i64_unmasked( %a, i64 ; ; RV64-LABEL: vmacc_vx_nxv8i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmadd.vx v8, a0, v16 -; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; RV64-NEXT: vmv.v.v v16, v8 +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, ma +; RV64-NEXT: vmacc.vx v16, a0, v8 ; RV64-NEXT: vmv8r.v v8, v16 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 @@ -2046,9 +2012,9 @@ define @vmacc_vv_nxv8i64_ta( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) @@ -2064,19 +2030,19 @@ define @vmacc_vx_nxv8i64_ta( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll index 863678ed79453..fe5b8b9bf6d52 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll @@ -476,10 +476,8 @@ define @vmadd_vv_nxv64i8( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) @@ -491,10 +489,8 @@ define @vmadd_vv_nxv64i8_unmasked( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) @@ -534,9 +530,8 @@ define @vmadd_vv_nxv64i8_ta( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) @@ -952,10 +947,8 @@ define @vmadd_vv_nxv32i16( %a, @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) @@ -967,10 +960,8 @@ define @vmadd_vv_nxv32i16_unmasked( %a, < ; CHECK-LABEL: vmadd_vv_nxv32i16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vmacc.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vmadd.vv v8, v16, v24 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) @@ -1010,9 +1001,8 @@ define @vmadd_vv_nxv32i16_ta( %a, @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) @@ -1350,10 +1340,8 @@ define @vmadd_vv_nxv16i32( %a, @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1365,10 +1353,8 @@ define @vmadd_vv_nxv16i32_unmasked( %a, < ; CHECK-LABEL: vmadd_vv_nxv16i32_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmacc.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vmadd.vv v8, v16, v24 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1408,9 +1394,8 @@ define @vmadd_vv_nxv16i32_ta( %a, @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1802,10 +1787,8 @@ define @vmadd_vv_nxv8i64( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) @@ -1817,10 +1800,8 @@ define @vmadd_vv_nxv8i64_unmasked( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) @@ -1838,9 +1819,8 @@ define @vmadd_vx_nxv8i64( %a, i64 %b, @vmadd_vx_nxv8i64_unmasked( %a, i64 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vmadd.vv v24, v8, v16 ; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; RV32-NEXT: vmv.v.v v8, v24 +; RV32-NEXT: vmadd.vv v8, v24, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret @@ -1892,9 +1871,8 @@ define @vmadd_vv_nxv8i64_ta( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) @@ -1910,10 +1888,9 @@ define @vmadd_vx_nxv8i64_ta( %a, i64 %b, @vnmsac_vv_nxv64i8( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) @@ -528,10 +526,8 @@ define @vnmsac_vv_nxv64i8_unmasked( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) @@ -543,10 +539,8 @@ define @vnmsac_vv_nxv64i8_unmasked( %a, @vnmsac_vx_nxv64i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vnmsub.vx v8, a0, v16 -; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu +; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -560,10 +554,8 @@ define @vnmsac_vx_nxv64i8( %a, i8 %b, @vnmsac_vx_nxv64i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv64i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vnmsub.vx v8, a0, v16 -; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma +; CHECK-NEXT: vnmsac.vx v16, a0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -578,9 +570,9 @@ define @vnmsac_vv_nxv64i8_ta( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.sub.nxv64i8( %c, %x, splat (i1 -1), i32 %evl) @@ -591,9 +583,9 @@ define @vnmsac_vv_nxv64i8_ta( %a, @vnmsac_vx_nxv64i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv64i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vnmsub.vx v8, a0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1027,10 +1019,8 @@ define @vnmsac_vv_nxv32i16( %a, @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) @@ -1043,10 +1033,8 @@ define @vnmsac_vv_nxv32i16_unmasked( %a, ; CHECK-LABEL: vnmsac_vv_nxv32i16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vnmsub.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) @@ -1058,10 +1046,8 @@ define @vnmsac_vv_nxv32i16_unmasked( %a, define @vnmsac_vx_nxv32i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vnmsub.vx v8, a0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 @@ -1075,10 +1061,8 @@ define @vnmsac_vx_nxv32i16( %a, i16 %b, < define @vnmsac_vx_nxv32i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv32i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vnmsub.vx v8, a0, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vnmsac.vx v16, a0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 @@ -1093,9 +1077,9 @@ define @vnmsac_vv_nxv32i16_ta( %a, @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.sub.nxv32i16( %c, %x, splat (i1 -1), i32 %evl) @@ -1106,9 +1090,9 @@ define @vnmsac_vv_nxv32i16_ta( %a, @vnmsac_vx_nxv32i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv32i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vnmsub.vx v8, a0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1458,10 +1442,8 @@ define @vnmsac_vv_nxv16i32( %a, @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) @@ -1474,10 +1456,8 @@ define @vnmsac_vv_nxv16i32_unmasked( %a, ; CHECK-LABEL: vnmsac_vv_nxv16i32_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vnmsub.vv v8, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.v.v v24, v8 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) @@ -1489,10 +1469,8 @@ define @vnmsac_vv_nxv16i32_unmasked( %a, define @vnmsac_vx_nxv16i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vnmsub.vx v8, a0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1506,10 +1484,8 @@ define @vnmsac_vx_nxv16i32( %a, i32 %b, < define @vnmsac_vx_nxv16i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv16i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vnmsub.vx v8, a0, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vnmsac.vx v16, a0, v8 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1524,9 +1500,9 @@ define @vnmsac_vv_nxv16i32_ta( %a, @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.sub.nxv16i32( %c, %x, splat (i1 -1), i32 %evl) @@ -1537,9 +1513,9 @@ define @vnmsac_vv_nxv16i32_ta( %a, @vnmsac_vx_nxv16i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vx_nxv16i32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vnmsub.vx v8, a0, v16 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1946,10 +1922,8 @@ define @vnmsac_vv_nxv8i64( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) @@ -1962,10 +1936,8 @@ define @vnmsac_vv_nxv8i64_unmasked( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) @@ -1984,9 +1956,8 @@ define @vnmsac_vx_nxv8i64( %a, i64 %b, @vnmsac_vx_nxv8i64( %a, i64 %b, poison, i64 %b, i32 0 @@ -2018,9 +1987,8 @@ define @vnmsac_vx_nxv8i64_unmasked( %a, i64 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vnmsub.vv v24, v8, v16 ; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; RV32-NEXT: vmv.v.v v16, v24 +; RV32-NEXT: vnmsac.vv v16, v8, v24 ; RV32-NEXT: vmv8r.v v8, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -2028,10 +1996,8 @@ define @vnmsac_vx_nxv8i64_unmasked( %a, i64 ; ; RV64-LABEL: vnmsac_vx_nxv8i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vnmsub.vx v8, a0, v16 -; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; RV64-NEXT: vmv.v.v v16, v8 +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, ma +; RV64-NEXT: vnmsac.vx v16, a0, v8 ; RV64-NEXT: vmv8r.v v8, v16 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 @@ -2046,9 +2012,9 @@ define @vnmsac_vv_nxv8i64_ta( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.sub.nxv8i64( %c, %x, splat (i1 -1), i32 %evl) @@ -2064,19 +2030,19 @@ define @vnmsac_vx_nxv8i64_ta( %a, i64 %b, < ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vnmsub.vv v24, v8, v16 -; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 +; RV32-NEXT: vnmsac.vv v16, v8, v24, v0.t +; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: vnmsac_vx_nxv8i64_ta: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vnmsub.vx v8, a0, v16 -; RV64-NEXT: vmerge.vvm v8, v16, v8, v0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vnmsac.vx v16, a0, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer From 356da649ada4a4be15a7abc409241fbb3e2c25d8 Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Mon, 1 Dec 2025 15:36:12 +0800 Subject: [PATCH 6/7] Update vmerge-peephole.mir --- llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir index 0c0602d6fbc18..338732c53aa3e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir @@ -126,10 +126,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %avl:gprnox0 = COPY $x8 ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8 - ; CHECK-NEXT: %x:vr = PseudoVLE32_V_M1 $noreg, $noreg, %avl, 5 /* e32 */, 2 /* tu, ma */ :: (load unknown-size, align 1) ; CHECK-NEXT: %mask:vmv0 = COPY $v0 - ; CHECK-NEXT: %y:vrnov0 = COPY %x - ; CHECK-NEXT: %z:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %y, %mask, %avl, 5 /* e32 */ + ; CHECK-NEXT: %z:vrnov0 = PseudoVLE32_V_M1_MASK %passthru, $noreg, %mask, %avl, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size, align 1) + ; CHECK-NEXT: %y:vrnov0 = COPY %z %avl:gprnox0 = COPY $x8 %passthru:vrnov0 = COPY $v8 %x:vr = PseudoVLE32_V_M1 $noreg, $noreg, %avl, 5 /* e32 */, 2 /* tu, ma */ :: (load unknown-size) From 712b77dc69245bb10e351ba0773bebc27f463a0d Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Tue, 2 Dec 2025 15:37:21 +0800 Subject: [PATCH 7/7] Remove extra '/' --- llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index b5e561caecf51..5acb7f5bcd56a 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -388,7 +388,7 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const { } // If \p Reg is defined by one or more COPYs of virtual registers, traverses -/// the chain and returns the root non-COPY source. +// the chain and returns the root non-COPY source. Register RISCVVectorPeephole::lookThruCopies(Register Reg) const { while (MachineInstr *Def = MRI->getUniqueVRegDef(Reg)) { if (!Def->isFullCopy())