From 469caa31e77f1da37434783f9b4f1d87fe8dff71 Mon Sep 17 00:00:00 2001 From: Chia Date: Wed, 10 Apr 2024 15:26:17 +0900 Subject: [PATCH] [RISCV] Use vwadd.vx for splat vector with extension (#87249) This patch allows `combineBinOp_VLToVWBinOp_VL` to handle patterns like `(splat_vector (sext op))` or `(splat_vector (zext op))`. Then we can use `vwadd.vx` and `vwadd.w` for such a case. ### Source code ``` define @vwadd_vx_splat_sext( %va, i32 %b) { %sb = sext i32 %b to i64 %head = insertelement poison, i64 %sb, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = sext %va to %ve = add %vc, %splat ret %ve } ``` ### Before this patch [Compiler Explorer](https://godbolt.org/z/sq191PsT4) ``` vwadd_vx_splat_sext: sext.w a0, a0 vsetvli a1, zero, e64, m8, ta, ma vmv.v.x v16, a0 vsetvli zero, zero, e32, m4, ta, ma vwadd.wv v16, v16, v8 vmv8r.v v8, v16 ret ``` ### After this patch ``` vwadd_vx_splat_sext vsetvli a1, zero, e32, m4, ta, ma vwadd.vx v16, v8, a0 vmv8r.v v8, v16 ret ``` --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 85 ++--- llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll | 336 +++++++++++++------- llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll | 204 +++++++----- llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll | 156 +++++++++ llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll | 23 +- 5 files changed, 569 insertions(+), 235 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 80cc41b458ca81..6e97575c167cd5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13597,7 +13597,8 @@ struct NodeExtensionHelper { /// Check if this instance represents a splat. bool isSplat() const { - return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL; + return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL || + OrigOperand.getOpcode() == ISD::SPLAT_VECTOR; } /// Get the extended opcode. @@ -13641,6 +13642,8 @@ struct NodeExtensionHelper { case RISCVISD::VZEXT_VL: case RISCVISD::FP_EXTEND_VL: return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL); + case ISD::SPLAT_VECTOR: + return DAG.getSplat(NarrowVT, DL, Source.getOperand(0)); case RISCVISD::VMV_V_X_VL: return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL); @@ -13776,6 +13779,47 @@ struct NodeExtensionHelper { /// Check if this node needs to be fully folded or extended for all users. bool needToPromoteOtherUsers() const { return EnforceOneUse; } + void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + unsigned Opc = OrigOperand.getOpcode(); + MVT VT = OrigOperand.getSimpleValueType(); + + assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) && + "Unexpected Opcode"); + + // The pasthru must be undef for tail agnostic. + if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef()) + return; + + // Get the scalar value. + SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0) + : OrigOperand.getOperand(1); + + // See if we have enough sign bits or zero bits in the scalar to use a + // widening opcode by splatting to smaller element size. + unsigned EltBits = VT.getScalarSizeInBits(); + unsigned ScalarBits = Op.getValueSizeInBits(); + // Make sure we're getting all element bits from the scalar register. + // FIXME: Support implicit sign extension of vmv.v.x? + if (ScalarBits < EltBits) + return; + + unsigned NarrowSize = VT.getScalarSizeInBits() / 2; + // If the narrow type cannot be expressed with a legal VMV, + // this is not a valid candidate. + if (NarrowSize < 8) + return; + + if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize) + SupportsSExt = true; + + if (DAG.MaskedValueIsZero(Op, + APInt::getBitsSetFrom(ScalarBits, NarrowSize))) + SupportsZExt = true; + + EnforceOneUse = false; + } + /// Helper method to set the various fields of this struct based on the /// type of \p Root. void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG, @@ -13814,43 +13858,10 @@ struct NodeExtensionHelper { case RISCVISD::FP_EXTEND_VL: SupportsFPExt = true; break; - case RISCVISD::VMV_V_X_VL: { - // Historically, we didn't care about splat values not disappearing during - // combines. - EnforceOneUse = false; - - // The operand is a splat of a scalar. - - // The pasthru must be undef for tail agnostic. - if (!OrigOperand.getOperand(0).isUndef()) - break; - - // Get the scalar value. - SDValue Op = OrigOperand.getOperand(1); - - // See if we have enough sign bits or zero bits in the scalar to use a - // widening opcode by splatting to smaller element size. - MVT VT = Root->getSimpleValueType(0); - unsigned EltBits = VT.getScalarSizeInBits(); - unsigned ScalarBits = Op.getValueSizeInBits(); - // Make sure we're getting all element bits from the scalar register. - // FIXME: Support implicit sign extension of vmv.v.x? - if (ScalarBits < EltBits) - break; - - unsigned NarrowSize = VT.getScalarSizeInBits() / 2; - // If the narrow type cannot be expressed with a legal VMV, - // this is not a valid candidate. - if (NarrowSize < 8) - break; - - if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize) - SupportsSExt = true; - if (DAG.MaskedValueIsZero(Op, - APInt::getBitsSetFrom(ScalarBits, NarrowSize))) - SupportsZExt = true; + case ISD::SPLAT_VECTOR: + case RISCVISD::VMV_V_X_VL: + fillUpExtensionSupportForSplat(Root, DAG, Subtarget); break; - } default: break; } diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll index fc94f8c2a52797..d756cfcf707728 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I ; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I -; RUN: llc -mtriple=riscv32 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV32F +; RUN: llc -mtriple=riscv64 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV64F ; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB @@ -1229,21 +1229,36 @@ define @ctlz_nxv1i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_nxv1i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-F-NEXT: vmv.v.x v9, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8 -; CHECK-F-NEXT: vsrl.vi v8, v10, 23 -; CHECK-F-NEXT: vwsubu.wv v9, v9, v8 -; CHECK-F-NEXT: li a1, 64 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-F-NEXT: vminu.vx v8, v9, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_nxv1i64: +; RV32F: # %bb.0: +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32F-NEXT: vmv.v.x v9, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; RV32F-NEXT: vsrl.vi v8, v10, 23 +; RV32F-NEXT: vwsubu.wv v9, v9, v8 +; RV32F-NEXT: li a1, 64 +; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32F-NEXT: vminu.vx v8, v9, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_nxv1i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; RV64F-NEXT: vmv.v.x v9, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; RV64F-NEXT: vsrl.vi v8, v10, 23 +; RV64F-NEXT: vwsubu.vv v10, v9, v8 +; RV64F-NEXT: li a1, 64 +; RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64F-NEXT: vminu.vx v8, v10, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv1i64: ; CHECK-D: # %bb.0: @@ -1370,21 +1385,36 @@ define @ctlz_nxv2i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_nxv2i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vmv.v.x v10, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8 -; CHECK-F-NEXT: vsrl.vi v8, v12, 23 -; CHECK-F-NEXT: vwsubu.wv v10, v10, v8 -; CHECK-F-NEXT: li a1, 64 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vminu.vx v8, v10, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_nxv2i64: +; RV32F: # %bb.0: +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32F-NEXT: vmv.v.x v10, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v12, v8 +; RV32F-NEXT: vsrl.vi v8, v12, 23 +; RV32F-NEXT: vwsubu.wv v10, v10, v8 +; RV32F-NEXT: li a1, 64 +; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32F-NEXT: vminu.vx v8, v10, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_nxv2i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV64F-NEXT: vmv.v.x v10, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v11, v8 +; RV64F-NEXT: vsrl.vi v8, v11, 23 +; RV64F-NEXT: vwsubu.vv v12, v10, v8 +; RV64F-NEXT: li a1, 64 +; RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64F-NEXT: vminu.vx v8, v12, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv2i64: ; CHECK-D: # %bb.0: @@ -1511,21 +1541,36 @@ define @ctlz_nxv4i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_nxv4i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vmv.v.x v12, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8 -; CHECK-F-NEXT: vsrl.vi v8, v16, 23 -; CHECK-F-NEXT: vwsubu.wv v12, v12, v8 -; CHECK-F-NEXT: li a1, 64 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vminu.vx v8, v12, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_nxv4i64: +; RV32F: # %bb.0: +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32F-NEXT: vmv.v.x v12, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v16, v8 +; RV32F-NEXT: vsrl.vi v8, v16, 23 +; RV32F-NEXT: vwsubu.wv v12, v12, v8 +; RV32F-NEXT: li a1, 64 +; RV32F-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32F-NEXT: vminu.vx v8, v12, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_nxv4i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV64F-NEXT: vmv.v.x v12, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v14, v8 +; RV64F-NEXT: vsrl.vi v8, v14, 23 +; RV64F-NEXT: vwsubu.vv v16, v12, v8 +; RV64F-NEXT: li a1, 64 +; RV64F-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64F-NEXT: vminu.vx v8, v16, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv4i64: ; CHECK-D: # %bb.0: @@ -1652,21 +1697,36 @@ define @ctlz_nxv8i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_nxv8i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vmv.v.x v16, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v24, v8 -; CHECK-F-NEXT: vsrl.vi v8, v24, 23 -; CHECK-F-NEXT: vwsubu.wv v16, v16, v8 -; CHECK-F-NEXT: li a1, 64 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vminu.vx v8, v16, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_nxv8i64: +; RV32F: # %bb.0: +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32F-NEXT: vmv.v.x v16, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v24, v8 +; RV32F-NEXT: vsrl.vi v8, v24, 23 +; RV32F-NEXT: vwsubu.wv v16, v16, v8 +; RV32F-NEXT: li a1, 64 +; RV32F-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32F-NEXT: vminu.vx v8, v16, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_nxv8i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV64F-NEXT: vmv.v.x v16, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v20, v8 +; RV64F-NEXT: vsrl.vi v8, v20, 23 +; RV64F-NEXT: vwsubu.vv v24, v16, v8 +; RV64F-NEXT: li a1, 64 +; RV64F-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64F-NEXT: vminu.vx v8, v24, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv8i64: ; CHECK-D: # %bb.0: @@ -2835,19 +2895,31 @@ define @ctlz_zero_undef_nxv1i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_zero_undef_nxv1i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-F-NEXT: vmv.v.x v9, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8 -; CHECK-F-NEXT: vsrl.vi v8, v10, 23 -; CHECK-F-NEXT: vwsubu.wv v9, v9, v8 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: vmv1r.v v8, v9 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_zero_undef_nxv1i64: +; RV32F: # %bb.0: +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32F-NEXT: vmv.v.x v9, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; RV32F-NEXT: vsrl.vi v8, v10, 23 +; RV32F-NEXT: vwsubu.wv v9, v9, v8 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: vmv1r.v v8, v9 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_zero_undef_nxv1i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; RV64F-NEXT: vmv.v.x v9, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; RV64F-NEXT: vsrl.vi v10, v10, 23 +; RV64F-NEXT: vwsubu.vv v8, v9, v10 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv1i64: ; CHECK-D: # %bb.0: @@ -2971,19 +3043,31 @@ define @ctlz_zero_undef_nxv2i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_zero_undef_nxv2i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vmv.v.x v10, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8 -; CHECK-F-NEXT: vsrl.vi v8, v12, 23 -; CHECK-F-NEXT: vwsubu.wv v10, v10, v8 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: vmv2r.v v8, v10 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_zero_undef_nxv2i64: +; RV32F: # %bb.0: +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32F-NEXT: vmv.v.x v10, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v12, v8 +; RV32F-NEXT: vsrl.vi v8, v12, 23 +; RV32F-NEXT: vwsubu.wv v10, v10, v8 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: vmv2r.v v8, v10 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_zero_undef_nxv2i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV64F-NEXT: vmv.v.x v10, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v11, v8 +; RV64F-NEXT: vsrl.vi v11, v11, 23 +; RV64F-NEXT: vwsubu.vv v8, v10, v11 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv2i64: ; CHECK-D: # %bb.0: @@ -3107,19 +3191,31 @@ define @ctlz_zero_undef_nxv4i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_zero_undef_nxv4i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vmv.v.x v12, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8 -; CHECK-F-NEXT: vsrl.vi v8, v16, 23 -; CHECK-F-NEXT: vwsubu.wv v12, v12, v8 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: vmv4r.v v8, v12 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_zero_undef_nxv4i64: +; RV32F: # %bb.0: +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32F-NEXT: vmv.v.x v12, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v16, v8 +; RV32F-NEXT: vsrl.vi v8, v16, 23 +; RV32F-NEXT: vwsubu.wv v12, v12, v8 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: vmv4r.v v8, v12 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_zero_undef_nxv4i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV64F-NEXT: vmv.v.x v12, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v14, v8 +; RV64F-NEXT: vsrl.vi v14, v14, 23 +; RV64F-NEXT: vwsubu.vv v8, v12, v14 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv4i64: ; CHECK-D: # %bb.0: @@ -3243,19 +3339,31 @@ define @ctlz_zero_undef_nxv8i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_zero_undef_nxv8i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: vmv8r.v v16, v8 -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vmv.v.x v8, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v24, v16 -; CHECK-F-NEXT: vsrl.vi v16, v24, 23 -; CHECK-F-NEXT: vwsubu.wv v8, v8, v16 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_zero_undef_nxv8i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vmv8r.v v16, v8 +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32F-NEXT: vmv.v.x v8, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v24, v16 +; RV32F-NEXT: vsrl.vi v16, v24, 23 +; RV32F-NEXT: vwsubu.wv v8, v8, v16 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_zero_undef_nxv8i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV64F-NEXT: vmv.v.x v16, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v20, v8 +; RV64F-NEXT: vsrl.vi v20, v20, 23 +; RV64F-NEXT: vwsubu.vv v8, v16, v20 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv8i64: ; CHECK-D: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll index b14cde25aa85b2..d13f4d2dca1ff4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -1241,13 +1241,12 @@ define @cttz_nxv1i64( %va) { ; RV64F-NEXT: fsrmi a0, 1 ; RV64F-NEXT: vfncvt.f.xu.w v10, v9 ; RV64F-NEXT: vsrl.vi v9, v10, 23 -; RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV64F-NEXT: vzext.vf2 v10, v9 ; RV64F-NEXT: li a1, 127 -; RV64F-NEXT: vsub.vx v9, v10, a1 +; RV64F-NEXT: vwsubu.vx v10, v9, a1 +; RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64F-NEXT: vmseq.vi v0, v8, 0 ; RV64F-NEXT: li a1, 64 -; RV64F-NEXT: vmerge.vxm v8, v9, a1, v0 +; RV64F-NEXT: vmerge.vxm v8, v10, a1, v0 ; RV64F-NEXT: fsrm a0 ; RV64F-NEXT: ret ; @@ -1404,13 +1403,12 @@ define @cttz_nxv2i64( %va) { ; RV64F-NEXT: fsrmi a0, 1 ; RV64F-NEXT: vfncvt.f.xu.w v12, v10 ; RV64F-NEXT: vsrl.vi v10, v12, 23 -; RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64F-NEXT: vzext.vf2 v12, v10 ; RV64F-NEXT: li a1, 127 -; RV64F-NEXT: vsub.vx v10, v12, a1 +; RV64F-NEXT: vwsubu.vx v12, v10, a1 +; RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV64F-NEXT: vmseq.vi v0, v8, 0 ; RV64F-NEXT: li a1, 64 -; RV64F-NEXT: vmerge.vxm v8, v10, a1, v0 +; RV64F-NEXT: vmerge.vxm v8, v12, a1, v0 ; RV64F-NEXT: fsrm a0 ; RV64F-NEXT: ret ; @@ -1567,13 +1565,12 @@ define @cttz_nxv4i64( %va) { ; RV64F-NEXT: fsrmi a0, 1 ; RV64F-NEXT: vfncvt.f.xu.w v16, v12 ; RV64F-NEXT: vsrl.vi v12, v16, 23 -; RV64F-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV64F-NEXT: vzext.vf2 v16, v12 ; RV64F-NEXT: li a1, 127 -; RV64F-NEXT: vsub.vx v12, v16, a1 +; RV64F-NEXT: vwsubu.vx v16, v12, a1 +; RV64F-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; RV64F-NEXT: vmseq.vi v0, v8, 0 ; RV64F-NEXT: li a1, 64 -; RV64F-NEXT: vmerge.vxm v8, v12, a1, v0 +; RV64F-NEXT: vmerge.vxm v8, v16, a1, v0 ; RV64F-NEXT: fsrm a0 ; RV64F-NEXT: ret ; @@ -1730,13 +1727,12 @@ define @cttz_nxv8i64( %va) { ; RV64F-NEXT: fsrmi a0, 1 ; RV64F-NEXT: vfncvt.f.xu.w v24, v16 ; RV64F-NEXT: vsrl.vi v16, v24, 23 -; RV64F-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64F-NEXT: vzext.vf2 v24, v16 ; RV64F-NEXT: li a1, 127 -; RV64F-NEXT: vsub.vx v16, v24, a1 +; RV64F-NEXT: vwsubu.vx v24, v16, a1 +; RV64F-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64F-NEXT: vmseq.vi v0, v8, 0 ; RV64F-NEXT: li a1, 64 -; RV64F-NEXT: vmerge.vxm v8, v16, a1, v0 +; RV64F-NEXT: vmerge.vxm v8, v24, a1, v0 ; RV64F-NEXT: fsrm a0 ; RV64F-NEXT: ret ; @@ -2891,21 +2887,35 @@ define @cttz_zero_undef_nxv1i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: cttz_zero_undef_nxv1i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-F-NEXT: vrsub.vi v9, v8, 0 -; CHECK-F-NEXT: vand.vv v8, v8, v9 -; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v9, v8 -; CHECK-F-NEXT: vsrl.vi v8, v9, 23 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-F-NEXT: vzext.vf2 v9, v8 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vsub.vx v8, v9, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: cttz_zero_undef_nxv1i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32F-NEXT: vrsub.vi v9, v8, 0 +; RV32F-NEXT: vand.vv v8, v8, v9 +; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v9, v8 +; RV32F-NEXT: vsrl.vi v8, v9, 23 +; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32F-NEXT: vzext.vf2 v9, v8 +; RV32F-NEXT: li a1, 127 +; RV32F-NEXT: vsub.vx v8, v9, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: cttz_zero_undef_nxv1i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64F-NEXT: vrsub.vi v9, v8, 0 +; RV64F-NEXT: vand.vv v8, v8, v9 +; RV64F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v9, v8 +; RV64F-NEXT: vsrl.vi v9, v9, 23 +; RV64F-NEXT: li a1, 127 +; RV64F-NEXT: vwsubu.vx v8, v9, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv1i64: ; CHECK-D: # %bb.0: @@ -3011,21 +3021,35 @@ define @cttz_zero_undef_nxv2i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: cttz_zero_undef_nxv2i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vrsub.vi v10, v8, 0 -; CHECK-F-NEXT: vand.vv v8, v8, v10 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8 -; CHECK-F-NEXT: vsrl.vi v8, v10, 23 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vzext.vf2 v10, v8 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vsub.vx v8, v10, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: cttz_zero_undef_nxv2i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32F-NEXT: vrsub.vi v10, v8, 0 +; RV32F-NEXT: vand.vv v8, v8, v10 +; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; RV32F-NEXT: vsrl.vi v8, v10, 23 +; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32F-NEXT: vzext.vf2 v10, v8 +; RV32F-NEXT: li a1, 127 +; RV32F-NEXT: vsub.vx v8, v10, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: cttz_zero_undef_nxv2i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64F-NEXT: vrsub.vi v10, v8, 0 +; RV64F-NEXT: vand.vv v8, v8, v10 +; RV64F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; RV64F-NEXT: vsrl.vi v10, v10, 23 +; RV64F-NEXT: li a1, 127 +; RV64F-NEXT: vwsubu.vx v8, v10, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv2i64: ; CHECK-D: # %bb.0: @@ -3131,21 +3155,35 @@ define @cttz_zero_undef_nxv4i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: cttz_zero_undef_nxv4i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vrsub.vi v12, v8, 0 -; CHECK-F-NEXT: vand.vv v8, v8, v12 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8 -; CHECK-F-NEXT: vsrl.vi v8, v12, 23 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vzext.vf2 v12, v8 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vsub.vx v8, v12, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: cttz_zero_undef_nxv4i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32F-NEXT: vrsub.vi v12, v8, 0 +; RV32F-NEXT: vand.vv v8, v8, v12 +; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v12, v8 +; RV32F-NEXT: vsrl.vi v8, v12, 23 +; RV32F-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32F-NEXT: vzext.vf2 v12, v8 +; RV32F-NEXT: li a1, 127 +; RV32F-NEXT: vsub.vx v8, v12, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: cttz_zero_undef_nxv4i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64F-NEXT: vrsub.vi v12, v8, 0 +; RV64F-NEXT: vand.vv v8, v8, v12 +; RV64F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v12, v8 +; RV64F-NEXT: vsrl.vi v12, v12, 23 +; RV64F-NEXT: li a1, 127 +; RV64F-NEXT: vwsubu.vx v8, v12, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv4i64: ; CHECK-D: # %bb.0: @@ -3251,21 +3289,35 @@ define @cttz_zero_undef_nxv8i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: cttz_zero_undef_nxv8i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vrsub.vi v16, v8, 0 -; CHECK-F-NEXT: vand.vv v8, v8, v16 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8 -; CHECK-F-NEXT: vsrl.vi v8, v16, 23 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vzext.vf2 v16, v8 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vsub.vx v8, v16, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: cttz_zero_undef_nxv8i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32F-NEXT: vrsub.vi v16, v8, 0 +; RV32F-NEXT: vand.vv v8, v8, v16 +; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v16, v8 +; RV32F-NEXT: vsrl.vi v8, v16, 23 +; RV32F-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32F-NEXT: vzext.vf2 v16, v8 +; RV32F-NEXT: li a1, 127 +; RV32F-NEXT: vsub.vx v8, v16, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: cttz_zero_undef_nxv8i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64F-NEXT: vrsub.vi v16, v8, 0 +; RV64F-NEXT: vand.vv v8, v8, v16 +; RV64F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v16, v8 +; RV64F-NEXT: vsrl.vi v16, v16, 23 +; RV64F-NEXT: li a1, 127 +; RV64F-NEXT: vwsubu.vx v8, v16, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv8i64: ; CHECK-D: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll index 5dd01c654eff1d..21ddf1a6e114d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll @@ -1466,3 +1466,159 @@ define @vwadd_wv_disjoint_or( %x.i32, %x.i32, %y.i32 ret %or } + +define @vwadd_vx_splat_zext( %va, i32 %b) { +; RV32-LABEL: vwadd_vx_splat_zext: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vwaddu.wv v16, v16, v8 +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vwadd_vx_splat_zext: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV64-NEXT: vwaddu.vx v16, v8, a0 +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %zb = zext i32 %b to i64 + %head = insertelement poison, i64 %zb, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %vc = zext %va to + %ve = add %vc, %splat + ret %ve +} + +define @vwadd_vx_splat_zext_i1( %va, i16 %b) { +; RV32-LABEL: vwadd_vx_splat_zext_i1: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srli a0, a0, 16 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vmv.v.x v8, a0 +; RV32-NEXT: vadd.vi v8, v8, 1, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vwadd_vx_splat_zext_i1: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srli a0, a0, 48 +; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: vadd.vi v8, v8, 1, v0.t +; RV64-NEXT: ret + %zb = zext i16 %b to i32 + %head = insertelement poison, i32 %zb, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %vc = zext %va to + %ve = add %vc, %splat + ret %ve +} + +define @vwadd_wx_splat_zext( %va, i32 %b) { +; RV32-LABEL: vwadd_wx_splat_zext: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vwadd_wx_splat_zext: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vadd.vx v8, v8, a0 +; RV64-NEXT: ret + %zb = zext i32 %b to i64 + %head = insertelement poison, i64 %zb, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %ve = add %va, %splat + ret %ve +} + +define @vwadd_vx_splat_sext( %va, i32 %b) { +; RV32-LABEL: vwadd_vx_splat_sext: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a0 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vwadd.wv v16, v16, v8 +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: vwadd_vx_splat_sext: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV64-NEXT: vwadd.vx v16, v8, a0 +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %sb = sext i32 %b to i64 + %head = insertelement poison, i64 %sb, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %vc = sext %va to + %ve = add %vc, %splat + ret %ve +} + +define @vwadd_vx_splat_sext_i1( %va, i16 %b) { +; RV32-LABEL: vwadd_vx_splat_sext_i1: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srai a0, a0, 16 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vmv.v.x v8, a0 +; RV32-NEXT: li a0, 1 +; RV32-NEXT: vsub.vx v8, v8, a0, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vwadd_vx_splat_sext_i1: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srai a0, a0, 48 +; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %sb = sext i16 %b to i32 + %head = insertelement poison, i32 %sb, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %vc = sext %va to + %ve = add %vc, %splat + ret %ve +} + +define @vwadd_wx_splat_sext( %va, i32 %b) { +; RV32-LABEL: vwadd_wx_splat_sext: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vadd.vx v8, v8, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vwadd_wx_splat_sext: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vadd.vx v8, v8, a0 +; RV64-NEXT: ret + %sb = sext i32 %b to i64 + %head = insertelement poison, i64 %sb, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %ve = add %va, %splat + ret %ve +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll index 72fc9c918f22c4..41ec2fc443d028 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB -; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,RV32ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,RV64ZVBB ; ============================================================================== ; i32 -> i64 @@ -864,12 +864,19 @@ define @vwsll_vi_nxv2i64_nxv2i8( %a) { ; CHECK-NEXT: vsll.vi v8, v10, 2 ; CHECK-NEXT: ret ; -; CHECK-ZVBB-LABEL: vwsll_vi_nxv2i64_nxv2i8: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 -; CHECK-ZVBB-NEXT: ret +; RV32ZVBB-LABEL: vwsll_vi_nxv2i64_nxv2i8: +; RV32ZVBB: # %bb.0: +; RV32ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZVBB-NEXT: vzext.vf8 v10, v8 +; RV32ZVBB-NEXT: vsll.vi v8, v10, 2 +; RV32ZVBB-NEXT: ret +; +; RV64ZVBB-LABEL: vwsll_vi_nxv2i64_nxv2i8: +; RV64ZVBB: # %bb.0: +; RV64ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; RV64ZVBB-NEXT: vzext.vf4 v10, v8 +; RV64ZVBB-NEXT: vwsll.vi v8, v10, 2 +; RV64ZVBB-NEXT: ret %x = zext %a to %z = shl %x, splat (i64 2) ret %z