diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7d28d52dd12e3..dcf5bcecf0a6a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11628,15 +11628,39 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); } -// According to the property that indexed load/store instructions -// zero-extended their indices, \p narrowIndex tries to narrow the type of index -// operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C < -// bits(ty). +/// According to the property that indexed load/store instructions zero-extend +/// their indices, try to narrow the type of index operand. static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) { if (isIndexTypeSigned(IndexType)) return false; - if (N.getOpcode() != ISD::SHL || !N->hasOneUse()) + if (!N->hasOneUse()) + return false; + + EVT VT = N.getValueType(); + SDLoc DL(N); + + // In general, what we're doing here is seeing if we can sink a truncate to + // a smaller element type into the expression tree building our index. + // TODO: We can generalize this and handle a bunch more cases if useful. + + // Narrow a buildvector to the narrowest element type. This requires less + // work and less register pressure at high LMUL, and creates smaller constants + // which may be cheaper to materialize. + if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) { + KnownBits Known = DAG.computeKnownBits(N); + unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits()); + LLVMContext &C = *DAG.getContext(); + EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C); + if (ResultVT.bitsLT(VT.getVectorElementType())) { + N = DAG.getNode(ISD::TRUNCATE, DL, + VT.changeVectorElementType(ResultVT), N); + return true; + } + } + + // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty). + if (N.getOpcode() != ISD::SHL) return false; SDValue N0 = N.getOperand(0); @@ -11651,7 +11675,6 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt)) return false;; - SDLoc DL(N); SDValue Src = N0.getOperand(0); EVT SrcVT = Src.getValueType(); unsigned SrcElen = SrcVT.getScalarSizeInBits(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 6c6ffe656f433..7883aaa27c287 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -12857,10 +12857,10 @@ define <4 x i32> @mgather_broadcast_load_unmasked(ptr %base) { ; ; RV64V-LABEL: mgather_broadcast_load_unmasked: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64V-NEXT: vmv.v.i v10, 0 +; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; RV64V-NEXT: vmv.v.i v9, 0 ; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64V-NEXT: vluxei64.v v8, (a0), v10 +; RV64V-NEXT: vluxei8.v v8, (a0), v9 ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_broadcast_load_unmasked: @@ -12949,10 +12949,10 @@ define <4 x i32> @mgather_broadcast_load_masked(ptr %base, <4 x i1> %m) { ; ; RV64V-LABEL: mgather_broadcast_load_masked: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64V-NEXT: vmv.v.i v10, 0 +; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; RV64V-NEXT: vmv.v.i v9, 0 ; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64V-NEXT: vluxei64.v v8, (a0), v10, v0.t +; RV64V-NEXT: vluxei8.v v8, (a0), v9, v0.t ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_broadcast_load_masked: @@ -13016,11 +13016,11 @@ define <4 x i32> @mgather_unit_stride_load(ptr %base) { ; ; RV64V-LABEL: mgather_unit_stride_load: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64V-NEXT: vid.v v8 -; RV64V-NEXT: vsll.vi v10, v8, 2 +; RV64V-NEXT: vsll.vi v9, v8, 2 ; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64V-NEXT: vluxei64.v v8, (a0), v10 +; RV64V-NEXT: vluxei8.v v8, (a0), v9 ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_unit_stride_load: @@ -13089,11 +13089,9 @@ define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) { ; RV64V: # %bb.0: ; RV64V-NEXT: lui a1, 115073 ; RV64V-NEXT: addiw a1, a1, 1040 -; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64V-NEXT: vmv.s.x v8, a1 -; RV64V-NEXT: vsext.vf8 v10, v8 -; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64V-NEXT: vluxei64.v v8, (a0), v10 +; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64V-NEXT: vmv.s.x v9, a1 +; RV64V-NEXT: vluxei8.v v8, (a0), v9 ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_unit_stride_load_with_offset: @@ -13153,19 +13151,20 @@ define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) { define <4 x i32> @mgather_unit_stride_load_narrow_idx(ptr %base) { ; RV32-LABEL: mgather_unit_stride_load_narrow_idx: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV32-NEXT: vid.v v8 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vluxei32.v v8, (a0), v8 +; RV32-NEXT: vsll.vi v9, v8, 2 +; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32-NEXT: vluxei8.v v8, (a0), v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_unit_stride_load_narrow_idx: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64V-NEXT: vid.v v8 -; RV64V-NEXT: vsll.vi v10, v8, 2 +; RV64V-NEXT: vsll.vi v9, v8, 2 ; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64V-NEXT: vluxei64.v v8, (a0), v10 +; RV64V-NEXT: vluxei8.v v8, (a0), v9 ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_unit_stride_load_narrow_idx: @@ -13224,19 +13223,20 @@ define <4 x i32> @mgather_unit_stride_load_narrow_idx(ptr %base) { define <4 x i32> @mgather_unit_stride_load_wide_idx(ptr %base) { ; RV32-LABEL: mgather_unit_stride_load_wide_idx: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV32-NEXT: vid.v v8 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vluxei32.v v8, (a0), v8 +; RV32-NEXT: vsll.vi v9, v8, 2 +; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32-NEXT: vluxei8.v v8, (a0), v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_unit_stride_load_wide_idx: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64V-NEXT: vid.v v8 -; RV64V-NEXT: vsll.vi v10, v8, 2 +; RV64V-NEXT: vsll.vi v9, v8, 2 ; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64V-NEXT: vluxei64.v v8, (a0), v10 +; RV64V-NEXT: vluxei8.v v8, (a0), v9 ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_unit_stride_load_wide_idx: @@ -13374,8 +13374,8 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; RV32-NEXT: lui a1, %hi(.LCPI107_0) ; RV32-NEXT: addi a1, a1, %lo(.LCPI107_0) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle32.v v10, (a1) -; RV32-NEXT: vluxei32.v v8, (a0), v10 +; RV32-NEXT: vle8.v v9, (a1) +; RV32-NEXT: vluxei8.v v8, (a0), v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_strided_2xSEW: @@ -13383,8 +13383,8 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; RV64V-NEXT: lui a1, %hi(.LCPI107_0) ; RV64V-NEXT: addi a1, a1, %lo(.LCPI107_0) ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64V-NEXT: vle64.v v12, (a1) -; RV64V-NEXT: vluxei64.v v8, (a0), v12 +; RV64V-NEXT: vle8.v v9, (a1) +; RV64V-NEXT: vluxei8.v v8, (a0), v9 ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_strided_2xSEW: @@ -13491,8 +13491,8 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; RV32-NEXT: lui a1, %hi(.LCPI108_0) ; RV32-NEXT: addi a1, a1, %lo(.LCPI108_0) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle32.v v10, (a1) -; RV32-NEXT: vluxei32.v v8, (a0), v10 +; RV32-NEXT: vle8.v v9, (a1) +; RV32-NEXT: vluxei8.v v8, (a0), v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_gather_2xSEW: @@ -13500,8 +13500,8 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; RV64V-NEXT: lui a1, %hi(.LCPI108_0) ; RV64V-NEXT: addi a1, a1, %lo(.LCPI108_0) ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64V-NEXT: vle64.v v12, (a1) -; RV64V-NEXT: vluxei64.v v8, (a0), v12 +; RV64V-NEXT: vle8.v v9, (a1) +; RV64V-NEXT: vluxei8.v v8, (a0), v9 ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW: