Skip to content

Commit

Permalink
[RISCV] Optimize i64 insertelt on RV32.
Browse files Browse the repository at this point in the history
We can use tail undisturbed vslide1down to insert into the vector.

This should make D136640 unneeded.

Reviewed By: reames

Differential Revision: https://reviews.llvm.org/D136738
  • Loading branch information
topperc committed Oct 28, 2022
1 parent 4ea6ffb commit 6a79441
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 43 deletions.
19 changes: 19 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Expand Up @@ -5008,6 +5008,25 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
// Limit the active VL to two.
SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
// If the Idx is 0 we can insert directly into the vector.
if (isNullConstant(Idx)) {
// First slide in the lo value, then the hi in above it. We use slide1down
// to avoid the register group overlap constraint of vslide1up.
ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
Vec, Vec, ValLo, I32Mask, InsertI64VL);
// If the source vector is undef don't pass along the tail elements from
// the previous slide1down.
SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
// Bitcast back to the right container type.
ValInVec = DAG.getBitcast(ContainerVT, ValInVec);

if (!VecVT.isFixedLengthVector())
return ValInVec;
return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
}

// First slide in the lo value, then the hi in above it. We use slide1down
// to avoid the register group overlap constraint of vslide1up.
ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
Expand Down
12 changes: 3 additions & 9 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll
Expand Up @@ -509,9 +509,7 @@ define <4 x i16> @bitcast_i64_v4i16(i64 %a) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vslide1down.vx v9, v8, a1
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vslideup.vi v8, v9, 0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: ret
;
; RV64-LABEL: bitcast_i64_v4i16:
Expand Down Expand Up @@ -547,9 +545,7 @@ define <2 x i32> @bitcast_i64_v2i32(i64 %a) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vslide1down.vx v9, v8, a1
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vslideup.vi v8, v9, 0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: ret
;
; RV64-LABEL: bitcast_i64_v2i32:
Expand Down Expand Up @@ -585,9 +581,7 @@ define <1 x i64> @bitcast_i64_v1i64(i64 %a) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vslide1down.vx v9, v8, a1
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vslideup.vi v8, v9, 0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: ret
;
; RV64-LABEL: bitcast_i64_v1i64:
Expand Down
12 changes: 3 additions & 9 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll
Expand Up @@ -200,9 +200,7 @@ define <4 x half> @bitcast_i64_v4f16(i64 %a) {
; RV32-FP: # %bb.0:
; RV32-FP-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV32-FP-NEXT: vslide1down.vx v8, v8, a0
; RV32-FP-NEXT: vslide1down.vx v9, v8, a1
; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-FP-NEXT: vslideup.vi v8, v9, 0
; RV32-FP-NEXT: vslide1down.vx v8, v8, a1
; RV32-FP-NEXT: ret
;
; RV64-FP-LABEL: bitcast_i64_v4f16:
Expand All @@ -219,9 +217,7 @@ define <2 x float> @bitcast_i64_v2f32(i64 %a) {
; RV32-FP: # %bb.0:
; RV32-FP-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV32-FP-NEXT: vslide1down.vx v8, v8, a0
; RV32-FP-NEXT: vslide1down.vx v9, v8, a1
; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-FP-NEXT: vslideup.vi v8, v9, 0
; RV32-FP-NEXT: vslide1down.vx v8, v8, a1
; RV32-FP-NEXT: ret
;
; RV64-FP-LABEL: bitcast_i64_v2f32:
Expand All @@ -238,9 +234,7 @@ define <1 x double> @bitcast_i64_v1f64(i64 %a) {
; RV32-FP: # %bb.0:
; RV32-FP-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV32-FP-NEXT: vslide1down.vx v8, v8, a0
; RV32-FP-NEXT: vslide1down.vx v9, v8, a1
; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-FP-NEXT: vslideup.vi v8, v9, 0
; RV32-FP-NEXT: vslide1down.vx v8, v8, a1
; RV32-FP-NEXT: ret
;
; RV64-FP-LABEL: bitcast_i64_v1f64:
Expand Down
8 changes: 3 additions & 5 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
Expand Up @@ -144,11 +144,9 @@ define <2 x i64> @mgather_v2i64_align4(<2 x i64*> %ptrs, <2 x i1> %m, <2 x i64>
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: lw a2, 0(a1)
; RV32-NEXT: lw a1, 4(a1)
; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV32-NEXT: vslide1down.vx v10, v8, a2
; RV32-NEXT: vslide1down.vx v10, v10, a1
; RV32-NEXT: vsetivli zero, 1, e64, m1, tu, ma
; RV32-NEXT: vslideup.vi v9, v10, 0
; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV32-NEXT: vslide1down.vx v9, v9, a2
; RV32-NEXT: vslide1down.vx v9, v9, a1
; RV32-NEXT: andi a0, a0, 2
; RV32-NEXT: beqz a0, .LBB5_2
; RV32-NEXT: .LBB5_4: # %cond.load1
Expand Down
32 changes: 12 additions & 20 deletions llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll
Expand Up @@ -635,11 +635,9 @@ define <vscale x 16 x i32> @insertelt_nxv16i32_idx(<vscale x 16 x i32> %v, i32 %
define <vscale x 1 x i64> @insertelt_nxv1i64_0(<vscale x 1 x i64> %v, i64 %elt) {
; CHECK-LABEL: insertelt_nxv1i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-NEXT: vslide1down.vx v9, v8, a0
; CHECK-NEXT: vslide1down.vx v9, v9, a1
; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v9, 0
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: ret
%r = insertelement <vscale x 1 x i64> %v, i64 %elt, i32 0
ret <vscale x 1 x i64> %r
Expand Down Expand Up @@ -675,11 +673,9 @@ define <vscale x 1 x i64> @insertelt_nxv1i64_idx(<vscale x 1 x i64> %v, i64 %elt
define <vscale x 2 x i64> @insertelt_nxv2i64_0(<vscale x 2 x i64> %v, i64 %elt) {
; CHECK-LABEL: insertelt_nxv2i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma
; CHECK-NEXT: vslide1down.vx v10, v8, a0
; CHECK-NEXT: vslide1down.vx v10, v10, a1
; CHECK-NEXT: vsetivli zero, 1, e64, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 0
; CHECK-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: ret
%r = insertelement <vscale x 2 x i64> %v, i64 %elt, i32 0
ret <vscale x 2 x i64> %r
Expand Down Expand Up @@ -715,11 +711,9 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_idx(<vscale x 2 x i64> %v, i64 %elt
define <vscale x 4 x i64> @insertelt_nxv4i64_0(<vscale x 4 x i64> %v, i64 %elt) {
; CHECK-LABEL: insertelt_nxv4i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, m4, ta, ma
; CHECK-NEXT: vslide1down.vx v12, v8, a0
; CHECK-NEXT: vslide1down.vx v12, v12, a1
; CHECK-NEXT: vsetivli zero, 1, e64, m4, tu, ma
; CHECK-NEXT: vslideup.vi v8, v12, 0
; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, ma
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: ret
%r = insertelement <vscale x 4 x i64> %v, i64 %elt, i32 0
ret <vscale x 4 x i64> %r
Expand Down Expand Up @@ -755,11 +749,9 @@ define <vscale x 4 x i64> @insertelt_nxv4i64_idx(<vscale x 4 x i64> %v, i64 %elt
define <vscale x 8 x i64> @insertelt_nxv8i64_0(<vscale x 8 x i64> %v, i64 %elt) {
; CHECK-LABEL: insertelt_nxv8i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma
; CHECK-NEXT: vslide1down.vx v16, v8, a0
; CHECK-NEXT: vslide1down.vx v16, v16, a1
; CHECK-NEXT: vsetivli zero, 1, e64, m8, tu, ma
; CHECK-NEXT: vslideup.vi v8, v16, 0
; CHECK-NEXT: vsetivli zero, 2, e32, m8, tu, ma
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: ret
%r = insertelement <vscale x 8 x i64> %v, i64 %elt, i32 0
ret <vscale x 8 x i64> %r
Expand Down

0 comments on commit 6a79441

Please sign in to comment.