Skip to content

Commit

Permalink
[RISCV] Combine (mul (zext, zext)) -> (zext (mul (zext, zext))) (#86465)
Browse files Browse the repository at this point in the history
Building on #86248, we can also narrow the width of a mul of zexts.

This is specifically legal because on RVV we always extend to the next
power of 2 width, and multiplying two N bit integers produces a maximum
value of 2\*N bits.
So as long as we keep an inner zext of 2\*N, we will have enough space
for the multiply and won't overflow.

Alive2 proof: https://alive2.llvm.org/ce/z/XteYyb
  • Loading branch information
lukel97 committed Mar 26, 2024
1 parent ecfffbf commit 87519a2
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 76 deletions.
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12935,6 +12935,7 @@ static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,

// add (zext, zext) -> zext (add (zext, zext))
// sub (zext, zext) -> sext (sub (zext, zext))
// mul (zext, zext) -> zext (mul (zext, zext))
//
// where the sum of the extend widths match, and the the range of the bin op
// fits inside the width of the narrower bin op. (For profitability on rvv, we
Expand Down Expand Up @@ -13380,6 +13381,9 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
}

if (SDValue V = combineBinOpOfZExt(N, DAG))
return V;

return SDValue();
}

Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll
Original file line number Diff line number Diff line change
Expand Up @@ -391,12 +391,12 @@ define <32 x i64> @vwmulu_v32i64(ptr %x, ptr %y) {
define <2 x i32> @vwmulu_v2i32_v2i8(ptr %x, ptr %y) {
; CHECK-LABEL: vwmulu_v2i32_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vle8.v v9, (a1)
; CHECK-NEXT: vzext.vf2 v10, v8
; CHECK-NEXT: vzext.vf2 v11, v9
; CHECK-NEXT: vwmulu.vv v8, v10, v11
; CHECK-NEXT: vwmulu.vv v10, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
%a = load <2 x i8>, ptr %x
%b = load <2 x i8>, ptr %y
Expand Down
128 changes: 56 additions & 72 deletions llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -355,10 +355,10 @@ define <vscale x 1 x i64> @vwmul_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vsc
define <vscale x 1 x i64> @vwmulu_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv1i64_nxv1i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
; CHECK-NEXT: vzext.vf2 v11, v9
; CHECK-NEXT: vwmulu.vv v8, v10, v11
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; CHECK-NEXT: vwmulu.vv v10, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
%vc = zext <vscale x 1 x i16> %va to <vscale x 1 x i64>
%vd = zext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
Expand Down Expand Up @@ -402,11 +402,9 @@ define <vscale x 1 x i64> @vwmulu_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16
; CHECK-LABEL: vwmulu_vx_nxv1i64_nxv1i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
; CHECK-NEXT: vzext.vf2 v11, v9
; CHECK-NEXT: vwmulu.vv v8, v10, v11
; CHECK-NEXT: vwmulu.vx v9, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vzext.vf2 v8, v9
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i16> undef, i16 %b, i16 0
%splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
Expand Down Expand Up @@ -451,10 +449,10 @@ define <vscale x 2 x i64> @vwmul_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vsc
define <vscale x 2 x i64> @vwmulu_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv2i64_nxv2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
; CHECK-NEXT: vzext.vf2 v11, v9
; CHECK-NEXT: vwmulu.vv v8, v10, v11
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; CHECK-NEXT: vwmulu.vv v10, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
%vc = zext <vscale x 2 x i16> %va to <vscale x 2 x i64>
%vd = zext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
Expand Down Expand Up @@ -498,11 +496,9 @@ define <vscale x 2 x i64> @vwmulu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16
; CHECK-LABEL: vwmulu_vx_nxv2i64_nxv2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
; CHECK-NEXT: vzext.vf2 v11, v9
; CHECK-NEXT: vwmulu.vv v8, v10, v11
; CHECK-NEXT: vwmulu.vx v10, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i16> undef, i16 %b, i16 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
Expand Down Expand Up @@ -547,10 +543,10 @@ define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vsc
define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v12, v8
; CHECK-NEXT: vzext.vf2 v14, v9
; CHECK-NEXT: vwmulu.vv v8, v12, v14
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vwmulu.vv v12, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf2 v8, v12
; CHECK-NEXT: ret
%vc = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
%vd = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
Expand Down Expand Up @@ -594,11 +590,9 @@ define <vscale x 4 x i64> @vwmulu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
; CHECK-LABEL: vwmulu_vx_nxv4i64_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v12, v8
; CHECK-NEXT: vzext.vf2 v14, v9
; CHECK-NEXT: vwmulu.vv v8, v12, v14
; CHECK-NEXT: vwmulu.vx v12, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf2 v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> undef, i16 %b, i16 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
Expand Down Expand Up @@ -643,10 +637,10 @@ define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vsc
define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
; CHECK-NEXT: vzext.vf2 v16, v8
; CHECK-NEXT: vzext.vf2 v20, v10
; CHECK-NEXT: vwmulu.vv v8, v16, v20
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; CHECK-NEXT: vwmulu.vv v16, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vzext.vf2 v8, v16
; CHECK-NEXT: ret
%vc = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
%vd = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
Expand Down Expand Up @@ -690,11 +684,9 @@ define <vscale x 8 x i64> @vwmulu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
; CHECK-LABEL: vwmulu_vx_nxv8i64_nxv8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.v.x v10, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vzext.vf2 v16, v8
; CHECK-NEXT: vzext.vf2 v20, v10
; CHECK-NEXT: vwmulu.vv v8, v16, v20
; CHECK-NEXT: vwmulu.vx v16, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vzext.vf2 v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> undef, i16 %b, i16 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
Expand Down Expand Up @@ -739,10 +731,10 @@ define <vscale x 1 x i64> @vwmul_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscal
define <vscale x 1 x i64> @vwmulu_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv1i64_nxv1i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vzext.vf4 v10, v8
; CHECK-NEXT: vzext.vf4 v11, v9
; CHECK-NEXT: vwmulu.vv v8, v10, v11
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
; CHECK-NEXT: vwmulu.vv v10, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vzext.vf4 v8, v10
; CHECK-NEXT: ret
%vc = zext <vscale x 1 x i8> %va to <vscale x 1 x i64>
%vd = zext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
Expand Down Expand Up @@ -786,11 +778,9 @@ define <vscale x 1 x i64> @vwmulu_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b
; CHECK-LABEL: vwmulu_vx_nxv1i64_nxv1i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vzext.vf4 v10, v8
; CHECK-NEXT: vzext.vf4 v11, v9
; CHECK-NEXT: vwmulu.vv v8, v10, v11
; CHECK-NEXT: vwmulu.vx v9, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vzext.vf4 v8, v9
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i8> undef, i8 %b, i8 0
%splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
Expand Down Expand Up @@ -835,10 +825,10 @@ define <vscale x 2 x i64> @vwmul_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscal
define <vscale x 2 x i64> @vwmulu_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv2i64_nxv2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vzext.vf4 v10, v8
; CHECK-NEXT: vzext.vf4 v11, v9
; CHECK-NEXT: vwmulu.vv v8, v10, v11
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; CHECK-NEXT: vwmulu.vv v10, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf4 v8, v10
; CHECK-NEXT: ret
%vc = zext <vscale x 2 x i8> %va to <vscale x 2 x i64>
%vd = zext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
Expand Down Expand Up @@ -882,11 +872,9 @@ define <vscale x 2 x i64> @vwmulu_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b
; CHECK-LABEL: vwmulu_vx_nxv2i64_nxv2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vzext.vf4 v10, v8
; CHECK-NEXT: vzext.vf4 v11, v9
; CHECK-NEXT: vwmulu.vv v8, v10, v11
; CHECK-NEXT: vwmulu.vx v10, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf4 v8, v10
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i8> undef, i8 %b, i8 0
%splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
Expand Down Expand Up @@ -931,10 +919,10 @@ define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscal
define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf4 v12, v8
; CHECK-NEXT: vzext.vf4 v14, v9
; CHECK-NEXT: vwmulu.vv v8, v12, v14
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vwmulu.vv v12, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf4 v8, v12
; CHECK-NEXT: ret
%vc = zext <vscale x 4 x i8> %va to <vscale x 4 x i64>
%vd = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
Expand Down Expand Up @@ -978,11 +966,9 @@ define <vscale x 4 x i64> @vwmulu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b
; CHECK-LABEL: vwmulu_vx_nxv4i64_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf4 v12, v8
; CHECK-NEXT: vzext.vf4 v14, v9
; CHECK-NEXT: vwmulu.vv v8, v12, v14
; CHECK-NEXT: vwmulu.vx v12, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf4 v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i8> undef, i8 %b, i8 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
Expand Down Expand Up @@ -1027,10 +1013,10 @@ define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscal
define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
; CHECK-NEXT: vzext.vf4 v16, v8
; CHECK-NEXT: vzext.vf4 v20, v9
; CHECK-NEXT: vwmulu.vv v8, v16, v20
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vwmulu.vv v16, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vzext.vf4 v8, v16
; CHECK-NEXT: ret
%vc = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
%vd = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
Expand Down Expand Up @@ -1074,11 +1060,9 @@ define <vscale x 8 x i64> @vwmulu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b
; CHECK-LABEL: vwmulu_vx_nxv8i64_nxv8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vzext.vf4 v16, v8
; CHECK-NEXT: vzext.vf4 v20, v9
; CHECK-NEXT: vwmulu.vv v8, v16, v20
; CHECK-NEXT: vwmulu.vx v16, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vzext.vf4 v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i8> undef, i8 %b, i8 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
Expand Down

0 comments on commit 87519a2

Please sign in to comment.