Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV] Support vwsll in combineBinOp_VLToVWBinOp_VL #87620

Merged
merged 2 commits into from
Apr 9, 2024

Conversation

lukel97
Copy link
Contributor

@lukel97 lukel97 commented Apr 4, 2024

If the subtarget has +zvbb then we can attempt folding shl and shl_vl to vwsll nodes.

There are few test cases where we still don't pick up the vwsll:

@llvmbot
Copy link
Collaborator

llvmbot commented Apr 4, 2024

@llvm/pr-subscribers-backend-risc-v

Author: Luke Lau (lukel97)

Changes

If the subtarget has +zvbb then we can attempt folding shl and shl_vl to vwsll nodes.

There are few test cases where we still don't pick up the vwsll:

  • For fixed vector vwsll.vi on RV32, see the FIXME for VMV_V_X_VL in fillUpExtensionSupport for support implicit sign extension
  • For scalable vector vwsll.vi we need to support ISD::SPLAT_VECTOR, see #87249

Full diff: https://github.com/llvm/llvm-project/pull/87620.diff

3 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+37-9)
  • (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll (+51-35)
  • (modified) llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll (+12-21)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 279d8a435a04ca..02632aeedae2c2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13543,6 +13543,7 @@ enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
 /// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
 /// sub | sub_vl -> vwsub(u) | vwsub(u)_w
 /// mul | mul_vl -> vwmul(u) | vwmul_su
+/// shl | shl_vl -> vwsll
 /// fadd -> vfwadd | vfwadd_w
 /// fsub -> vfwsub | vfwsub_w
 /// fmul -> vfwmul
@@ -13721,6 +13722,9 @@ struct NodeExtensionHelper {
     case ISD::MUL:
     case RISCVISD::MUL_VL:
       return RISCVISD::VWMULU_VL;
+    case ISD::SHL:
+    case RISCVISD::SHL_VL:
+      return RISCVISD::VWSLL_VL;
     default:
       llvm_unreachable("Unexpected opcode");
     }
@@ -13871,7 +13875,8 @@ struct NodeExtensionHelper {
   }
 
   /// Check if \p Root supports any extension folding combines.
-  static bool isSupportedRoot(const SDNode *Root) {
+  static bool isSupportedRoot(const SDNode *Root,
+                              const RISCVSubtarget &Subtarget) {
     switch (Root->getOpcode()) {
     case ISD::ADD:
     case ISD::SUB:
@@ -13897,6 +13902,11 @@ struct NodeExtensionHelper {
     case RISCVISD::VFWADD_W_VL:
     case RISCVISD::VFWSUB_W_VL:
       return true;
+    case ISD::SHL:
+      return Root->getValueType(0).isScalableVector() &&
+             Subtarget.hasStdExtZvbb();
+    case RISCVISD::SHL_VL:
+      return Subtarget.hasStdExtZvbb();
     default:
       return false;
     }
@@ -13905,8 +13915,9 @@ struct NodeExtensionHelper {
   /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
   NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
                       const RISCVSubtarget &Subtarget) {
-    assert(isSupportedRoot(Root) && "Trying to build an helper with an "
-                                    "unsupported root");
+    assert(isSupportedRoot(Root, Subtarget) &&
+           "Trying to build an helper with an "
+           "unsupported root");
     assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
     assert(DAG.getTargetLoweringInfo().isTypeLegal(Root->getValueType(0)));
     OrigOperand = Root->getOperand(OperandIdx);
@@ -13958,12 +13969,13 @@ struct NodeExtensionHelper {
   static std::pair<SDValue, SDValue>
   getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
                const RISCVSubtarget &Subtarget) {
-    assert(isSupportedRoot(Root) && "Unexpected root");
+    assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
     switch (Root->getOpcode()) {
     case ISD::ADD:
     case ISD::SUB:
     case ISD::MUL:
-    case ISD::OR: {
+    case ISD::OR:
+    case ISD::SHL: {
       SDLoc DL(Root);
       MVT VT = Root->getSimpleValueType(0);
       return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
@@ -14001,6 +14013,8 @@ struct NodeExtensionHelper {
     case RISCVISD::VWSUBU_W_VL:
     case RISCVISD::FSUB_VL:
     case RISCVISD::VFWSUB_W_VL:
+    case ISD::SHL:
+    case RISCVISD::SHL_VL:
       return false;
     default:
       llvm_unreachable("Unexpected opcode");
@@ -14054,6 +14068,7 @@ struct CombineResult {
     case ISD::SUB:
     case ISD::MUL:
     case ISD::OR:
+    case ISD::SHL:
       Merge = DAG.getUNDEF(Root->getValueType(0));
       break;
     }
@@ -14224,6 +14239,11 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
     // mul -> vwmulsu
     Strategies.push_back(canFoldToVW_SU);
     break;
+  case ISD::SHL:
+  case RISCVISD::SHL_VL:
+    // shl -> vwsll
+    Strategies.push_back(canFoldToVWWithZEXT);
+    break;
   case RISCVISD::VWADD_W_VL:
   case RISCVISD::VWSUB_W_VL:
     // vwadd_w|vwsub_w -> vwadd|vwsub
@@ -14251,6 +14271,7 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
 /// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
 /// sub | sub_vl -> vwsub(u) | vwsub(u)_w
 /// mul | mul_vl -> vwmul(u) | vwmul_su
+/// shl | shl_vl -> vwsll
 /// fadd_vl ->  vfwadd | vfwadd_w
 /// fsub_vl ->  vfwsub | vfwsub_w
 /// fmul_vl ->  vfwmul
@@ -14265,7 +14286,7 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
   if (DCI.isBeforeLegalize())
     return SDValue();
 
-  if (!NodeExtensionHelper::isSupportedRoot(N))
+  if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
     return SDValue();
 
   SmallVector<SDNode *> Worklist;
@@ -14276,7 +14297,7 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
 
   while (!Worklist.empty()) {
     SDNode *Root = Worklist.pop_back_val();
-    if (!NodeExtensionHelper::isSupportedRoot(Root))
+    if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))
       return SDValue();
 
     NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
@@ -16371,9 +16392,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
                               VPSN->getMemOperand(), IndexType);
     break;
   }
+  case RISCVISD::SHL_VL:
+    if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
+      return V;
+    [[fallthrough]];
   case RISCVISD::SRA_VL:
-  case RISCVISD::SRL_VL:
-  case RISCVISD::SHL_VL: {
+  case RISCVISD::SRL_VL: {
     SDValue ShAmt = N->getOperand(1);
     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
@@ -16393,6 +16417,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
     [[fallthrough]];
   case ISD::SRL:
   case ISD::SHL: {
+    if (N->getOpcode() == ISD::SHL) {
+      if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
+        return V;
+    }
     SDValue ShAmt = N->getOperand(1);
     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
index 83d1d1b3f94c7c..9a3ed58d0ec297 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
-; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB64
 
 ; ==============================================================================
 ; i32 -> i64
@@ -111,8 +111,7 @@ define <4 x i64> @vwsll_vx_i32_v4i64_zext(<4 x i32> %a, i32 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT:    vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT:    vwsll.vx v10, v8, a0
 ; CHECK-ZVBB-NEXT:    vmv2r.v v8, v10
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <4 x i32> poison, i32 %b, i32 0
@@ -371,8 +370,7 @@ define <8 x i32> @vwsll_vx_i16_v8i32_zext(<8 x i16> %a, i16 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v8i32_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT:    vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT:    vwsll.vx v10, v8, a0
 ; CHECK-ZVBB-NEXT:    vmv2r.v v8, v10
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <8 x i16> poison, i16 %b, i32 0
@@ -499,6 +497,27 @@ define <16 x i16> @vwsll_vv_v16i16_zext(<16 x i8> %a, <16 x i8> %b) {
 }
 
 define <16 x i16> @vwsll_vx_i64_v16i16(<16 x i8> %a, i64 %b) {
+; CHECK-ZVBB32-LABEL: vwsll_vx_i64_v16i16:
+; CHECK-ZVBB32:       # %bb.0:
+; CHECK-ZVBB32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-ZVBB32-NEXT:    vmv.v.x v16, a0
+; CHECK-ZVBB32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-ZVBB32-NEXT:    vrgather.vi v24, v16, 0
+; CHECK-ZVBB32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-ZVBB32-NEXT:    vzext.vf2 v10, v8
+; CHECK-ZVBB32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-ZVBB32-NEXT:    vnsrl.wi v12, v24, 0
+; CHECK-ZVBB32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-ZVBB32-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-ZVBB32-NEXT:    vsll.vv v8, v10, v8
+; CHECK-ZVBB32-NEXT:    ret
+;
+; CHECK-ZVBB64-LABEL: vwsll_vx_i64_v16i16:
+; CHECK-ZVBB64:       # %bb.0:
+; CHECK-ZVBB64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-ZVBB64-NEXT:    vwsll.vx v10, v8, a0
+; CHECK-ZVBB64-NEXT:    vmv2r.v v8, v10
+; CHECK-ZVBB64-NEXT:    ret
   %head = insertelement <8 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <8 x i64> %head, <8 x i64> poison, <16 x i32> zeroinitializer
   %x = zext <16 x i8> %a to <16 x i16>
@@ -593,8 +612,7 @@ define <16 x i16> @vwsll_vx_i8_v16i16_zext(<16 x i8> %a, i8 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v16i16_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT:    vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT:    vwsll.vx v10, v8, a0
 ; CHECK-ZVBB-NEXT:    vmv2r.v v8, v10
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <16 x i8> poison, i8 %b, i32 0
@@ -661,10 +679,10 @@ define <4 x i64> @vwsll_vv_v4i64_v4i8_zext(<4 x i8> %a, <4 x i8> %b) {
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_v4i8_zext:
 ; CHECK-ZVBB:       # %bb.0:
-; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT:    vzext.vf8 v12, v9
-; CHECK-ZVBB-NEXT:    vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT:    vzext.vf4 v11, v9
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v11
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <4 x i8> %a to <4 x i64>
   %y = zext <4 x i8> %b to <4 x i64>
@@ -735,11 +753,8 @@ define <4 x i64> @vwsll_vx_i32_v4i64_v4i8_zext(<4 x i8> %a, i32 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_v4i8_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT:    vzext.vf2 v12, v9
-; CHECK-ZVBB-NEXT:    vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <4 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -790,12 +805,9 @@ define <4 x i64> @vwsll_vx_i16_v4i64_v4i8_zext(<4 x i8> %a, i16 %b) {
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_v4i8_zext:
 ; CHECK-ZVBB:       # %bb.0:
-; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-ZVBB-NEXT:    vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT:    vzext.vf4 v12, v9
-; CHECK-ZVBB-NEXT:    vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <4 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer
@@ -846,12 +858,9 @@ define <4 x i64> @vwsll_vx_i8_v4i64_v4i8_zext(<4 x i8> %a, i8 %b) {
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_v4i8_zext:
 ; CHECK-ZVBB:       # %bb.0:
-; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-ZVBB-NEXT:    vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT:    vzext.vf8 v12, v9
-; CHECK-ZVBB-NEXT:    vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <4 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer
@@ -869,12 +878,19 @@ define <4 x i64> @vwsll_vi_v4i64_v4i8(<4 x i8> %a) {
 ; CHECK-NEXT:    vsll.vi v8, v10, 2
 ; CHECK-NEXT:    ret
 ;
-; CHECK-ZVBB-LABEL: vwsll_vi_v4i64_v4i8:
-; CHECK-ZVBB:       # %bb.0:
-; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT:    vsll.vi v8, v10, 2
-; CHECK-ZVBB-NEXT:    ret
+; CHECK-ZVBB32-LABEL: vwsll_vi_v4i64_v4i8:
+; CHECK-ZVBB32:       # %bb.0:
+; CHECK-ZVBB32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-ZVBB32-NEXT:    vzext.vf8 v10, v8
+; CHECK-ZVBB32-NEXT:    vsll.vi v8, v10, 2
+; CHECK-ZVBB32-NEXT:    ret
+;
+; CHECK-ZVBB64-LABEL: vwsll_vi_v4i64_v4i8:
+; CHECK-ZVBB64:       # %bb.0:
+; CHECK-ZVBB64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB64-NEXT:    vzext.vf4 v10, v8
+; CHECK-ZVBB64-NEXT:    vwsll.vi v8, v10, 2
+; CHECK-ZVBB64-NEXT:    ret
   %x = zext <4 x i8> %a to <4 x i64>
   %z = shl <4 x i64> %x, splat (i64 2)
   ret <4 x i64> %z
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
index 082de2e7bf77bf..72fc9c918f22c4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
@@ -665,10 +665,10 @@ define <vscale x 2 x i64> @vwsll_vv_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a, <v
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_nxv2i8_zext:
 ; CHECK-ZVBB:       # %bb.0:
-; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT:    vzext.vf8 v12, v9
-; CHECK-ZVBB-NEXT:    vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT:    vzext.vf4 v11, v9
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v11
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64>
   %y = zext <vscale x 2 x i8> %b to <vscale x 2 x i64>
@@ -739,11 +739,8 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT:    vzext.vf2 v12, v9
-; CHECK-ZVBB-NEXT:    vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -794,12 +791,9 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_zext:
 ; CHECK-ZVBB:       # %bb.0:
-; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-ZVBB-NEXT:    vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT:    vzext.vf4 v12, v9
-; CHECK-ZVBB-NEXT:    vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -850,12 +844,9 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a,
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_zext:
 ; CHECK-ZVBB:       # %bb.0:
-; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-ZVBB-NEXT:    vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT:    vzext.vf8 v12, v9
-; CHECK-ZVBB-NEXT:    vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

If the subtarget has +zvbb then we can attempt folding shl and shl_vl to vwsll nodes.

There are few test cases where we still don't pick up the vwsll:
- For fixed vector vwsll.vi on RV32, see the FIXME for VMV_V_X_VL in fillUpExtensionSupport for support implicit sign extension
- For scalable vector vwsll.vi we need to support ISD::SPLAT_VECTOR, see llvm#87249
@lukel97 lukel97 force-pushed the combineBinOp_VLToVWBinOp_VL-vwsll branch from 5554883 to 515f591 Compare April 8, 2024 22:58
Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@lukel97 lukel97 merged commit 9c66036 into llvm:main Apr 9, 2024
3 of 4 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants