From a2018effea48b9526ab17feb58f30319a10894d8 Mon Sep 17 00:00:00 2001
From: Fros1er <34234343+Fros1er@users.noreply.github.com>
Date: Fri, 14 Jun 2024 22:28:38 +0800
Subject: [PATCH 1/7] [SelectionDAG][RISCV] Add pre-commit tests.

---
 llvm/test/CodeGen/RISCV/pr94265.ll | 35 ++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/pr94265.ll
diff --git a/llvm/test/CodeGen/RISCV/pr94265.ll b/llvm/test/CodeGen/RISCV/pr94265.ll
new file mode 100644
index 0000000000000..b1dff117eb17c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/pr94265.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=riscv32-- -mattr=+v | FileCheck -check-prefix=RV32I %s
+; RUN: llc < %s -mtriple=riscv64-- -mattr=+v | FileCheck -check-prefix=RV64I %s
+
+define <8 x i16> @PR94265(<8 x i32> %a0) #0 {
+; RV32I-LABEL: PR94265:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32I-NEXT:    vsra.vi v10, v8, 31
+; RV32I-NEXT:    vsrl.vi v10, v10, 26
+; RV32I-NEXT:    vadd.vv v8, v8, v10
+; RV32I-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; RV32I-NEXT:    vnsrl.wi v10, v8, 0
+; RV32I-NEXT:    vsll.vi v8, v10, 4
+; RV32I-NEXT:    li a0, -1024
+; RV32I-NEXT:    vand.vx v8, v8, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: PR94265:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV64I-NEXT:    vsra.vi v10, v8, 31
+; RV64I-NEXT:    vsrl.vi v10, v10, 26
+; RV64I-NEXT:    vadd.vv v8, v8, v10
+; RV64I-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; RV64I-NEXT:    vnsrl.wi v10, v8, 0
+; RV64I-NEXT:    vsll.vi v8, v10, 4
+; RV64I-NEXT:    li a0, -1024
+; RV64I-NEXT:    vand.vx v8, v8, a0
+; RV64I-NEXT:    ret
+  %t1 = sdiv <8 x i32> %a0, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
+  %t2 = trunc <8 x i32> %t1 to <8 x i16>
+  %t3 = shl <8 x i16> %t2, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
+  ret <8 x i16> %t3
+}

From 2c04c2327e14a5301b01c6eb6fd0f9aac71e2a05 Mon Sep 17 00:00:00 2001
From: Fros1er <34234343+Fros1er@users.noreply.github.com>
Date: Fri, 14 Jun 2024 22:40:06 +0800
Subject: [PATCH 2/7] [SelectionDAG][RISCV] Add isTypeDesirableForOp with
 NewVT+OldVT, fix issue#94265

---
 llvm/include/llvm/CodeGen/TargetLowering.h       | 14 ++++++++++++++
 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp |  4 +++-
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp      |  8 ++++++++
 llvm/lib/Target/RISCV/RISCVISelLowering.h        |  2 ++
 llvm/test/CodeGen/RISCV/pr94265.ll               | 12 ++++--------
 5 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 3074ece787a08..f0e20e4372b8d 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4339,6 +4339,20 @@ class TargetLowering : public TargetLoweringBase {
     return isTypeLegal(VT);
   }
 
+  /// Same as isTypeDesirableForOp(unsigned Opc, EVT VT), but also check if
+  /// the target is 'desirable' to truncate or extend OldVT to NewVT only using
+  /// the given node type, without the need of explicit trunc or ext. e.g. On
+  /// RISC-V Vector extension, vnsrl.wi can directly convert <n x i32> to <n x
+  /// i16> when shifting, with no extra trunc operations needed.
+  virtual bool isTypeDesirableForOp(unsigned Opc, EVT NewVT, EVT OldVT) const {
+    // Fallback to isTypeDesirableForOp(unsigned Opc, EVT VT).
+    if (NewVT == OldVT) {
+      return isTypeDesirableForOp(Opc, NewVT);
+    }
+    // Most of instructions are not desirable, so return false by default.
+    return false;
+  }
+
   /// Return true if it is profitable for dag combiner to transform a floating
   /// point op of specified opcode to a equivalent op of an integer
   /// type. e.g. f32 load -> i32 load can be profitable on ARM.
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 623d2e0a047ef..373aeac5e7317 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2597,7 +2597,9 @@ bool TargetLowering::SimplifyDemandedBits(
         HighBits.lshrInPlace(ShVal);
         HighBits = HighBits.trunc(BitWidth);
 
-        if (!(HighBits & DemandedBits)) {
+        if (!isTypeDesirableForOp(ISD::SRL, Op.getValueType(),
+                                  Src.getValueType()) &&
+            !(HighBits & DemandedBits)) {
           // None of the shifted in bits are needed.  Add a truncate of the
           // shift input, then shift it.
           SDValue NewShAmt =
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b1b27f03252e0..694e0b0dff1a3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -17462,6 +17462,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
   return true;
 }
 
+bool RISCVTargetLowering::isTypeDesirableForOp(unsigned Opc, EVT NewVT,
+                                               EVT OldVT) const {
+  if (Subtarget.hasStdExtV() && NewVT.isVector() && OldVT.isVector()) {
+    return true;
+  }
+  return TargetLowering::isTypeDesirableForOp(Opc, NewVT, OldVT);
+}
+
 bool RISCVTargetLowering::targetShrinkDemandedConstant(
     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
     TargetLoweringOpt &TLO) const {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 3b8eb3c88901a..353836783ccfb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -708,6 +708,8 @@ class RISCVTargetLowering : public TargetLowering {
   bool isDesirableToCommuteWithShift(const SDNode *N,
                                      CombineLevel Level) const override;
 
+  bool isTypeDesirableForOp(unsigned Opc, EVT NewVT, EVT OldVT) const override;
+
   /// If a physical register, this returns the register that receives the
   /// exception address on entry to an EH pad.
   Register
diff --git a/llvm/test/CodeGen/RISCV/pr94265.ll b/llvm/test/CodeGen/RISCV/pr94265.ll
index b1dff117eb17c..cb41e22381d19 100644
--- a/llvm/test/CodeGen/RISCV/pr94265.ll
+++ b/llvm/test/CodeGen/RISCV/pr94265.ll
@@ -10,10 +10,8 @@ define <8 x i16> @PR94265(<8 x i32> %a0) #0 {
 ; RV32I-NEXT:    vsrl.vi v10, v10, 26
 ; RV32I-NEXT:    vadd.vv v8, v8, v10
 ; RV32I-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32I-NEXT:    vnsrl.wi v10, v8, 0
-; RV32I-NEXT:    vsll.vi v8, v10, 4
-; RV32I-NEXT:    li a0, -1024
-; RV32I-NEXT:    vand.vx v8, v8, a0
+; RV32I-NEXT:    vnsrl.wi v10, v8, 6
+; RV32I-NEXT:    vsll.vi v8, v10, 10
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: PR94265:
@@ -23,10 +21,8 @@ define <8 x i16> @PR94265(<8 x i32> %a0) #0 {
 ; RV64I-NEXT:    vsrl.vi v10, v10, 26
 ; RV64I-NEXT:    vadd.vv v8, v8, v10
 ; RV64I-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64I-NEXT:    vnsrl.wi v10, v8, 0
-; RV64I-NEXT:    vsll.vi v8, v10, 4
-; RV64I-NEXT:    li a0, -1024
-; RV64I-NEXT:    vand.vx v8, v8, a0
+; RV64I-NEXT:    vnsrl.wi v10, v8, 6
+; RV64I-NEXT:    vsll.vi v8, v10, 10
 ; RV64I-NEXT:    ret
   %t1 = sdiv <8 x i32> %a0, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
   %t2 = trunc <8 x i32> %t1 to <8 x i16>

From f6ab73cb4fb9f0fe40637f616e0f585cfa3ae534 Mon Sep 17 00:00:00 2001
From: Fros1er <34234343+Fros1er@users.noreply.github.com>
Date: Fri, 28 Jun 2024 21:47:26 +0800
Subject: [PATCH 3/7] rename new func to isTypeDesirableForOpwithCast

---
 llvm/include/llvm/CodeGen/TargetLowering.h       | 3 ++-
 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 4 ++--
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp      | 6 +++---
 llvm/lib/Target/RISCV/RISCVISelLowering.h        | 3 ++-
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index f0e20e4372b8d..c94c0b1f9a4e7 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4344,7 +4344,8 @@ class TargetLowering : public TargetLoweringBase {
   /// the given node type, without the need of explicit trunc or ext. e.g. On
   /// RISC-V Vector extension, vnsrl.wi can directly convert <n x i32> to <n x
   /// i16> when shifting, with no extra trunc operations needed.
-  virtual bool isTypeDesirableForOp(unsigned Opc, EVT NewVT, EVT OldVT) const {
+  virtual bool isTypeDesirableForOpWithCast(unsigned Opc, EVT NewVT,
+                                            EVT OldVT) const {
     // Fallback to isTypeDesirableForOp(unsigned Opc, EVT VT).
     if (NewVT == OldVT) {
       return isTypeDesirableForOp(Opc, NewVT);
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 373aeac5e7317..1a8748fa3d131 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2597,8 +2597,8 @@ bool TargetLowering::SimplifyDemandedBits(
         HighBits.lshrInPlace(ShVal);
         HighBits = HighBits.trunc(BitWidth);
 
-        if (!isTypeDesirableForOp(ISD::SRL, Op.getValueType(),
-                                  Src.getValueType()) &&
+        if (!isTypeDesirableForOpWithCast(ISD::SRL, Op.getValueType(),
+                                          Src.getValueType()) &&
             !(HighBits & DemandedBits)) {
           // None of the shifted in bits are needed.  Add a truncate of the
           // shift input, then shift it.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 694e0b0dff1a3..b1a3684835343 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -17462,12 +17462,12 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
   return true;
 }
 
-bool RISCVTargetLowering::isTypeDesirableForOp(unsigned Opc, EVT NewVT,
-                                               EVT OldVT) const {
+bool RISCVTargetLowering::isTypeDesirableForOpWithCast(unsigned Opc, EVT NewVT,
+                                                       EVT OldVT) const {
   if (Subtarget.hasStdExtV() && NewVT.isVector() && OldVT.isVector()) {
     return true;
   }
-  return TargetLowering::isTypeDesirableForOp(Opc, NewVT, OldVT);
+  return TargetLowering::isTypeDesirableForOpWithCast(Opc, NewVT, OldVT);
 }
 
 bool RISCVTargetLowering::targetShrinkDemandedConstant(
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 353836783ccfb..b79f8ca67bcd5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -708,7 +708,8 @@ class RISCVTargetLowering : public TargetLowering {
   bool isDesirableToCommuteWithShift(const SDNode *N,
                                      CombineLevel Level) const override;
 
-  bool isTypeDesirableForOp(unsigned Opc, EVT NewVT, EVT OldVT) const override;
+  bool isTypeDesirableForOpWithCast(unsigned Opc, EVT NewVT,
+                                    EVT OldVT) const override;
 
   /// If a physical register, this returns the register that receives the
   /// exception address on entry to an EH pad.

From 6286d17512d4f95fdf4c616d45e60c11a60b3d39 Mon Sep 17 00:00:00 2001
From: Fros1er <34234343+Fros1er@users.noreply.github.com>
Date: Sun, 30 Jun 2024 03:30:24 +0800
Subject: [PATCH 4/7] remove new func, use overrided isTruncateFree instead

---
 llvm/include/llvm/CodeGen/TargetLowering.h       | 15 ---------------
 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp |  3 +--
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp      | 16 ++++++++--------
 llvm/lib/Target/RISCV/RISCVISelLowering.h        |  4 +---
 4 files changed, 10 insertions(+), 28 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index c94c0b1f9a4e7..3074ece787a08 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4339,21 +4339,6 @@ class TargetLowering : public TargetLoweringBase {
     return isTypeLegal(VT);
   }
 
-  /// Same as isTypeDesirableForOp(unsigned Opc, EVT VT), but also check if
-  /// the target is 'desirable' to truncate or extend OldVT to NewVT only using
-  /// the given node type, without the need of explicit trunc or ext. e.g. On
-  /// RISC-V Vector extension, vnsrl.wi can directly convert <n x i32> to <n x
-  /// i16> when shifting, with no extra trunc operations needed.
-  virtual bool isTypeDesirableForOpWithCast(unsigned Opc, EVT NewVT,
-                                            EVT OldVT) const {
-    // Fallback to isTypeDesirableForOp(unsigned Opc, EVT VT).
-    if (NewVT == OldVT) {
-      return isTypeDesirableForOp(Opc, NewVT);
-    }
-    // Most of instructions are not desirable, so return false by default.
-    return false;
-  }
-
   /// Return true if it is profitable for dag combiner to transform a floating
   /// point op of specified opcode to a equivalent op of an integer
   /// type. e.g. f32 load -> i32 load can be profitable on ARM.
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 1a8748fa3d131..60cad8f5b30e0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2597,8 +2597,7 @@ bool TargetLowering::SimplifyDemandedBits(
         HighBits.lshrInPlace(ShVal);
         HighBits = HighBits.trunc(BitWidth);
 
-        if (!isTypeDesirableForOpWithCast(ISD::SRL, Op.getValueType(),
-                                          Src.getValueType()) &&
+        if (!isTruncateFree(Src, Op.getValueType()) &&
             !(HighBits & DemandedBits)) {
           // None of the shifted in bits are needed.  Add a truncate of the
           // shift input, then shift it.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b1a3684835343..460ee29abd09f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1884,6 +1884,14 @@ bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
   return (SrcBits == 64 && DestBits == 32);
 }
 
+bool RISCVTargetLowering::isTruncateFree(SDValue Val, EVT VT2) const {
+  // free truncate from vnsrl and vnsra
+  if (Subtarget.hasStdExtV() && (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) && Val.getValueType().isVector() && VT2.isVector()) {
+    return true;
+  }
+  return TargetLowering::isTruncateFree(Val, VT2);
+}
+
 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
   // Zexts are free if they can be combined with a load.
   // Don't advertise i32->i64 zextload as being free for RV64. It interacts
@@ -17462,14 +17470,6 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
   return true;
 }
 
-bool RISCVTargetLowering::isTypeDesirableForOpWithCast(unsigned Opc, EVT NewVT,
-                                                       EVT OldVT) const {
-  if (Subtarget.hasStdExtV() && NewVT.isVector() && OldVT.isVector()) {
-    return true;
-  }
-  return TargetLowering::isTypeDesirableForOpWithCast(Opc, NewVT, OldVT);
-}
-
 bool RISCVTargetLowering::targetShrinkDemandedConstant(
     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
     TargetLoweringOpt &TLO) const {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index b79f8ca67bcd5..d66374ec5b171 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -497,6 +497,7 @@ class RISCVTargetLowering : public TargetLowering {
   bool isLegalAddImmediate(int64_t Imm) const override;
   bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
   bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
+  bool isTruncateFree(SDValue Val, EVT VT2) const override;
   bool isZExtFree(SDValue Val, EVT VT2) const override;
   bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
   bool signExtendConstant(const ConstantInt *CI) const override;
@@ -708,9 +709,6 @@ class RISCVTargetLowering : public TargetLowering {
   bool isDesirableToCommuteWithShift(const SDNode *N,
                                      CombineLevel Level) const override;
 
-  bool isTypeDesirableForOpWithCast(unsigned Opc, EVT NewVT,
-                                    EVT OldVT) const override;
-
   /// If a physical register, this returns the register that receives the
   /// exception address on entry to an EH pad.
   Register

From 261506d939a775c800c4bb3d7eab945336d24acf Mon Sep 17 00:00:00 2001
From: Fros1er <34234343+Fros1er@users.noreply.github.com>
Date: Sun, 30 Jun 2024 03:31:17 +0800
Subject: [PATCH 5/7] format

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 460ee29abd09f..77eef4d0501b5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1886,7 +1886,9 @@ bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
 
 bool RISCVTargetLowering::isTruncateFree(SDValue Val, EVT VT2) const {
   // free truncate from vnsrl and vnsra
-  if (Subtarget.hasStdExtV() && (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) && Val.getValueType().isVector() && VT2.isVector()) {
+  if (Subtarget.hasStdExtV() &&
+      (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
+      Val.getValueType().isVector() && VT2.isVector()) {
     return true;
   }
   return TargetLowering::isTruncateFree(Val, VT2);

From 4ba7e48a87c3d5ad37edfd23d84dae71c8e8749d Mon Sep 17 00:00:00 2001
From: Fros1er <34234343+Fros1er@users.noreply.github.com>
Date: Thu, 11 Jul 2024 22:03:34 +0800
Subject: [PATCH 6/7] fix failed tests, check size of VT in isTruncateFree

---
 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 15 ++++++++++++---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp      |  7 +++++--
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 60cad8f5b30e0..411a2924b92ac 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2586,6 +2586,17 @@ bool TargetLowering::SimplifyDemandedBits(
         break;
 
       if (Src.getNode()->hasOneUse()) {
+        if (isTruncateFree(Src, VT) &&
+            !isTruncateFree(Src.getValueType(), VT)) {
+          // If truncate is only free at trunc(srl), do not turn it into
+          // srl(trunc). The check is done by first check the truncate is free
+          // at Src's opcode(srl), then check the truncate is not done by
+          // referencing sub-register. In test, if both trunc(srl) and
+          // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
+          // trunc(srl)'s trunc is free, trunc(srl) is better.
+          break;
+        }
+
         std::optional<uint64_t> ShAmtC =
             TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
         if (!ShAmtC || *ShAmtC >= BitWidth)
@@ -2596,9 +2607,7 @@ bool TargetLowering::SimplifyDemandedBits(
             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
         HighBits.lshrInPlace(ShVal);
         HighBits = HighBits.trunc(BitWidth);
-
-        if (!isTruncateFree(Src, Op.getValueType()) &&
-            !(HighBits & DemandedBits)) {
+        if (!(HighBits & DemandedBits)) {
           // None of the shifted in bits are needed.  Add a truncate of the
           // shift input, then shift it.
           SDValue NewShAmt =
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 77eef4d0501b5..3c7eab1d7defe 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1885,11 +1885,14 @@ bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
 }
 
 bool RISCVTargetLowering::isTruncateFree(SDValue Val, EVT VT2) const {
+  EVT SrcVT = Val.getValueType();
   // free truncate from vnsrl and vnsra
   if (Subtarget.hasStdExtV() &&
       (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
-      Val.getValueType().isVector() && VT2.isVector()) {
-    return true;
+      SrcVT.isVector() && VT2.isVector()) {
+    unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
+    unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
+    return (SrcBits == DestBits * 2);
   }
   return TargetLowering::isTruncateFree(Val, VT2);
 }

From a5a8af65a4efc157e5a9df82c6ecdd1190a8fa4e Mon Sep 17 00:00:00 2001
From: Fros1er <34234343+Fros1er@users.noreply.github.com>
Date: Fri, 12 Jul 2024 19:17:45 +0800
Subject: [PATCH 7/7] fallback when srcbits != destbits

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3c7eab1d7defe..bf1df8c00578a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1892,7 +1892,9 @@ bool RISCVTargetLowering::isTruncateFree(SDValue Val, EVT VT2) const {
       SrcVT.isVector() && VT2.isVector()) {
     unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
     unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
-    return (SrcBits == DestBits * 2);
+    if (SrcBits == DestBits * 2) {
+      return true;
+    }
   }
   return TargetLowering::isTruncateFree(Val, VT2);
 }