-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV][ISel] Use vaaddu with rounding mode rnu for ISD::AVGCEILU. #77473
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Chia (sun-jacobi) ChangesSimilar to #76550, but for Source code
Before this patch
After this patch
Patch is 43.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/77473.diff 6 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a5b33e8e293a17..1a5886f6d00f2a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -814,8 +814,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
Custom);
setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
- setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
- ISD::SSUBSAT, ISD::USUBSAT},
+ setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT,
+ ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT},
VT, Legal);
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
@@ -1185,8 +1185,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
- setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
- ISD::SSUBSAT, ISD::USUBSAT},
+ setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT,
+ ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT},
VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
@@ -5467,6 +5467,7 @@ static unsigned getRISCVVLOp(SDValue Op) {
OP_CASE(SSUBSAT)
OP_CASE(USUBSAT)
OP_CASE(AVGFLOORU)
+ OP_CASE(AVGCEILU)
OP_CASE(FADD)
OP_CASE(FSUB)
OP_CASE(FMUL)
@@ -5571,7 +5572,7 @@ static bool hasMergeOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
- 125 &&
+ 126 &&
RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
ISD::FIRST_TARGET_STRICTFP_OPCODE ==
21 &&
@@ -5597,7 +5598,7 @@ static bool hasMaskOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
- 125 &&
+ 126 &&
RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
ISD::FIRST_TARGET_STRICTFP_OPCODE ==
21 &&
@@ -6462,6 +6463,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return SplitVectorOp(Op, DAG);
[[fallthrough]];
case ISD::AVGFLOORU:
+ case ISD::AVGCEILU:
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
@@ -18599,6 +18601,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(UREM_VL)
NODE_NAME_CASE(XOR_VL)
NODE_NAME_CASE(AVGFLOORU_VL)
+ NODE_NAME_CASE(AVGCEILU_VL)
NODE_NAME_CASE(SADDSAT_VL)
NODE_NAME_CASE(UADDSAT_VL)
NODE_NAME_CASE(SSUBSAT_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 5d51fe168b04de..0d14e5b757bdd1 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -255,6 +255,8 @@ enum NodeType : unsigned {
// Averaging adds of unsigned integers.
AVGFLOORU_VL,
+ // Rounding averaging adds of unsigned integers.
+ AVGCEILU_VL,
MULHS_VL,
MULHU_VL,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 4f87c36506e520..8ebd8b89c11929 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -877,6 +877,23 @@ multiclass VPatMultiplyAddSDNode_VV_VX<SDNode op, string instruction_name> {
}
}
+multiclass VPatAVGADD_VV_VX_RM<SDNode vop, int vxrm> {
+ foreach vti = AllIntegerVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2,
+ vxrm, vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatPat (XLenVT GPR:$rs2)))),
+ (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
+ vxrm, vti.AVL, vti.Log2SEW, TA_MA)>;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@@ -1132,20 +1149,8 @@ defm : VPatBinarySDNode_VV_VX<ssubsat, "PseudoVSSUB">;
defm : VPatBinarySDNode_VV_VX<usubsat, "PseudoVSSUBU">;
// 12.2. Vector Single-Width Averaging Add and Subtract
-foreach vti = AllIntegerVectors in {
- let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
- (vti.Vector vti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX)
- (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2,
- 0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
- def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (SplatPat (XLenVT GPR:$rs2)))),
- (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX)
- (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
- 0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
- }
-}
+defm : VPatAVGADD_VV_VX_RM<avgflooru, 0b10>;
+defm : VPatAVGADD_VV_VX_RM<avgceilu, 0b00>;
// 15. Vector Mask Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index d60ff4b5fab018..1deb9a709463e8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -112,6 +112,7 @@ def riscv_cttz_vl : SDNode<"RISCVISD::CTTZ_VL", SDT_RISCVIntUnOp_VL>
def riscv_ctpop_vl : SDNode<"RISCVISD::CTPOP_VL", SDT_RISCVIntUnOp_VL>;
def riscv_avgflooru_vl : SDNode<"RISCVISD::AVGFLOORU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
+def riscv_avgceilu_vl : SDNode<"RISCVISD::AVGCEILU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>;
@@ -2031,6 +2032,25 @@ multiclass VPatSlide1VL_VF<SDNode vop, string instruction_name> {
}
}
+multiclass VPatAVGADDVL_VV_VX_RM<SDNode vop, int vxrm> {
+ foreach vti = AllIntegerVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
+ vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2,
+ (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@@ -2308,22 +2328,8 @@ defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">;
defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
// 12.2. Vector Single-Width Averaging Add and Subtract
-foreach vti = AllIntegerVectors in {
- let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
- (vti.Vector vti.RegClass:$rs2),
- vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
- (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
- vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
- (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2,
- (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- }
-}
+defm : VPatAVGADDVL_VV_VX_RM<riscv_avgflooru_vl, 0b10>;
+defm : VPatAVGADDVL_VV_VX_RM<riscv_avgceilu_vl, 0b00>;
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
class VPatTruncSatClipMaxMinBase<string inst,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
index f6bdeda946c40a..954edf872aff8d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
@@ -2,8 +2,8 @@
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
-define <8 x i8> @vaaddu_vv_v8i8(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i8:
+define <8 x i8> @vaaddu_vv_v8i8_floor(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -17,8 +17,8 @@ define <8 x i8> @vaaddu_vv_v8i8(<8 x i8> %x, <8 x i8> %y) {
ret <8 x i8> %ret
}
-define <8 x i8> @vaaddu_vx_v8i8(<8 x i8> %x, i8 %y) {
-; CHECK-LABEL: vaaddu_vx_v8i8:
+define <8 x i8> @vaaddu_vx_v8i8_floor(<8 x i8> %x, i8 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i8_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -37,8 +37,8 @@ define <8 x i8> @vaaddu_vx_v8i8(<8 x i8> %x, i8 %y) {
}
-define <8 x i8> @vaaddu_vv_v8i8_sexti16(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i8_sexti16:
+define <8 x i8> @vaaddu_vv_v8i8_floor_sexti16(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_floor_sexti16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vwadd.vv v10, v8, v9
@@ -52,8 +52,8 @@ define <8 x i8> @vaaddu_vv_v8i8_sexti16(<8 x i8> %x, <8 x i8> %y) {
ret <8 x i8> %ret
}
-define <8 x i8> @vaaddu_vv_v8i8_zexti32(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i8_zexti32:
+define <8 x i8> @vaaddu_vv_v8i8_floor_zexti32(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_floor_zexti32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -67,8 +67,8 @@ define <8 x i8> @vaaddu_vv_v8i8_zexti32(<8 x i8> %x, <8 x i8> %y) {
ret <8 x i8> %ret
}
-define <8 x i8> @vaaddu_vv_v8i8_lshr2(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i8_lshr2:
+define <8 x i8> @vaaddu_vv_v8i8_floor_lshr2(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_floor_lshr2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vwaddu.vv v10, v8, v9
@@ -82,8 +82,8 @@ define <8 x i8> @vaaddu_vv_v8i8_lshr2(<8 x i8> %x, <8 x i8> %y) {
ret <8 x i8> %ret
}
-define <8 x i16> @vaaddu_vv_v8i16(<8 x i16> %x, <8 x i16> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i16:
+define <8 x i16> @vaaddu_vv_v8i16_floor(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i16_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -97,8 +97,8 @@ define <8 x i16> @vaaddu_vv_v8i16(<8 x i16> %x, <8 x i16> %y) {
ret <8 x i16> %ret
}
-define <8 x i16> @vaaddu_vx_v8i16(<8 x i16> %x, i16 %y) {
-; CHECK-LABEL: vaaddu_vx_v8i16:
+define <8 x i16> @vaaddu_vx_v8i16_floor(<8 x i16> %x, i16 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i16_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -116,8 +116,8 @@ define <8 x i16> @vaaddu_vx_v8i16(<8 x i16> %x, i16 %y) {
ret <8 x i16> %ret
}
-define <8 x i32> @vaaddu_vv_v8i32(<8 x i32> %x, <8 x i32> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i32:
+define <8 x i32> @vaaddu_vv_v8i32_floor(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i32_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -131,8 +131,8 @@ define <8 x i32> @vaaddu_vv_v8i32(<8 x i32> %x, <8 x i32> %y) {
ret <8 x i32> %ret
}
-define <8 x i32> @vaaddu_vx_v8i32(<8 x i32> %x, i32 %y) {
-; CHECK-LABEL: vaaddu_vx_v8i32:
+define <8 x i32> @vaaddu_vx_v8i32_floor(<8 x i32> %x, i32 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i32_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -150,8 +150,8 @@ define <8 x i32> @vaaddu_vx_v8i32(<8 x i32> %x, i32 %y) {
ret <8 x i32> %ret
}
-define <8 x i64> @vaaddu_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i64:
+define <8 x i64> @vaaddu_vv_v8i64_floor(<8 x i64> %x, <8 x i64> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i64_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -165,8 +165,8 @@ define <8 x i64> @vaaddu_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
ret <8 x i64> %ret
}
-define <8 x i1> @vaaddu_vv_v8i1(<8 x i1> %x, <8 x i1> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i1:
+define <8 x i1> @vaaddu_vv_v8i1_floor(<8 x i1> %x, <8 x i1> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i1_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, 0
@@ -186,8 +186,8 @@ define <8 x i1> @vaaddu_vv_v8i1(<8 x i1> %x, <8 x i1> %y) {
ret <8 x i1> %ret
}
-define <8 x i64> @vaaddu_vx_v8i64(<8 x i64> %x, i64 %y) {
-; RV32-LABEL: vaaddu_vx_v8i64:
+define <8 x i64> @vaaddu_vx_v8i64_floor(<8 x i64> %x, i64 %y) {
+; RV32-LABEL: vaaddu_vx_v8i64_floor:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
@@ -201,7 +201,7 @@ define <8 x i64> @vaaddu_vx_v8i64(<8 x i64> %x, i64 %y) {
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
-; RV64-LABEL: vaaddu_vx_v8i64:
+; RV64-LABEL: vaaddu_vx_v8i64_floor:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: csrwi vxrm, 2
@@ -218,3 +218,258 @@ define <8 x i64> @vaaddu_vx_v8i64(<8 x i64> %x, i64 %y) {
%ret = trunc <8 x i128> %div to <8 x i64>
ret <8 x i64> %ret
}
+
+define <8 x i8> @vaaddu_vv_v8i8_ceil(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i8> %x to <8 x i16>
+ %yzv = zext <8 x i8> %y to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %div = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vx_v8i8_ceil(<8 x i8> %x, i8 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i8_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i8> %x to <8 x i16>
+ %yhead = insertelement <8 x i8> poison, i8 %y, i32 0
+ %ysplat = shufflevector <8 x i8> %yhead, <8 x i8> poison, <8 x i32> zeroinitializer
+ %yzv = zext <8 x i8> %ysplat to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %one = insertelement <8 x i16> poison, i16 1, i32 0
+ %splat = shufflevector <8 x i16> %one, <8 x i16> poison, <8 x i32> zeroinitializer
+ %div = lshr <8 x i16> %add1, %splat
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vv_v8i8_ceil_sexti16(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_ceil_sexti16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vwadd.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vadd.vi v8, v10, 1
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 1
+; CHECK-NEXT: ret
+ %xzv = sext <8 x i8> %x to <8 x i16>
+ %yzv = sext <8 x i8> %y to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %div = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vv_v8i8_ceil_zexti32(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_ceil_zexti32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i8> %x to <8 x i32>
+ %yzv = zext <8 x i8> %y to <8 x i32>
+ %add = add nuw nsw <8 x i32> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %div = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %ret = trunc <8 x i32> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vv_v8i8_ceil_lshr2(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_ceil_lshr2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vadd.vi v8, v10, 2
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 2
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i8> %x to <8 x i16>
+ %yzv = zext <8 x i8> %y to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+ %div = lshr <8 x i16> %add1, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vv_v8i8_ceil_add2(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_ceil_add2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: li a0, 2
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vx v8, v10, a0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i8> %x to <8 x i16>
+ %yzv = zext <8 x i8> %y to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+ %div = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i16> @vaaddu_vv_v8i16_ceil(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i16_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: v...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch is inspired by LLVM patches: llvm/llvm-project#76550 llvm/llvm-project#77473 Use vaaddu for AVG vectorization. Before this patch: vsetivli zero,8,e8,mf2,ta,ma vle8.v v3,0(a1) vle8.v v2,0(a2) vwaddu.vv v1,v3,v2 vsetvli zero,zero,e16,m1,ta,ma vadd.vi v1,v1,1 vsetvli zero,zero,e8,mf2,ta,ma vnsrl.wi v1,v1,1 vse8.v v1,0(a0) ret After this patch: vsetivli zero,8,e8,mf2,ta,ma csrwi vxrm,0 vle8.v v1,0(a1) vle8.v v2,0(a2) vaaddu.vv v1,v1,v2 vse8.v v1,0(a0) ret Note on signed averaging addition Based on the rvv spec, there is also a variant for signed averaging addition called vaadd. But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd. Thus this patch only introduces vaaddu. More details in: riscvarchive/riscv-v-spec#935 riscvarchive/riscv-v-spec#934 Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove. (avg<v_double_trunc>3_floor): New pattern. (<u>avg<v_double_trunc>3_ceil): Remove. (avg<v_double_trunc>3_ceil): New pattern. (uavg<mode>3_floor): Ditto. (uavg<mode>3_ceil): Ditto. * config/riscv/riscv-protos.h (enum insn_flags): Add for average addition. (enum insn_type): Ditto. * config/riscv/riscv-v.cc: Ditto. * config/riscv/vector-iterators.md (ashiftrt): Remove. (ASHIFTRT): Ditto. * config/riscv/vector.md: Add VLS modes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto.
…lvm#77473) Similar to llvm#76550, but for `ISD::AVGCEILU`. Specifically, this patch aims to use `vaaddu` with rounding mode rnu (i.e `vxrm[1:0] = 0b00`) for `ISD::AVGCEILU`. ### Source code ``` define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_ceil(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) { %xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i16> %yzv = zext <vscale x 8 x i8> %y to <vscale x 8 x i16> %add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv %one = insertelement <vscale x 8 x i16> poison, i16 1, i32 0 %splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer %add1 = add nuw nsw <vscale x 8 x i16> %add, %splat %div = lshr <vscale x 8 x i16> %add1, %splat %ret = trunc <vscale x 8 x i16> %div to <vscale x 8 x i8> ret <vscale x 8 x i8> %ret } ``` ### Before this patch ``` vaaddu_vv_nxv8i8_ceil: vsetvli a0, zero, e8, m1, ta, ma vwaddu.vv v10, v8, v9 vsetvli zero, zero, e16, m2, ta, ma vadd.vi v10, v10, 1 vsetvli zero, zero, e8, m1, ta, ma vnsrl.wi v8, v10, 1 ret ``` ### After this patch ``` vaaddu_vv_nxv8i8_ceil: vsetvli a0, zero, e8, m1, ta, ma csrwi vxrm, 0 vaaddu.vv v8, v8, v9 ret ```
This patch is inspired by LLVM patches: llvm/llvm-project#76550 llvm/llvm-project#77473 Use vaaddu for AVG vectorization. Before this patch: vsetivli zero,8,e8,mf2,ta,ma vle8.v v3,0(a1) vle8.v v2,0(a2) vwaddu.vv v1,v3,v2 vsetvli zero,zero,e16,m1,ta,ma vadd.vi v1,v1,1 vsetvli zero,zero,e8,mf2,ta,ma vnsrl.wi v1,v1,1 vse8.v v1,0(a0) ret After this patch: vsetivli zero,8,e8,mf2,ta,ma csrwi vxrm,0 vle8.v v1,0(a1) vle8.v v2,0(a2) vaaddu.vv v1,v1,v2 vse8.v v1,0(a0) ret Note on signed averaging addition Based on the rvv spec, there is also a variant for signed averaging addition called vaadd. But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd. Thus this patch only introduces vaaddu. More details in: riscvarchive/riscv-v-spec#935 riscvarchive/riscv-v-spec#934 Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove. (avg<v_double_trunc>3_floor): New pattern. (<u>avg<v_double_trunc>3_ceil): Remove. (avg<v_double_trunc>3_ceil): New pattern. (uavg<mode>3_floor): Ditto. (uavg<mode>3_ceil): Ditto. * config/riscv/riscv-protos.h (enum insn_flags): Add for average addition. (enum insn_type): Ditto. * config/riscv/riscv-v.cc: Ditto. * config/riscv/vector-iterators.md (ashiftrt): Remove. (ASHIFTRT): Ditto. * config/riscv/vector.md: Add VLS modes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto.
This patch is inspired by LLVM patches: llvm/llvm-project#76550 llvm/llvm-project#77473 Use vaaddu for AVG vectorization. Before this patch: vsetivli zero,8,e8,mf2,ta,ma vle8.v v3,0(a1) vle8.v v2,0(a2) vwaddu.vv v1,v3,v2 vsetvli zero,zero,e16,m1,ta,ma vadd.vi v1,v1,1 vsetvli zero,zero,e8,mf2,ta,ma vnsrl.wi v1,v1,1 vse8.v v1,0(a0) ret After this patch: vsetivli zero,8,e8,mf2,ta,ma csrwi vxrm,0 vle8.v v1,0(a1) vle8.v v2,0(a2) vaaddu.vv v1,v1,v2 vse8.v v1,0(a0) ret Note on signed averaging addition Based on the rvv spec, there is also a variant for signed averaging addition called vaadd. But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd. Thus this patch only introduces vaaddu. More details in: riscvarchive/riscv-v-spec#935 riscvarchive/riscv-v-spec#934 Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove. (avg<v_double_trunc>3_floor): New pattern. (<u>avg<v_double_trunc>3_ceil): Remove. (avg<v_double_trunc>3_ceil): New pattern. (uavg<mode>3_floor): Ditto. (uavg<mode>3_ceil): Ditto. * config/riscv/riscv-protos.h (enum insn_flags): Add for average addition. (enum insn_type): Ditto. * config/riscv/riscv-v.cc: Ditto. * config/riscv/vector-iterators.md (ashiftrt): Remove. (ASHIFTRT): Ditto. * config/riscv/vector.md: Add VLS modes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto.
This patch is inspired by LLVM patches: llvm/llvm-project#76550 llvm/llvm-project#77473 Use vaaddu for AVG vectorization. Before this patch: vsetivli zero,8,e8,mf2,ta,ma vle8.v v3,0(a1) vle8.v v2,0(a2) vwaddu.vv v1,v3,v2 vsetvli zero,zero,e16,m1,ta,ma vadd.vi v1,v1,1 vsetvli zero,zero,e8,mf2,ta,ma vnsrl.wi v1,v1,1 vse8.v v1,0(a0) ret After this patch: vsetivli zero,8,e8,mf2,ta,ma csrwi vxrm,0 vle8.v v1,0(a1) vle8.v v2,0(a2) vaaddu.vv v1,v1,v2 vse8.v v1,0(a0) ret Note on signed averaging addition Based on the rvv spec, there is also a variant for signed averaging addition called vaadd. But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd. Thus this patch only introduces vaaddu. More details in: riscvarchive/riscv-v-spec#935 riscvarchive/riscv-v-spec#934 Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove. (avg<v_double_trunc>3_floor): New pattern. (<u>avg<v_double_trunc>3_ceil): Remove. (avg<v_double_trunc>3_ceil): New pattern. (uavg<mode>3_floor): Ditto. (uavg<mode>3_ceil): Ditto. * config/riscv/riscv-protos.h (enum insn_flags): Add for average addition. (enum insn_type): Ditto. * config/riscv/riscv-v.cc: Ditto. * config/riscv/vector-iterators.md (ashiftrt): Remove. (ASHIFTRT): Ditto. * config/riscv/vector.md: Add VLS modes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto.
This patch is inspired by LLVM patches: llvm/llvm-project#76550 llvm/llvm-project#77473 Use vaaddu for AVG vectorization. Before this patch: vsetivli zero,8,e8,mf2,ta,ma vle8.v v3,0(a1) vle8.v v2,0(a2) vwaddu.vv v1,v3,v2 vsetvli zero,zero,e16,m1,ta,ma vadd.vi v1,v1,1 vsetvli zero,zero,e8,mf2,ta,ma vnsrl.wi v1,v1,1 vse8.v v1,0(a0) ret After this patch: vsetivli zero,8,e8,mf2,ta,ma csrwi vxrm,0 vle8.v v1,0(a1) vle8.v v2,0(a2) vaaddu.vv v1,v1,v2 vse8.v v1,0(a0) ret Note on signed averaging addition Based on the rvv spec, there is also a variant for signed averaging addition called vaadd. But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd. Thus this patch only introduces vaaddu. More details in: riscvarchive/riscv-v-spec#935 riscvarchive/riscv-v-spec#934 Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove. (avg<v_double_trunc>3_floor): New pattern. (<u>avg<v_double_trunc>3_ceil): Remove. (avg<v_double_trunc>3_ceil): New pattern. (uavg<mode>3_floor): Ditto. (uavg<mode>3_ceil): Ditto. * config/riscv/riscv-protos.h (enum insn_flags): Add for average addition. (enum insn_type): Ditto. * config/riscv/riscv-v.cc: Ditto. * config/riscv/vector-iterators.md (ashiftrt): Remove. (ASHIFTRT): Ditto. * config/riscv/vector.md: Add VLS modes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto.
This patch is inspired by LLVM patches: llvm/llvm-project#76550 llvm/llvm-project#77473 Use vaaddu for AVG vectorization. Before this patch: vsetivli zero,8,e8,mf2,ta,ma vle8.v v3,0(a1) vle8.v v2,0(a2) vwaddu.vv v1,v3,v2 vsetvli zero,zero,e16,m1,ta,ma vadd.vi v1,v1,1 vsetvli zero,zero,e8,mf2,ta,ma vnsrl.wi v1,v1,1 vse8.v v1,0(a0) ret After this patch: vsetivli zero,8,e8,mf2,ta,ma csrwi vxrm,0 vle8.v v1,0(a1) vle8.v v2,0(a2) vaaddu.vv v1,v1,v2 vse8.v v1,0(a0) ret Note on signed averaging addition Based on the rvv spec, there is also a variant for signed averaging addition called vaadd. But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd. Thus this patch only introduces vaaddu. More details in: riscvarchive/riscv-v-spec#935 riscvarchive/riscv-v-spec#934 Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove. (avg<v_double_trunc>3_floor): New pattern. (<u>avg<v_double_trunc>3_ceil): Remove. (avg<v_double_trunc>3_ceil): New pattern. (uavg<mode>3_floor): Ditto. (uavg<mode>3_ceil): Ditto. * config/riscv/riscv-protos.h (enum insn_flags): Add for average addition. (enum insn_type): Ditto. * config/riscv/riscv-v.cc: Ditto. * config/riscv/vector-iterators.md (ashiftrt): Remove. (ASHIFTRT): Ditto. * config/riscv/vector.md: Add VLS modes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto.
This patch is inspired by LLVM patches: llvm/llvm-project#76550 llvm/llvm-project#77473 Use vaaddu for AVG vectorization. Before this patch: vsetivli zero,8,e8,mf2,ta,ma vle8.v v3,0(a1) vle8.v v2,0(a2) vwaddu.vv v1,v3,v2 vsetvli zero,zero,e16,m1,ta,ma vadd.vi v1,v1,1 vsetvli zero,zero,e8,mf2,ta,ma vnsrl.wi v1,v1,1 vse8.v v1,0(a0) ret After this patch: vsetivli zero,8,e8,mf2,ta,ma csrwi vxrm,0 vle8.v v1,0(a1) vle8.v v2,0(a2) vaaddu.vv v1,v1,v2 vse8.v v1,0(a0) ret Note on signed averaging addition Based on the rvv spec, there is also a variant for signed averaging addition called vaadd. But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd. Thus this patch only introduces vaaddu. More details in: riscvarchive/riscv-v-spec#935 riscvarchive/riscv-v-spec#934 Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove. (avg<v_double_trunc>3_floor): New pattern. (<u>avg<v_double_trunc>3_ceil): Remove. (avg<v_double_trunc>3_ceil): New pattern. (uavg<mode>3_floor): Ditto. (uavg<mode>3_ceil): Ditto. * config/riscv/riscv-protos.h (enum insn_flags): Add for average addition. (enum insn_type): Ditto. * config/riscv/riscv-v.cc: Ditto. * config/riscv/vector-iterators.md (ashiftrt): Remove. (ASHIFTRT): Ditto. * config/riscv/vector.md: Add VLS modes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto.
This patch is inspired by LLVM patches: llvm/llvm-project#76550 llvm/llvm-project#77473 Use vaaddu for AVG vectorization. Before this patch: vsetivli zero,8,e8,mf2,ta,ma vle8.v v3,0(a1) vle8.v v2,0(a2) vwaddu.vv v1,v3,v2 vsetvli zero,zero,e16,m1,ta,ma vadd.vi v1,v1,1 vsetvli zero,zero,e8,mf2,ta,ma vnsrl.wi v1,v1,1 vse8.v v1,0(a0) ret After this patch: vsetivli zero,8,e8,mf2,ta,ma csrwi vxrm,0 vle8.v v1,0(a1) vle8.v v2,0(a2) vaaddu.vv v1,v1,v2 vse8.v v1,0(a0) ret Note on signed averaging addition Based on the rvv spec, there is also a variant for signed averaging addition called vaadd. But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd. Thus this patch only introduces vaaddu. More details in: riscvarchive/riscv-v-spec#935 riscvarchive/riscv-v-spec#934 Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove. (avg<v_double_trunc>3_floor): New pattern. (<u>avg<v_double_trunc>3_ceil): Remove. (avg<v_double_trunc>3_ceil): New pattern. (uavg<mode>3_floor): Ditto. (uavg<mode>3_ceil): Ditto. * config/riscv/riscv-protos.h (enum insn_flags): Add for average addition. (enum insn_type): Ditto. * config/riscv/riscv-v.cc: Ditto. * config/riscv/vector-iterators.md (ashiftrt): Remove. (ASHIFTRT): Ditto. * config/riscv/vector.md: Add VLS modes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto.
This patch is inspired by LLVM patches: llvm/llvm-project#76550 llvm/llvm-project#77473 Use vaaddu for AVG vectorization. Before this patch: vsetivli zero,8,e8,mf2,ta,ma vle8.v v3,0(a1) vle8.v v2,0(a2) vwaddu.vv v1,v3,v2 vsetvli zero,zero,e16,m1,ta,ma vadd.vi v1,v1,1 vsetvli zero,zero,e8,mf2,ta,ma vnsrl.wi v1,v1,1 vse8.v v1,0(a0) ret After this patch: vsetivli zero,8,e8,mf2,ta,ma csrwi vxrm,0 vle8.v v1,0(a1) vle8.v v2,0(a2) vaaddu.vv v1,v1,v2 vse8.v v1,0(a0) ret Note on signed averaging addition Based on the rvv spec, there is also a variant for signed averaging addition called vaadd. But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd. Thus this patch only introduces vaaddu. More details in: riscvarchive/riscv-v-spec#935 riscvarchive/riscv-v-spec#934 Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove. (avg<v_double_trunc>3_floor): New pattern. (<u>avg<v_double_trunc>3_ceil): Remove. (avg<v_double_trunc>3_ceil): New pattern. (uavg<mode>3_floor): Ditto. (uavg<mode>3_ceil): Ditto. * config/riscv/riscv-protos.h (enum insn_flags): Add for average addition. (enum insn_type): Ditto. * config/riscv/riscv-v.cc: Ditto. * config/riscv/vector-iterators.md (ashiftrt): Remove. (ASHIFTRT): Ditto. * config/riscv/vector.md: Add VLS modes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto.
This patch is inspired by LLVM patches: llvm/llvm-project#76550 llvm/llvm-project#77473 Use vaaddu for AVG vectorization. Before this patch: vsetivli zero,8,e8,mf2,ta,ma vle8.v v3,0(a1) vle8.v v2,0(a2) vwaddu.vv v1,v3,v2 vsetvli zero,zero,e16,m1,ta,ma vadd.vi v1,v1,1 vsetvli zero,zero,e8,mf2,ta,ma vnsrl.wi v1,v1,1 vse8.v v1,0(a0) ret After this patch: vsetivli zero,8,e8,mf2,ta,ma csrwi vxrm,0 vle8.v v1,0(a1) vle8.v v2,0(a2) vaaddu.vv v1,v1,v2 vse8.v v1,0(a0) ret Note on signed averaging addition Based on the rvv spec, there is also a variant for signed averaging addition called vaadd. But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd. Thus this patch only introduces vaaddu. More details in: riscvarchive/riscv-v-spec#935 riscvarchive/riscv-v-spec#934 Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove. (avg<v_double_trunc>3_floor): New pattern. (<u>avg<v_double_trunc>3_ceil): Remove. (avg<v_double_trunc>3_ceil): New pattern. (uavg<mode>3_floor): Ditto. (uavg<mode>3_ceil): Ditto. * config/riscv/riscv-protos.h (enum insn_flags): Add for average addition. (enum insn_type): Ditto. * config/riscv/riscv-v.cc: Ditto. * config/riscv/vector-iterators.md (ashiftrt): Remove. (ASHIFTRT): Ditto. * config/riscv/vector.md: Add VLS modes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto.
This patch is inspired by LLVM patches: llvm/llvm-project#76550 llvm/llvm-project#77473 Use vaaddu for AVG vectorization. Before this patch: vsetivli zero,8,e8,mf2,ta,ma vle8.v v3,0(a1) vle8.v v2,0(a2) vwaddu.vv v1,v3,v2 vsetvli zero,zero,e16,m1,ta,ma vadd.vi v1,v1,1 vsetvli zero,zero,e8,mf2,ta,ma vnsrl.wi v1,v1,1 vse8.v v1,0(a0) ret After this patch: vsetivli zero,8,e8,mf2,ta,ma csrwi vxrm,0 vle8.v v1,0(a1) vle8.v v2,0(a2) vaaddu.vv v1,v1,v2 vse8.v v1,0(a0) ret Note on signed averaging addition Based on the rvv spec, there is also a variant for signed averaging addition called vaadd. But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd. Thus this patch only introduces vaaddu. More details in: riscvarchive/riscv-v-spec#935 riscvarchive/riscv-v-spec#934 Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove. (avg<v_double_trunc>3_floor): New pattern. (<u>avg<v_double_trunc>3_ceil): Remove. (avg<v_double_trunc>3_ceil): New pattern. (uavg<mode>3_floor): Ditto. (uavg<mode>3_ceil): Ditto. * config/riscv/riscv-protos.h (enum insn_flags): Add for average addition. (enum insn_type): Ditto. * config/riscv/riscv-v.cc: Ditto. * config/riscv/vector-iterators.md (ashiftrt): Remove. (ASHIFTRT): Ditto. * config/riscv/vector.md: Add VLS modes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto.
This patch is inspired by LLVM patches: llvm/llvm-project#76550 llvm/llvm-project#77473 Use vaaddu for AVG vectorization. Before this patch: vsetivli zero,8,e8,mf2,ta,ma vle8.v v3,0(a1) vle8.v v2,0(a2) vwaddu.vv v1,v3,v2 vsetvli zero,zero,e16,m1,ta,ma vadd.vi v1,v1,1 vsetvli zero,zero,e8,mf2,ta,ma vnsrl.wi v1,v1,1 vse8.v v1,0(a0) ret After this patch: vsetivli zero,8,e8,mf2,ta,ma csrwi vxrm,0 vle8.v v1,0(a1) vle8.v v2,0(a2) vaaddu.vv v1,v1,v2 vse8.v v1,0(a0) ret Note on signed averaging addition Based on the rvv spec, there is also a variant for signed averaging addition called vaadd. But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd. Thus this patch only introduces vaaddu. More details in: riscvarchive/riscv-v-spec#935 riscvarchive/riscv-v-spec#934 Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove. (avg<v_double_trunc>3_floor): New pattern. (<u>avg<v_double_trunc>3_ceil): Remove. (avg<v_double_trunc>3_ceil): New pattern. (uavg<mode>3_floor): Ditto. (uavg<mode>3_ceil): Ditto. * config/riscv/riscv-protos.h (enum insn_flags): Add for average addition. (enum insn_type): Ditto. * config/riscv/riscv-v.cc: Ditto. * config/riscv/vector-iterators.md (ashiftrt): Remove. (ASHIFTRT): Ditto. * config/riscv/vector.md: Add VLS modes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto.
This patch is inspired by LLVM patches: llvm/llvm-project#76550 llvm/llvm-project#77473 Use vaaddu for AVG vectorization. Before this patch: vsetivli zero,8,e8,mf2,ta,ma vle8.v v3,0(a1) vle8.v v2,0(a2) vwaddu.vv v1,v3,v2 vsetvli zero,zero,e16,m1,ta,ma vadd.vi v1,v1,1 vsetvli zero,zero,e8,mf2,ta,ma vnsrl.wi v1,v1,1 vse8.v v1,0(a0) ret After this patch: vsetivli zero,8,e8,mf2,ta,ma csrwi vxrm,0 vle8.v v1,0(a1) vle8.v v2,0(a2) vaaddu.vv v1,v1,v2 vse8.v v1,0(a0) ret Note on signed averaging addition Based on the rvv spec, there is also a variant for signed averaging addition called vaadd. But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd. Thus this patch only introduces vaaddu. More details in: riscvarchive/riscv-v-spec#935 riscvarchive/riscv-v-spec#934 Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove. (avg<v_double_trunc>3_floor): New pattern. (<u>avg<v_double_trunc>3_ceil): Remove. (avg<v_double_trunc>3_ceil): New pattern. (uavg<mode>3_floor): Ditto. (uavg<mode>3_ceil): Ditto. * config/riscv/riscv-protos.h (enum insn_flags): Add for average addition. (enum insn_type): Ditto. * config/riscv/riscv-v.cc: Ditto. * config/riscv/vector-iterators.md (ashiftrt): Remove. (ASHIFTRT): Ditto. * config/riscv/vector.md: Add VLS modes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto.
Similar to #76550, but for
ISD::AVGCEILU
.Specifically, this patch aims to use
vaaddu
with rounding mode rnu (i.evxrm[1:0] = 0b00
) forISD::AVGCEILU
.Source code
Before this patch
After this patch