-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[RISCV][llvm] Support VFADD, VFSUB, VFMUL codegen for Zvfbfa #170612
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Support both fixed-length vectors and scalable vectors. Note: VP version is not gonna be supported for trivial instructions since they're going to be removed soon.
|
@llvm/pr-subscribers-backend-risc-v Author: Brandon Wu (4vtomat) ChangesSupport both fixed-length vectors and scalable vectors. Note: VP version is not gonna be supported for trivial instructions Patch is 129.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170612.diff 10 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ab2652eac3823..5942236a1ce8f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -90,8 +90,9 @@ static cl::opt<bool>
// TODO: Support more ops
static const unsigned ZvfbfaVPOps[] = {
ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN, ISD::EXPERIMENTAL_VP_SPLAT};
-static const unsigned ZvfbfaOps[] = {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN,
- ISD::SPLAT_VECTOR};
+static const unsigned ZvfbfaOps[] = {
+ ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::SPLAT_VECTOR,
+ ISD::FADD, ISD::FSUB, ISD::FMUL};
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
@@ -1090,6 +1091,36 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VECREDUCE_FMINIMUM,
ISD::VECREDUCE_FMAXIMUM};
+ // TODO: support more ops.
+ static const unsigned ZvfbfaPromoteOps[] = {ISD::FMINNUM,
+ ISD::FMAXNUM,
+ ISD::FMINIMUMNUM,
+ ISD::FMAXIMUMNUM,
+ ISD::FDIV,
+ ISD::FMA,
+ ISD::FSQRT,
+ ISD::FCEIL,
+ ISD::FTRUNC,
+ ISD::FFLOOR,
+ ISD::FROUND,
+ ISD::FROUNDEVEN,
+ ISD::FRINT,
+ ISD::FNEARBYINT,
+ ISD::IS_FPCLASS,
+ ISD::SETCC,
+ ISD::FMAXIMUM,
+ ISD::FMINIMUM,
+ ISD::STRICT_FADD,
+ ISD::STRICT_FSUB,
+ ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV,
+ ISD::STRICT_FSQRT,
+ ISD::STRICT_FMA,
+ ISD::VECREDUCE_FMIN,
+ ISD::VECREDUCE_FMAX,
+ ISD::VECREDUCE_FMINIMUM,
+ ISD::VECREDUCE_FMAXIMUM};
+
// TODO: support more vp ops.
static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
ISD::VP_FADD,
@@ -1294,11 +1325,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
if (getLMUL(VT) == RISCVVType::LMUL_8) {
- setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
+ setOperationAction(ZvfbfaPromoteOps, VT, Custom);
setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
} else {
MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
- setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
+ setOperationPromotedToType(ZvfbfaPromoteOps, VT, F32VecVT);
setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
}
};
@@ -1615,7 +1646,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// TODO: could split the f16 vector into two vectors and do promotion.
if (!isTypeLegal(F32VecVT))
continue;
- setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
+
+ if (Subtarget.hasStdExtZvfbfa())
+ setOperationPromotedToType(ZvfbfaPromoteOps, VT, F32VecVT);
+ else
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
continue;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index ffb2ac0756da4..7faac137fd41d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -523,6 +523,71 @@ multiclass VPatConversionVF_WF_BF16<string intrinsic, string instruction,
}
}
+multiclass VPatBinaryFPSDNode_VV_VF_RM_BF16<SDPatternOperator vop,
+ string instruction_name> {
+ foreach vti = AllBF16Vectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : VPatBinarySDNode_VV_RM<vop, instruction_name,
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.AVL, vti.RegClass, isSEWAware=1>;
+ def : VPatBinarySDNode_VF_RM<vop, instruction_name#"_V"#vti.ScalarSuffix,
+ vti.Vector, vti.Vector, vti.Scalar,
+ vti.Log2SEW, vti.LMul, vti.AVL, vti.RegClass,
+ vti.ScalarRegClass, isSEWAware=1>;
+ }
+ }
+}
+
+multiclass VPatBinaryFPSDNode_R_VF_RM_BF16<SDPatternOperator vop, string instruction_name> {
+ foreach fvti = AllBF16Vectors in
+ let Predicates = GetVTypePredicates<fvti>.Predicates in
+ def : Pat<(fvti.Vector (vop (fvti.Vector (SplatFPOp fvti.Scalar:$rs2)),
+ (fvti.Vector fvti.RegClass:$rs1))),
+ (!cast<Instruction>(
+ instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_E"#fvti.SEW)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs1,
+ (fvti.Scalar fvti.ScalarRegClass:$rs2),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TA_MA)>;
+}
+
+multiclass VPatBinaryFPVL_VV_VF_RM_BF16<SDPatternOperator vop, string instruction_name> {
+ foreach vti = AllBF16Vectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : VPatBinaryVL_V_RM<vop, instruction_name, "VV",
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
+ vti.RegClass, isSEWAware=1>;
+ def : VPatBinaryVL_VF_RM<vop, instruction_name#"_V"#vti.ScalarSuffix,
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
+ vti.ScalarRegClass, isSEWAware=1>;
+ }
+ }
+}
+
+multiclass VPatBinaryFPVL_R_VF_RM_BF16<SDPatternOperator vop, string instruction_name> {
+ foreach fvti = AllBF16Vectors in {
+ let Predicates = GetVTypePredicates<fvti>.Predicates in
+ def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2),
+ fvti.RegClass:$rs1,
+ (fvti.Vector fvti.RegClass:$passthru),
+ (fvti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
+ fvti.RegClass:$passthru,
+ fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2,
+ (fvti.Mask VMV0:$vm),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+}
+
let Predicates = [HasStdExtZvfbfa] in {
defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfadd", "PseudoVFADD_ALT",
AllBF16Vectors, isSEWAware = 1>;
@@ -783,4 +848,14 @@ let Predicates = [HasStdExtZvfbfa] in {
TAIL_AGNOSTIC)>;
}
}
+
+ defm : VPatBinaryFPSDNode_VV_VF_RM_BF16<any_fadd, "PseudoVFADD_ALT">;
+ defm : VPatBinaryFPSDNode_VV_VF_RM_BF16<any_fsub, "PseudoVFSUB_ALT">;
+ defm : VPatBinaryFPSDNode_VV_VF_RM_BF16<any_fmul, "PseudoVFMUL_ALT">;
+ defm : VPatBinaryFPSDNode_R_VF_RM_BF16<any_fsub, "PseudoVFRSUB_ALT">;
+
+ defm : VPatBinaryFPVL_VV_VF_RM_BF16<any_riscv_fadd_vl, "PseudoVFADD_ALT">;
+ defm : VPatBinaryFPVL_VV_VF_RM_BF16<any_riscv_fsub_vl, "PseudoVFSUB_ALT">;
+ defm : VPatBinaryFPVL_VV_VF_RM_BF16<any_riscv_fmul_vl, "PseudoVFMUL_ALT">;
+ defm : VPatBinaryFPVL_R_VF_RM_BF16<any_riscv_fsub_vl, "PseudoVFRSUB_ALT">;
} // Predicates = [HasStdExtZvfbfa]
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-sdnode.ll
new file mode 100644
index 0000000000000..14432bd9b1a45
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-sdnode.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvfbfa,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvfbfa,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
+
+define <1 x bfloat> @vfadd_vv_v1bf16(<1 x bfloat> %va, <1 x bfloat> %vb) {
+; CHECK-LABEL: vfadd_vv_v1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %vc = fadd <1 x bfloat> %va, %vb
+ ret <1 x bfloat> %vc
+}
+
+define <1 x bfloat> @vfadd_vf_v1bf16(<1 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfadd_vf_v1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <1 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <1 x bfloat> %head, <1 x bfloat> poison, <1 x i32> zeroinitializer
+ %vc = fadd <1 x bfloat> %va, %splat
+ ret <1 x bfloat> %vc
+}
+
+define <2 x bfloat> @vfadd_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb) {
+; CHECK-LABEL: vfadd_vv_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %vc = fadd <2 x bfloat> %va, %vb
+ ret <2 x bfloat> %vc
+}
+
+define <2 x bfloat> @vfadd_vf_v2bf16(<2 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfadd_vf_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
+ %vc = fadd <2 x bfloat> %va, %splat
+ ret <2 x bfloat> %vc
+}
+
+define <4 x bfloat> @vfadd_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb) {
+; CHECK-LABEL: vfadd_vv_v4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %vc = fadd <4 x bfloat> %va, %vb
+ ret <4 x bfloat> %vc
+}
+
+define <4 x bfloat> @vfadd_vf_v4bf16(<4 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfadd_vf_v4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
+ %vc = fadd <4 x bfloat> %va, %splat
+ ret <4 x bfloat> %vc
+}
+
+define <8 x bfloat> @vfadd_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb) {
+; CHECK-LABEL: vfadd_vv_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %vc = fadd <8 x bfloat> %va, %vb
+ ret <8 x bfloat> %vc
+}
+
+define <8 x bfloat> @vfadd_vf_v8bf16(<8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfadd_vf_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
+ %vc = fadd <8 x bfloat> %va, %splat
+ ret <8 x bfloat> %vc
+}
+
+define <16 x bfloat> @vfadd_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb) {
+; CHECK-LABEL: vfadd_vv_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %vc = fadd <16 x bfloat> %va, %vb
+ ret <16 x bfloat> %vc
+}
+
+define <16 x bfloat> @vfadd_vf_v16bf16(<16 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfadd_vf_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
+ %vc = fadd <16 x bfloat> %va, %splat
+ ret <16 x bfloat> %vc
+}
+
+define <32 x bfloat> @vfadd_vv_v32bf16(<32 x bfloat> %va, <32 x bfloat> %vb) {
+; CHECK-LABEL: vfadd_vv_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %vc = fadd <32 x bfloat> %va, %vb
+ ret <32 x bfloat> %vc
+}
+
+define <32 x bfloat> @vfadd_vf_v32bf16(<32 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfadd_vf_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <32 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <32 x bfloat> %head, <32 x bfloat> poison, <32 x i32> zeroinitializer
+ %vc = fadd <32 x bfloat> %va, %splat
+ ret <32 x bfloat> %vc
+}
+
+define <64 x bfloat> @vfadd_vv_v64bf16(<64 x bfloat> %va, <64 x bfloat> %vb) {
+; CHECK-LABEL: vfadd_vv_v64bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %vc = fadd <64 x bfloat> %va, %vb
+ ret <64 x bfloat> %vc
+}
+
+define <64 x bfloat> @vfadd_vf_v64bf16(<64 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfadd_vf_v64bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <64 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <64 x bfloat> %head, <64 x bfloat> poison, <64 x i32> zeroinitializer
+ %vc = fadd <64 x bfloat> %va, %splat
+ ret <64 x bfloat> %vc
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-sdnode.ll
new file mode 100644
index 0000000000000..8dca21f85c5f9
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-sdnode.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvfbfa,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvfbfa,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
+
+define <1 x bfloat> @vfmul_vv_v1bf16(<1 x bfloat> %va, <1 x bfloat> %vb) {
+; CHECK-LABEL: vfmul_vv_v1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %vc = fmul <1 x bfloat> %va, %vb
+ ret <1 x bfloat> %vc
+}
+
+define <1 x bfloat> @vfmul_vf_v1bf16(<1 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfmul_vf_v1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <1 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <1 x bfloat> %head, <1 x bfloat> poison, <1 x i32> zeroinitializer
+ %vc = fmul <1 x bfloat> %va, %splat
+ ret <1 x bfloat> %vc
+}
+
+define <2 x bfloat> @vfmul_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb) {
+; CHECK-LABEL: vfmul_vv_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %vc = fmul <2 x bfloat> %va, %vb
+ ret <2 x bfloat> %vc
+}
+
+define <2 x bfloat> @vfmul_vf_v2bf16(<2 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfmul_vf_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
+ %vc = fmul <2 x bfloat> %va, %splat
+ ret <2 x bfloat> %vc
+}
+
+define <4 x bfloat> @vfmul_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb) {
+; CHECK-LABEL: vfmul_vv_v4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %vc = fmul <4 x bfloat> %va, %vb
+ ret <4 x bfloat> %vc
+}
+
+define <4 x bfloat> @vfmul_vf_v4bf16(<4 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfmul_vf_v4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
+ %vc = fmul <4 x bfloat> %va, %splat
+ ret <4 x bfloat> %vc
+}
+
+define <8 x bfloat> @vfmul_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb) {
+; CHECK-LABEL: vfmul_vv_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %vc = fmul <8 x bfloat> %va, %vb
+ ret <8 x bfloat> %vc
+}
+
+define <8 x bfloat> @vfmul_vf_v8bf16(<8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfmul_vf_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
+ %vc = fmul <8 x bfloat> %va, %splat
+ ret <8 x bfloat> %vc
+}
+
+define <16 x bfloat> @vfmul_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb) {
+; CHECK-LABEL: vfmul_vv_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %vc = fmul <16 x bfloat> %va, %vb
+ ret <16 x bfloat> %vc
+}
+
+define <16 x bfloat> @vfmul_vf_v16bf16(<16 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfmul_vf_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
+ %vc = fmul <16 x bfloat> %va, %splat
+ ret <16 x bfloat> %vc
+}
+
+define <32 x bfloat> @vfmul_vv_v32bf16(<32 x bfloat> %va, <32 x bfloat> %vb) {
+; CHECK-LABEL: vfmul_vv_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %vc = fmul <32 x bfloat> %va, %vb
+ ret <32 x bfloat> %vc
+}
+
+define <32 x bfloat> @vfmul_vf_v32bf16(<32 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfmul_vf_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK...
[truncated]
|
| ISD::VECREDUCE_FMINIMUM, | ||
| ISD::VECREDUCE_FMAXIMUM}; | ||
|
|
||
| // TODO: support more ops. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just to check, this list should be come smaller over time right? Should the comment say something like
| // TODO: support more ops. | |
| // TODO: Make more of these ops legal. |
| } | ||
| } | ||
|
|
||
| multiclass VPatBinaryFPSDNode_VV_VF_RM_BF16<SDPatternOperator vop, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of creating new pattern multiclasses, can we move them into VPatBinaryFPSDNode_VV_VF_RM? That way we don't need to defm everything again
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I actually thought about it and found I also defined new ones in previous patches so I just go with it lol
But I think you're right, we should not define a new one so that it's easier to maintain
lukel97
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| bit isSEWAware = 0> { | ||
| foreach vti = AllFloatVectors in { | ||
| bit isSEWAware = 0, bit isBF16 = 0> { | ||
| foreach vti = !if(isBF16, AllBF16Vectors, AllFloatVectors) in { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oops, I meant as in VPatBinaryFPSDNode_VV_VF_RM would generate both patterns for AllFloatVectors and AllBF16Vectors at the same time. That way you wouldn't need to define the patterns in a separate file.
But I see that you've defined all the patterns in one place for zvfbfa anyway, so maybe we can revisit this later
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh I think it should be separate pattern definition since there’s “_ALT” suffix in pseudo instruction for bf16 types
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can just append _ALT to instruction_name when isBF16 is true
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep you are right!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you used a single instantiation, the isBF16 argument wouldn't exist. So you'd have to get it from fvti.
We also need to fix the Predicates. Currently it's relying on the let Predicates in RISCVInstrInfoZvfbf.td overriding the Predicates that are defined inside the class.
| } | ||
|
|
||
| defm : VPatBinaryFPSDNode_VV_VF_RM<any_fadd, "PseudoVFADD_ALT", | ||
| /*isSEWAware*/ 1, /*isBF16*/ 1>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| /*isSEWAware*/ 1, /*isBF16*/ 1>; | |
| isSEWAware=1, isBF16=1>; |
You don't need these comments, you can use named arguments. :-)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oops, thats right
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| ; RUN: --check-prefixes=CHECK,ZVFHMIN | ||
| ; RUN: llc -mtriple=riscv64 -mattr=+zvfh,+experimental-zvfbfa,+v \ | ||
| ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ | ||
| ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+experimental-zvfbfa,+v \ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why here is +zvfh, but in vfmul-sdnode.ll/vfsub-sdnode.ll is +zvfhmin
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had a patch that support fpround and fpextend previously which only update this sd-node test case. When doing this patch I tried to not change unrelated stuffs to make test case hard to read lol
It should be zvfhmin I guess so that people won't get confused, that can be my followup NFC patch
…0612) Support both fixed-length vectors and scalable vectors. Note: VP version is not gonna be supported for trivial instructions since they're going to be removed soon.
Support both fixed-length vectors and scalable vectors.
Note: VP version is not gonna be supported for trivial instructions
since they're going to be removed soon.