-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[llvm][RISCV] Support Zvfbfa codegen for fneg, fabs and copysign #166944
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This is first patch for Zvfbfa codegen and I'm going to break it down to several patches to make it easier to reivew. The codegen supports both scalable vector and fixed length vector on both native operations and vp intrinsics.
|
@llvm/pr-subscribers-backend-risc-v Author: Brandon Wu (4vtomat) ChangesThis is first patch for Zvfbfa codegen and I'm going to break it down to Patch is 204.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166944.diff 14 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1977d3372c5f6..4bb83d1f600fb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -87,6 +87,12 @@ static cl::opt<bool>
"be combined with a shift"),
cl::init(true));
+// TODO: Support more ops
+static const unsigned ZvfbfaVPOps[] = {
+ ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN};
+static const unsigned ZvfbfaOps[] = {
+ ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN};
+
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -1208,6 +1214,61 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
};
+ // Sets common actions for zvfbfa, some of instructions are supported
+ // natively so that we don't need to promote them.
+ const auto SetZvfbfaActions = [&](MVT VT) {
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
+ Custom);
+ setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
+ setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
+ setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
+ Custom);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS,
+ ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
+ ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE,
+ ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE,
+ ISD::VECTOR_COMPRESS},
+ VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
+
+ setOperationAction(ISD::FCOPYSIGN, VT, Legal);
+ setOperationAction(ZvfbfaVPOps, VT, Custom);
+
+ MVT EltVT = VT.getVectorElementType();
+ if (isTypeLegal(EltVT))
+ setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
+ ISD::EXTRACT_VECTOR_ELT},
+ VT, Custom);
+ else
+ setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
+ EltVT, Custom);
+ setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
+ ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
+ ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
+ ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
+ ISD::VP_SCATTER},
+ VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
+
+ // Expand FP operations that need libcalls.
+ setOperationAction(FloatingPointLibCallOps, VT, Expand);
+
+ // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
+ if (getLMUL(VT) == RISCVVType::LMUL_8) {
+ setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
+ setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
+ } else {
+ MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
+ }
+ };
+
if (Subtarget.hasVInstructionsF16()) {
for (MVT VT : F16VecVTs) {
if (!isTypeLegal(VT))
@@ -1222,7 +1283,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
}
- if (Subtarget.hasVInstructionsBF16Minimal()) {
+ if (Subtarget.hasVInstructionsBF16()) {
+ for (MVT VT : BF16VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
+ SetZvfbfaActions(VT);
+ }
+ } else if (Subtarget.hasVInstructionsBF16Minimal()) {
for (MVT VT : BF16VecVTs) {
if (!isTypeLegal(VT))
continue;
@@ -1501,6 +1568,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// available.
setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
}
+ if (Subtarget.hasStdExtZvfbfa()) {
+ setOperationAction(ZvfbfaOps, VT, Custom);
+ setOperationAction(ZvfbfaVPOps, VT, Custom);
+ }
setOperationAction(
{ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
Custom);
@@ -7245,7 +7316,13 @@ static bool isPromotedOpNeedingSplit(SDValue Op,
return (Op.getValueType() == MVT::nxv32f16 &&
(Subtarget.hasVInstructionsF16Minimal() &&
!Subtarget.hasVInstructionsF16())) ||
- Op.getValueType() == MVT::nxv32bf16;
+ (Op.getValueType() == MVT::nxv32bf16 &&
+ Subtarget.hasVInstructionsBF16Minimal() &&
+ (!Subtarget.hasVInstructionsBF16() ||
+ (std::find(std::begin(ZvfbfaOps), std::end(ZvfbfaOps),
+ Op.getOpcode()) == std::end(ZvfbfaOps) &&
+ std::find(std::begin(ZvfbfaVPOps), std::end(ZvfbfaVPOps),
+ Op.getOpcode()) == std::end(ZvfbfaVPOps))));
}
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index b9c5b75983b1f..49f1c92750f3a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -701,5 +701,86 @@ let Predicates = [HasStdExtZvfbfa] in {
FRM_DYN,
fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
-}
+
+ foreach vti = AllBF16Vectors in {
+ // 13.12. Vector Floating-Point Sign-Injection Instructions
+ def : Pat<(fabs (vti.Vector vti.RegClass:$rs)),
+ (!cast<Instruction>("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
+ // Handle fneg with VFSGNJN using the same input for both operands.
+ def : Pat<(fneg (vti.Vector vti.RegClass:$rs)),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2))),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (fneg vti.RegClass:$rs2)))),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ // 13.12. Vector Floating-Point Sign-Injection Instructions
+ def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX #"_E"#vti.SEW#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
+ vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TA_MA)>;
+ // Handle fneg with VFSGNJN using the same input for both operands.
+ def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW #"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
+ vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TA_MA)>;
+
+ def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ vti.RegClass:$passthru,
+ (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+ vti.RegClass:$passthru, vti.RegClass:$rs1,
+ vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+
+ def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+ (riscv_fneg_vl vti.RegClass:$rs2,
+ (vti.Mask true_mask),
+ VLOpFrag),
+ srcvalue,
+ (vti.Mask true_mask),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+ (SplatFPOp vti.ScalarRegClass:$rs2),
+ vti.RegClass:$passthru,
+ (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+ vti.RegClass:$passthru, vti.RegClass:$rs1,
+ vti.ScalarRegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+ }
+ }
} // Predicates = [HasStdExtZvfbfa]
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll
new file mode 100644
index 0000000000000..9cfed6a659c64
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
+
+define <2 x bfloat> @copysign_v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %r = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs)
+ ret <2 x bfloat> %r
+}
+
+define <4 x bfloat> @copysign_v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %r = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs)
+ ret <4 x bfloat> %r
+}
+
+define <8 x bfloat> @copysign_v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %r = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs)
+ ret <8 x bfloat> %r
+}
+
+define <16 x bfloat> @copysign_v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %r = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs)
+ ret <16 x bfloat> %r
+}
+
+define <32 x bfloat> @copysign_v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v32bf32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %r = call <32 x bfloat> @llvm.copysign.v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs)
+ ret <32 x bfloat> %r
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
index a2178e1c571da..967ca092fe3c1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
@@ -1,8 +1,180 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFH %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFH %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s
+
+declare <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat>, <2 x bfloat>, <2 x i1>, i32)
+
+define <2 x bfloat> @vfsgnj_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfsgnj_vv_v2bf16_unmasked(<2 x bfloat> %va, <2 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v2bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v2bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> splat (i1 true), i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+declare <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat>, <4 x bfloat>, <4 x i1>, i32)
+
+define <4 x bfloat> @vfsgnj_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfsgnj_vv_v4bf16_unmasked(<4 x bfloat> %va, <4 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v4bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v4bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> splat (i1 true), i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+declare <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat>, <8 x bfloat>, <8 x i1>, i32)
+
+define <8 x bfloat> @vfsgnj_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfsgnj_vv_v8bf16_unmasked(<8 x bfloat> %va, <8 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v8bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v8bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> splat (i1 true), i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+declare <16 x bfloat> @llvm.vp.copysign.v16bf16(<16 x bfloat>, <16 x bfloat>, <16 x i1>, i32)
+
+define <16 x bfloat> @vfsgnj_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v10, v10, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v10, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <16 x ...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
| (Op.getValueType() == MVT::nxv32bf16 && | ||
| Subtarget.hasVInstructionsBF16Minimal() && | ||
| (!Subtarget.hasVInstructionsBF16() || | ||
| (std::find(std::begin(ZvfbfaOps), std::end(ZvfbfaOps), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
llvm::contains
| ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=lp64d \ | ||
| ; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s | ||
|
|
||
| declare <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat>, <2 x bfloat>, <2 x i1>, i32) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we no longer need to declare intrinsics
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
tclin914
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) | ||
| (vti.Vector (IMPLICIT_DEF)), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) | |
| (vti.Vector (IMPLICIT_DEF)), | |
| (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) | |
| (vti.Vector (IMPLICIT_DEF)), |
This is first patch for Zvfbfa codegen and I'm going to break it down to
several patches to make it easier to reivew.
The codegen supports both scalable vector and fixed length vector on both
native operations and vp intrinsics.