-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Add codegen support for Zvfbfmin #87911
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Jianjian Guan (jacquesguan) ChangesPatch is 30.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/87911.diff 9 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 279d8a435a04ca..16ab0c13e5c73c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1080,6 +1080,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
}
+ // TODO: Could we merge some code with zvfhmin?
+ if (Subtarget.hasVInstructionsBF16()) {
+ for (MVT VT : BF16VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+ // TODO: Promote to fp32.
+ }
+ }
+
if (Subtarget.hasVInstructionsF32()) {
for (MVT VT : F32VecVTs) {
if (!isTypeLegal(VT))
@@ -1295,6 +1306,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
continue;
}
+ if (VT.getVectorElementType() == MVT::bf16 &&
+ Subtarget.hasVInstructionsBF16()) {
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+ // TODO: Promote to fp32.
+ continue;
+ }
+
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
Custom);
@@ -2549,6 +2568,10 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
if (!Subtarget.hasVInstructionsF16Minimal())
return false;
break;
+ case MVT::bf16:
+ if (!Subtarget.hasVInstructionsBF16())
+ return false;
+ break;
case MVT::f32:
if (!Subtarget.hasVInstructionsF32())
return false;
@@ -2600,6 +2623,7 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
case MVT::i16:
case MVT::i32:
case MVT::i64:
+ case MVT::bf16:
case MVT::f16:
case MVT::f32:
case MVT::f64: {
@@ -8086,9 +8110,12 @@ RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
- bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
- SrcVT.getVectorElementType() != MVT::f16);
- bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
+ bool IsDirectExtend =
+ IsExtend && (VT.getVectorElementType() != MVT::f64 ||
+ (SrcVT.getVectorElementType() != MVT::f16 &&
+ SrcVT.getVectorElementType() != MVT::bf16));
+ bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
+ VT.getVectorElementType() != MVT::bf16) ||
SrcVT.getVectorElementType() != MVT::f64);
bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index da761ae856706a..12c521ddeece95 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -1450,6 +1450,20 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
+foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = [HasVInstructionsBF16] in
+ def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))),
+ (!cast<Instruction>("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fwti.RegClass:$rs1,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TA_MA)>;
+}
+
//===----------------------------------------------------------------------===//
// Vector Splats
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index b721dcd9898854..e07c72c2fdf6d9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2668,6 +2668,20 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
GPR:$vl, fvti.Log2SEW, TA_MA)>;
}
+foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = [HasVInstructionsBF16] in
+ def : Pat<(fwti.Vector (any_riscv_fpextend_vl
+ (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFWCVTBF16_F_F_V_"#fvti.LMul.MX#"_MASK")
+ (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW, TA_MA)>;
+}
+
// 13.19 Narrowing Floating-Point/Integer Type-Convert Instructions
defm : VPatNConvertFP2IVL_W_RM<riscv_vfcvt_xu_f_vl, "PseudoVFNCVT_XU_F_W">;
defm : VPatNConvertFP2IVL_W_RM<riscv_vfcvt_x_f_vl, "PseudoVFNCVT_X_F_W">;
@@ -2712,6 +2726,22 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
}
}
+foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = [HasVInstructionsBF16] in
+ def : Pat<(fvti.Vector (any_riscv_fpround_vl
+ (fwti.Vector fwti.RegClass:$rs1),
+ (fwti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX#"_MASK")
+ (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
+ (fwti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, fvti.Log2SEW, TA_MA)>;
+}
+
// 14. Vector Reduction Operations
// 14.1. Vector Single-Width Integer Reduction Instructions
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll
index 5de28a0d722d0e..72193d762b0703 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
declare <2 x float> @llvm.vp.fpext.v2f32.v2f16(<2 x half>, <2 x i1>, i32)
@@ -120,3 +120,53 @@ define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 ze
%v = call <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float> %a, <32 x i1> %m, i32 %vl)
ret <32 x double> %v
}
+
+declare <2 x float> @llvm.vp.fpext.v2f32.v2bf16(<2 x bfloat>, <2 x i1>, i32)
+
+define <2 x float> @vfpext_v2bf16_v2f32(<2 x bfloat> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_v2bf16_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <2 x float> @llvm.vp.fpext.v2f32.v2bf16(<2 x bfloat> %a, <2 x i1> %m, i32 %vl)
+ ret <2 x float> %v
+}
+
+define <2 x float> @vfpext_v2bf16_v2f32_unmasked(<2 x bfloat> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_v2bf16_v2f32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <2 x float> @llvm.vp.fpext.v2f32.v2bf16(<2 x bfloat> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+ ret <2 x float> %v
+}
+
+declare <2 x double> @llvm.vp.fpext.v2f64.v2bf16(<2 x bfloat>, <2 x i1>, i32)
+
+define <2 x double> @vfpext_v2bf16_v2f64(<2 x bfloat> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_v2bf16_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x double> @llvm.vp.fpext.v2f64.v2bf16(<2 x bfloat> %a, <2 x i1> %m, i32 %vl)
+ ret <2 x double> %v
+}
+
+define <2 x double> @vfpext_v2bf16_v2f64_unmasked(<2 x bfloat> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_v2bf16_v2f64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <2 x double> @llvm.vp.fpext.v2f64.v2bf16(<2 x bfloat> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+ ret <2 x double> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll
index 0d5b59b087b40f..1fea30eef43dff 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
declare <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float>, <2 x i1>, i32)
@@ -122,3 +122,53 @@ define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32
%v = call <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double> %a, <32 x i1> %m, i32 %vl)
ret <32 x float> %v
}
+
+declare <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float>, <2 x i1>, i32)
+
+define <2 x bfloat> @vfptrunc_v2bf16_v2f32(<2 x float> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_v2bf16_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8, v0.t
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float> %a, <2 x i1> %m, i32 %vl)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfptrunc_v2bf16_v2f32_unmasked(<2 x float> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_v2bf16_v2f32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+ ret <2 x bfloat> %v
+}
+
+declare <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double>, <2 x i1>, i32)
+
+define <2 x bfloat> @vfptrunc_v2bf16_v2f64(<2 x double> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_v2bf16_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double> %a, <2 x i1> %m, i32 %vl)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfptrunc_v2bf16_v2f64_unmasked(<2 x double> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_v2bf16_v2f64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+ ret <2 x bfloat> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
index d805a103aafd37..b002b8e7656687 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
define <vscale x 1 x float> @vfpext_nxv1f16_nxv1f32(<vscale x 1 x half> %va) {
@@ -167,3 +167,115 @@ define <vscale x 8 x double> @vfpext_nxv8f32_nxv8f64(<vscale x 8 x float> %va) {
%evec = fpext <vscale x 8 x float> %va to <vscale x 8 x double>
ret <vscale x 8 x double> %evec
}
+
+define <vscale x 1 x float> @vfpext_nxv1bf16_nxv1f32(<vscale x 1 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv1bf16_nxv1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 1 x bfloat> %va to <vscale x 1 x float>
+ ret <vscale x 1 x float> %evec
+}
+
+define <vscale x 1 x double> @vfpext_nxv1bf16_nxv1f64(<vscale x 1 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv1bf16_nxv1f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v8, v9
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 1 x bfloat> %va to <vscale x 1 x double>
+ ret <vscale x 1 x double> %evec
+}
+
+define <vscale x 2 x float> @vfpext_nxv2bf16_nxv2f32(<vscale x 2 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv2bf16_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 2 x bfloat> %va to <vscale x 2 x float>
+ ret <vscale x 2 x float> %evec
+}
+
+define <vscale x 2 x double> @vfpext_nxv2bf16_nxv2f64(<vscale x 2 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv2bf16_nxv2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v8, v10
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 2 x bfloat> %va to <vscale x 2 x double>
+ ret <vscale x 2 x double> %evec
+}
+
+define <vscale x 4 x float> @vfpext_nxv4bf16_nxv4f32(<vscale x 4 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv4bf16_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 4 x bfloat> %va to <vscale x 4 x float>
+ ret <vscale x 4 x float> %evec
+}
+
+define <vscale x 4 x double> @vfpext_nxv4bf16_nxv4f64(<vscale x 4 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv4bf16_nxv4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v8, v12
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 4 x bfloat> %va to <vscale x 4 x double>
+ ret <vscale x 4 x double> %evec
+}
+
+define <vscale x 8 x float> @vfpext_nxv8bf16_nxv8f32(<vscale x 8 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv8bf16_nxv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 8 x bfloat> %va to <vscale x 8 x float>
+ ret <vscale x 8 x float> %evec
+}
+
+define <vscale x 8 x double> @vfpext_nxv8bf16_nxv8f64(<vscale x 8 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv8bf16_nxv8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v8, v16
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 8 x bfloat> %va to <vscale x 8 x double>
+ ret <vscale x 8 x double> %evec
+}
+
+define <vscale x 16 x float> @vfpext_nxv16bf16_nxv16f32(<vscale x 16 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv16bf16_nxv16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 16 x bfloat> %va to <vscale x 16 x float>
+ ret <vscale x 16 x float> %evec
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll
index 5cfa98916a2de0..aaaf4ad4607119 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple...
[truncated]
|
Do you plan to add load/store support? |
120efbb
to
5599c78
Compare
Added load/store support. |
ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, | ||
VT, Custom); | ||
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); | ||
// TODO: Promote to fp32. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like F16Minimal has support for STRICT_FP_ROUND, STRICT_FP_EXTEND, VP_MERGE, VP_SELECT, SELECT, SELECT_CC, SINT_TO_FP, UINT_TO_FP, VP_SINT_TO_FP, VP_UINT_TO_FP, and SPLAT_VECTOR
but these are not included for BF16. Do you plan to add support for these?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Zvfbfmin does not have any integer conversion instructions so SINT_TO_FP, UINT_TO_FP, VP_SINT_TO_FP, VP_UINT_TO_FP
can be removed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm curious what SPLAT_VECTOR for F16Minimal does since the scalar FP16 type isn't guaranteed legal with F16Minimal.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SPLAT_VECTOR
and select/merge with splat depends on Zvfh
, so I plan to add these in a separate patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm curious what SPLAT_VECTOR for F16Minimal does since the scalar FP16 type isn't guaranteed legal with F16Minimal.
I create a pr #88275 to add the constraint for SPLAT_VECTOR
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based on the comments in this thread, we have been able to trim down the original list. We are left with consideration of STRICT_FP_ROUND, STRICT_FP_EXTEND
. Are we able to address those opcodes here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based on the comments in this thread, we have been able to trim down the original list. We are left with consideration of
STRICT_FP_ROUND, STRICT_FP_EXTEND
. Are we able to address those opcodes here?
Added STRICT_FP_ROUND
, STRICT_FP_EXTEND
now.
; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s | ||
; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s | ||
; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+experimental-zfbfmin,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s | ||
; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+experimental-zfbfmin,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do you need to add experimental-zfbfmin
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed +experimental-zfbfmin
now. It's unused for legal type.
; | ||
; RV64-LABEL: v4bf16: | ||
; RV64: # %bb.0: | ||
; RV64-NEXT: ld a0, 0(a0) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please use a type that doesn't convert to i64 on RV64
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
cd0f02a
to
0f52ad8
Compare
Ping, any more comment? |
@@ -181,3 +181,13 @@ define <16 x i64> @exact_vlen_i64_m8(ptr %p) vscale_range(2,2) { | |||
%v = load <16 x i64>, ptr %p | |||
ret <16 x i64> %v | |||
} | |||
|
|||
define <8 x bfloat> @load_v6bf16(ptr %p) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
v6 -> v8
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); | ||
setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, | ||
Custom); | ||
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are concat, insert, extract, and scalar_to_vector tested? It's not immediately obvious from the test names
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added test cases for insert_vectors and extract_vectors, and removed scalar_to_vector now since it should need bf16 scalar extension. For concat_vectors, I didn't find the direct case of it.
0f52ad8
to
93997ed
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch adds basic codegen support for Zvfbfmin extension.