diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 2c0a02ae396c7..9cd6817c1b7a7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -91,8 +91,10 @@ static cl::opt static const unsigned ZvfbfaVPOps[] = { ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN}; static const unsigned ZvfbfaOps[] = { - ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::SPLAT_VECTOR, - ISD::FADD, ISD::FSUB, ISD::FMUL}; + ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::SPLAT_VECTOR, + ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMINNUM, + ISD::FMAXNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUM, + ISD::FMAXIMUM}; RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) @@ -1087,11 +1089,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VECREDUCE_FMAXIMUM}; // TODO: Make more of these ops legal. - static const unsigned ZvfbfaPromoteOps[] = {ISD::FMINNUM, - ISD::FMAXNUM, - ISD::FMINIMUMNUM, - ISD::FMAXIMUMNUM, - ISD::FDIV, + static const unsigned ZvfbfaPromoteOps[] = {ISD::FDIV, ISD::FMA, ISD::FSQRT, ISD::FCEIL, @@ -1103,8 +1101,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, - ISD::FMAXIMUM, - ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, @@ -1297,6 +1293,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VECTOR_INTERLEAVE, ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE, ISD::VECTOR_COMPRESS}, VT, Custom); + setOperationAction( + {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT, + Legal); + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom); setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 14ad7ca0eb35a..b3cc33d31761d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -201,12 +201,15 @@ class VPatBinarySDNode_VF_RM { - foreach vti = AllFloatVectors in { + foreach vti = AllFloatAndBF16Vectors in { let Predicates = GetVTypePredicates.Predicates in { - def : VPatBinarySDNode_VV; - def : VPatBinarySDNode_VF; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 9273ce094eb0a..4c41667560a98 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -1043,13 +1043,16 @@ class VPatBinaryVL_VF_RM { - foreach vti = AllFloatVectors in { + foreach vti = AllFloatAndBF16Vectors in { let Predicates = GetVTypePredicates.Predicates in { - def : VPatBinaryVL_V; - def : VPatBinaryVL_VF; @@ -1199,7 +1202,7 @@ multiclass VPatIntegerSetCCVL_VI_Swappable { - foreach fvti = AllFloatVectors in { + foreach fvti = AllFloatAndBF16Vectors in { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(fvti.Mask (vop (fvti.Vector fvti.RegClass:$rs1), fvti.RegClass:$rs2, @@ -1207,7 +1210,9 @@ multiclass VPatFPSetCCVL_VV_VF_FV(inst_name#"_VV_"#fvti.LMul.MX#"_MASK") + (!cast(inst_name# + !if(!eq(fvti.Scalar, bf16), "_ALT", "")# + "_VV_"#fvti.LMul.MX#"_MASK") VR:$passthru, fvti.RegClass:$rs1, fvti.RegClass:$rs2, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MU)>; @@ -1217,7 +1222,9 @@ multiclass VPatFPSetCCVL_VV_VF_FV(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK") + (!cast(inst_name# + !if(!eq(fvti.Scalar, bf16), "_ALT", "")# + "_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK") VR:$passthru, fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MU)>; @@ -1227,7 +1234,9 @@ multiclass VPatFPSetCCVL_VV_VF_FV(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK") + (!cast(swapped_op_inst_name# + !if(!eq(fvti.Scalar, bf16), "_ALT", "")# + "_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK") VR:$passthru, fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MU)>; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll index 6ee2e204bcfe3..0df73330027e4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll @@ -1,12 +1,140 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN + +define <2 x bfloat> @vfmax_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmax_v2bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFBFMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v2bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9 +; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFBFA-NEXT: vfmax.vv v8, v8, v10 +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.maximum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) + ret <2 x bfloat> %v +} + +define <4 x bfloat> @vfmax_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmax_v4bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFBFMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v4bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9 +; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFBFA-NEXT: vfmax.vv v8, v8, v10 +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.maximum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) + ret <4 x bfloat> %v +} + +define <8 x bfloat> @vfmax_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmax_v8bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFBFMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFBFMIN-NEXT: vfmax.vv v10, v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v8bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9 +; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFBFA-NEXT: vfmax.vv v8, v8, v10 +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.maximum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) + ret <8 x bfloat> %v +} + +define <16 x bfloat> @vfmax_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmax_v16bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFBFMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFBFMIN-NEXT: vfmax.vv v12, v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v16bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFA-NEXT: vmerge.vvm v8, v10, v8, v0 +; ZVFBFA-NEXT: vfmax.vv v8, v8, v12 +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.maximum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) + ret <16 x bfloat> %v +} define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmax_v2f16_vv: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll index c8cea368f905e..b5976bf972278 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfhmin,+zfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH +; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfhmin,+zfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN define <2 x double> @max_v2f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: max_v2f64: @@ -199,3 +203,113 @@ entry: %c = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> %a, <16 x half> %b) ret <16 x half> %c } + +define <2 x bfloat> @max_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) { +; ZVFBFMIN-LABEL: max_v2bf16: +; ZVFBFMIN: # %bb.0: # %entry +; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: max_v2bf16: +; ZVFBFA: # %bb.0: # %entry +; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v9 +; ZVFBFA-NEXT: ret +entry: + %c = call <2 x bfloat> @llvm.maximumnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) + ret <2 x bfloat> %c +} + +define <4 x bfloat> @max_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) { +; ZVFBFMIN-LABEL: max_v4bf16: +; ZVFBFMIN: # %bb.0: # %entry +; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: max_v4bf16: +; ZVFBFA: # %bb.0: # %entry +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v9 +; ZVFBFA-NEXT: ret +entry: + %c = call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) + ret <4 x bfloat> %c +} + +define <8 x bfloat> @max_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) { +; ZVFBFMIN-LABEL: max_v8bf16: +; ZVFBFMIN: # %bb.0: # %entry +; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v10, v12, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: max_v8bf16: +; ZVFBFA: # %bb.0: # %entry +; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v9 +; ZVFBFA-NEXT: ret +entry: + %c = call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) + ret <8 x bfloat> %c +} + +define <9 x bfloat> @max_v9bf16(<9 x bfloat> %a, <9 x bfloat> %b) { +; ZVFBFMIN-LABEL: max_v9bf16: +; ZVFBFMIN: # %bb.0: # %entry +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v12, v16, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: max_v9bf16: +; ZVFBFA: # %bb.0: # %entry +; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v10 +; ZVFBFA-NEXT: ret +entry: + %c = call <9 x bfloat> @llvm.maximumnum.v9bf16(<9 x bfloat> %a, <9 x bfloat> %b) + ret <9 x bfloat> %c +} + +define <16 x bfloat> @max_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) { +; ZVFBFMIN-LABEL: max_v16bf16: +; ZVFBFMIN: # %bb.0: # %entry +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v12, v16, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: max_v16bf16: +; ZVFBFA: # %bb.0: # %entry +; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v10 +; ZVFBFA-NEXT: ret +entry: + %c = call <16 x bfloat> @llvm.maximumnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) + ret <16 x bfloat> %c +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll index a95177a1de9a6..a6b523d99fd1c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll @@ -1,12 +1,140 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN + +define <2 x bfloat> @vfmin_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmin_v2bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFBFMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v2bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9 +; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFBFA-NEXT: vfmin.vv v8, v8, v10 +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.minimum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) + ret <2 x bfloat> %v +} + +define <4 x bfloat> @vfmin_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmin_v4bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFBFMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v4bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9 +; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFBFA-NEXT: vfmin.vv v8, v8, v10 +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.minimum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) + ret <4 x bfloat> %v +} + +define <8 x bfloat> @vfmin_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmin_v8bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFBFMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFBFMIN-NEXT: vfmin.vv v10, v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v8bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9 +; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFBFA-NEXT: vfmin.vv v8, v8, v10 +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.minimum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) + ret <8 x bfloat> %v +} + +define <16 x bfloat> @vfmin_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmin_v16bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFBFMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFBFMIN-NEXT: vfmin.vv v12, v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v16bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFA-NEXT: vmerge.vvm v8, v10, v8, v0 +; ZVFBFA-NEXT: vfmin.vv v8, v8, v12 +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.minimum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) + ret <16 x bfloat> %v +} define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmin_v2f16_vv: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll index 36114d56aa0d6..3409d17f7e0a0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfhmin,+zfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH +; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfhmin,+zfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN define <2 x double> @min_v2f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: min_v2f64: @@ -199,3 +203,113 @@ entry: %c = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> %a, <16 x half> %b) ret <16 x half> %c } + +define <2 x bfloat> @min_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) { +; ZVFBFMIN-LABEL: min_v2bf16: +; ZVFBFMIN: # %bb.0: # %entry +; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: min_v2bf16: +; ZVFBFA: # %bb.0: # %entry +; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v9 +; ZVFBFA-NEXT: ret +entry: + %c = call <2 x bfloat> @llvm.minimumnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) + ret <2 x bfloat> %c +} + +define <4 x bfloat> @min_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) { +; ZVFBFMIN-LABEL: min_v4bf16: +; ZVFBFMIN: # %bb.0: # %entry +; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: min_v4bf16: +; ZVFBFA: # %bb.0: # %entry +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v9 +; ZVFBFA-NEXT: ret +entry: + %c = call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) + ret <4 x bfloat> %c +} + +define <8 x bfloat> @min_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) { +; ZVFBFMIN-LABEL: min_v8bf16: +; ZVFBFMIN: # %bb.0: # %entry +; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v10, v12, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: min_v8bf16: +; ZVFBFA: # %bb.0: # %entry +; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v9 +; ZVFBFA-NEXT: ret +entry: + %c = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) + ret <8 x bfloat> %c +} + +define <9 x bfloat> @min_v9bf16(<9 x bfloat> %a, <9 x bfloat> %b) { +; ZVFBFMIN-LABEL: min_v9bf16: +; ZVFBFMIN: # %bb.0: # %entry +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v12, v16, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: min_v9bf16: +; ZVFBFA: # %bb.0: # %entry +; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v10 +; ZVFBFA-NEXT: ret +entry: + %c = call <9 x bfloat> @llvm.minimumnum.v9bf16(<9 x bfloat> %a, <9 x bfloat> %b) + ret <9 x bfloat> %c +} + +define <16 x bfloat> @min_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) { +; ZVFBFMIN-LABEL: min_v16bf16: +; ZVFBFMIN: # %bb.0: # %entry +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v12, v16, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: min_v16bf16: +; ZVFBFA: # %bb.0: # %entry +; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v10 +; ZVFBFA-NEXT: ret +entry: + %c = call <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) + ret <16 x bfloat> %c +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll index 44362efa1fe83..93854c1a8b9bc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll @@ -1,12 +1,300 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN + +define <2 x bfloat> @vfmax_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmax_v2bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v2bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vfmax_v2bf16_vf(<2 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmax_v2bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v9, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v9, v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v2bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer + %v = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %splat) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vfmax_v2bf16_fv(<2 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmax_v2bf16_fv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v9, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v9, v8, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v2bf16_fv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer + %v = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %splat, <2 x bfloat> %a) + ret <2 x bfloat> %v +} + +define <4 x bfloat> @vfmax_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmax_v4bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v4bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) + ret <4 x bfloat> %v +} + +define <4 x bfloat> @vfmax_v4bf16_vf(<4 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmax_v4bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v9, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v9, v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v4bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer + %v = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %splat) + ret <4 x bfloat> %v +} + +define <4 x bfloat> @vfmax_v4bf16_fv(<4 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmax_v4bf16_fv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v9, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v9, v8, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v4bf16_fv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer + %v = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> %splat, <4 x bfloat> %a) + ret <4 x bfloat> %v +} + +define <8 x bfloat> @vfmax_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmax_v8bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v10, v12, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v8bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @vfmax_v8bf16_vf(<8 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmax_v8bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v12, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v10, v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v8bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer + %v = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %splat) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @vfmax_v8bf16_fv(<8 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmax_v8bf16_fv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v12, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v10, v8, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v8bf16_fv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer + %v = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> %splat, <8 x bfloat> %a) + ret <8 x bfloat> %v +} + +define <16 x bfloat> @vfmax_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmax_v16bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v12, v16, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v16bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v10 +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.maxnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) + ret <16 x bfloat> %v +} + +define <16 x bfloat> @vfmax_v16bf16_vf(<16 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmax_v16bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v16, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v16 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v12, v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v16bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer + %v = call <16 x bfloat> @llvm.maxnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %splat) + ret <16 x bfloat> %v +} + +define <16 x bfloat> @vfmax_v16bf16_fv(<16 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmax_v16bf16_fv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v16, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v16 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v12, v8, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_v16bf16_fv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer + %v = call <16 x bfloat> @llvm.maxnum.v16bf16(<16 x bfloat> %splat, <16 x bfloat> %a) + ret <16 x bfloat> %v +} define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmax_v2f16_vv: @@ -25,6 +313,7 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) { ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret +; %v = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll index c9bb99d6cb3d6..d79f91af1df85 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll @@ -1,12 +1,300 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN + +define <2 x bfloat> @vfmin_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmin_v2bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v2bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vfmin_v2bf16_vf(<2 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmin_v2bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v9, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v9, v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v2bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer + %v = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %splat) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vfmin_v2bf16_fv(<2 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmin_v2bf16_fv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v9, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v9, v8, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v2bf16_fv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer + %v = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %splat, <2 x bfloat> %a) + ret <2 x bfloat> %v +} + +define <4 x bfloat> @vfmin_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmin_v4bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v4bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) + ret <4 x bfloat> %v +} + +define <4 x bfloat> @vfmin_v4bf16_vf(<4 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmin_v4bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v9, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v9, v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v4bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer + %v = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %splat) + ret <4 x bfloat> %v +} + +define <4 x bfloat> @vfmin_v4bf16_fv(<4 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmin_v4bf16_fv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v9, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v9, v8, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v4bf16_fv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer + %v = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> %splat, <4 x bfloat> %a) + ret <4 x bfloat> %v +} + +define <8 x bfloat> @vfmin_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmin_v8bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v10, v12, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v8bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @vfmin_v8bf16_vf(<8 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmin_v8bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v12, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v10, v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v8bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer + %v = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %splat) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @vfmin_v8bf16_fv(<8 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmin_v8bf16_fv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v12, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v10, v8, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v8bf16_fv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer + %v = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> %splat, <8 x bfloat> %a) + ret <8 x bfloat> %v +} + +define <16 x bfloat> @vfmin_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) { +; ZVFBFMIN-LABEL: vfmin_v16bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v12, v16, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v16bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v10 +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.minnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) + ret <16 x bfloat> %v +} + +define <16 x bfloat> @vfmin_v16bf16_vf(<16 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmin_v16bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v16, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v16 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v12, v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v16bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer + %v = call <16 x bfloat> @llvm.minnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %splat) + ret <16 x bfloat> %v +} + +define <16 x bfloat> @vfmin_v16bf16_fv(<16 x bfloat> %a, bfloat %b) { +; ZVFBFMIN-LABEL: vfmin_v16bf16_fv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v16, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v16 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v12, v8, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_v16bf16_fv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret + %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer + %v = call <16 x bfloat> @llvm.minnum.v16bf16(<16 x bfloat> %splat, <16 x bfloat> %a) + ret <16 x bfloat> %v +} define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmin_v2f16_vv: diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll index fd70f95ed53c6..f30b2a60f0d1b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll @@ -1,188 +1,249 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \ -; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \ -; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v,+m \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v,+m \ +; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN define @vfmax_nxv1bf16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv1bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmax.vv v9, v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv1bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFBFMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv1bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9 +; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFBFA-NEXT: vfmax.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %v = call @llvm.maximum.nxv1bf16( %a, %b) ret %v } define @vfmax_nxv2bf16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv2bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmax.vv v9, v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv2bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFBFMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv2bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9 +; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFBFA-NEXT: vfmax.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %v = call @llvm.maximum.nxv2bf16( %a, %b) ret %v } define @vfmax_nxv4bf16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv4bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0 -; CHECK-NEXT: vmfeq.vv v0, v12, v12 -; CHECK-NEXT: vmerge.vvm v10, v12, v10, v0 -; CHECK-NEXT: vfmax.vv v10, v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv4bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFBFMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFBFMIN-NEXT: vfmax.vv v10, v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv4bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9 +; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFBFA-NEXT: vfmax.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %v = call @llvm.maximum.nxv4bf16( %a, %b) ret %v } define @vfmax_nxv8bf16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv8bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v12, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0 -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmerge.vvm v12, v16, v12, v0 -; CHECK-NEXT: vfmax.vv v12, v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv8bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFBFMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFBFMIN-NEXT: vfmax.vv v12, v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv8bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFA-NEXT: vmerge.vvm v8, v10, v8, v0 +; ZVFBFA-NEXT: vfmax.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %v = call @llvm.maximum.nxv8bf16( %a, %b) ret %v } define @vfmax_nxv16bf16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv16bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 -; CHECK-NEXT: vfmax.vv v16, v16, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv16bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFBFMIN-NEXT: vmfeq.vv v7, v16, v16 +; ZVFBFMIN-NEXT: vmerge.vvm v8, v24, v16, v0 +; ZVFBFMIN-NEXT: vmv1r.v v0, v7 +; ZVFBFMIN-NEXT: vmerge.vvm v16, v16, v24, v0 +; ZVFBFMIN-NEXT: vfmax.vv v16, v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v16 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv16bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v16, v8, v12, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12 +; ZVFBFA-NEXT: vmerge.vvm v8, v12, v8, v0 +; ZVFBFA-NEXT: vfmax.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %v = call @llvm.maximum.nxv16bf16( %a, %b) ret %v } define @vfmax_nxv32bf16_vv( %a, %b) nounwind { -; CHECK-LABEL: vfmax_nxv32bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmv8r.v v0, v16 -; CHECK-NEXT: vmv8r.v v24, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v3, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vmv1r.v v0, v3 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v4 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vfmax.vv v16, v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv32bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: addi sp, sp, -16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 4 +; ZVFBFMIN-NEXT: sub sp, sp, a0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vmv8r.v v0, v16 +; ZVFBFMIN-NEXT: vmv8r.v v24, v8 +; ZVFBFMIN-NEXT: addi a0, sp, 16 +; ZVFBFMIN-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v24 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFMIN-NEXT: vmfeq.vv v3, v16, v16 +; ZVFBFMIN-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add a0, sp, a0 +; ZVFBFMIN-NEXT: addi a0, a0, 16 +; ZVFBFMIN-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill +; ZVFBFMIN-NEXT: vmv1r.v v0, v3 +; ZVFBFMIN-NEXT: vmerge.vvm v8, v16, v8, v0 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v4 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add a0, sp, a0 +; ZVFBFMIN-NEXT: addi a0, a0, 16 +; ZVFBFMIN-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v8, v8, v16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add a0, sp, a0 +; ZVFBFMIN-NEXT: addi a0, a0, 16 +; ZVFBFMIN-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; ZVFBFMIN-NEXT: addi a0, sp, 16 +; ZVFBFMIN-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v20 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFMIN-NEXT: vmfeq.vv v7, v24, v24 +; ZVFBFMIN-NEXT: vmerge.vvm v16, v8, v24, v0 +; ZVFBFMIN-NEXT: vmv1r.v v0, v7 +; ZVFBFMIN-NEXT: vmerge.vvm v8, v24, v8, v0 +; ZVFBFMIN-NEXT: vfmax.vv v16, v8, v16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add a0, sp, a0 +; ZVFBFMIN-NEXT: addi a0, a0, 16 +; ZVFBFMIN-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v24 +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v12, v16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 4 +; ZVFBFMIN-NEXT: add sp, sp, a0 +; ZVFBFMIN-NEXT: addi sp, sp, 16 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv32bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmfeq.vv v7, v16, v16 +; ZVFBFA-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFBFA-NEXT: vmv1r.v v0, v7 +; ZVFBFA-NEXT: vmerge.vvm v8, v16, v8, v0 +; ZVFBFA-NEXT: vfmax.vv v8, v8, v24 +; ZVFBFA-NEXT: ret %v = call @llvm.maximum.nxv32bf16( %a, %b) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll index dce5004d03e16..bedbe80ffad6d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll @@ -1,43 +1,61 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA,ZVFHMIN define @vfmax_vv_nxv1bf16( %va, %vb) { -; CHECK-LABEL: vfmax_vv_nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmax.vv v9, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_vv_nxv1bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_vv_nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %vc = call @llvm.maximumnum.nxv1bf16( %va, %vb) ret %vc } define @vfmax_vf_nxv1bf16( %va, bfloat %b) { -; CHECK-LABEL: vfmax_vf_nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmax.vf v9, v9, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_vf_nxv1bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vf v9, v9, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_vf_nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = call @llvm.maximumnum.nxv1bf16( %va, %splat) @@ -45,31 +63,43 @@ define @vfmax_vf_nxv1bf16( %va, bfloa } define @vfmax_vv_nxv2bf16( %va, %vb) { -; CHECK-LABEL: vfmax_vv_nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmax.vv v9, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_vv_nxv2bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_vv_nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %vc = call @llvm.maximumnum.nxv2bf16( %va, %vb) ret %vc } define @vfmax_vf_nxv2bf16( %va, bfloat %b) { -; CHECK-LABEL: vfmax_vf_nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmax.vf v9, v9, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_vf_nxv2bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmax.vf v9, v9, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_vf_nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = call @llvm.maximumnum.nxv2bf16( %va, %splat) @@ -77,31 +107,43 @@ define @vfmax_vf_nxv2bf16( %va, bfloa } define @vfmax_vv_nxv4bf16( %va, %vb) { -; CHECK-LABEL: vfmax_vv_nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmax.vv v10, v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_vv_nxv4bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v10, v12, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_vv_nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %vc = call @llvm.maximumnum.nxv4bf16( %va, %vb) ret %vc } define @vfmax_vf_nxv4bf16( %va, bfloat %b) { -; CHECK-LABEL: vfmax_vf_nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmax.vf v10, v10, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_vf_nxv4bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vf v10, v10, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_vf_nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = call @llvm.maximumnum.nxv4bf16( %va, %splat) @@ -109,31 +151,43 @@ define @vfmax_vf_nxv4bf16( %va, bfloa } define @vfmax_vv_nxv8bf16( %va, %vb) { -; CHECK-LABEL: vfmax_vv_nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmax.vv v12, v16, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_vv_nxv8bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v12, v16, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_vv_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %vc = call @llvm.maximumnum.nxv8bf16( %va, %vb) ret %vc } define @vfmax_vf_nxv8bf16( %va, bfloat %b) { -; CHECK-LABEL: vfmax_vf_nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmax.vf v12, v12, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_vf_nxv8bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmax.vf v12, v12, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_vf_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = call @llvm.maximumnum.nxv8bf16( %va, %splat) @@ -141,16 +195,22 @@ define @vfmax_vf_nxv8bf16( %va, bfloa } define @vfmax_fv_nxv8bf16( %va, bfloat %b) { -; CHECK-LABEL: vfmax_fv_nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmax.vf v12, v12, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_fv_nxv8bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmax.vf v12, v12, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_fv_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = call @llvm.maximumnum.nxv8bf16( %splat, %va) @@ -158,31 +218,43 @@ define @vfmax_fv_nxv8bf16( %va, bfloa } define @vfmax_vv_nxv16bf16( %va, %vb) { -; CHECK-LABEL: vfmax_vv_nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vv v16, v24, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_vv_nxv16bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v16, v24, v16 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v16 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_vv_nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %vc = call @llvm.maximumnum.nxv16bf16( %va, %vb) ret %vc } define @vfmax_vf_nxv16bf16( %va, bfloat %b) { -; CHECK-LABEL: vfmax_vf_nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vf v16, v16, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_vf_nxv16bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmax.vf v16, v16, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v16 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_vf_nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = call @llvm.maximumnum.nxv16bf16( %va, %splat) @@ -190,78 +262,90 @@ define @vfmax_vf_nxv16bf16( %va, bf } define @vfmax_vv_nxv32bf16( %va, %vb) { -; CHECK-LABEL: vfmax_vv_nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vv v0, v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vv v16, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_vv_nxv32bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: addi sp, sp, -16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: sub sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v16 +; ZVFBFMIN-NEXT: addi a0, sp, 16 +; ZVFBFMIN-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v20 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFBFMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v0, v0, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v0 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v16, v16, v24 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v12, v16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFBFMIN-NEXT: addi sp, sp, 16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 0 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_vv_nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %vc = call @llvm.maximumnum.nxv32bf16( %va, %vb) ret %vc } define @vfmax_vf_nxv32bf16( %va, bfloat %b) { -; CHECK-LABEL: vfmax_vf_nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vv v0, v8, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vv v16, v24, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_vf_nxv32bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: addi sp, sp, -16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: sub sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: addi a1, sp, 16 +; ZVFBFMIN-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v12 +; ZVFBFMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v8, a0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFBFMIN-NEXT: addi a0, sp, 16 +; ZVFBFMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v0, v8, v0 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v0 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v16, v24, v16 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v12, v16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFBFMIN-NEXT: addi sp, sp, 16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 0 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_vf_nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = call @llvm.maximumnum.nxv32bf16( %va, %splat) diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll index 339f97a73ee52..8edc7d0d0e55b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll @@ -1,188 +1,249 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \ -; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \ -; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v,+m \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v,+m \ +; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFHMIN define @vfmin_nxv1bf16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv1bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmin.vv v9, v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv1bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFBFMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv1bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9 +; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFBFA-NEXT: vfmin.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %v = call @llvm.minimum.nxv1bf16( %a, %b) ret %v } define @vfmin_nxv2bf16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv2bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmin.vv v9, v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv2bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFBFMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv2bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9 +; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFBFA-NEXT: vfmin.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %v = call @llvm.minimum.nxv2bf16( %a, %b) ret %v } define @vfmin_nxv4bf16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv4bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0 -; CHECK-NEXT: vmfeq.vv v0, v12, v12 -; CHECK-NEXT: vmerge.vvm v10, v12, v10, v0 -; CHECK-NEXT: vfmin.vv v10, v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv4bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFBFMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFBFMIN-NEXT: vfmin.vv v10, v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv4bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9 +; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFBFA-NEXT: vfmin.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %v = call @llvm.minimum.nxv4bf16( %a, %b) ret %v } define @vfmin_nxv8bf16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv8bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v12, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0 -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmerge.vvm v12, v16, v12, v0 -; CHECK-NEXT: vfmin.vv v12, v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv8bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFBFMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFBFMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFBFMIN-NEXT: vfmin.vv v12, v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv8bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10 +; ZVFBFA-NEXT: vmerge.vvm v8, v10, v8, v0 +; ZVFBFA-NEXT: vfmin.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %v = call @llvm.minimum.nxv8bf16( %a, %b) ret %v } define @vfmin_nxv16bf16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv16bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 -; CHECK-NEXT: vfmin.vv v16, v16, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv16bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFBFMIN-NEXT: vmfeq.vv v7, v16, v16 +; ZVFBFMIN-NEXT: vmerge.vvm v8, v24, v16, v0 +; ZVFBFMIN-NEXT: vmv1r.v v0, v7 +; ZVFBFMIN-NEXT: vmerge.vvm v16, v16, v24, v0 +; ZVFBFMIN-NEXT: vfmin.vv v16, v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v16 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv16bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmerge.vvm v16, v8, v12, v0 +; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12 +; ZVFBFA-NEXT: vmerge.vvm v8, v12, v8, v0 +; ZVFBFA-NEXT: vfmin.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %v = call @llvm.minimum.nxv16bf16( %a, %b) ret %v } define @vfmin_nxv32bf16_vv( %a, %b) nounwind { -; CHECK-LABEL: vfmin_nxv32bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmv8r.v v0, v16 -; CHECK-NEXT: vmv8r.v v24, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v3, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vmv1r.v v0, v3 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v4 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vfmin.vv v16, v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv32bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: addi sp, sp, -16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 4 +; ZVFBFMIN-NEXT: sub sp, sp, a0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vmv8r.v v0, v16 +; ZVFBFMIN-NEXT: vmv8r.v v24, v8 +; ZVFBFMIN-NEXT: addi a0, sp, 16 +; ZVFBFMIN-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v24 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFMIN-NEXT: vmfeq.vv v3, v16, v16 +; ZVFBFMIN-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add a0, sp, a0 +; ZVFBFMIN-NEXT: addi a0, a0, 16 +; ZVFBFMIN-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill +; ZVFBFMIN-NEXT: vmv1r.v v0, v3 +; ZVFBFMIN-NEXT: vmerge.vvm v8, v16, v8, v0 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v4 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add a0, sp, a0 +; ZVFBFMIN-NEXT: addi a0, a0, 16 +; ZVFBFMIN-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v8, v8, v16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add a0, sp, a0 +; ZVFBFMIN-NEXT: addi a0, a0, 16 +; ZVFBFMIN-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; ZVFBFMIN-NEXT: addi a0, sp, 16 +; ZVFBFMIN-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v8, v20 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFMIN-NEXT: vmfeq.vv v7, v24, v24 +; ZVFBFMIN-NEXT: vmerge.vvm v16, v8, v24, v0 +; ZVFBFMIN-NEXT: vmv1r.v v0, v7 +; ZVFBFMIN-NEXT: vmerge.vvm v8, v24, v8, v0 +; ZVFBFMIN-NEXT: vfmin.vv v16, v8, v16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add a0, sp, a0 +; ZVFBFMIN-NEXT: addi a0, a0, 16 +; ZVFBFMIN-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v24 +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v12, v16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 4 +; ZVFBFMIN-NEXT: add sp, sp, a0 +; ZVFBFMIN-NEXT: addi sp, sp, 16 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv32bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8 +; ZVFBFA-NEXT: vmfeq.vv v7, v16, v16 +; ZVFBFA-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFBFA-NEXT: vmv1r.v v0, v7 +; ZVFBFA-NEXT: vmerge.vvm v8, v16, v8, v0 +; ZVFBFA-NEXT: vfmin.vv v8, v8, v24 +; ZVFBFA-NEXT: ret %v = call @llvm.minimum.nxv32bf16( %a, %b) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll index a52625d9e8ef4..431c4c29be306 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll @@ -1,43 +1,61 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA,ZVFHMIN define @vfmin_vv_nxv1bf16( %va, %vb) { -; CHECK-LABEL: vfmin_vv_nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmin.vv v9, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_vv_nxv1bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_vv_nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %vc = call @llvm.minimumnum.nxv1bf16( %va, %vb) ret %vc } define @vfmin_vf_nxv1bf16( %va, bfloat %b) { -; CHECK-LABEL: vfmin_vf_nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmin.vf v9, v9, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_vf_nxv1bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vf v9, v9, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_vf_nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = call @llvm.minimumnum.nxv1bf16( %va, %splat) @@ -45,31 +63,43 @@ define @vfmin_vf_nxv1bf16( %va, bfloa } define @vfmin_vv_nxv2bf16( %va, %vb) { -; CHECK-LABEL: vfmin_vv_nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmin.vv v9, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_vv_nxv2bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_vv_nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %vc = call @llvm.minimumnum.nxv2bf16( %va, %vb) ret %vc } define @vfmin_vf_nxv2bf16( %va, bfloat %b) { -; CHECK-LABEL: vfmin_vf_nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmin.vf v9, v9, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_vf_nxv2bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmin.vf v9, v9, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_vf_nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = call @llvm.minimumnum.nxv2bf16( %va, %splat) @@ -77,31 +107,43 @@ define @vfmin_vf_nxv2bf16( %va, bfloa } define @vfmin_vv_nxv4bf16( %va, %vb) { -; CHECK-LABEL: vfmin_vv_nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmin.vv v10, v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_vv_nxv4bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v10, v12, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_vv_nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %vc = call @llvm.minimumnum.nxv4bf16( %va, %vb) ret %vc } define @vfmin_vf_nxv4bf16( %va, bfloat %b) { -; CHECK-LABEL: vfmin_vf_nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmin.vf v10, v10, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_vf_nxv4bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vf v10, v10, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_vf_nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = call @llvm.minimumnum.nxv4bf16( %va, %splat) @@ -109,31 +151,43 @@ define @vfmin_vf_nxv4bf16( %va, bfloa } define @vfmin_vv_nxv8bf16( %va, %vb) { -; CHECK-LABEL: vfmin_vv_nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmin.vv v12, v16, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_vv_nxv8bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v12, v16, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_vv_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %vc = call @llvm.minimumnum.nxv8bf16( %va, %vb) ret %vc } define @vfmin_vf_nxv8bf16( %va, bfloat %b) { -; CHECK-LABEL: vfmin_vf_nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmin.vf v12, v12, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_vf_nxv8bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmin.vf v12, v12, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_vf_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = call @llvm.minimumnum.nxv8bf16( %va, %splat) @@ -141,16 +195,22 @@ define @vfmin_vf_nxv8bf16( %va, bfloa } define @vfmin_fv_nxv8bf16( %va, bfloat %b) { -; CHECK-LABEL: vfmin_fv_nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmin.vf v12, v12, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_fv_nxv8bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmin.vf v12, v12, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_fv_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = call @llvm.minimumnum.nxv8bf16( %splat, %va) @@ -158,31 +218,43 @@ define @vfmin_fv_nxv8bf16( %va, bfloa } define @vfmin_vv_nxv16bf16( %va, %vb) { -; CHECK-LABEL: vfmin_vv_nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vv v16, v24, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_vv_nxv16bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v16, v24, v16 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v16 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_vv_nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %vc = call @llvm.minimumnum.nxv16bf16( %va, %vb) ret %vc } define @vfmin_vf_nxv16bf16( %va, bfloat %b) { -; CHECK-LABEL: vfmin_vf_nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vf v16, v16, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_vf_nxv16bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmin.vf v16, v16, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v16 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_vf_nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = call @llvm.minimumnum.nxv16bf16( %va, %splat) @@ -190,78 +262,90 @@ define @vfmin_vf_nxv16bf16( %va, bf } define @vfmin_vv_nxv32bf16( %va, %vb) { -; CHECK-LABEL: vfmin_vv_nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vv v0, v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vv v16, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_vv_nxv32bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: addi sp, sp, -16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: sub sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v16 +; ZVFBFMIN-NEXT: addi a0, sp, 16 +; ZVFBFMIN-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v20 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFBFMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v0, v0, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v0 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v16, v16, v24 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v12, v16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFBFMIN-NEXT: addi sp, sp, 16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 0 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_vv_nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %vc = call @llvm.minimumnum.nxv32bf16( %va, %vb) ret %vc } define @vfmin_vf_nxv32bf16( %va, bfloat %b) { -; CHECK-LABEL: vfmin_vf_nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vv v0, v8, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vv v16, v24, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_vf_nxv32bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: addi sp, sp, -16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: sub sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: addi a1, sp, 16 +; ZVFBFMIN-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v12 +; ZVFBFMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v8, a0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFBFMIN-NEXT: addi a0, sp, 16 +; ZVFBFMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v0, v8, v0 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v0 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v16, v24, v16 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v12, v16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFBFMIN-NEXT: addi sp, sp, 16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 0 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_vf_nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = call @llvm.minimumnum.nxv32bf16( %va, %splat) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll index 63bfe1dfad5fc..8a569d41ac25c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll @@ -1,43 +1,61 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA,ZVFHMIN define @vfmax_nxv1bf16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv1bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmax.vv v9, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv1bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv1bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %v = call @llvm.maxnum.nxv1bf16( %a, %b) ret %v } define @vfmax_nxv1bf16_vf( %a, bfloat %b) { -; CHECK-LABEL: vfmax_nxv1bf16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmax.vf v9, v9, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv1bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vf v9, v9, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv1bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.maxnum.nxv1bf16( %a, %splat) @@ -45,31 +63,43 @@ define @vfmax_nxv1bf16_vf( %a, bfloat } define @vfmax_nxv2bf16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv2bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmax.vv v9, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv2bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv2bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %v = call @llvm.maxnum.nxv2bf16( %a, %b) ret %v } define @vfmax_nxv2bf16_vf( %a, bfloat %b) { -; CHECK-LABEL: vfmax_nxv2bf16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmax.vf v9, v9, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv2bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmax.vf v9, v9, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv2bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.maxnum.nxv2bf16( %a, %splat) @@ -77,31 +107,43 @@ define @vfmax_nxv2bf16_vf( %a, bfloat } define @vfmax_nxv4bf16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv4bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmax.vv v10, v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv4bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v10, v12, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv4bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %v = call @llvm.maxnum.nxv4bf16( %a, %b) ret %v } define @vfmax_nxv4bf16_vf( %a, bfloat %b) { -; CHECK-LABEL: vfmax_nxv4bf16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmax.vf v10, v10, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv4bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmax.vf v10, v10, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv4bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.maxnum.nxv4bf16( %a, %splat) @@ -109,31 +151,43 @@ define @vfmax_nxv4bf16_vf( %a, bfloat } define @vfmax_nxv8bf16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv8bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmax.vv v12, v16, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv8bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v12, v16, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv8bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %v = call @llvm.maxnum.nxv8bf16( %a, %b) ret %v } define @vfmax_nxv8bf16_vf( %a, bfloat %b) { -; CHECK-LABEL: vfmax_nxv8bf16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmax.vf v12, v12, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv8bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmax.vf v12, v12, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv8bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.maxnum.nxv8bf16( %a, %splat) @@ -141,31 +195,43 @@ define @vfmax_nxv8bf16_vf( %a, bfloat } define @vfmax_nxv16bf16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv16bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vv v16, v24, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv16bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v16, v24, v16 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v16 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv16bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %v = call @llvm.maxnum.nxv16bf16( %a, %b) ret %v } define @vfmax_nxv16bf16_vf( %a, bfloat %b) { -; CHECK-LABEL: vfmax_nxv16bf16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vf v16, v16, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv16bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmax.vf v16, v16, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v16 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv16bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.maxnum.nxv16bf16( %a, %splat) @@ -173,78 +239,90 @@ define @vfmax_nxv16bf16_vf( %a, bfl } define @vfmax_nxv32bf16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv32bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vv v0, v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vv v16, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv32bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: addi sp, sp, -16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: sub sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v16 +; ZVFBFMIN-NEXT: addi a0, sp, 16 +; ZVFBFMIN-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v20 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFBFMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v0, v0, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v0 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v16, v16, v24 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v12, v16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFBFMIN-NEXT: addi sp, sp, 16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 0 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv32bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfmax.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %v = call @llvm.maxnum.nxv32bf16( %a, %b) ret %v } define @vfmax_nxv32bf16_vf( %a, bfloat %b) { -; CHECK-LABEL: vfmax_nxv32bf16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vv v0, v8, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vv v16, v24, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmax_nxv32bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: addi sp, sp, -16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: sub sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: addi a1, sp, 16 +; ZVFBFMIN-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v12 +; ZVFBFMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v8, a0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFBFMIN-NEXT: addi a0, sp, 16 +; ZVFBFMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v0, v8, v0 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v0 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmax.vv v16, v24, v16 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v12, v16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFBFMIN-NEXT: addi sp, sp, 16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 0 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmax_nxv32bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.maxnum.nxv32bf16( %a, %splat) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll index bb435c9d0114f..d88c13d2a5752 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll @@ -1,43 +1,61 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFBFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA,ZVFHMIN define @vfmin_nxv1bf16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv1bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmin.vv v9, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv1bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv1bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %v = call @llvm.minnum.nxv1bf16( %a, %b) ret %v } define @vfmin_nxv1bf16_vf( %a, bfloat %b) { -; CHECK-LABEL: vfmin_nxv1bf16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmin.vf v9, v9, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv1bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vf v9, v9, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv1bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.minnum.nxv1bf16( %a, %splat) @@ -45,31 +63,43 @@ define @vfmin_nxv1bf16_vf( %a, bfloat } define @vfmin_nxv2bf16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv2bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmin.vv v9, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv2bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv2bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %v = call @llvm.minnum.nxv2bf16( %a, %b) ret %v } define @vfmin_nxv2bf16_vf( %a, bfloat %b) { -; CHECK-LABEL: vfmin_nxv2bf16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmin.vf v9, v9, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv2bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmin.vf v9, v9, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v9 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv2bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.minnum.nxv2bf16( %a, %splat) @@ -77,31 +107,43 @@ define @vfmin_nxv2bf16_vf( %a, bfloat } define @vfmin_nxv4bf16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv4bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmin.vv v10, v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv4bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v10, v12, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv4bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %v = call @llvm.minnum.nxv4bf16( %a, %b) ret %v } define @vfmin_nxv4bf16_vf( %a, bfloat %b) { -; CHECK-LABEL: vfmin_nxv4bf16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmin.vf v10, v10, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv4bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmin.vf v10, v10, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv4bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.minnum.nxv4bf16( %a, %splat) @@ -109,31 +151,43 @@ define @vfmin_nxv4bf16_vf( %a, bfloat } define @vfmin_nxv8bf16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv8bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmin.vv v12, v16, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv8bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v12, v16, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv8bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %v = call @llvm.minnum.nxv8bf16( %a, %b) ret %v } define @vfmin_nxv8bf16_vf( %a, bfloat %b) { -; CHECK-LABEL: vfmin_nxv8bf16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmin.vf v12, v12, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv8bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmin.vf v12, v12, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv8bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.minnum.nxv8bf16( %a, %splat) @@ -141,31 +195,43 @@ define @vfmin_nxv8bf16_vf( %a, bfloat } define @vfmin_nxv16bf16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv16bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vv v16, v24, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv16bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v16, v24, v16 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v16 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv16bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %v = call @llvm.minnum.nxv16bf16( %a, %b) ret %v } define @vfmin_nxv16bf16_vf( %a, bfloat %b) { -; CHECK-LABEL: vfmin_nxv16bf16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vf v16, v16, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv16bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmin.vf v16, v16, fa5 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v16 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv16bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.minnum.nxv16bf16( %a, %splat) @@ -173,78 +239,90 @@ define @vfmin_nxv16bf16_vf( %a, bfl } define @vfmin_nxv32bf16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv32bf16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vv v0, v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vv v16, v16, v24 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv32bf16_vv: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: addi sp, sp, -16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: sub sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v16 +; ZVFBFMIN-NEXT: addi a0, sp, 16 +; ZVFBFMIN-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v20 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFBFMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v0, v0, v8 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v0 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v16, v16, v24 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v12, v16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFBFMIN-NEXT: addi sp, sp, 16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 0 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv32bf16_vv: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfmin.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %v = call @llvm.minnum.nxv32bf16( %a, %b) ret %v } define @vfmin_nxv32bf16_vf( %a, bfloat %b) { -; CHECK-LABEL: vfmin_nxv32bf16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vv v0, v8, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vv v16, v24, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret +; ZVFBFMIN-LABEL: vfmin_nxv32bf16_vf: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: addi sp, sp, -16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: sub sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFBFMIN-NEXT: fmv.x.h a0, fa0 +; ZVFBFMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFBFMIN-NEXT: addi a1, sp, 16 +; ZVFBFMIN-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v12 +; ZVFBFMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v8, a0 +; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v8 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFBFMIN-NEXT: addi a0, sp, 16 +; ZVFBFMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v0, v8, v0 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v0 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmin.vv v16, v24, v16 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v12, v16 +; ZVFBFMIN-NEXT: csrr a0, vlenb +; ZVFBFMIN-NEXT: slli a0, a0, 3 +; ZVFBFMIN-NEXT: add sp, sp, a0 +; ZVFBFMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFBFMIN-NEXT: addi sp, sp, 16 +; ZVFBFMIN-NEXT: .cfi_def_cfa_offset 0 +; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfmin_nxv32bf16_vf: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.minnum.nxv32bf16( %a, %splat)