Skip to content

Commit

Permalink
[RISCV] Support fmaximum/fminimum for fp16 vector when only Zvfhmin e…
Browse files Browse the repository at this point in the history
…nabled (#67393)

This patch promotes fmaximum/fminimum for fp16 vector to float
operation.
  • Loading branch information
tclin914 committed Sep 27, 2023
1 parent 7eb2531 commit d6b1b99
Show file tree
Hide file tree
Showing 5 changed files with 1,036 additions and 327 deletions.
7 changes: 6 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SPLAT_VECTOR, ISD::SETCC};
ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SPLAT_VECTOR, ISD::SETCC,
ISD::FMAXIMUM, ISD::FMINIMUM};

// TODO: support more vp ops.
static const unsigned ZvfhminPromoteVPOps[] = {
Expand Down Expand Up @@ -5659,6 +5660,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
case ISD::FMAXIMUM:
case ISD::FMINIMUM:
if (Op.getValueType() == MVT::nxv32f16 &&
(Subtarget.hasVInstructionsF16Minimal() &&
!Subtarget.hasVInstructionsF16()))
return SplitVectorOp(Op, DAG);
return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
case ISD::FP_EXTEND: {
SDLoc DL(Op);
Expand Down
263 changes: 194 additions & 69 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
Original file line number Diff line number Diff line change
@@ -1,73 +1,141 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN

declare <2 x half> @llvm.maximum.v2f16(<2 x half>, <2 x half>)

define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
; CHECK-LABEL: vfmax_v2f16_vv:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v10, v9, v9
; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v11
; CHECK-NEXT: ret
; ZVFH-LABEL: vfmax_v2f16_vv:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v10, v9, v9
; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
; ZVFH-NEXT: vmv1r.v v0, v10
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v11
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_v2f16_vv:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
ret <2 x half> %v
}

declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>)

define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) {
; CHECK-LABEL: vfmax_v4f16_vv:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v10, v9, v9
; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v11
; CHECK-NEXT: ret
; ZVFH-LABEL: vfmax_v4f16_vv:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v10, v9, v9
; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
; ZVFH-NEXT: vmv1r.v v0, v10
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v11
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_v4f16_vv:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
; ZVFHMIN-NEXT: vmv.v.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%v = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
ret <4 x half> %v
}

declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)

define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) {
; CHECK-LABEL: vfmax_v8f16_vv:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v10, v9, v9
; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v11
; CHECK-NEXT: ret
; ZVFH-LABEL: vfmax_v8f16_vv:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v10, v9, v9
; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
; ZVFH-NEXT: vmv.v.v v0, v10
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v11
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_v8f16_vv:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0
; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
%v = call <8 x half> @llvm.maximum.v8f16(<8 x half> %a, <8 x half> %b)
ret <8 x half> %v
}

declare <16 x half> @llvm.maximum.v16f16(<16 x half>, <16 x half>)

define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) {
; CHECK-LABEL: vfmax_v16f16_vv:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v12, v10, v10
; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v14
; CHECK-NEXT: ret
; ZVFH-LABEL: vfmax_v16f16_vv:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v12, v10, v10
; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0
; ZVFH-NEXT: vmv1r.v v0, v12
; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v14
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_v16f16_vv:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12
; ZVFHMIN-NEXT: vmerge.vvm v20, v16, v12, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0
; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%v = call <16 x half> @llvm.maximum.v16f16(<16 x half> %a, <16 x half> %b)
ret <16 x half> %v
}
Expand Down Expand Up @@ -220,46 +288,103 @@ define <16 x double> @vfmax_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwi
}

define <2 x half> @vfmax_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) {
; CHECK-LABEL: vfmax_v2f16_vv_nnan:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vfmax.vv v8, v8, v9
; CHECK-NEXT: ret
; ZVFH-LABEL: vfmax_v2f16_vv_nnan:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFH-NEXT: vfmax.vv v8, v8, v9
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnan:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%v = call nnan <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
ret <2 x half> %v
}

; FIXME: The nnan from fadd isn't propagating.
define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
; CHECK-LABEL: vfmax_v2f16_vv_nnana:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vfadd.vv v10, v8, v8
; CHECK-NEXT: vmfeq.vv v0, v9, v9
; CHECK-NEXT: vmfeq.vv v8, v10, v10
; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0
; CHECK-NEXT: vfmax.vv v8, v11, v8
; CHECK-NEXT: ret
; ZVFH-LABEL: vfmax_v2f16_vv_nnana:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFH-NEXT: vfadd.vv v10, v8, v8
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
; ZVFH-NEXT: vmfeq.vv v8, v10, v10
; ZVFH-NEXT: vmerge.vvm v11, v9, v10, v0
; ZVFH-NEXT: vmv1r.v v0, v8
; ZVFH-NEXT: vmerge.vvm v8, v10, v9, v0
; ZVFH-NEXT: vfmax.vv v8, v11, v8
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnana:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9
; ZVFHMIN-NEXT: vmerge.vvm v10, v11, v9, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0
; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%c = fadd nnan <2 x half> %a, %a
%v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %b)
ret <2 x half> %v
}

; FIXME: The nnan from fadd isn't propagating.
define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
; CHECK-LABEL: vfmax_v2f16_vv_nnanb:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vfadd.vv v10, v9, v9
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v9, v10, v10
; CHECK-NEXT: vmerge.vvm v11, v8, v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v11
; CHECK-NEXT: ret
; ZVFH-LABEL: vfmax_v2f16_vv_nnanb:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFH-NEXT: vfadd.vv v10, v9, v9
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v9, v10, v10
; ZVFH-NEXT: vmerge.vvm v11, v8, v10, v0
; ZVFH-NEXT: vmv1r.v v0, v9
; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v11
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnanb:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfadd.vv v9, v10, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0
; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%c = fadd nnan <2 x half> %b, %b
%v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c)
ret <2 x half> %v
Expand Down

0 comments on commit d6b1b99

Please sign in to comment.