Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ARM] MVE minnm and maxnm instructions
This adds the patterns for minnm and maxnm from the fminnum and fmaxnum nodes, similar to scalar types. Original patch by Simon Tatham Differential Revision: https://reviews.llvm.org/D63870 llvm-svn: 366002
- Loading branch information
1 parent
2a7f520
commit ec8af0d
Showing
3 changed files
with
119 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE | ||
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP | ||
|
||
define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) { | ||
; CHECK-MVE-LABEL: maxnm_float32_t: | ||
; CHECK-MVE: @ %bb.0: @ %entry | ||
; CHECK-MVE-NEXT: vmaxnm.f32 s11, s7, s3 | ||
; CHECK-MVE-NEXT: vmaxnm.f32 s10, s6, s2 | ||
; CHECK-MVE-NEXT: vmaxnm.f32 s9, s5, s1 | ||
; CHECK-MVE-NEXT: vmaxnm.f32 s8, s4, s0 | ||
; CHECK-MVE-NEXT: vmov q0, q2 | ||
; CHECK-MVE-NEXT: bx lr | ||
; | ||
; CHECK-MVEFP-LABEL: maxnm_float32_t: | ||
; CHECK-MVEFP: @ %bb.0: @ %entry | ||
; CHECK-MVEFP-NEXT: vmaxnm.f32 q0, q1, q0 | ||
; CHECK-MVEFP-NEXT: bx lr | ||
entry: | ||
%cmp = fcmp fast ogt <4 x float> %src2, %src1 | ||
%0 = select <4 x i1> %cmp, <4 x float> %src2, <4 x float> %src1 | ||
ret <4 x float> %0 | ||
} | ||
|
||
define arm_aapcs_vfpcc <8 x half> @minnm_float16_t(<8 x half> %src1, <8 x half> %src2) { | ||
; CHECK-MVE-LABEL: minnm_float16_t: | ||
; CHECK-MVE: @ %bb.0: @ %entry | ||
; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] | ||
; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] | ||
; CHECK-MVE-NEXT: vmov s8, r0 | ||
; CHECK-MVE-NEXT: vmov.u16 r0, q1[0] | ||
; CHECK-MVE-NEXT: vmov s10, r0 | ||
; CHECK-MVE-NEXT: vmov.u16 r2, q1[1] | ||
; CHECK-MVE-NEXT: vminnm.f16 s8, s10, s8 | ||
; CHECK-MVE-NEXT: vmov s10, r2 | ||
; CHECK-MVE-NEXT: vmov r0, s8 | ||
; CHECK-MVE-NEXT: vmov s8, r1 | ||
; CHECK-MVE-NEXT: vminnm.f16 s8, s10, s8 | ||
; CHECK-MVE-NEXT: vmov r1, s8 | ||
; CHECK-MVE-NEXT: vmov.16 q2[0], r0 | ||
; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] | ||
; CHECK-MVE-NEXT: vmov.16 q2[1], r1 | ||
; CHECK-MVE-NEXT: vmov s12, r0 | ||
; CHECK-MVE-NEXT: vmov.u16 r0, q1[2] | ||
; CHECK-MVE-NEXT: vmov s14, r0 | ||
; CHECK-MVE-NEXT: vminnm.f16 s12, s14, s12 | ||
; CHECK-MVE-NEXT: vmov r0, s12 | ||
; CHECK-MVE-NEXT: vmov.16 q2[2], r0 | ||
; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] | ||
; CHECK-MVE-NEXT: vmov s12, r0 | ||
; CHECK-MVE-NEXT: vmov.u16 r0, q1[3] | ||
; CHECK-MVE-NEXT: vmov s14, r0 | ||
; CHECK-MVE-NEXT: vminnm.f16 s12, s14, s12 | ||
; CHECK-MVE-NEXT: vmov r0, s12 | ||
; CHECK-MVE-NEXT: vmov.16 q2[3], r0 | ||
; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] | ||
; CHECK-MVE-NEXT: vmov s12, r0 | ||
; CHECK-MVE-NEXT: vmov.u16 r0, q1[4] | ||
; CHECK-MVE-NEXT: vmov s14, r0 | ||
; CHECK-MVE-NEXT: vminnm.f16 s12, s14, s12 | ||
; CHECK-MVE-NEXT: vmov r0, s12 | ||
; CHECK-MVE-NEXT: vmov.16 q2[4], r0 | ||
; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] | ||
; CHECK-MVE-NEXT: vmov s12, r0 | ||
; CHECK-MVE-NEXT: vmov.u16 r0, q1[5] | ||
; CHECK-MVE-NEXT: vmov s14, r0 | ||
; CHECK-MVE-NEXT: vminnm.f16 s12, s14, s12 | ||
; CHECK-MVE-NEXT: vmov r0, s12 | ||
; CHECK-MVE-NEXT: vmov.16 q2[5], r0 | ||
; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] | ||
; CHECK-MVE-NEXT: vmov s12, r0 | ||
; CHECK-MVE-NEXT: vmov.u16 r0, q1[6] | ||
; CHECK-MVE-NEXT: vmov s14, r0 | ||
; CHECK-MVE-NEXT: vminnm.f16 s12, s14, s12 | ||
; CHECK-MVE-NEXT: vmov r0, s12 | ||
; CHECK-MVE-NEXT: vmov.16 q2[6], r0 | ||
; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] | ||
; CHECK-MVE-NEXT: vmov s0, r0 | ||
; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] | ||
; CHECK-MVE-NEXT: vmov s2, r0 | ||
; CHECK-MVE-NEXT: vminnm.f16 s0, s2, s0 | ||
; CHECK-MVE-NEXT: vmov r0, s0 | ||
; CHECK-MVE-NEXT: vmov.16 q2[7], r0 | ||
; CHECK-MVE-NEXT: vmov q0, q2 | ||
; CHECK-MVE-NEXT: bx lr | ||
; | ||
; CHECK-MVEFP-LABEL: minnm_float16_t: | ||
; CHECK-MVEFP: @ %bb.0: @ %entry | ||
; CHECK-MVEFP-NEXT: vminnm.f16 q0, q1, q0 | ||
; CHECK-MVEFP-NEXT: bx lr | ||
entry: | ||
%cmp = fcmp fast ogt <8 x half> %src2, %src1 | ||
%0 = select <8 x i1> %cmp, <8 x half> %src1, <8 x half> %src2 | ||
ret <8 x half> %0 | ||
} |