Skip to content

Commit

Permalink
[ARM] Patterns for vector conversion between half and float
Browse files Browse the repository at this point in the history
These patterns were omitted because clang only allows converting between
these types using intrinsics, but other front-ends or optimisation
passes may want to use them.

Differential revision: https://reviews.llvm.org/D119354
  • Loading branch information
ostannard committed Feb 10, 2022
1 parent 4efde1e commit a766201
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 0 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/Target/ARM/ARMInstrNEON.td
Expand Up @@ -6946,6 +6946,9 @@ def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
v4f32, v4i16, int_arm_neon_vcvthf2fp>,
Requires<[HasNEON, HasFP16]>;

def : Pat<(v4f16 (fpround (v4f32 QPR:$src))), (VCVTf2h QPR:$src)>;
def : Pat<(v4f32 (fpextend (v4f16 DPR:$src))), (VCVTh2f DPR:$src)>;

// Vector Reverse.

// VREV64 : Vector Reverse elements within 64-bit doublewords
Expand Down
59 changes: 59 additions & 0 deletions llvm/test/CodeGen/ARM/fp16-vector-cvt.ll
@@ -0,0 +1,59 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple armv8a-none-none-eabihf -mattr=fullfp16 < %s | FileCheck %s

define <4 x half> @fptrunc_vector_f32_f16(<4 x float> %a) {
; CHECK-LABEL: fptrunc_vector_f32_f16:
; CHECK: @ %bb.0: @ %bb
; CHECK-NEXT: vcvt.f16.f32 d0, q0
; CHECK-NEXT: bx lr
bb:
%z = fptrunc <4 x float> %a to <4 x half>
ret <4 x half> %z
}

define <4 x half> @fptrunc_vector_f64_f16(<4 x double> %a) {
; CHECK-LABEL: fptrunc_vector_f64_f16:
; CHECK: @ %bb.0: @ %bb
; CHECK-NEXT: vcvtb.f16.f64 s0, d0
; CHECK-NEXT: vcvtb.f16.f64 s8, d1
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vcvtb.f16.f64 s2, d2
; CHECK-NEXT: vmov r0, s8
; CHECK-NEXT: vmov.16 d0[0], r1
; CHECK-NEXT: vmov.16 d0[1], r0
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vcvtb.f16.f64 s2, d3
; CHECK-NEXT: vmov.16 d0[2], r0
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov.16 d0[3], r0
; CHECK-NEXT: bx lr
bb:
%z = fptrunc <4 x double> %a to <4 x half>
ret <4 x half> %z
}

define <4 x float> @fpext_vector_f16_f32(<4 x half> %a) {
; CHECK-LABEL: fpext_vector_f16_f32:
; CHECK: @ %bb.0: @ %bb
; CHECK-NEXT: vcvt.f32.f16 q0, d0
; CHECK-NEXT: bx lr
bb:
%z = fpext <4 x half> %a to <4 x float>
ret <4 x float> %z
}

define <4 x double> @fpext_vector_f16_f64(<4 x half> %a) {
; CHECK-LABEL: fpext_vector_f16_f64:
; CHECK: @ %bb.0: @ %bb
; CHECK-NEXT: vmovx.f16 s4, s0
; CHECK-NEXT: vmovx.f16 s2, s1
; CHECK-NEXT: vcvtb.f64.f16 d17, s4
; CHECK-NEXT: vcvtb.f64.f16 d3, s2
; CHECK-NEXT: vcvtb.f64.f16 d16, s0
; CHECK-NEXT: vcvtb.f64.f16 d2, s1
; CHECK-NEXT: vorr q0, q8, q8
; CHECK-NEXT: bx lr
bb:
%z = fpext <4 x half> %a to <4 x double>
ret <4 x double> %z
}

0 comments on commit a766201

Please sign in to comment.