diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index a6b92593c4958..355128786c5b9 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -20347,13 +20347,7 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint( case 'w': if (VT == MVT::Other) break; - if (VT == MVT::f16) - return RCPair(0U, Subtarget->hasFullFP16() ? &ARM::HPRRegClass - : &ARM::SPRRegClass); - if (VT == MVT::bf16) - return RCPair(0U, Subtarget->hasBF16() ? &ARM::HPRRegClass - : &ARM::SPRRegClass); - if (VT == MVT::f32) + if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16) return RCPair(0U, &ARM::SPRRegClass); if (VT.getSizeInBits() == 64) return RCPair(0U, &ARM::DPRRegClass); @@ -20363,7 +20357,7 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint( case 'x': if (VT == MVT::Other) break; - if (VT == MVT::f32) + if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16) return RCPair(0U, &ARM::SPR_8RegClass); if (VT.getSizeInBits() == 64) return RCPair(0U, &ARM::DPR_8RegClass); @@ -20373,13 +20367,7 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint( case 't': if (VT == MVT::Other) break; - if (VT == MVT::f16) - return RCPair(0U, Subtarget->hasFullFP16() ? &ARM::HPRRegClass - : &ARM::SPRRegClass); - if (VT == MVT::bf16) - return RCPair(0U, Subtarget->hasBF16() ? &ARM::HPRRegClass - : &ARM::SPRRegClass); - if (VT == MVT::f32 || VT == MVT::i32) + if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16) return RCPair(0U, &ARM::SPRRegClass); if (VT.getSizeInBits() == 64) return RCPair(0U, &ARM::DPR_VFP2RegClass); diff --git a/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll b/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll index 9840e3f63c5ac..554e5ba72c5d4 100644 --- a/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll +++ b/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll @@ -1,21 +1,34 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + ; No FP16/BF16 -; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-SOFTFP -; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-SOFTFP -; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-HARD -; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-HARD +; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=NO-FP16-SOFTFP +; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=NO-FP16-SOFTFP +; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=NO-FP16-SOFTFP +; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=NO-FP16-SOFTFP +; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=NO-FP16-HARD +; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=NO-FP16-HARD +; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=NO-FP16-HARD +; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=NO-FP16-HARD ; With FP16, Without BF16 -; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP -; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP -; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD -; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD +; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=FP16-SOFTFP +; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=FP16-SOFTFP +; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=FP16-SOFTFP +; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=FP16-SOFTFP +; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=FP16-HARD +; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=FP16-HARD +; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=FP16-HARD +; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=FP16-HARD ; With FP16/BF16 -; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP,BF16-SOFTFP -; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP,BF16-SOFTFP -; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD,BF16-HARD -; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD,BF16-HARD +; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefix=BF16-SOFTFP +; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefix=BF16-SOFTFP +; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,+neon %s -o - | FileCheck %s --check-prefix=SIMD-BF16-SOFTFP +; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,+neon %s -o - | FileCheck %s --check-prefix=SIMD-BF16-SOFTFP +; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefix=BF16-HARD +; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefix=BF16-HARD +; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,+neon %s -o - | FileCheck %s --check-prefix=SIMD-BF16-HARD +; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,+neon %s -o - | FileCheck %s --check-prefix=SIMD-BF16-HARD ; This test ensures that we can use `w` and `t` constraints to allocate ; S-registers for 16-bit FP inputs and outputs for inline assembly, with either @@ -41,6 +54,8 @@ define half @half_t(half %x) nounwind { ; FP16-SOFTFP-LABEL: half_t: ; FP16-SOFTFP: @ %bb.0: @ %entry ; FP16-SOFTFP-NEXT: vmov.f16 s0, r0 +; FP16-SOFTFP-NEXT: vmov.f16 r0, s0 +; FP16-SOFTFP-NEXT: vmov s0, r0 ; FP16-SOFTFP-NEXT: @APP ; FP16-SOFTFP-NEXT: vmov.f32 s0, s0 ; FP16-SOFTFP-NEXT: @NO_APP @@ -49,10 +64,52 @@ define half @half_t(half %x) nounwind { ; ; FP16-HARD-LABEL: half_t: ; FP16-HARD: @ %bb.0: @ %entry +; FP16-HARD-NEXT: vmov.f16 r0, s0 +; FP16-HARD-NEXT: vmov s0, r0 ; FP16-HARD-NEXT: @APP ; FP16-HARD-NEXT: vmov.f32 s0, s0 ; FP16-HARD-NEXT: @NO_APP ; FP16-HARD-NEXT: bx lr +; +; BF16-SOFTFP-LABEL: half_t: +; BF16-SOFTFP: @ %bb.0: @ %entry +; BF16-SOFTFP-NEXT: vmov.f16 s0, r0 +; BF16-SOFTFP-NEXT: vmov.f16 r0, s0 +; BF16-SOFTFP-NEXT: vmov s0, r0 +; BF16-SOFTFP-NEXT: @APP +; BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; BF16-SOFTFP-NEXT: @NO_APP +; BF16-SOFTFP-NEXT: vmov r0, s0 +; BF16-SOFTFP-NEXT: bx lr +; +; SIMD-BF16-SOFTFP-LABEL: half_t: +; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0 +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0 +; SIMD-BF16-SOFTFP-NEXT: @APP +; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-SOFTFP-NEXT: @NO_APP +; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0 +; SIMD-BF16-SOFTFP-NEXT: bx lr +; +; BF16-HARD-LABEL: half_t: +; BF16-HARD: @ %bb.0: @ %entry +; BF16-HARD-NEXT: vmov.f16 r0, s0 +; BF16-HARD-NEXT: vmov s0, r0 +; BF16-HARD-NEXT: @APP +; BF16-HARD-NEXT: vmov.f32 s0, s0 +; BF16-HARD-NEXT: @NO_APP +; BF16-HARD-NEXT: bx lr +; +; SIMD-BF16-HARD-LABEL: half_t: +; SIMD-BF16-HARD: @ %bb.0: @ %entry +; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-HARD-NEXT: vmov s0, r0 +; SIMD-BF16-HARD-NEXT: @APP +; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-HARD-NEXT: @NO_APP +; SIMD-BF16-HARD-NEXT: bx lr entry: %0 = tail call half asm "vmov $0, $1", "=t,t"(half %x) ret half %0 @@ -78,6 +135,8 @@ define half @half_w(half %x) nounwind { ; FP16-SOFTFP-LABEL: half_w: ; FP16-SOFTFP: @ %bb.0: @ %entry ; FP16-SOFTFP-NEXT: vmov.f16 s0, r0 +; FP16-SOFTFP-NEXT: vmov.f16 r0, s0 +; FP16-SOFTFP-NEXT: vmov s0, r0 ; FP16-SOFTFP-NEXT: @APP ; FP16-SOFTFP-NEXT: vmov.f32 s0, s0 ; FP16-SOFTFP-NEXT: @NO_APP @@ -86,15 +145,138 @@ define half @half_w(half %x) nounwind { ; ; FP16-HARD-LABEL: half_w: ; FP16-HARD: @ %bb.0: @ %entry +; FP16-HARD-NEXT: vmov.f16 r0, s0 +; FP16-HARD-NEXT: vmov s0, r0 ; FP16-HARD-NEXT: @APP ; FP16-HARD-NEXT: vmov.f32 s0, s0 ; FP16-HARD-NEXT: @NO_APP ; FP16-HARD-NEXT: bx lr +; +; BF16-SOFTFP-LABEL: half_w: +; BF16-SOFTFP: @ %bb.0: @ %entry +; BF16-SOFTFP-NEXT: vmov.f16 s0, r0 +; BF16-SOFTFP-NEXT: vmov.f16 r0, s0 +; BF16-SOFTFP-NEXT: vmov s0, r0 +; BF16-SOFTFP-NEXT: @APP +; BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; BF16-SOFTFP-NEXT: @NO_APP +; BF16-SOFTFP-NEXT: vmov r0, s0 +; BF16-SOFTFP-NEXT: bx lr +; +; SIMD-BF16-SOFTFP-LABEL: half_w: +; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0 +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0 +; SIMD-BF16-SOFTFP-NEXT: @APP +; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-SOFTFP-NEXT: @NO_APP +; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0 +; SIMD-BF16-SOFTFP-NEXT: bx lr +; +; BF16-HARD-LABEL: half_w: +; BF16-HARD: @ %bb.0: @ %entry +; BF16-HARD-NEXT: vmov.f16 r0, s0 +; BF16-HARD-NEXT: vmov s0, r0 +; BF16-HARD-NEXT: @APP +; BF16-HARD-NEXT: vmov.f32 s0, s0 +; BF16-HARD-NEXT: @NO_APP +; BF16-HARD-NEXT: bx lr +; +; SIMD-BF16-HARD-LABEL: half_w: +; SIMD-BF16-HARD: @ %bb.0: @ %entry +; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-HARD-NEXT: vmov s0, r0 +; SIMD-BF16-HARD-NEXT: @APP +; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-HARD-NEXT: @NO_APP +; SIMD-BF16-HARD-NEXT: bx lr entry: %0 = tail call half asm "vmov $0, $1", "=w,w"(half %x) ret half %0 } +define half @half_x(half %x) nounwind { +; NO-FP16-SOFTFP-LABEL: half_x: +; NO-FP16-SOFTFP: @ %bb.0: @ %entry +; NO-FP16-SOFTFP-NEXT: vmov s0, r0 +; NO-FP16-SOFTFP-NEXT: @APP +; NO-FP16-SOFTFP-NEXT: vmov.f32 s0, s0 +; NO-FP16-SOFTFP-NEXT: @NO_APP +; NO-FP16-SOFTFP-NEXT: vmov r0, s0 +; NO-FP16-SOFTFP-NEXT: bx lr +; +; NO-FP16-HARD-LABEL: half_x: +; NO-FP16-HARD: @ %bb.0: @ %entry +; NO-FP16-HARD-NEXT: @APP +; NO-FP16-HARD-NEXT: vmov.f32 s0, s0 +; NO-FP16-HARD-NEXT: @NO_APP +; NO-FP16-HARD-NEXT: bx lr +; +; FP16-SOFTFP-LABEL: half_x: +; FP16-SOFTFP: @ %bb.0: @ %entry +; FP16-SOFTFP-NEXT: vmov.f16 s0, r0 +; FP16-SOFTFP-NEXT: vmov.f16 r0, s0 +; FP16-SOFTFP-NEXT: vmov s0, r0 +; FP16-SOFTFP-NEXT: @APP +; FP16-SOFTFP-NEXT: vmov.f32 s0, s0 +; FP16-SOFTFP-NEXT: @NO_APP +; FP16-SOFTFP-NEXT: vmov r0, s0 +; FP16-SOFTFP-NEXT: bx lr +; +; FP16-HARD-LABEL: half_x: +; FP16-HARD: @ %bb.0: @ %entry +; FP16-HARD-NEXT: vmov.f16 r0, s0 +; FP16-HARD-NEXT: vmov s0, r0 +; FP16-HARD-NEXT: @APP +; FP16-HARD-NEXT: vmov.f32 s0, s0 +; FP16-HARD-NEXT: @NO_APP +; FP16-HARD-NEXT: bx lr +; +; BF16-SOFTFP-LABEL: half_x: +; BF16-SOFTFP: @ %bb.0: @ %entry +; BF16-SOFTFP-NEXT: vmov.f16 s0, r0 +; BF16-SOFTFP-NEXT: vmov.f16 r0, s0 +; BF16-SOFTFP-NEXT: vmov s0, r0 +; BF16-SOFTFP-NEXT: @APP +; BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; BF16-SOFTFP-NEXT: @NO_APP +; BF16-SOFTFP-NEXT: vmov r0, s0 +; BF16-SOFTFP-NEXT: bx lr +; +; SIMD-BF16-SOFTFP-LABEL: half_x: +; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0 +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0 +; SIMD-BF16-SOFTFP-NEXT: @APP +; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-SOFTFP-NEXT: @NO_APP +; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0 +; SIMD-BF16-SOFTFP-NEXT: bx lr +; +; BF16-HARD-LABEL: half_x: +; BF16-HARD: @ %bb.0: @ %entry +; BF16-HARD-NEXT: vmov.f16 r0, s0 +; BF16-HARD-NEXT: vmov s0, r0 +; BF16-HARD-NEXT: @APP +; BF16-HARD-NEXT: vmov.f32 s0, s0 +; BF16-HARD-NEXT: @NO_APP +; BF16-HARD-NEXT: bx lr +; +; SIMD-BF16-HARD-LABEL: half_x: +; SIMD-BF16-HARD: @ %bb.0: @ %entry +; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-HARD-NEXT: vmov s0, r0 +; SIMD-BF16-HARD-NEXT: @APP +; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-HARD-NEXT: @NO_APP +; SIMD-BF16-HARD-NEXT: bx lr +entry: + %0 = tail call half asm "vmov $0, $1", "=x,x"(half %x) + ret half %0 +} + define bfloat @bf16_t(bfloat %x) nounwind { ; NO-FP16-SOFTFP-LABEL: bf16_t: ; NO-FP16-SOFTFP: @ %bb.0: @ %entry @@ -127,6 +309,42 @@ define bfloat @bf16_t(bfloat %x) nounwind { ; FP16-HARD-NEXT: vmov.f32 s0, s0 ; FP16-HARD-NEXT: @NO_APP ; FP16-HARD-NEXT: bx lr +; +; BF16-SOFTFP-LABEL: bf16_t: +; BF16-SOFTFP: @ %bb.0: @ %entry +; BF16-SOFTFP-NEXT: vmov s0, r0 +; BF16-SOFTFP-NEXT: @APP +; BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; BF16-SOFTFP-NEXT: @NO_APP +; BF16-SOFTFP-NEXT: vmov r0, s0 +; BF16-SOFTFP-NEXT: bx lr +; +; SIMD-BF16-SOFTFP-LABEL: bf16_t: +; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0 +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0 +; SIMD-BF16-SOFTFP-NEXT: @APP +; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-SOFTFP-NEXT: @NO_APP +; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0 +; SIMD-BF16-SOFTFP-NEXT: bx lr +; +; BF16-HARD-LABEL: bf16_t: +; BF16-HARD: @ %bb.0: @ %entry +; BF16-HARD-NEXT: @APP +; BF16-HARD-NEXT: vmov.f32 s0, s0 +; BF16-HARD-NEXT: @NO_APP +; BF16-HARD-NEXT: bx lr +; +; SIMD-BF16-HARD-LABEL: bf16_t: +; SIMD-BF16-HARD: @ %bb.0: @ %entry +; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-HARD-NEXT: vmov s0, r0 +; SIMD-BF16-HARD-NEXT: @APP +; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-HARD-NEXT: @NO_APP +; SIMD-BF16-HARD-NEXT: bx lr entry: %0 = tail call bfloat asm "vmov $0, $1", "=t,t"(bfloat %x) ret bfloat %0 @@ -164,10 +382,116 @@ define bfloat @bf16_w(bfloat %x) nounwind { ; FP16-HARD-NEXT: vmov.f32 s0, s0 ; FP16-HARD-NEXT: @NO_APP ; FP16-HARD-NEXT: bx lr +; +; BF16-SOFTFP-LABEL: bf16_w: +; BF16-SOFTFP: @ %bb.0: @ %entry +; BF16-SOFTFP-NEXT: vmov s0, r0 +; BF16-SOFTFP-NEXT: @APP +; BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; BF16-SOFTFP-NEXT: @NO_APP +; BF16-SOFTFP-NEXT: vmov r0, s0 +; BF16-SOFTFP-NEXT: bx lr +; +; SIMD-BF16-SOFTFP-LABEL: bf16_w: +; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0 +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0 +; SIMD-BF16-SOFTFP-NEXT: @APP +; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-SOFTFP-NEXT: @NO_APP +; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0 +; SIMD-BF16-SOFTFP-NEXT: bx lr +; +; BF16-HARD-LABEL: bf16_w: +; BF16-HARD: @ %bb.0: @ %entry +; BF16-HARD-NEXT: @APP +; BF16-HARD-NEXT: vmov.f32 s0, s0 +; BF16-HARD-NEXT: @NO_APP +; BF16-HARD-NEXT: bx lr +; +; SIMD-BF16-HARD-LABEL: bf16_w: +; SIMD-BF16-HARD: @ %bb.0: @ %entry +; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-HARD-NEXT: vmov s0, r0 +; SIMD-BF16-HARD-NEXT: @APP +; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-HARD-NEXT: @NO_APP +; SIMD-BF16-HARD-NEXT: bx lr entry: %0 = tail call bfloat asm "vmov $0, $1", "=w,w"(bfloat %x) ret bfloat %0 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; BF16-HARD: {{.*}} -; BF16-SOFTFP: {{.*}} + +define bfloat @bf16_x(bfloat %x) nounwind { +; NO-FP16-SOFTFP-LABEL: bf16_x: +; NO-FP16-SOFTFP: @ %bb.0: @ %entry +; NO-FP16-SOFTFP-NEXT: vmov s0, r0 +; NO-FP16-SOFTFP-NEXT: @APP +; NO-FP16-SOFTFP-NEXT: vmov.f32 s0, s0 +; NO-FP16-SOFTFP-NEXT: @NO_APP +; NO-FP16-SOFTFP-NEXT: vmov r0, s0 +; NO-FP16-SOFTFP-NEXT: bx lr +; +; NO-FP16-HARD-LABEL: bf16_x: +; NO-FP16-HARD: @ %bb.0: @ %entry +; NO-FP16-HARD-NEXT: @APP +; NO-FP16-HARD-NEXT: vmov.f32 s0, s0 +; NO-FP16-HARD-NEXT: @NO_APP +; NO-FP16-HARD-NEXT: bx lr +; +; FP16-SOFTFP-LABEL: bf16_x: +; FP16-SOFTFP: @ %bb.0: @ %entry +; FP16-SOFTFP-NEXT: vmov s0, r0 +; FP16-SOFTFP-NEXT: @APP +; FP16-SOFTFP-NEXT: vmov.f32 s0, s0 +; FP16-SOFTFP-NEXT: @NO_APP +; FP16-SOFTFP-NEXT: vmov r0, s0 +; FP16-SOFTFP-NEXT: bx lr +; +; FP16-HARD-LABEL: bf16_x: +; FP16-HARD: @ %bb.0: @ %entry +; FP16-HARD-NEXT: @APP +; FP16-HARD-NEXT: vmov.f32 s0, s0 +; FP16-HARD-NEXT: @NO_APP +; FP16-HARD-NEXT: bx lr +; +; BF16-SOFTFP-LABEL: bf16_x: +; BF16-SOFTFP: @ %bb.0: @ %entry +; BF16-SOFTFP-NEXT: vmov s0, r0 +; BF16-SOFTFP-NEXT: @APP +; BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; BF16-SOFTFP-NEXT: @NO_APP +; BF16-SOFTFP-NEXT: vmov r0, s0 +; BF16-SOFTFP-NEXT: bx lr +; +; SIMD-BF16-SOFTFP-LABEL: bf16_x: +; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0 +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0 +; SIMD-BF16-SOFTFP-NEXT: @APP +; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-SOFTFP-NEXT: @NO_APP +; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0 +; SIMD-BF16-SOFTFP-NEXT: bx lr +; +; BF16-HARD-LABEL: bf16_x: +; BF16-HARD: @ %bb.0: @ %entry +; BF16-HARD-NEXT: @APP +; BF16-HARD-NEXT: vmov.f32 s0, s0 +; BF16-HARD-NEXT: @NO_APP +; BF16-HARD-NEXT: bx lr +; +; SIMD-BF16-HARD-LABEL: bf16_x: +; SIMD-BF16-HARD: @ %bb.0: @ %entry +; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-HARD-NEXT: vmov s0, r0 +; SIMD-BF16-HARD-NEXT: @APP +; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-HARD-NEXT: @NO_APP +; SIMD-BF16-HARD-NEXT: bx lr +entry: + %0 = tail call bfloat asm "vmov $0, $1", "=x,x"(bfloat %x) + ret bfloat %0 +}