[ARM] Support fp16/bf16 using t constraint

fp16 and bf16 values can be used in GCC's inline assembly using the "t" constraint, which means "VFP floating-point registers s0-s31" - fp16 and bf16 values are stored in S registers too. This change ensures that LLVM is compatible with GCC for programs that use fp16 and the 't' constraint. Fixes #57753 Differential Revision: https://reviews.llvm.org/D134553
llvm · Sep 28, 2022 · ff4027d · ff4027d
1 parent 8bd5440
commit ff4027d
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 0 deletions.
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -20163,6 +20163,8 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
     case 't':
       if (VT == MVT::Other)
         break;
+      if (VT == MVT::f16 || VT == MVT::bf16)
+        return RCPair(0U, &ARM::HPRRegClass);
       if (VT == MVT::f32 || VT == MVT::i32)
         return RCPair(0U, &ARM::SPRRegClass);
       if (VT.getSizeInBits() == 64)

diff --git a/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll b/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=arm -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s
+
+
+define arm_aapcscc half @f(half %x) nounwind {
+; CHECK-LABEL: f:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov.f16 s0, r0
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    vsqrt.f16 s0, s0
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call half asm "vsqrt.f16 $0, $1", "=t,t"(half %x)
+  ret half %0
+}
+
+define arm_aapcscc bfloat @h(bfloat %x) nounwind {
+; CHECK-LABEL: h:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov.f16 s0, r0
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    vmov.f32 s0, s0
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    vmov.f16 r0, s0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call bfloat asm "vmov.f32 $0, $1", "=t,t"(bfloat %x)
+  ret bfloat %0
+}