From fa0ff5778aa7302010a16696b75e980b28f63f51 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye@intel.com>
Date: Wed, 10 Aug 2022 08:59:21 +0800
Subject: [PATCH] [X86][BF16] Enable __bf16 for x86 targets.

X86 psABI has updated to support __bf16 type, the ABI of which is the
same as FP16. See https://discourse.llvm.org/t/patch-add-optional-bfloat16-support/63149

Reviewed By: pengfei

Differential Revision: https://reviews.llvm.org/D130964

(cherry picked from commit e4888a37d36780872d685c68ef8b26b2e14d6d39)
---
 clang/docs/LanguageExtensions.rst        |   4 +
 clang/lib/Basic/Targets/X86.cpp          |   2 +
 clang/lib/Basic/Targets/X86.h            |   4 +
 clang/lib/CodeGen/TargetInfo.cpp         |  13 +-
 clang/test/CodeGen/X86/bfloat-abi.c      | 149 +++++++++++++++++++++++
 clang/test/CodeGen/X86/bfloat-half-abi.c | 149 +++++++++++++++++++++++
 clang/test/CodeGen/X86/bfloat-mangle.cpp |   5 +
 clang/test/Sema/vector-decl-crash.c      |   2 +-
 llvm/include/llvm/IR/Type.h              |   5 +
 9 files changed, 326 insertions(+), 7 deletions(-)
 create mode 100644 clang/test/CodeGen/X86/bfloat-abi.c
 create mode 100644 clang/test/CodeGen/X86/bfloat-half-abi.c
 create mode 100644 clang/test/CodeGen/X86/bfloat-mangle.cpp
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 1bac2aee84bd90..259983271f345e 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -756,6 +756,10 @@ performing the operation, and then truncating to ``_Float16``.
 ``__bf16`` is purely a storage format; it is currently only supported on the following targets:
 * 32-bit ARM
 * 64-bit ARM (AArch64)
+* X86 (see below)
+
+On X86 targets, ``__bf16`` is supported as long as SSE2 is available, which
+includes all 64-bit and all recent 32-bit processors.
 
 The ``__bf16`` type is only available when supported in hardware.
 
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 69afdf8a3584cb..0f5c366816df9d 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -358,6 +358,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
 
     HasFloat16 = SSELevel >= SSE2;
 
+    HasBFloat16 = SSELevel >= SSE2;
+
     MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch<MMX3DNowEnum>(Feature)
                                       .Case("+3dnowa", AMD3DNowAthlon)
                                       .Case("+3dnow", AMD3DNow)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 0affa58b2f4c0f..ed0864aec6d2d2 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -156,6 +156,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
 public:
   X86TargetInfo(const llvm::Triple &Triple, const TargetOptions &)
       : TargetInfo(Triple) {
+    BFloat16Width = BFloat16Align = 16;
+    BFloat16Format = &llvm::APFloat::BFloat();
     LongDoubleFormat = &llvm::APFloat::x87DoubleExtended();
     AddrSpaceMap = &X86AddrSpaceMap;
     HasStrictFP = true;
@@ -396,6 +398,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   uint64_t getPointerAlignV(unsigned AddrSpace) const override {
     return getPointerWidthV(AddrSpace);
   }
+
+  const char *getBFloat16Mangling() const override { return "u6__bf16"; };
 };
 
 // X86-32 generic target
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index d1ee61eab9d66d..195ad8cdc13eac 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -2871,7 +2871,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo,
     } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) {
       Current = Integer;
     } else if (k == BuiltinType::Float || k == BuiltinType::Double ||
-               k == BuiltinType::Float16) {
+               k == BuiltinType::Float16 || k == BuiltinType::BFloat16) {
       Current = SSE;
     } else if (k == BuiltinType::LongDouble) {
       const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
@@ -3002,7 +3002,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo,
         Current = Integer;
       else if (Size <= 128)
         Lo = Hi = Integer;
-    } else if (ET->isFloat16Type() || ET == getContext().FloatTy) {
+    } else if (ET->isFloat16Type() || ET == getContext().FloatTy ||
+               ET->isBFloat16Type()) {
       Current = SSE;
     } else if (ET == getContext().DoubleTy) {
       Lo = Hi = SSE;
@@ -3474,9 +3475,9 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
   if (SourceSize > T0Size)
       T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD);
   if (T1 == nullptr) {
-    // Check if IRType is a half + float. float type will be in IROffset+4 due
+    // Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due
     // to its alignment.
-    if (T0->isHalfTy() && SourceSize > 4)
+    if (T0->is16bitFPTy() && SourceSize > 4)
       T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
     // If we can't get a second FP type, return a simple half or float.
     // avx512fp16-abi.c:pr51813_2 shows it works to return float for
@@ -3488,7 +3489,7 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
   if (T0->isFloatTy() && T1->isFloatTy())
     return llvm::FixedVectorType::get(T0, 2);
 
-  if (T0->isHalfTy() && T1->isHalfTy()) {
+  if (T0->is16bitFPTy() && T1->is16bitFPTy()) {
     llvm::Type *T2 = nullptr;
     if (SourceSize > 4)
       T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
@@ -3497,7 +3498,7 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
     return llvm::FixedVectorType::get(T0, 4);
   }
 
-  if (T0->isHalfTy() || T1->isHalfTy())
+  if (T0->is16bitFPTy() || T1->is16bitFPTy())
     return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4);
 
   return llvm::Type::getDoubleTy(getVMContext());
diff --git a/clang/test/CodeGen/X86/bfloat-abi.c b/clang/test/CodeGen/X86/bfloat-abi.c
new file mode 100644
index 00000000000000..42250791848ac7
--- /dev/null
+++ b/clang/test/CodeGen/X86/bfloat-abi.c
@@ -0,0 +1,149 @@
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm  -target-feature +sse2 < %s | FileCheck %s --check-prefixes=CHECK
+
+struct bfloat1 {
+  __bf16 a;
+};
+
+struct bfloat1 h1(__bf16 a) {
+  // CHECK: define{{.*}}bfloat @
+  struct bfloat1 x;
+  x.a = a;
+  return x;
+}
+
+struct bfloat2 {
+  __bf16 a;
+  __bf16 b;
+};
+
+struct bfloat2 h2(__bf16 a, __bf16 b) {
+  // CHECK: define{{.*}}<2 x bfloat> @
+  struct bfloat2 x;
+  x.a = a;
+  x.b = b;
+  return x;
+}
+
+struct bfloat3 {
+  __bf16 a;
+  __bf16 b;
+  __bf16 c;
+};
+
+struct bfloat3 h3(__bf16 a, __bf16 b, __bf16 c) {
+  // CHECK: define{{.*}}<4 x bfloat> @
+  struct bfloat3 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  return x;
+}
+
+struct bfloat4 {
+  __bf16 a;
+  __bf16 b;
+  __bf16 c;
+  __bf16 d;
+};
+
+struct bfloat4 h4(__bf16 a, __bf16 b, __bf16 c, __bf16 d) {
+  // CHECK: define{{.*}}<4 x bfloat> @
+  struct bfloat4 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  x.d = d;
+  return x;
+}
+
+struct floatbfloat {
+  float a;
+  __bf16 b;
+};
+
+struct floatbfloat fh(float a, __bf16 b) {
+  // CHECK: define{{.*}}<4 x half> @
+  struct floatbfloat x;
+  x.a = a;
+  x.b = b;
+  return x;
+}
+
+struct floatbfloat2 {
+  float a;
+  __bf16 b;
+  __bf16 c;
+};
+
+struct floatbfloat2 fh2(float a, __bf16 b, __bf16 c) {
+  // CHECK: define{{.*}}<4 x half> @
+  struct floatbfloat2 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  return x;
+}
+
+struct bfloatfloat {
+  __bf16 a;
+  float b;
+};
+
+struct bfloatfloat hf(__bf16 a, float b) {
+  // CHECK: define{{.*}}<4 x half> @
+  struct bfloatfloat x;
+  x.a = a;
+  x.b = b;
+  return x;
+}
+
+struct bfloat2float {
+  __bf16 a;
+  __bf16 b;
+  float c;
+};
+
+struct bfloat2float h2f(__bf16 a, __bf16 b, float c) {
+  // CHECK: define{{.*}}<4 x bfloat> @
+  struct bfloat2float x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  return x;
+}
+
+struct floatbfloat3 {
+  float a;
+  __bf16 b;
+  __bf16 c;
+  __bf16 d;
+};
+
+struct floatbfloat3 fh3(float a, __bf16 b, __bf16 c, __bf16 d) {
+  // CHECK: define{{.*}}{ <4 x half>, bfloat } @
+  struct floatbfloat3 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  x.d = d;
+  return x;
+}
+
+struct bfloat5 {
+  __bf16 a;
+  __bf16 b;
+  __bf16 c;
+  __bf16 d;
+  __bf16 e;
+};
+
+struct bfloat5 h5(__bf16 a, __bf16 b, __bf16 c, __bf16 d, __bf16 e) {
+  // CHECK: define{{.*}}{ <4 x bfloat>, bfloat } @
+  struct bfloat5 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  x.d = d;
+  x.e = e;
+  return x;
+}
diff --git a/clang/test/CodeGen/X86/bfloat-half-abi.c b/clang/test/CodeGen/X86/bfloat-half-abi.c
new file mode 100644
index 00000000000000..42250791848ac7
--- /dev/null
+++ b/clang/test/CodeGen/X86/bfloat-half-abi.c
@@ -0,0 +1,149 @@
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm  -target-feature +sse2 < %s | FileCheck %s --check-prefixes=CHECK
+
+struct bfloat1 {
+  __bf16 a;
+};
+
+struct bfloat1 h1(__bf16 a) {
+  // CHECK: define{{.*}}bfloat @
+  struct bfloat1 x;
+  x.a = a;
+  return x;
+}
+
+struct bfloat2 {
+  __bf16 a;
+  __bf16 b;
+};
+
+struct bfloat2 h2(__bf16 a, __bf16 b) {
+  // CHECK: define{{.*}}<2 x bfloat> @
+  struct bfloat2 x;
+  x.a = a;
+  x.b = b;
+  return x;
+}
+
+struct bfloat3 {
+  __bf16 a;
+  __bf16 b;
+  __bf16 c;
+};
+
+struct bfloat3 h3(__bf16 a, __bf16 b, __bf16 c) {
+  // CHECK: define{{.*}}<4 x bfloat> @
+  struct bfloat3 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  return x;
+}
+
+struct bfloat4 {
+  __bf16 a;
+  __bf16 b;
+  __bf16 c;
+  __bf16 d;
+};
+
+struct bfloat4 h4(__bf16 a, __bf16 b, __bf16 c, __bf16 d) {
+  // CHECK: define{{.*}}<4 x bfloat> @
+  struct bfloat4 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  x.d = d;
+  return x;
+}
+
+struct floatbfloat {
+  float a;
+  __bf16 b;
+};
+
+struct floatbfloat fh(float a, __bf16 b) {
+  // CHECK: define{{.*}}<4 x half> @
+  struct floatbfloat x;
+  x.a = a;
+  x.b = b;
+  return x;
+}
+
+struct floatbfloat2 {
+  float a;
+  __bf16 b;
+  __bf16 c;
+};
+
+struct floatbfloat2 fh2(float a, __bf16 b, __bf16 c) {
+  // CHECK: define{{.*}}<4 x half> @
+  struct floatbfloat2 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  return x;
+}
+
+struct bfloatfloat {
+  __bf16 a;
+  float b;
+};
+
+struct bfloatfloat hf(__bf16 a, float b) {
+  // CHECK: define{{.*}}<4 x half> @
+  struct bfloatfloat x;
+  x.a = a;
+  x.b = b;
+  return x;
+}
+
+struct bfloat2float {
+  __bf16 a;
+  __bf16 b;
+  float c;
+};
+
+struct bfloat2float h2f(__bf16 a, __bf16 b, float c) {
+  // CHECK: define{{.*}}<4 x bfloat> @
+  struct bfloat2float x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  return x;
+}
+
+struct floatbfloat3 {
+  float a;
+  __bf16 b;
+  __bf16 c;
+  __bf16 d;
+};
+
+struct floatbfloat3 fh3(float a, __bf16 b, __bf16 c, __bf16 d) {
+  // CHECK: define{{.*}}{ <4 x half>, bfloat } @
+  struct floatbfloat3 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  x.d = d;
+  return x;
+}
+
+struct bfloat5 {
+  __bf16 a;
+  __bf16 b;
+  __bf16 c;
+  __bf16 d;
+  __bf16 e;
+};
+
+struct bfloat5 h5(__bf16 a, __bf16 b, __bf16 c, __bf16 d, __bf16 e) {
+  // CHECK: define{{.*}}{ <4 x bfloat>, bfloat } @
+  struct bfloat5 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  x.d = d;
+  x.e = e;
+  return x;
+}
diff --git a/clang/test/CodeGen/X86/bfloat-mangle.cpp b/clang/test/CodeGen/X86/bfloat-mangle.cpp
new file mode 100644
index 00000000000000..2892a76d8d910f
--- /dev/null
+++ b/clang/test/CodeGen/X86/bfloat-mangle.cpp
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -triple i386-unknown-unknown -target-feature +sse2 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-feature +sse2 -emit-llvm -o - %s | FileCheck %s
+
+// CHECK: define {{.*}}void @_Z3foou6__bf16(bfloat noundef %b)
+void foo(__bf16 b) {}
diff --git a/clang/test/Sema/vector-decl-crash.c b/clang/test/Sema/vector-decl-crash.c
index 5e4b098fee2d37..fafe34133de43d 100644
--- a/clang/test/Sema/vector-decl-crash.c
+++ b/clang/test/Sema/vector-decl-crash.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -fsyntax-only -verify -triple x86_64-unknown-unknown
+// RUN: %clang_cc1 %s -fsyntax-only -verify -triple riscv64-unknown-unknown
 
 // GH50171
 // This would previously crash when __bf16 was not a supported type.
diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index 51263c6b8fccc5..a7f22324571b70 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -144,6 +144,11 @@ class Type {
   /// Return true if this is 'bfloat', a 16-bit bfloat type.
   bool isBFloatTy() const { return getTypeID() == BFloatTyID; }
 
+  /// Return true if this is a 16-bit float type.
+  bool is16bitFPTy() const {
+    return getTypeID() == BFloatTyID || getTypeID() == HalfTyID;
+  }
+
   /// Return true if this is 'float', a 32-bit IEEE fp type.
   bool isFloatTy() const { return getTypeID() == FloatTyID; }