From fa0ff5778aa7302010a16696b75e980b28f63f51 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Wed, 10 Aug 2022 08:59:21 +0800 Subject: [PATCH] [X86][BF16] Enable __bf16 for x86 targets. X86 psABI has updated to support __bf16 type, the ABI of which is the same as FP16. See https://discourse.llvm.org/t/patch-add-optional-bfloat16-support/63149 Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D130964 (cherry picked from commit e4888a37d36780872d685c68ef8b26b2e14d6d39) --- clang/docs/LanguageExtensions.rst | 4 + clang/lib/Basic/Targets/X86.cpp | 2 + clang/lib/Basic/Targets/X86.h | 4 + clang/lib/CodeGen/TargetInfo.cpp | 13 +- clang/test/CodeGen/X86/bfloat-abi.c | 149 +++++++++++++++++++++++ clang/test/CodeGen/X86/bfloat-half-abi.c | 149 +++++++++++++++++++++++ clang/test/CodeGen/X86/bfloat-mangle.cpp | 5 + clang/test/Sema/vector-decl-crash.c | 2 +- llvm/include/llvm/IR/Type.h | 5 + 9 files changed, 326 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGen/X86/bfloat-abi.c create mode 100644 clang/test/CodeGen/X86/bfloat-half-abi.c create mode 100644 clang/test/CodeGen/X86/bfloat-mangle.cpp diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 1bac2aee84bd90..259983271f345e 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -756,6 +756,10 @@ performing the operation, and then truncating to ``_Float16``. ``__bf16`` is purely a storage format; it is currently only supported on the following targets: * 32-bit ARM * 64-bit ARM (AArch64) +* X86 (see below) + +On X86 targets, ``__bf16`` is supported as long as SSE2 is available, which +includes all 64-bit and all recent 32-bit processors. The ``__bf16`` type is only available when supported in hardware. diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 69afdf8a3584cb..0f5c366816df9d 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -358,6 +358,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasFloat16 = SSELevel >= SSE2; + HasBFloat16 = SSELevel >= SSE2; + MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch(Feature) .Case("+3dnowa", AMD3DNowAthlon) .Case("+3dnow", AMD3DNow) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 0affa58b2f4c0f..ed0864aec6d2d2 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -156,6 +156,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { public: X86TargetInfo(const llvm::Triple &Triple, const TargetOptions &) : TargetInfo(Triple) { + BFloat16Width = BFloat16Align = 16; + BFloat16Format = &llvm::APFloat::BFloat(); LongDoubleFormat = &llvm::APFloat::x87DoubleExtended(); AddrSpaceMap = &X86AddrSpaceMap; HasStrictFP = true; @@ -396,6 +398,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { uint64_t getPointerAlignV(unsigned AddrSpace) const override { return getPointerWidthV(AddrSpace); } + + const char *getBFloat16Mangling() const override { return "u6__bf16"; }; }; // X86-32 generic target diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index d1ee61eab9d66d..195ad8cdc13eac 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -2871,7 +2871,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) { Current = Integer; } else if (k == BuiltinType::Float || k == BuiltinType::Double || - k == BuiltinType::Float16) { + k == BuiltinType::Float16 || k == BuiltinType::BFloat16) { Current = SSE; } else if (k == BuiltinType::LongDouble) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); @@ -3002,7 +3002,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, Current = Integer; else if (Size <= 128) Lo = Hi = Integer; - } else if (ET->isFloat16Type() || ET == getContext().FloatTy) { + } else if (ET->isFloat16Type() || ET == getContext().FloatTy || + ET->isBFloat16Type()) { Current = SSE; } else if (ET == getContext().DoubleTy) { Lo = Hi = SSE; @@ -3474,9 +3475,9 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, if (SourceSize > T0Size) T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD); if (T1 == nullptr) { - // Check if IRType is a half + float. float type will be in IROffset+4 due + // Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due // to its alignment. - if (T0->isHalfTy() && SourceSize > 4) + if (T0->is16bitFPTy() && SourceSize > 4) T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD); // If we can't get a second FP type, return a simple half or float. // avx512fp16-abi.c:pr51813_2 shows it works to return float for @@ -3488,7 +3489,7 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, if (T0->isFloatTy() && T1->isFloatTy()) return llvm::FixedVectorType::get(T0, 2); - if (T0->isHalfTy() && T1->isHalfTy()) { + if (T0->is16bitFPTy() && T1->is16bitFPTy()) { llvm::Type *T2 = nullptr; if (SourceSize > 4) T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD); @@ -3497,7 +3498,7 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, return llvm::FixedVectorType::get(T0, 4); } - if (T0->isHalfTy() || T1->isHalfTy()) + if (T0->is16bitFPTy() || T1->is16bitFPTy()) return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4); return llvm::Type::getDoubleTy(getVMContext()); diff --git a/clang/test/CodeGen/X86/bfloat-abi.c b/clang/test/CodeGen/X86/bfloat-abi.c new file mode 100644 index 00000000000000..42250791848ac7 --- /dev/null +++ b/clang/test/CodeGen/X86/bfloat-abi.c @@ -0,0 +1,149 @@ +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -target-feature +sse2 < %s | FileCheck %s --check-prefixes=CHECK + +struct bfloat1 { + __bf16 a; +}; + +struct bfloat1 h1(__bf16 a) { + // CHECK: define{{.*}}bfloat @ + struct bfloat1 x; + x.a = a; + return x; +} + +struct bfloat2 { + __bf16 a; + __bf16 b; +}; + +struct bfloat2 h2(__bf16 a, __bf16 b) { + // CHECK: define{{.*}}<2 x bfloat> @ + struct bfloat2 x; + x.a = a; + x.b = b; + return x; +} + +struct bfloat3 { + __bf16 a; + __bf16 b; + __bf16 c; +}; + +struct bfloat3 h3(__bf16 a, __bf16 b, __bf16 c) { + // CHECK: define{{.*}}<4 x bfloat> @ + struct bfloat3 x; + x.a = a; + x.b = b; + x.c = c; + return x; +} + +struct bfloat4 { + __bf16 a; + __bf16 b; + __bf16 c; + __bf16 d; +}; + +struct bfloat4 h4(__bf16 a, __bf16 b, __bf16 c, __bf16 d) { + // CHECK: define{{.*}}<4 x bfloat> @ + struct bfloat4 x; + x.a = a; + x.b = b; + x.c = c; + x.d = d; + return x; +} + +struct floatbfloat { + float a; + __bf16 b; +}; + +struct floatbfloat fh(float a, __bf16 b) { + // CHECK: define{{.*}}<4 x half> @ + struct floatbfloat x; + x.a = a; + x.b = b; + return x; +} + +struct floatbfloat2 { + float a; + __bf16 b; + __bf16 c; +}; + +struct floatbfloat2 fh2(float a, __bf16 b, __bf16 c) { + // CHECK: define{{.*}}<4 x half> @ + struct floatbfloat2 x; + x.a = a; + x.b = b; + x.c = c; + return x; +} + +struct bfloatfloat { + __bf16 a; + float b; +}; + +struct bfloatfloat hf(__bf16 a, float b) { + // CHECK: define{{.*}}<4 x half> @ + struct bfloatfloat x; + x.a = a; + x.b = b; + return x; +} + +struct bfloat2float { + __bf16 a; + __bf16 b; + float c; +}; + +struct bfloat2float h2f(__bf16 a, __bf16 b, float c) { + // CHECK: define{{.*}}<4 x bfloat> @ + struct bfloat2float x; + x.a = a; + x.b = b; + x.c = c; + return x; +} + +struct floatbfloat3 { + float a; + __bf16 b; + __bf16 c; + __bf16 d; +}; + +struct floatbfloat3 fh3(float a, __bf16 b, __bf16 c, __bf16 d) { + // CHECK: define{{.*}}{ <4 x half>, bfloat } @ + struct floatbfloat3 x; + x.a = a; + x.b = b; + x.c = c; + x.d = d; + return x; +} + +struct bfloat5 { + __bf16 a; + __bf16 b; + __bf16 c; + __bf16 d; + __bf16 e; +}; + +struct bfloat5 h5(__bf16 a, __bf16 b, __bf16 c, __bf16 d, __bf16 e) { + // CHECK: define{{.*}}{ <4 x bfloat>, bfloat } @ + struct bfloat5 x; + x.a = a; + x.b = b; + x.c = c; + x.d = d; + x.e = e; + return x; +} diff --git a/clang/test/CodeGen/X86/bfloat-half-abi.c b/clang/test/CodeGen/X86/bfloat-half-abi.c new file mode 100644 index 00000000000000..42250791848ac7 --- /dev/null +++ b/clang/test/CodeGen/X86/bfloat-half-abi.c @@ -0,0 +1,149 @@ +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -target-feature +sse2 < %s | FileCheck %s --check-prefixes=CHECK + +struct bfloat1 { + __bf16 a; +}; + +struct bfloat1 h1(__bf16 a) { + // CHECK: define{{.*}}bfloat @ + struct bfloat1 x; + x.a = a; + return x; +} + +struct bfloat2 { + __bf16 a; + __bf16 b; +}; + +struct bfloat2 h2(__bf16 a, __bf16 b) { + // CHECK: define{{.*}}<2 x bfloat> @ + struct bfloat2 x; + x.a = a; + x.b = b; + return x; +} + +struct bfloat3 { + __bf16 a; + __bf16 b; + __bf16 c; +}; + +struct bfloat3 h3(__bf16 a, __bf16 b, __bf16 c) { + // CHECK: define{{.*}}<4 x bfloat> @ + struct bfloat3 x; + x.a = a; + x.b = b; + x.c = c; + return x; +} + +struct bfloat4 { + __bf16 a; + __bf16 b; + __bf16 c; + __bf16 d; +}; + +struct bfloat4 h4(__bf16 a, __bf16 b, __bf16 c, __bf16 d) { + // CHECK: define{{.*}}<4 x bfloat> @ + struct bfloat4 x; + x.a = a; + x.b = b; + x.c = c; + x.d = d; + return x; +} + +struct floatbfloat { + float a; + __bf16 b; +}; + +struct floatbfloat fh(float a, __bf16 b) { + // CHECK: define{{.*}}<4 x half> @ + struct floatbfloat x; + x.a = a; + x.b = b; + return x; +} + +struct floatbfloat2 { + float a; + __bf16 b; + __bf16 c; +}; + +struct floatbfloat2 fh2(float a, __bf16 b, __bf16 c) { + // CHECK: define{{.*}}<4 x half> @ + struct floatbfloat2 x; + x.a = a; + x.b = b; + x.c = c; + return x; +} + +struct bfloatfloat { + __bf16 a; + float b; +}; + +struct bfloatfloat hf(__bf16 a, float b) { + // CHECK: define{{.*}}<4 x half> @ + struct bfloatfloat x; + x.a = a; + x.b = b; + return x; +} + +struct bfloat2float { + __bf16 a; + __bf16 b; + float c; +}; + +struct bfloat2float h2f(__bf16 a, __bf16 b, float c) { + // CHECK: define{{.*}}<4 x bfloat> @ + struct bfloat2float x; + x.a = a; + x.b = b; + x.c = c; + return x; +} + +struct floatbfloat3 { + float a; + __bf16 b; + __bf16 c; + __bf16 d; +}; + +struct floatbfloat3 fh3(float a, __bf16 b, __bf16 c, __bf16 d) { + // CHECK: define{{.*}}{ <4 x half>, bfloat } @ + struct floatbfloat3 x; + x.a = a; + x.b = b; + x.c = c; + x.d = d; + return x; +} + +struct bfloat5 { + __bf16 a; + __bf16 b; + __bf16 c; + __bf16 d; + __bf16 e; +}; + +struct bfloat5 h5(__bf16 a, __bf16 b, __bf16 c, __bf16 d, __bf16 e) { + // CHECK: define{{.*}}{ <4 x bfloat>, bfloat } @ + struct bfloat5 x; + x.a = a; + x.b = b; + x.c = c; + x.d = d; + x.e = e; + return x; +} diff --git a/clang/test/CodeGen/X86/bfloat-mangle.cpp b/clang/test/CodeGen/X86/bfloat-mangle.cpp new file mode 100644 index 00000000000000..2892a76d8d910f --- /dev/null +++ b/clang/test/CodeGen/X86/bfloat-mangle.cpp @@ -0,0 +1,5 @@ +// RUN: %clang_cc1 -triple i386-unknown-unknown -target-feature +sse2 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-feature +sse2 -emit-llvm -o - %s | FileCheck %s + +// CHECK: define {{.*}}void @_Z3foou6__bf16(bfloat noundef %b) +void foo(__bf16 b) {} diff --git a/clang/test/Sema/vector-decl-crash.c b/clang/test/Sema/vector-decl-crash.c index 5e4b098fee2d37..fafe34133de43d 100644 --- a/clang/test/Sema/vector-decl-crash.c +++ b/clang/test/Sema/vector-decl-crash.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -fsyntax-only -verify -triple x86_64-unknown-unknown +// RUN: %clang_cc1 %s -fsyntax-only -verify -triple riscv64-unknown-unknown // GH50171 // This would previously crash when __bf16 was not a supported type. diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h index 51263c6b8fccc5..a7f22324571b70 100644 --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -144,6 +144,11 @@ class Type { /// Return true if this is 'bfloat', a 16-bit bfloat type. bool isBFloatTy() const { return getTypeID() == BFloatTyID; } + /// Return true if this is a 16-bit float type. + bool is16bitFPTy() const { + return getTypeID() == BFloatTyID || getTypeID() == HalfTyID; + } + /// Return true if this is 'float', a 32-bit IEEE fp type. bool isFloatTy() const { return getTypeID() == FloatTyID; }