-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AArch64] Add FP8 Neon intrinsics for dot-product #119911
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: Momchil Velikov (momchil-velikov) ChangesThis patch adds the following intrinsics:
Patch is 107.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119911.diff 25 Files Affected:
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index f0eee77c73ef06d..cacd3d75ffcd8de 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2404,6 +2404,10 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
/// SVE vector or predicate, excluding tuple types such as svint32x4_t.
bool isSveVLSBuiltinType() const;
+ /// Determines if this is a *builtin* NEON vector type, a type not built with
+ /// `neon_vector_type`
+ bool isNeonVectorBuiltinType() const;
+
/// Returns the representative type for the element of an SVE builtin type.
/// This is used to represent fixed-length SVE vectors created with the
/// 'arm_sve_vector_bits' type attribute as VectorType.
@@ -2518,6 +2522,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
bool isFloat32Type() const;
bool isDoubleType() const;
bool isBFloat16Type() const;
+ bool isMFloat8Type() const;
bool isFloat128Type() const;
bool isIbm128Type() const;
bool isRealType() const; // C99 6.2.5p17 (real floating + integer)
@@ -8532,6 +8537,10 @@ inline bool Type::isBFloat16Type() const {
return isSpecificBuiltinType(BuiltinType::BFloat16);
}
+inline bool Type::isMFloat8Type() const {
+ return isSpecificBuiltinType(BuiltinType::MFloat8);
+}
+
inline bool Type::isFloat128Type() const {
return isSpecificBuiltinType(BuiltinType::Float128);
}
diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def
index 063cac1f4a58ee7..6b704b386536c9b 100644
--- a/clang/include/clang/Basic/AArch64SVEACLETypes.def
+++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def
@@ -57,6 +57,11 @@
// - IsBF true for vector of brain float elements.
//===----------------------------------------------------------------------===//
+#ifndef SVE_SCALAR_TYPE
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+ SVE_TYPE(Name, Id, SingletonId)
+#endif
+
#ifndef SVE_VECTOR_TYPE
#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
SVE_TYPE(Name, Id, SingletonId)
@@ -72,6 +77,11 @@
SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, true)
#endif
+#ifndef SVE_VECTOR_TYPE_MFLOAT
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \
+ SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, false)
+#endif
+
#ifndef SVE_VECTOR_TYPE_FLOAT
#define SVE_VECTOR_TYPE_FLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \
SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, true, false)
@@ -125,8 +135,7 @@ SVE_VECTOR_TYPE_FLOAT("__SVFloat64_t", "__SVFloat64_t", SveFloat64, SveFloat64Ty
SVE_VECTOR_TYPE_BFLOAT("__SVBfloat16_t", "__SVBfloat16_t", SveBFloat16, SveBFloat16Ty, 8, 16, 1)
-// This is a 8 bits opaque type.
-SVE_VECTOR_TYPE_INT("__SVMfloat8_t", "__SVMfloat8_t", SveMFloat8, SveMFloat8Ty, 16, 8, 1, false)
+SVE_VECTOR_TYPE_MFLOAT("__SVMfloat8_t", "__SVMfloat8_t", SveMFloat8, SveMFloat8Ty, 16, 8, 1)
//
// x2
@@ -148,7 +157,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x2_t", "svfloat64x2_t", SveFloat64x2, Sv
SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x2_t", "svbfloat16x2_t", SveBFloat16x2, SveBFloat16x2Ty, 8, 16, 2)
-SVE_VECTOR_TYPE_INT("__clang_svmfloat8x2_t", "svmfloat8x2_t", SveMFloat8x2, SveMFloat8x2Ty, 16, 8, 2, false)
+SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x2_t", "svmfloat8x2_t", SveMFloat8x2, SveMFloat8x2Ty, 16, 8, 2)
//
// x3
@@ -170,7 +179,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x3_t", "svfloat64x3_t", SveFloat64x3, Sv
SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x3_t", "svbfloat16x3_t", SveBFloat16x3, SveBFloat16x3Ty, 8, 16, 3)
-SVE_VECTOR_TYPE_INT("__clang_svmfloat8x3_t", "svmfloat8x3_t", SveMFloat8x3, SveMFloat8x3Ty, 16, 8, 3, false)
+SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x3_t", "svmfloat8x3_t", SveMFloat8x3, SveMFloat8x3Ty, 16, 8, 3)
//
// x4
@@ -192,7 +201,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x4_t", "svfloat64x4_t", SveFloat64x4, Sv
SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x4_t", "svbfloat16x4_t", SveBFloat16x4, SveBFloat16x4Ty, 8, 16, 4)
-SVE_VECTOR_TYPE_INT("__clang_svmfloat8x4_t", "svmfloat8x4_t", SveMFloat8x4, SveMFloat8x4Ty, 16, 8, 4, false)
+SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x4_t", "svmfloat8x4_t", SveMFloat8x4, SveMFloat8x4Ty, 16, 8, 4)
SVE_PREDICATE_TYPE_ALL("__SVBool_t", "__SVBool_t", SveBool, SveBoolTy, 16, 1)
SVE_PREDICATE_TYPE_ALL("__clang_svboolx2_t", "svboolx2_t", SveBoolx2, SveBoolx2Ty, 16, 2)
@@ -200,11 +209,13 @@ SVE_PREDICATE_TYPE_ALL("__clang_svboolx4_t", "svboolx4_t", SveBoolx4, SveBoolx4T
SVE_OPAQUE_TYPE("__SVCount_t", "__SVCount_t", SveCount, SveCountTy)
-AARCH64_VECTOR_TYPE_MFLOAT("__mfp8", "__mfp8", MFloat8, MFloat8Ty, 1, 8, 1)
+SVE_SCALAR_TYPE("__mfp8", "__mfp8", MFloat8, MFloat8Ty, 8)
+
AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x8_t", "__MFloat8x8_t", MFloat8x8, MFloat8x8Ty, 8, 8, 1)
AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, MFloat8x16Ty, 16, 8, 1)
#undef SVE_VECTOR_TYPE
+#undef SVE_VECTOR_TYPE_MFLOAT
#undef SVE_VECTOR_TYPE_BFLOAT
#undef SVE_VECTOR_TYPE_FLOAT
#undef SVE_VECTOR_TYPE_INT
@@ -213,4 +224,5 @@ AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, MFloa
#undef SVE_OPAQUE_TYPE
#undef AARCH64_VECTOR_TYPE_MFLOAT
#undef AARCH64_VECTOR_TYPE
+#undef SVE_SCALAR_TYPE
#undef SVE_TYPE
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 0a245e2077f68ff..a7440a6d7826c90 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10545,6 +10545,9 @@ def err_vec_builtin_incompatible_vector : Error<
def err_vsx_builtin_nonconstant_argument : Error<
"argument %0 to %1 must be a 2-bit unsigned literal (i.e. 0, 1, 2 or 3)">;
+def err_shufflevector_incompatible_index_vector : Error<
+ "second argument for __builtin_shufflevector must be integer vector "
+ "with length equal to the length of the first argument">;
def err_shufflevector_nonconstant_argument : Error<
"index for __builtin_shufflevector must be a constant integer">;
def err_shufflevector_argument_too_large : Error<
@@ -10552,6 +10555,8 @@ def err_shufflevector_argument_too_large : Error<
"of vector elements">;
def err_shufflevector_minus_one_is_undefined_behavior_constexpr : Error<
"index for __builtin_shufflevector not within the bounds of the input vectors; index of -1 found at position %0 is not permitted in a constexpr context">;
+def err_shufflevector_unsupported_result_vector_type : Error<
+ "unsupported vector type for the result">;
def err_convertvector_non_vector : Error<
"first argument to __builtin_convertvector must be a vector">;
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index ef89fa4358dfeb0..7c2e68b3f7a195c 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -2125,6 +2125,51 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in {
}
}
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,bf16,neon" in {
+ def VBF1CVT_BF16_MF8 : VInst<"vcvt1_bf16_mf8_fpm", "(QB).V", "m">;
+ def VBF1CVT_LOW_BF16_MF8 : VInst<"vcvt1_low_bf16_mf8_fpm", "B.V", "Qm">;
+ def VBF2CVTL_BF16_MF8 : VInst<"vcvt2_bf16_mf8_fpm", "(QB).V", "m">;
+ def VBF2CVTL_LOW_BF16_MF8 : VInst<"vcvt2_low_bf16_mf8_fpm", "B.V", "Qm">;
+ def VBF1CVTL2_HIGH_BF16_MF8 : VInst<"vcvt1_high_bf16_mf8_fpm", "B.V", "Qm">;
+ def VBF2CVTL2_HIGH_BF16_MF8 : VInst<"vcvt2_high_bf16_mf8_fpm", "B.V", "Qm">;
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in {
+ def VF1CVT_F16_MF8 : VInst<"vcvt1_f16_mf8_fpm", "(>QF).V", "m">;
+ def VF1CVT_LOW_F16_MF8 : VInst<"vcvt1_low_f16_mf8_fpm", "(>F).V", "Qm">;
+ def VF2CVTL_F16_MF8 : VInst<"vcvt2_f16_mf8_fpm", "(>QF).V", "m">;
+ def VF2CVTL_LOW_F16_MF8 : VInst<"vcvt2_low_f16_mf8_fpm", "(>F).V", "Qm">;
+ def VF1CVTL2_HIGH_F16_MF8 : VInst<"vcvt1_high_f16_mf8_fpm", "(>F).V", "Qm">;
+ def VF2CVTL2_HIGH_F16_MF8 : VInst<"vcvt2_high_f16_mf8_fpm", "(>F).V", "Qm">;
+
+ def VCVTN_LOW_F8_F32 : VInst<"vcvt_mf8_f32_fpm", ".(>>QF)(>>QF)V", "m">;
+ def VCVTN_HIGH_F8_F32 : VInst<"vcvt_high_mf8_f32_fpm", ".(q)(>>F)(>>F)V", "Qm">;
+ def VCVTN_F8_F16 : VInst<"vcvt_mf8_f16_fpm", ".(>F)(>F)V", "m">;
+ def VCVTNQ_F8_F16 : VInst<"vcvtq_mf8_f16_fpm", ".(>F)(>F)V", "Qm">;
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot2,neon" in {
+ def VDOT_F16_MF8 : VInst<"vdot_f16_mf8_fpm", "(>F)(>F)..V", "m">;
+ def VDOTQ_F16_MF8 : VInst<"vdotq_f16_mf8_fpm", "(>F)(>F)..V", "Qm">;
+
+ def VDOT_LANE_F16_MF8 : VInst<"vdot_lane_f16_mf8_fpm", "(>F)(>F)..IV", "m", [ImmCheck<3, ImmCheck0_3, 0>]>;
+ def VDOT_LANEQ_F16_MF8 : VInst<"vdot_laneq_f16_mf8_fpm", "(>F)(>F).QIV", "m", [ImmCheck<3, ImmCheck0_7, 0>]>;
+
+ def VDOTQ_LANE_F16_MF8 : VInst<"vdotq_lane_f16_mf8_fpm", "(>F)(>F).qIV", "Qm", [ImmCheck<3, ImmCheck0_3, 0>]>;
+ def VDOTQ_LANEQ_F16_MF8 : VInst<"vdotq_laneq_f16_mf8_fpm", "(>F)(>F)..IV", "Qm", [ImmCheck<3, ImmCheck0_7, 0>]>;
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot4,neon" in {
+ def VDOT_F32_MF8 : VInst<"vdot_f32_mf8_fpm", "(>>F)(>>F)..V", "m">;
+ def VDOTQ_F32_MF8 : VInst<"vdotq_f32_mf8_fpm", "(>>F)(>>F)..V", "Qm">;
+
+ def VDOT_LANE_F32_MF8 : VInst<"vdot_lane_f32_mf8_fpm", "(>>F)(>>F)..IV", "m", [ImmCheck<3, ImmCheck0_1, 0>]>;
+ def VDOT_LANEQ_F32_MF8 : VInst<"vdot_laneq_f32_mf8_fpm", "(>>F)(>>F).QIV", "m", [ImmCheck<3, ImmCheck0_3, 0>]>;
+
+ def VDOTQ_LANE_F32_MF8 : VInst<"vdotq_lane_f32_mf8_fpm", "(>>F)(>>F).qIV", "Qm", [ImmCheck<3, ImmCheck0_1, 0>]>;
+ def VDOTQ_LANEQ_F32_MF8 : VInst<"vdotq_laneq_f32_mf8_fpm", "(>>F)(>>F)..IV", "Qm", [ImmCheck<3, ImmCheck0_3, 0>]>;
+}
+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "neon,faminmax" in {
def FAMIN : WInst<"vamin", "...", "fhQdQfQh">;
def FAMAX : WInst<"vamax", "...", "fhQdQfQh">;
@@ -2134,4 +2179,4 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in {
// fscale
def FSCALE_V128 : WInst<"vscale", "..(.S)", "QdQfQh">;
def FSCALE_V64 : WInst<"vscale", "(.q)(.q)(.qS)", "fh">;
-}
\ No newline at end of file
+}
diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td
index fd800e5a6278e4a..b9b9d509c22512b 100644
--- a/clang/include/clang/Basic/arm_neon_incl.td
+++ b/clang/include/clang/Basic/arm_neon_incl.td
@@ -243,6 +243,7 @@ def OP_UNAVAILABLE : Operation {
// B: change to BFloat16
// P: change to polynomial category.
// p: change polynomial to equivalent integer category. Otherwise nop.
+// V: change to fpm_t
//
// >: double element width (vector size unchanged).
// <: half element width (vector size unchanged).
@@ -301,6 +302,7 @@ class Inst <string n, string p, string t, Operation o, list<ImmCheck> ch = []>{
class SInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
class IInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
class WInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
+class VInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
// The following instruction classes are implemented via operators
// instead of builtins. As such these declarations are only used for
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 6ec927e13a7552e..904df6f6163bc03 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -2275,6 +2275,11 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
Width = NumEls * ElBits * NF; \
Align = NumEls * ElBits; \
break;
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+ case BuiltinType::Id: \
+ Width = Bits; \
+ Align = Bits; \
+ break;
#include "clang/Basic/AArch64SVEACLETypes.def"
#define PPC_VECTOR_TYPE(Name, Id, Size) \
case BuiltinType::Id: \
@@ -4395,15 +4400,18 @@ ASTContext::getBuiltinVectorTypeInfo(const BuiltinType *Ty) const {
ElBits, NF) \
case BuiltinType::Id: \
return {BFloat16Ty, llvm::ElementCount::getScalable(NumEls), NF};
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \
+ ElBits, NF) \
+ case BuiltinType::Id: \
+ return {MFloat8Ty, llvm::ElementCount::getScalable(NumEls), NF};
#define SVE_PREDICATE_TYPE_ALL(Name, MangledName, Id, SingletonId, NumEls, NF) \
case BuiltinType::Id: \
return {BoolTy, llvm::ElementCount::getScalable(NumEls), NF};
#define AARCH64_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \
ElBits, NF) \
case BuiltinType::Id: \
- return {getIntTypeForBitwidth(ElBits, false), \
- llvm::ElementCount::getFixed(NumEls), NF};
-#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId)
+ return {MFloat8Ty, llvm::ElementCount::getFixed(NumEls), NF};
+#define SVE_TYPE(Name, Id, SingletonId)
#include "clang/Basic/AArch64SVEACLETypes.def"
#define RVV_VECTOR_TYPE_INT(Name, Id, SingletonId, NumEls, ElBits, NF, \
@@ -4465,11 +4473,16 @@ QualType ASTContext::getScalableVectorType(QualType EltTy, unsigned NumElts,
EltTySize == ElBits && NumElts == (NumEls * NF) && NumFields == 1) { \
return SingletonId; \
}
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \
+ ElBits, NF) \
+ if (EltTy->isMFloat8Type() && EltTySize == ElBits && \
+ NumElts == (NumEls * NF) && NumFields == 1) { \
+ return SingletonId; \
+ }
#define SVE_PREDICATE_TYPE_ALL(Name, MangledName, Id, SingletonId, NumEls, NF) \
if (EltTy->isBooleanType() && NumElts == (NumEls * NF) && NumFields == 1) \
return SingletonId;
-#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId)
-#define AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId)
+#define SVE_TYPE(Name, Id, SingletonId)
#include "clang/Basic/AArch64SVEACLETypes.def"
} else if (Target->hasRISCVVTypes()) {
uint64_t EltTySize = getTypeSize(EltTy);
@@ -12177,8 +12190,15 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
RequiresICE, false);
assert(!RequiresICE && "Can't require vector ICE");
- // TODO: No way to make AltiVec vectors in builtins yet.
- Type = Context.getVectorType(ElementType, NumElements, VectorKind::Generic);
+ if (ElementType == Context.MFloat8Ty) {
+ assert((NumElements == 8 || NumElements == 16) &&
+ "Invalid number of elements");
+ Type = NumElements == 8 ? Context.MFloat8x8Ty : Context.MFloat8x16Ty;
+ } else {
+ // TODO: No way to make AltiVec vectors in builtins yet.
+ Type =
+ Context.getVectorType(ElementType, NumElements, VectorKind::Generic);
+ }
break;
}
case 'E': {
@@ -12234,6 +12254,9 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
case 'p':
Type = Context.getProcessIDType();
break;
+ case 'm':
+ Type = Context.MFloat8Ty;
+ break;
}
// If there are modifiers and if we're allowed to parse them, go for it.
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 47aa9b40dab845b..9404f9fd9b151d5 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -3438,6 +3438,11 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
type_name = MangledName; \
Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \
break;
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+ case BuiltinType::Id: \
+ type_name = MangledName; \
+ Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \
+ break;
#include "clang/Basic/AArch64SVEACLETypes.def"
#define PPC_VECTOR_TYPE(Name, Id, Size) \
case BuiltinType::Id: \
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 976361d07b68bf5..1c21fad75253d81 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2527,9 +2527,7 @@ bool Type::isSVESizelessBuiltinType() const {
#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId) \
case BuiltinType::Id: \
return true;
-#define AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
- case BuiltinType::Id: \
- return false;
+#define SVE_TYPE(Name, Id, SingletonId)
#include "clang/Basic/AArch64SVEACLETypes.def"
default:
return false;
@@ -2578,6 +2576,19 @@ bool Type::isSveVLSBuiltinType() const {
return false;
}
+bool Type::isNeonVectorBuiltinType() const {
+ if (const BuiltinType *BT = getAs<BuiltinType>()) {
+ switch (BT->getKind()) {
+ case BuiltinType::MFloat8x8:
+ case BuiltinType::MFloat8x16:
+ return true;
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
QualType Type::getSizelessVectorEltType(const ASTContext &Ctx) const {
assert(isSizelessVectorType() && "Must be sizeless vector type");
// Currently supports SVE and RVV
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 49a4c1ecc825e74..1ab04663eaaeca9 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6868,6 +6868,32 @@ Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
return Builder.CreateCall(F, Ops, name);
}
+Value *CodeGenFunction::EmitFP8NeonCall(Function *F,
+ SmallVectorImpl<Value *> &Ops,
+ Value *FPM, const char *name) {
+ Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
+ return EmitNeonCall(F, Ops, name);
+}
+
+llvm::Value *CodeGenFunction::EmitFP8NeonFDOTCall(
+ unsigned IID, bool ExtendLane, llvm::Type *RetTy,
+ SmallVectorImpl<llvm::Value *> &Ops, unsigned ICEArguments,
+ const CallExpr *E, const char *name) {
+
+ const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
+ RetTy->getPrimitiveSizeInBits();
+ llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
+ Ops[1]->getType()};
+ if (ExtendLane) {
+ auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
+ Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
+ Builder.getInt64(0));
+ }
+ llvm::Value *FPM =
+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
+ return EmitFP8NeonCall(CGM.getIntrinsic(IID, Tys), Ops, FPM, name);
+}
+
Value *CodeGenFunction::EmitNeonShiftVector(Val...
[truncated]
|
@llvm/pr-subscribers-backend-aarch64 Author: Momchil Velikov (momchil-velikov) ChangesThis patch adds the following intrinsics:
Patch is 107.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119911.diff 25 Files Affected:
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index f0eee77c73ef06d..cacd3d75ffcd8de 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2404,6 +2404,10 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
/// SVE vector or predicate, excluding tuple types such as svint32x4_t.
bool isSveVLSBuiltinType() const;
+ /// Determines if this is a *builtin* NEON vector type, a type not built with
+ /// `neon_vector_type`
+ bool isNeonVectorBuiltinType() const;
+
/// Returns the representative type for the element of an SVE builtin type.
/// This is used to represent fixed-length SVE vectors created with the
/// 'arm_sve_vector_bits' type attribute as VectorType.
@@ -2518,6 +2522,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
bool isFloat32Type() const;
bool isDoubleType() const;
bool isBFloat16Type() const;
+ bool isMFloat8Type() const;
bool isFloat128Type() const;
bool isIbm128Type() const;
bool isRealType() const; // C99 6.2.5p17 (real floating + integer)
@@ -8532,6 +8537,10 @@ inline bool Type::isBFloat16Type() const {
return isSpecificBuiltinType(BuiltinType::BFloat16);
}
+inline bool Type::isMFloat8Type() const {
+ return isSpecificBuiltinType(BuiltinType::MFloat8);
+}
+
inline bool Type::isFloat128Type() const {
return isSpecificBuiltinType(BuiltinType::Float128);
}
diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def
index 063cac1f4a58ee7..6b704b386536c9b 100644
--- a/clang/include/clang/Basic/AArch64SVEACLETypes.def
+++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def
@@ -57,6 +57,11 @@
// - IsBF true for vector of brain float elements.
//===----------------------------------------------------------------------===//
+#ifndef SVE_SCALAR_TYPE
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+ SVE_TYPE(Name, Id, SingletonId)
+#endif
+
#ifndef SVE_VECTOR_TYPE
#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
SVE_TYPE(Name, Id, SingletonId)
@@ -72,6 +77,11 @@
SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, true)
#endif
+#ifndef SVE_VECTOR_TYPE_MFLOAT
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \
+ SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, false)
+#endif
+
#ifndef SVE_VECTOR_TYPE_FLOAT
#define SVE_VECTOR_TYPE_FLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \
SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, true, false)
@@ -125,8 +135,7 @@ SVE_VECTOR_TYPE_FLOAT("__SVFloat64_t", "__SVFloat64_t", SveFloat64, SveFloat64Ty
SVE_VECTOR_TYPE_BFLOAT("__SVBfloat16_t", "__SVBfloat16_t", SveBFloat16, SveBFloat16Ty, 8, 16, 1)
-// This is a 8 bits opaque type.
-SVE_VECTOR_TYPE_INT("__SVMfloat8_t", "__SVMfloat8_t", SveMFloat8, SveMFloat8Ty, 16, 8, 1, false)
+SVE_VECTOR_TYPE_MFLOAT("__SVMfloat8_t", "__SVMfloat8_t", SveMFloat8, SveMFloat8Ty, 16, 8, 1)
//
// x2
@@ -148,7 +157,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x2_t", "svfloat64x2_t", SveFloat64x2, Sv
SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x2_t", "svbfloat16x2_t", SveBFloat16x2, SveBFloat16x2Ty, 8, 16, 2)
-SVE_VECTOR_TYPE_INT("__clang_svmfloat8x2_t", "svmfloat8x2_t", SveMFloat8x2, SveMFloat8x2Ty, 16, 8, 2, false)
+SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x2_t", "svmfloat8x2_t", SveMFloat8x2, SveMFloat8x2Ty, 16, 8, 2)
//
// x3
@@ -170,7 +179,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x3_t", "svfloat64x3_t", SveFloat64x3, Sv
SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x3_t", "svbfloat16x3_t", SveBFloat16x3, SveBFloat16x3Ty, 8, 16, 3)
-SVE_VECTOR_TYPE_INT("__clang_svmfloat8x3_t", "svmfloat8x3_t", SveMFloat8x3, SveMFloat8x3Ty, 16, 8, 3, false)
+SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x3_t", "svmfloat8x3_t", SveMFloat8x3, SveMFloat8x3Ty, 16, 8, 3)
//
// x4
@@ -192,7 +201,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x4_t", "svfloat64x4_t", SveFloat64x4, Sv
SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x4_t", "svbfloat16x4_t", SveBFloat16x4, SveBFloat16x4Ty, 8, 16, 4)
-SVE_VECTOR_TYPE_INT("__clang_svmfloat8x4_t", "svmfloat8x4_t", SveMFloat8x4, SveMFloat8x4Ty, 16, 8, 4, false)
+SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x4_t", "svmfloat8x4_t", SveMFloat8x4, SveMFloat8x4Ty, 16, 8, 4)
SVE_PREDICATE_TYPE_ALL("__SVBool_t", "__SVBool_t", SveBool, SveBoolTy, 16, 1)
SVE_PREDICATE_TYPE_ALL("__clang_svboolx2_t", "svboolx2_t", SveBoolx2, SveBoolx2Ty, 16, 2)
@@ -200,11 +209,13 @@ SVE_PREDICATE_TYPE_ALL("__clang_svboolx4_t", "svboolx4_t", SveBoolx4, SveBoolx4T
SVE_OPAQUE_TYPE("__SVCount_t", "__SVCount_t", SveCount, SveCountTy)
-AARCH64_VECTOR_TYPE_MFLOAT("__mfp8", "__mfp8", MFloat8, MFloat8Ty, 1, 8, 1)
+SVE_SCALAR_TYPE("__mfp8", "__mfp8", MFloat8, MFloat8Ty, 8)
+
AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x8_t", "__MFloat8x8_t", MFloat8x8, MFloat8x8Ty, 8, 8, 1)
AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, MFloat8x16Ty, 16, 8, 1)
#undef SVE_VECTOR_TYPE
+#undef SVE_VECTOR_TYPE_MFLOAT
#undef SVE_VECTOR_TYPE_BFLOAT
#undef SVE_VECTOR_TYPE_FLOAT
#undef SVE_VECTOR_TYPE_INT
@@ -213,4 +224,5 @@ AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, MFloa
#undef SVE_OPAQUE_TYPE
#undef AARCH64_VECTOR_TYPE_MFLOAT
#undef AARCH64_VECTOR_TYPE
+#undef SVE_SCALAR_TYPE
#undef SVE_TYPE
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 0a245e2077f68ff..a7440a6d7826c90 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10545,6 +10545,9 @@ def err_vec_builtin_incompatible_vector : Error<
def err_vsx_builtin_nonconstant_argument : Error<
"argument %0 to %1 must be a 2-bit unsigned literal (i.e. 0, 1, 2 or 3)">;
+def err_shufflevector_incompatible_index_vector : Error<
+ "second argument for __builtin_shufflevector must be integer vector "
+ "with length equal to the length of the first argument">;
def err_shufflevector_nonconstant_argument : Error<
"index for __builtin_shufflevector must be a constant integer">;
def err_shufflevector_argument_too_large : Error<
@@ -10552,6 +10555,8 @@ def err_shufflevector_argument_too_large : Error<
"of vector elements">;
def err_shufflevector_minus_one_is_undefined_behavior_constexpr : Error<
"index for __builtin_shufflevector not within the bounds of the input vectors; index of -1 found at position %0 is not permitted in a constexpr context">;
+def err_shufflevector_unsupported_result_vector_type : Error<
+ "unsupported vector type for the result">;
def err_convertvector_non_vector : Error<
"first argument to __builtin_convertvector must be a vector">;
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index ef89fa4358dfeb0..7c2e68b3f7a195c 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -2125,6 +2125,51 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in {
}
}
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,bf16,neon" in {
+ def VBF1CVT_BF16_MF8 : VInst<"vcvt1_bf16_mf8_fpm", "(QB).V", "m">;
+ def VBF1CVT_LOW_BF16_MF8 : VInst<"vcvt1_low_bf16_mf8_fpm", "B.V", "Qm">;
+ def VBF2CVTL_BF16_MF8 : VInst<"vcvt2_bf16_mf8_fpm", "(QB).V", "m">;
+ def VBF2CVTL_LOW_BF16_MF8 : VInst<"vcvt2_low_bf16_mf8_fpm", "B.V", "Qm">;
+ def VBF1CVTL2_HIGH_BF16_MF8 : VInst<"vcvt1_high_bf16_mf8_fpm", "B.V", "Qm">;
+ def VBF2CVTL2_HIGH_BF16_MF8 : VInst<"vcvt2_high_bf16_mf8_fpm", "B.V", "Qm">;
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in {
+ def VF1CVT_F16_MF8 : VInst<"vcvt1_f16_mf8_fpm", "(>QF).V", "m">;
+ def VF1CVT_LOW_F16_MF8 : VInst<"vcvt1_low_f16_mf8_fpm", "(>F).V", "Qm">;
+ def VF2CVTL_F16_MF8 : VInst<"vcvt2_f16_mf8_fpm", "(>QF).V", "m">;
+ def VF2CVTL_LOW_F16_MF8 : VInst<"vcvt2_low_f16_mf8_fpm", "(>F).V", "Qm">;
+ def VF1CVTL2_HIGH_F16_MF8 : VInst<"vcvt1_high_f16_mf8_fpm", "(>F).V", "Qm">;
+ def VF2CVTL2_HIGH_F16_MF8 : VInst<"vcvt2_high_f16_mf8_fpm", "(>F).V", "Qm">;
+
+ def VCVTN_LOW_F8_F32 : VInst<"vcvt_mf8_f32_fpm", ".(>>QF)(>>QF)V", "m">;
+ def VCVTN_HIGH_F8_F32 : VInst<"vcvt_high_mf8_f32_fpm", ".(q)(>>F)(>>F)V", "Qm">;
+ def VCVTN_F8_F16 : VInst<"vcvt_mf8_f16_fpm", ".(>F)(>F)V", "m">;
+ def VCVTNQ_F8_F16 : VInst<"vcvtq_mf8_f16_fpm", ".(>F)(>F)V", "Qm">;
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot2,neon" in {
+ def VDOT_F16_MF8 : VInst<"vdot_f16_mf8_fpm", "(>F)(>F)..V", "m">;
+ def VDOTQ_F16_MF8 : VInst<"vdotq_f16_mf8_fpm", "(>F)(>F)..V", "Qm">;
+
+ def VDOT_LANE_F16_MF8 : VInst<"vdot_lane_f16_mf8_fpm", "(>F)(>F)..IV", "m", [ImmCheck<3, ImmCheck0_3, 0>]>;
+ def VDOT_LANEQ_F16_MF8 : VInst<"vdot_laneq_f16_mf8_fpm", "(>F)(>F).QIV", "m", [ImmCheck<3, ImmCheck0_7, 0>]>;
+
+ def VDOTQ_LANE_F16_MF8 : VInst<"vdotq_lane_f16_mf8_fpm", "(>F)(>F).qIV", "Qm", [ImmCheck<3, ImmCheck0_3, 0>]>;
+ def VDOTQ_LANEQ_F16_MF8 : VInst<"vdotq_laneq_f16_mf8_fpm", "(>F)(>F)..IV", "Qm", [ImmCheck<3, ImmCheck0_7, 0>]>;
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot4,neon" in {
+ def VDOT_F32_MF8 : VInst<"vdot_f32_mf8_fpm", "(>>F)(>>F)..V", "m">;
+ def VDOTQ_F32_MF8 : VInst<"vdotq_f32_mf8_fpm", "(>>F)(>>F)..V", "Qm">;
+
+ def VDOT_LANE_F32_MF8 : VInst<"vdot_lane_f32_mf8_fpm", "(>>F)(>>F)..IV", "m", [ImmCheck<3, ImmCheck0_1, 0>]>;
+ def VDOT_LANEQ_F32_MF8 : VInst<"vdot_laneq_f32_mf8_fpm", "(>>F)(>>F).QIV", "m", [ImmCheck<3, ImmCheck0_3, 0>]>;
+
+ def VDOTQ_LANE_F32_MF8 : VInst<"vdotq_lane_f32_mf8_fpm", "(>>F)(>>F).qIV", "Qm", [ImmCheck<3, ImmCheck0_1, 0>]>;
+ def VDOTQ_LANEQ_F32_MF8 : VInst<"vdotq_laneq_f32_mf8_fpm", "(>>F)(>>F)..IV", "Qm", [ImmCheck<3, ImmCheck0_3, 0>]>;
+}
+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "neon,faminmax" in {
def FAMIN : WInst<"vamin", "...", "fhQdQfQh">;
def FAMAX : WInst<"vamax", "...", "fhQdQfQh">;
@@ -2134,4 +2179,4 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in {
// fscale
def FSCALE_V128 : WInst<"vscale", "..(.S)", "QdQfQh">;
def FSCALE_V64 : WInst<"vscale", "(.q)(.q)(.qS)", "fh">;
-}
\ No newline at end of file
+}
diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td
index fd800e5a6278e4a..b9b9d509c22512b 100644
--- a/clang/include/clang/Basic/arm_neon_incl.td
+++ b/clang/include/clang/Basic/arm_neon_incl.td
@@ -243,6 +243,7 @@ def OP_UNAVAILABLE : Operation {
// B: change to BFloat16
// P: change to polynomial category.
// p: change polynomial to equivalent integer category. Otherwise nop.
+// V: change to fpm_t
//
// >: double element width (vector size unchanged).
// <: half element width (vector size unchanged).
@@ -301,6 +302,7 @@ class Inst <string n, string p, string t, Operation o, list<ImmCheck> ch = []>{
class SInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
class IInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
class WInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
+class VInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
// The following instruction classes are implemented via operators
// instead of builtins. As such these declarations are only used for
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 6ec927e13a7552e..904df6f6163bc03 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -2275,6 +2275,11 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
Width = NumEls * ElBits * NF; \
Align = NumEls * ElBits; \
break;
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+ case BuiltinType::Id: \
+ Width = Bits; \
+ Align = Bits; \
+ break;
#include "clang/Basic/AArch64SVEACLETypes.def"
#define PPC_VECTOR_TYPE(Name, Id, Size) \
case BuiltinType::Id: \
@@ -4395,15 +4400,18 @@ ASTContext::getBuiltinVectorTypeInfo(const BuiltinType *Ty) const {
ElBits, NF) \
case BuiltinType::Id: \
return {BFloat16Ty, llvm::ElementCount::getScalable(NumEls), NF};
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \
+ ElBits, NF) \
+ case BuiltinType::Id: \
+ return {MFloat8Ty, llvm::ElementCount::getScalable(NumEls), NF};
#define SVE_PREDICATE_TYPE_ALL(Name, MangledName, Id, SingletonId, NumEls, NF) \
case BuiltinType::Id: \
return {BoolTy, llvm::ElementCount::getScalable(NumEls), NF};
#define AARCH64_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \
ElBits, NF) \
case BuiltinType::Id: \
- return {getIntTypeForBitwidth(ElBits, false), \
- llvm::ElementCount::getFixed(NumEls), NF};
-#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId)
+ return {MFloat8Ty, llvm::ElementCount::getFixed(NumEls), NF};
+#define SVE_TYPE(Name, Id, SingletonId)
#include "clang/Basic/AArch64SVEACLETypes.def"
#define RVV_VECTOR_TYPE_INT(Name, Id, SingletonId, NumEls, ElBits, NF, \
@@ -4465,11 +4473,16 @@ QualType ASTContext::getScalableVectorType(QualType EltTy, unsigned NumElts,
EltTySize == ElBits && NumElts == (NumEls * NF) && NumFields == 1) { \
return SingletonId; \
}
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \
+ ElBits, NF) \
+ if (EltTy->isMFloat8Type() && EltTySize == ElBits && \
+ NumElts == (NumEls * NF) && NumFields == 1) { \
+ return SingletonId; \
+ }
#define SVE_PREDICATE_TYPE_ALL(Name, MangledName, Id, SingletonId, NumEls, NF) \
if (EltTy->isBooleanType() && NumElts == (NumEls * NF) && NumFields == 1) \
return SingletonId;
-#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId)
-#define AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId)
+#define SVE_TYPE(Name, Id, SingletonId)
#include "clang/Basic/AArch64SVEACLETypes.def"
} else if (Target->hasRISCVVTypes()) {
uint64_t EltTySize = getTypeSize(EltTy);
@@ -12177,8 +12190,15 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
RequiresICE, false);
assert(!RequiresICE && "Can't require vector ICE");
- // TODO: No way to make AltiVec vectors in builtins yet.
- Type = Context.getVectorType(ElementType, NumElements, VectorKind::Generic);
+ if (ElementType == Context.MFloat8Ty) {
+ assert((NumElements == 8 || NumElements == 16) &&
+ "Invalid number of elements");
+ Type = NumElements == 8 ? Context.MFloat8x8Ty : Context.MFloat8x16Ty;
+ } else {
+ // TODO: No way to make AltiVec vectors in builtins yet.
+ Type =
+ Context.getVectorType(ElementType, NumElements, VectorKind::Generic);
+ }
break;
}
case 'E': {
@@ -12234,6 +12254,9 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
case 'p':
Type = Context.getProcessIDType();
break;
+ case 'm':
+ Type = Context.MFloat8Ty;
+ break;
}
// If there are modifiers and if we're allowed to parse them, go for it.
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 47aa9b40dab845b..9404f9fd9b151d5 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -3438,6 +3438,11 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
type_name = MangledName; \
Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \
break;
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+ case BuiltinType::Id: \
+ type_name = MangledName; \
+ Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \
+ break;
#include "clang/Basic/AArch64SVEACLETypes.def"
#define PPC_VECTOR_TYPE(Name, Id, Size) \
case BuiltinType::Id: \
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 976361d07b68bf5..1c21fad75253d81 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2527,9 +2527,7 @@ bool Type::isSVESizelessBuiltinType() const {
#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId) \
case BuiltinType::Id: \
return true;
-#define AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
- case BuiltinType::Id: \
- return false;
+#define SVE_TYPE(Name, Id, SingletonId)
#include "clang/Basic/AArch64SVEACLETypes.def"
default:
return false;
@@ -2578,6 +2576,19 @@ bool Type::isSveVLSBuiltinType() const {
return false;
}
+bool Type::isNeonVectorBuiltinType() const {
+ if (const BuiltinType *BT = getAs<BuiltinType>()) {
+ switch (BT->getKind()) {
+ case BuiltinType::MFloat8x8:
+ case BuiltinType::MFloat8x16:
+ return true;
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
QualType Type::getSizelessVectorEltType(const ASTContext &Ctx) const {
assert(isSizelessVectorType() && "Must be sizeless vector type");
// Currently supports SVE and RVV
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 49a4c1ecc825e74..1ab04663eaaeca9 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6868,6 +6868,32 @@ Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
return Builder.CreateCall(F, Ops, name);
}
+Value *CodeGenFunction::EmitFP8NeonCall(Function *F,
+ SmallVectorImpl<Value *> &Ops,
+ Value *FPM, const char *name) {
+ Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
+ return EmitNeonCall(F, Ops, name);
+}
+
+llvm::Value *CodeGenFunction::EmitFP8NeonFDOTCall(
+ unsigned IID, bool ExtendLane, llvm::Type *RetTy,
+ SmallVectorImpl<llvm::Value *> &Ops, unsigned ICEArguments,
+ const CallExpr *E, const char *name) {
+
+ const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
+ RetTy->getPrimitiveSizeInBits();
+ llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
+ Ops[1]->getType()};
+ if (ExtendLane) {
+ auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
+ Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
+ Builder.getInt64(0));
+ }
+ llvm::Value *FPM =
+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
+ return EmitFP8NeonCall(CGM.getIntrinsic(IID, Tys), Ops, FPM, name);
+}
+
Value *CodeGenFunction::EmitNeonShiftVector(Val...
[truncated]
|
@llvm/pr-subscribers-llvm-ir Author: Momchil Velikov (momchil-velikov) ChangesThis patch adds the following intrinsics:
Patch is 107.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119911.diff 25 Files Affected:
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index f0eee77c73ef06d..cacd3d75ffcd8de 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2404,6 +2404,10 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
/// SVE vector or predicate, excluding tuple types such as svint32x4_t.
bool isSveVLSBuiltinType() const;
+ /// Determines if this is a *builtin* NEON vector type, a type not built with
+ /// `neon_vector_type`
+ bool isNeonVectorBuiltinType() const;
+
/// Returns the representative type for the element of an SVE builtin type.
/// This is used to represent fixed-length SVE vectors created with the
/// 'arm_sve_vector_bits' type attribute as VectorType.
@@ -2518,6 +2522,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
bool isFloat32Type() const;
bool isDoubleType() const;
bool isBFloat16Type() const;
+ bool isMFloat8Type() const;
bool isFloat128Type() const;
bool isIbm128Type() const;
bool isRealType() const; // C99 6.2.5p17 (real floating + integer)
@@ -8532,6 +8537,10 @@ inline bool Type::isBFloat16Type() const {
return isSpecificBuiltinType(BuiltinType::BFloat16);
}
+inline bool Type::isMFloat8Type() const {
+ return isSpecificBuiltinType(BuiltinType::MFloat8);
+}
+
inline bool Type::isFloat128Type() const {
return isSpecificBuiltinType(BuiltinType::Float128);
}
diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def
index 063cac1f4a58ee7..6b704b386536c9b 100644
--- a/clang/include/clang/Basic/AArch64SVEACLETypes.def
+++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def
@@ -57,6 +57,11 @@
// - IsBF true for vector of brain float elements.
//===----------------------------------------------------------------------===//
+#ifndef SVE_SCALAR_TYPE
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+ SVE_TYPE(Name, Id, SingletonId)
+#endif
+
#ifndef SVE_VECTOR_TYPE
#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
SVE_TYPE(Name, Id, SingletonId)
@@ -72,6 +77,11 @@
SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, true)
#endif
+#ifndef SVE_VECTOR_TYPE_MFLOAT
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \
+ SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, false)
+#endif
+
#ifndef SVE_VECTOR_TYPE_FLOAT
#define SVE_VECTOR_TYPE_FLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \
SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, true, false)
@@ -125,8 +135,7 @@ SVE_VECTOR_TYPE_FLOAT("__SVFloat64_t", "__SVFloat64_t", SveFloat64, SveFloat64Ty
SVE_VECTOR_TYPE_BFLOAT("__SVBfloat16_t", "__SVBfloat16_t", SveBFloat16, SveBFloat16Ty, 8, 16, 1)
-// This is a 8 bits opaque type.
-SVE_VECTOR_TYPE_INT("__SVMfloat8_t", "__SVMfloat8_t", SveMFloat8, SveMFloat8Ty, 16, 8, 1, false)
+SVE_VECTOR_TYPE_MFLOAT("__SVMfloat8_t", "__SVMfloat8_t", SveMFloat8, SveMFloat8Ty, 16, 8, 1)
//
// x2
@@ -148,7 +157,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x2_t", "svfloat64x2_t", SveFloat64x2, Sv
SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x2_t", "svbfloat16x2_t", SveBFloat16x2, SveBFloat16x2Ty, 8, 16, 2)
-SVE_VECTOR_TYPE_INT("__clang_svmfloat8x2_t", "svmfloat8x2_t", SveMFloat8x2, SveMFloat8x2Ty, 16, 8, 2, false)
+SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x2_t", "svmfloat8x2_t", SveMFloat8x2, SveMFloat8x2Ty, 16, 8, 2)
//
// x3
@@ -170,7 +179,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x3_t", "svfloat64x3_t", SveFloat64x3, Sv
SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x3_t", "svbfloat16x3_t", SveBFloat16x3, SveBFloat16x3Ty, 8, 16, 3)
-SVE_VECTOR_TYPE_INT("__clang_svmfloat8x3_t", "svmfloat8x3_t", SveMFloat8x3, SveMFloat8x3Ty, 16, 8, 3, false)
+SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x3_t", "svmfloat8x3_t", SveMFloat8x3, SveMFloat8x3Ty, 16, 8, 3)
//
// x4
@@ -192,7 +201,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x4_t", "svfloat64x4_t", SveFloat64x4, Sv
SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x4_t", "svbfloat16x4_t", SveBFloat16x4, SveBFloat16x4Ty, 8, 16, 4)
-SVE_VECTOR_TYPE_INT("__clang_svmfloat8x4_t", "svmfloat8x4_t", SveMFloat8x4, SveMFloat8x4Ty, 16, 8, 4, false)
+SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x4_t", "svmfloat8x4_t", SveMFloat8x4, SveMFloat8x4Ty, 16, 8, 4)
SVE_PREDICATE_TYPE_ALL("__SVBool_t", "__SVBool_t", SveBool, SveBoolTy, 16, 1)
SVE_PREDICATE_TYPE_ALL("__clang_svboolx2_t", "svboolx2_t", SveBoolx2, SveBoolx2Ty, 16, 2)
@@ -200,11 +209,13 @@ SVE_PREDICATE_TYPE_ALL("__clang_svboolx4_t", "svboolx4_t", SveBoolx4, SveBoolx4T
SVE_OPAQUE_TYPE("__SVCount_t", "__SVCount_t", SveCount, SveCountTy)
-AARCH64_VECTOR_TYPE_MFLOAT("__mfp8", "__mfp8", MFloat8, MFloat8Ty, 1, 8, 1)
+SVE_SCALAR_TYPE("__mfp8", "__mfp8", MFloat8, MFloat8Ty, 8)
+
AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x8_t", "__MFloat8x8_t", MFloat8x8, MFloat8x8Ty, 8, 8, 1)
AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, MFloat8x16Ty, 16, 8, 1)
#undef SVE_VECTOR_TYPE
+#undef SVE_VECTOR_TYPE_MFLOAT
#undef SVE_VECTOR_TYPE_BFLOAT
#undef SVE_VECTOR_TYPE_FLOAT
#undef SVE_VECTOR_TYPE_INT
@@ -213,4 +224,5 @@ AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, MFloa
#undef SVE_OPAQUE_TYPE
#undef AARCH64_VECTOR_TYPE_MFLOAT
#undef AARCH64_VECTOR_TYPE
+#undef SVE_SCALAR_TYPE
#undef SVE_TYPE
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 0a245e2077f68ff..a7440a6d7826c90 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10545,6 +10545,9 @@ def err_vec_builtin_incompatible_vector : Error<
def err_vsx_builtin_nonconstant_argument : Error<
"argument %0 to %1 must be a 2-bit unsigned literal (i.e. 0, 1, 2 or 3)">;
+def err_shufflevector_incompatible_index_vector : Error<
+ "second argument for __builtin_shufflevector must be integer vector "
+ "with length equal to the length of the first argument">;
def err_shufflevector_nonconstant_argument : Error<
"index for __builtin_shufflevector must be a constant integer">;
def err_shufflevector_argument_too_large : Error<
@@ -10552,6 +10555,8 @@ def err_shufflevector_argument_too_large : Error<
"of vector elements">;
def err_shufflevector_minus_one_is_undefined_behavior_constexpr : Error<
"index for __builtin_shufflevector not within the bounds of the input vectors; index of -1 found at position %0 is not permitted in a constexpr context">;
+def err_shufflevector_unsupported_result_vector_type : Error<
+ "unsupported vector type for the result">;
def err_convertvector_non_vector : Error<
"first argument to __builtin_convertvector must be a vector">;
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index ef89fa4358dfeb0..7c2e68b3f7a195c 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -2125,6 +2125,51 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in {
}
}
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,bf16,neon" in {
+ def VBF1CVT_BF16_MF8 : VInst<"vcvt1_bf16_mf8_fpm", "(QB).V", "m">;
+ def VBF1CVT_LOW_BF16_MF8 : VInst<"vcvt1_low_bf16_mf8_fpm", "B.V", "Qm">;
+ def VBF2CVTL_BF16_MF8 : VInst<"vcvt2_bf16_mf8_fpm", "(QB).V", "m">;
+ def VBF2CVTL_LOW_BF16_MF8 : VInst<"vcvt2_low_bf16_mf8_fpm", "B.V", "Qm">;
+ def VBF1CVTL2_HIGH_BF16_MF8 : VInst<"vcvt1_high_bf16_mf8_fpm", "B.V", "Qm">;
+ def VBF2CVTL2_HIGH_BF16_MF8 : VInst<"vcvt2_high_bf16_mf8_fpm", "B.V", "Qm">;
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in {
+ def VF1CVT_F16_MF8 : VInst<"vcvt1_f16_mf8_fpm", "(>QF).V", "m">;
+ def VF1CVT_LOW_F16_MF8 : VInst<"vcvt1_low_f16_mf8_fpm", "(>F).V", "Qm">;
+ def VF2CVTL_F16_MF8 : VInst<"vcvt2_f16_mf8_fpm", "(>QF).V", "m">;
+ def VF2CVTL_LOW_F16_MF8 : VInst<"vcvt2_low_f16_mf8_fpm", "(>F).V", "Qm">;
+ def VF1CVTL2_HIGH_F16_MF8 : VInst<"vcvt1_high_f16_mf8_fpm", "(>F).V", "Qm">;
+ def VF2CVTL2_HIGH_F16_MF8 : VInst<"vcvt2_high_f16_mf8_fpm", "(>F).V", "Qm">;
+
+ def VCVTN_LOW_F8_F32 : VInst<"vcvt_mf8_f32_fpm", ".(>>QF)(>>QF)V", "m">;
+ def VCVTN_HIGH_F8_F32 : VInst<"vcvt_high_mf8_f32_fpm", ".(q)(>>F)(>>F)V", "Qm">;
+ def VCVTN_F8_F16 : VInst<"vcvt_mf8_f16_fpm", ".(>F)(>F)V", "m">;
+ def VCVTNQ_F8_F16 : VInst<"vcvtq_mf8_f16_fpm", ".(>F)(>F)V", "Qm">;
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot2,neon" in {
+ def VDOT_F16_MF8 : VInst<"vdot_f16_mf8_fpm", "(>F)(>F)..V", "m">;
+ def VDOTQ_F16_MF8 : VInst<"vdotq_f16_mf8_fpm", "(>F)(>F)..V", "Qm">;
+
+ def VDOT_LANE_F16_MF8 : VInst<"vdot_lane_f16_mf8_fpm", "(>F)(>F)..IV", "m", [ImmCheck<3, ImmCheck0_3, 0>]>;
+ def VDOT_LANEQ_F16_MF8 : VInst<"vdot_laneq_f16_mf8_fpm", "(>F)(>F).QIV", "m", [ImmCheck<3, ImmCheck0_7, 0>]>;
+
+ def VDOTQ_LANE_F16_MF8 : VInst<"vdotq_lane_f16_mf8_fpm", "(>F)(>F).qIV", "Qm", [ImmCheck<3, ImmCheck0_3, 0>]>;
+ def VDOTQ_LANEQ_F16_MF8 : VInst<"vdotq_laneq_f16_mf8_fpm", "(>F)(>F)..IV", "Qm", [ImmCheck<3, ImmCheck0_7, 0>]>;
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot4,neon" in {
+ def VDOT_F32_MF8 : VInst<"vdot_f32_mf8_fpm", "(>>F)(>>F)..V", "m">;
+ def VDOTQ_F32_MF8 : VInst<"vdotq_f32_mf8_fpm", "(>>F)(>>F)..V", "Qm">;
+
+ def VDOT_LANE_F32_MF8 : VInst<"vdot_lane_f32_mf8_fpm", "(>>F)(>>F)..IV", "m", [ImmCheck<3, ImmCheck0_1, 0>]>;
+ def VDOT_LANEQ_F32_MF8 : VInst<"vdot_laneq_f32_mf8_fpm", "(>>F)(>>F).QIV", "m", [ImmCheck<3, ImmCheck0_3, 0>]>;
+
+ def VDOTQ_LANE_F32_MF8 : VInst<"vdotq_lane_f32_mf8_fpm", "(>>F)(>>F).qIV", "Qm", [ImmCheck<3, ImmCheck0_1, 0>]>;
+ def VDOTQ_LANEQ_F32_MF8 : VInst<"vdotq_laneq_f32_mf8_fpm", "(>>F)(>>F)..IV", "Qm", [ImmCheck<3, ImmCheck0_3, 0>]>;
+}
+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "neon,faminmax" in {
def FAMIN : WInst<"vamin", "...", "fhQdQfQh">;
def FAMAX : WInst<"vamax", "...", "fhQdQfQh">;
@@ -2134,4 +2179,4 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in {
// fscale
def FSCALE_V128 : WInst<"vscale", "..(.S)", "QdQfQh">;
def FSCALE_V64 : WInst<"vscale", "(.q)(.q)(.qS)", "fh">;
-}
\ No newline at end of file
+}
diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td
index fd800e5a6278e4a..b9b9d509c22512b 100644
--- a/clang/include/clang/Basic/arm_neon_incl.td
+++ b/clang/include/clang/Basic/arm_neon_incl.td
@@ -243,6 +243,7 @@ def OP_UNAVAILABLE : Operation {
// B: change to BFloat16
// P: change to polynomial category.
// p: change polynomial to equivalent integer category. Otherwise nop.
+// V: change to fpm_t
//
// >: double element width (vector size unchanged).
// <: half element width (vector size unchanged).
@@ -301,6 +302,7 @@ class Inst <string n, string p, string t, Operation o, list<ImmCheck> ch = []>{
class SInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
class IInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
class WInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
+class VInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
// The following instruction classes are implemented via operators
// instead of builtins. As such these declarations are only used for
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 6ec927e13a7552e..904df6f6163bc03 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -2275,6 +2275,11 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
Width = NumEls * ElBits * NF; \
Align = NumEls * ElBits; \
break;
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+ case BuiltinType::Id: \
+ Width = Bits; \
+ Align = Bits; \
+ break;
#include "clang/Basic/AArch64SVEACLETypes.def"
#define PPC_VECTOR_TYPE(Name, Id, Size) \
case BuiltinType::Id: \
@@ -4395,15 +4400,18 @@ ASTContext::getBuiltinVectorTypeInfo(const BuiltinType *Ty) const {
ElBits, NF) \
case BuiltinType::Id: \
return {BFloat16Ty, llvm::ElementCount::getScalable(NumEls), NF};
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \
+ ElBits, NF) \
+ case BuiltinType::Id: \
+ return {MFloat8Ty, llvm::ElementCount::getScalable(NumEls), NF};
#define SVE_PREDICATE_TYPE_ALL(Name, MangledName, Id, SingletonId, NumEls, NF) \
case BuiltinType::Id: \
return {BoolTy, llvm::ElementCount::getScalable(NumEls), NF};
#define AARCH64_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \
ElBits, NF) \
case BuiltinType::Id: \
- return {getIntTypeForBitwidth(ElBits, false), \
- llvm::ElementCount::getFixed(NumEls), NF};
-#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId)
+ return {MFloat8Ty, llvm::ElementCount::getFixed(NumEls), NF};
+#define SVE_TYPE(Name, Id, SingletonId)
#include "clang/Basic/AArch64SVEACLETypes.def"
#define RVV_VECTOR_TYPE_INT(Name, Id, SingletonId, NumEls, ElBits, NF, \
@@ -4465,11 +4473,16 @@ QualType ASTContext::getScalableVectorType(QualType EltTy, unsigned NumElts,
EltTySize == ElBits && NumElts == (NumEls * NF) && NumFields == 1) { \
return SingletonId; \
}
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \
+ ElBits, NF) \
+ if (EltTy->isMFloat8Type() && EltTySize == ElBits && \
+ NumElts == (NumEls * NF) && NumFields == 1) { \
+ return SingletonId; \
+ }
#define SVE_PREDICATE_TYPE_ALL(Name, MangledName, Id, SingletonId, NumEls, NF) \
if (EltTy->isBooleanType() && NumElts == (NumEls * NF) && NumFields == 1) \
return SingletonId;
-#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId)
-#define AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId)
+#define SVE_TYPE(Name, Id, SingletonId)
#include "clang/Basic/AArch64SVEACLETypes.def"
} else if (Target->hasRISCVVTypes()) {
uint64_t EltTySize = getTypeSize(EltTy);
@@ -12177,8 +12190,15 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
RequiresICE, false);
assert(!RequiresICE && "Can't require vector ICE");
- // TODO: No way to make AltiVec vectors in builtins yet.
- Type = Context.getVectorType(ElementType, NumElements, VectorKind::Generic);
+ if (ElementType == Context.MFloat8Ty) {
+ assert((NumElements == 8 || NumElements == 16) &&
+ "Invalid number of elements");
+ Type = NumElements == 8 ? Context.MFloat8x8Ty : Context.MFloat8x16Ty;
+ } else {
+ // TODO: No way to make AltiVec vectors in builtins yet.
+ Type =
+ Context.getVectorType(ElementType, NumElements, VectorKind::Generic);
+ }
break;
}
case 'E': {
@@ -12234,6 +12254,9 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
case 'p':
Type = Context.getProcessIDType();
break;
+ case 'm':
+ Type = Context.MFloat8Ty;
+ break;
}
// If there are modifiers and if we're allowed to parse them, go for it.
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 47aa9b40dab845b..9404f9fd9b151d5 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -3438,6 +3438,11 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
type_name = MangledName; \
Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \
break;
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+ case BuiltinType::Id: \
+ type_name = MangledName; \
+ Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \
+ break;
#include "clang/Basic/AArch64SVEACLETypes.def"
#define PPC_VECTOR_TYPE(Name, Id, Size) \
case BuiltinType::Id: \
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 976361d07b68bf5..1c21fad75253d81 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2527,9 +2527,7 @@ bool Type::isSVESizelessBuiltinType() const {
#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId) \
case BuiltinType::Id: \
return true;
-#define AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
- case BuiltinType::Id: \
- return false;
+#define SVE_TYPE(Name, Id, SingletonId)
#include "clang/Basic/AArch64SVEACLETypes.def"
default:
return false;
@@ -2578,6 +2576,19 @@ bool Type::isSveVLSBuiltinType() const {
return false;
}
+bool Type::isNeonVectorBuiltinType() const {
+ if (const BuiltinType *BT = getAs<BuiltinType>()) {
+ switch (BT->getKind()) {
+ case BuiltinType::MFloat8x8:
+ case BuiltinType::MFloat8x16:
+ return true;
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
QualType Type::getSizelessVectorEltType(const ASTContext &Ctx) const {
assert(isSizelessVectorType() && "Must be sizeless vector type");
// Currently supports SVE and RVV
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 49a4c1ecc825e74..1ab04663eaaeca9 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6868,6 +6868,32 @@ Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
return Builder.CreateCall(F, Ops, name);
}
+Value *CodeGenFunction::EmitFP8NeonCall(Function *F,
+ SmallVectorImpl<Value *> &Ops,
+ Value *FPM, const char *name) {
+ Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
+ return EmitNeonCall(F, Ops, name);
+}
+
+llvm::Value *CodeGenFunction::EmitFP8NeonFDOTCall(
+ unsigned IID, bool ExtendLane, llvm::Type *RetTy,
+ SmallVectorImpl<llvm::Value *> &Ops, unsigned ICEArguments,
+ const CallExpr *E, const char *name) {
+
+ const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
+ RetTy->getPrimitiveSizeInBits();
+ llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
+ Ops[1]->getType()};
+ if (ExtendLane) {
+ auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
+ Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
+ Builder.getInt64(0));
+ }
+ llvm::Value *FPM =
+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
+ return EmitFP8NeonCall(CGM.getIntrinsic(IID, Tys), Ops, FPM, name);
+}
+
Value *CodeGenFunction::EmitNeonShiftVector(Val...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff a2995cb4bb21ba2fe6277bbcd24b8ab1b357e12d d7896c34fc681c020b39a5a6d7ddfc403d915d20 --extensions cpp,c,h -- clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c clang/test/CodeGen/AArch64/fp8-cast.c clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c clang/test/Sema/aarch64-fp8-cast.c clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_cvt.c clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fdot.c clang/test/Sema/builtin-shufflevector.c clang/include/clang/AST/Type.h clang/include/clang/Sema/Sema.h clang/lib/AST/ASTContext.cpp clang/lib/AST/ItaniumMangle.cpp clang/lib/AST/Type.cpp clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h clang/lib/CodeGen/CodeGenTypes.cpp clang/lib/CodeGen/Targets/AArch64.cpp clang/lib/Sema/SemaCast.cpp clang/lib/Sema/SemaChecking.cpp clang/lib/Sema/SemaExpr.cpp clang/utils/TableGen/NeonEmitter.cpp clang/utils/TableGen/SveEmitter.cpp View the diff from clang-format here.diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 69ce28ecb6..32033dcc6e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14148,8 +14148,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
Builder.getInt64(0));
- return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2,
- Ty, Ops[1]->getType(), false, Ops, E, "vfcvtn2");
+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
+ Ops[1]->getType(), false, Ops, E, "vfcvtn2");
}
case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
|
3e4db7f
to
cde75ee
Compare
cde75ee
to
54d4664
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
54d4664
to
e1a3e8f
Compare
✅ With the latest revision this PR passed the Python code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The commit messsage is missing the f32_mf8 intrinsics
* The FP8 scalar type (`__mfp8`) was described as a vector type * The FP8 vector types were described/assumed to have integer element type (the element type ought to be `__mfp8`), * Add support for `m` type specifier (denoting `__mfp8`) in `DecodeTypeFromStr` and create SVE builtin prototypes using the specifier, instead of `int8_t`.
…shufflevector The Neon vector types for FP8 (`__MFloat8x8_t` and `__MFloat8x16_t`) are implemented as builtin types and need a special case in `__builtin_shufflevector`.
THis patch adds the following intrinsics: float16x4_t vdot_f16_mf8_fpm(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, fpm_t fpm) float16x8_t vdotq_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpm) float16x4_t vdot_lane_f16_mf8_fpm(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm) float16x4_t vdot_laneq_f16_mf8_fpm(float16x4_t vd, mfloat8x8_t vn, mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm) float16x8_t vdotq_lane_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn, mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm) float16x8_t vdotq_laneq_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)
e1a3e8f
to
d7896c3
Compare
This patch adds the following intrinsics: