-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Clang] Add support for scalable vectors in __builtin_reduce_* functions #87750
Conversation
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: Lawrence Benson (lawben) ChangesCurrently, a lot of This PR adds scalable vector support for:
Note: For all except Full diff: https://github.com/llvm/llvm-project/pull/87750.diff 5 Files Affected:
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 99f45d518c7960..a9f888a037109b 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2172,6 +2172,10 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
/// 'riscv_rvv_vector_bits' type attribute as VectorType.
QualType getRVVEltType(const ASTContext &Ctx) const;
+ /// Returns the representative type for the element of a sizeless vector
+ /// builtin type.
+ QualType getSizelessVectorEltType(const ASTContext &Ctx) const;
+
/// Types are partitioned into 3 broad categories (C99 6.2.5p1):
/// object types, function types, and incomplete types.
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index cb22c91a12aa89..dcba47de0cc7ae 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2510,6 +2510,18 @@ bool Type::isSveVLSBuiltinType() const {
return false;
}
+QualType Type::getSizelessVectorEltType(const ASTContext &Ctx) const {
+ assert(isSizelessVectorType() && "Must be sizeless vector type");
+ // Currently supports SVE and RVV
+ if (isSVESizelessBuiltinType())
+ return getSveEltType(Ctx);
+
+ if (isRVVSizelessBuiltinType())
+ return getRVVEltType(Ctx);
+
+ llvm_unreachable("Unhandled type");
+}
+
QualType Type::getSveEltType(const ASTContext &Ctx) const {
assert(isSveVLSBuiltinType() && "unsupported type!");
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2537e715b63ee4..e76a211242fdd7 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3868,9 +3868,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
}
case Builtin::BI__builtin_reduce_max: {
- auto GetIntrinsicID = [](QualType QT) {
+ auto GetIntrinsicID = [this](QualType QT) {
if (auto *VecTy = QT->getAs<VectorType>())
QT = VecTy->getElementType();
+ else if (QT->isSizelessVectorType())
+ QT = QT->getSizelessVectorEltType(CGM.getContext());
+
if (QT->isSignedIntegerType())
return llvm::Intrinsic::vector_reduce_smax;
if (QT->isUnsignedIntegerType())
@@ -3883,9 +3886,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
}
case Builtin::BI__builtin_reduce_min: {
- auto GetIntrinsicID = [](QualType QT) {
+ auto GetIntrinsicID = [this](QualType QT) {
if (auto *VecTy = QT->getAs<VectorType>())
QT = VecTy->getElementType();
+ else if (QT->isSizelessVectorType())
+ QT = QT->getSizelessVectorEltType(CGM.getContext());
+
if (QT->isSignedIntegerType())
return llvm::Intrinsic::vector_reduce_smin;
if (QT->isUnsignedIntegerType())
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 3dcd18b3afc8b4..6d45dd8bb7ed97 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3166,13 +3166,20 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
const Expr *Arg = TheCall->getArg(0);
const auto *TyA = Arg->getType()->getAs<VectorType>();
- if (!TyA) {
+
+ QualType ElTy;
+ if (TyA)
+ ElTy = TyA->getElementType();
+ else if (Arg->getType()->isSizelessVectorType())
+ ElTy = Arg->getType()->getSizelessVectorEltType(Context);
+
+ if (ElTy.isNull()) {
Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
<< 1 << /* vector ty*/ 4 << Arg->getType();
return ExprError();
}
- TheCall->setType(TyA->getElementType());
+ TheCall->setType(ElTy);
break;
}
@@ -3188,12 +3195,20 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
const Expr *Arg = TheCall->getArg(0);
const auto *TyA = Arg->getType()->getAs<VectorType>();
- if (!TyA || !TyA->getElementType()->isIntegerType()) {
+
+ QualType ElTy;
+ if (TyA)
+ ElTy = TyA->getElementType();
+ else if (Arg->getType()->isSizelessVectorType())
+ ElTy = Arg->getType()->getSizelessVectorEltType(Context);
+
+ if (ElTy.isNull() || !ElTy->isIntegerType()) {
Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
<< 1 << /* vector of integers */ 6 << Arg->getType();
return ExprError();
}
- TheCall->setType(TyA->getElementType());
+
+ TheCall->setType(ElTy);
break;
}
diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c
index 34f39cea5265ea..acafe9222d59fd 100644
--- a/clang/test/CodeGen/builtins-reduction-math.c
+++ b/clang/test/CodeGen/builtins-reduction-math.c
@@ -1,5 +1,8 @@
// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +sve %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=SVE %s
+
typedef float float4 __attribute__((ext_vector_type(4)));
typedef short int si8 __attribute__((ext_vector_type(8)));
typedef unsigned int u4 __attribute__((ext_vector_type(4)));
@@ -134,3 +137,53 @@ void test_builtin_reduce_and(si8 vi1, u4 vu1) {
// CHECK-NEXT: call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[VU1]])
unsigned r3 = __builtin_reduce_and(vu1);
}
+
+#if defined(__ARM_FEATURE_SVE)
+#include <arm_sve.h>
+
+void test_builtin_reduce_SVE(int a, unsigned long long b, short c, float d) {
+ // SVE-LABEL: void @test_builtin_reduce_SVE(
+
+ svint32_t vec_a = svdup_s32(a);
+ svuint64_t vec_b = svdup_u64(b);
+ svint16_t vec_c1 = svdup_s16(c);
+ svuint16_t vec_c2 = svdup_u16(c);
+ svfloat32_t vec_d = svdup_f32(d);
+
+ // SVE: [[VF1:%.+]] = load <vscale x 4 x i32>, ptr %vec_a
+ // SVE-NEXT: call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[VF1]])
+ int r1 = __builtin_reduce_add(vec_a);
+
+ // SVE: [[VF2:%.+]] = load <vscale x 4 x i32>, ptr %vec_a
+ // SVE-NEXT: call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> [[VF2]])
+ int r2 = __builtin_reduce_mul(vec_a);
+
+ // SVE: [[VF3:%.+]] = load <vscale x 2 x i64>, ptr %vec_b
+ // SVE-NEXT: call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> [[VF3]])
+ long long r3 = __builtin_reduce_xor(vec_b);
+
+ // SVE: [[VF4:%.+]] = load <vscale x 2 x i64>, ptr %vec_b
+ // SVE-NEXT: call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> [[VF4]])
+ long long r4 = __builtin_reduce_or(vec_b);
+
+ // SVE: [[VF5:%.+]] = load <vscale x 2 x i64>, ptr %vec_b
+ // SVE-NEXT: call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> [[VF5]])
+ long long r5 = __builtin_reduce_and(vec_b);
+
+ // SVE: [[VF6:%.+]] = load <vscale x 8 x i16>, ptr %vec_c1
+ // SVE-NEXT: call i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16> [[VF6]])
+ short r6 = __builtin_reduce_max(vec_c1);
+
+ // SVE: [[VF7:%.+]] = load <vscale x 8 x i16>, ptr %vec_c2
+ // SVE-NEXT: call i16 @llvm.vector.reduce.umin.nxv8i16(<vscale x 8 x i16> [[VF7]])
+ unsigned short r7 = __builtin_reduce_min(vec_c2);
+
+ // SVE: [[VF8:%.+]] = load <vscale x 4 x float>, ptr %vec_d
+ // SVE-NEXT: call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> [[VF8]])
+ float r8 = __builtin_reduce_max(vec_d);
+
+ // SVE: [[VF9:%.+]] = load <vscale x 4 x float>, ptr %vec_d
+ // SVE-NEXT: call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[VF9]])
+ float r9 = __builtin_reduce_min(vec_d);
+}
+#endif
|
Given there isn't any target-independent way to construct such a type, it feels sort of redundant. (A user could easily implement this themselves.) But I can't think of a reason to avoid adding this. Please update documentation and add a release note. |
@efriedma-quic While there is no target independent way to create a type, it is possible to pass this type around. This is the use case I'm thinking of. So if a user has one I'll add the changes to the docs and release note. |
@efriedma-quic @fhahn just a quick ping for this PR. is something still missing? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Currently, a lot of
__builtin_reduce_*
function do not support scalable vectors, i.e., ARM SVE and RISCV V. This PR adds support for them. The main code change is to use a different path to extract the type from the vectors, the rest is the same and LLVM supports the reduce functions forvscale
vectors.This PR adds scalable vector support for:
__builtin_reduce_add
__builtin_reduce_mul
__builtin_reduce_xor
__builtin_reduce_or
__builtin_reduce_and
__builtin_reduce_min
__builtin_reduce_max
Note: For all except
min/max
, the element type must still be an integer value. Adding floating point support foradd
andmul
is still an open TODO.