Skip to content

Commit

Permalink
[Clang] Add __builtin_vectorelements to get number of elements in vec…
Browse files Browse the repository at this point in the history
…tor (#69010)

Adds a new `__builtin_vectorelements()` function which returns the
number of elements for a given vector either at compile-time for
fixed-sized vectors, e.g., created via `__attribute__((vector_size(N)))`
or at runtime via a call to `@llvm.vscale.i32()` for scalable vectors,
e.g., SVE or RISCV V.

The new builtin follows a similar path as `sizeof()`, as it essentially
does the same thing but for the number of elements in vector instead of
the number of bytes. This allows us to re-use a lot of the existing
logic to handle types etc.

A small side addition is `Type::isSizelessVectorType()`, which we need
to distinguish between sizeless vectors (SVE, RISCV V) and sizeless
types (WASM).

This is the [corresponding
discussion](https://discourse.llvm.org/t/new-builtin-function-to-get-number-of-lanes-in-simd-vectors/73911).
  • Loading branch information
lawben committed Oct 19, 2023
1 parent 9ea2fd2 commit de65b6b
Show file tree
Hide file tree
Showing 18 changed files with 283 additions and 8 deletions.
8 changes: 8 additions & 0 deletions clang/docs/LanguageExtensions.rst
Expand Up @@ -619,6 +619,14 @@ Let ``T`` be one of the following types:

For scalar types, consider the operation applied to a vector with a single element.

*Vector Size*
To determine the number of elements in a vector, use ``__builtin_vectorelements()``.
For fixed-sized vectors, e.g., defined via ``__attribute__((vector_size(N)))`` or ARM
NEON's vector types (e.g., ``uint16x8_t``), this returns the constant number of
elements at compile-time. For scalable vectors, e.g., SVE or RISC-V V, the number of
elements is not known at compile-time and is determined at runtime. This builtin can
be used, e.g., to increment the loop-counter in vector-type agnostic loops.

*Elementwise Builtins*

Each builtin returns a vector equivalent to applying the specified operation
Expand Down
6 changes: 6 additions & 0 deletions clang/docs/ReleaseNotes.rst
Expand Up @@ -182,6 +182,12 @@ C23 Feature Support
Non-comprehensive list of changes in this release
-------------------------------------------------

* Clang now has a ``__builtin_vectorelements()`` function that determines the number of elements in a vector.
For fixed-sized vectors, e.g., defined via ``__attribute__((vector_size(N)))`` or ARM NEON's vector types
(e.g., ``uint16x8_t``), this returns the constant number of elements at compile-time.
For scalable vectors, e.g., SVE or RISC-V V, the number of elements is not known at compile-time and is
determined at runtime.

New Compiler Flags
------------------

Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/AST/Type.h
Expand Up @@ -2060,6 +2060,9 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
bool isSizelessType() const;
bool isSizelessBuiltinType() const;

/// Returns true for all scalable vector types.
bool isSizelessVectorType() const;

/// Returns true for SVE scalable vector types.
bool isSVESizelessBuiltinType() const;

Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Basic/Builtins.def
Expand Up @@ -674,6 +674,7 @@ BUILTIN(__builtin_debugtrap, "v", "n")
BUILTIN(__builtin_unreachable, "v", "nr")
BUILTIN(__builtin_shufflevector, "v." , "nct")
BUILTIN(__builtin_convertvector, "v." , "nct")
BUILTIN(__builtin_vectorelements, "v." , "nct")
BUILTIN(__builtin_alloca, "v*z" , "Fn")
BUILTIN(__builtin_alloca_uninitialized, "v*z", "Fn")
BUILTIN(__builtin_alloca_with_align, "v*zIz", "Fn")
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/DiagnosticASTKinds.td
Expand Up @@ -394,6 +394,8 @@ def note_constexpr_unsupported_layout : Note<
"type %0 has unexpected layout">;
def note_constexpr_unsupported_flexible_array : Note<
"flexible array initialization is not yet supported">;
def note_constexpr_non_const_vectorelements : Note<
"cannot determine number of elements for sizeless vectors in a constant expression">;
def err_experimental_clang_interp_failed : Error<
"the experimental clang interpreter failed to evaluate an expression">;

Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Expand Up @@ -10169,8 +10169,8 @@ def err_shufflevector_argument_too_large : Error<

def err_convertvector_non_vector : Error<
"first argument to __builtin_convertvector must be a vector">;
def err_convertvector_non_vector_type : Error<
"second argument to __builtin_convertvector must be a vector type">;
def err_builtin_non_vector_type : Error<
"%0 argument to %1 must be of vector type">;
def err_convertvector_incompatible_vector : Error<
"first two arguments to __builtin_convertvector must have the same number of elements">;

Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Basic/TokenKinds.def
Expand Up @@ -746,6 +746,7 @@ ALIAS("_pascal" , __pascal , KEYBORLAND)

// Clang Extensions.
KEYWORD(__builtin_convertvector , KEYALL)
UNARY_EXPR_OR_TYPE_TRAIT(__builtin_vectorelements, VectorElements, KEYALL)
ALIAS("__char16_t" , char16_t , KEYCXX)
ALIAS("__char32_t" , char32_t , KEYCXX)
KEYWORD(__builtin_bit_cast , KEYALL)
Expand Down
14 changes: 14 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Expand Up @@ -13595,6 +13595,20 @@ bool IntExprEvaluator::VisitUnaryExprOrTypeTraitExpr(
Info.Ctx.getOpenMPDefaultSimdAlign(E->getArgumentType()))
.getQuantity(),
E);
case UETT_VectorElements: {
QualType Ty = E->getTypeOfArgument();
// If the vector has a fixed size, we can determine the number of elements
// at compile time.
if (Ty->isVectorType())
return Success(Ty->castAs<VectorType>()->getNumElements(), E);

assert(Ty->isSizelessVectorType());
if (Info.InConstantContext)
Info.CCEDiag(E, diag::note_constexpr_non_const_vectorelements)
<< E->getSourceRange();

return false;
}
}

llvm_unreachable("unknown expr/type trait");
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/AST/ItaniumMangle.cpp
Expand Up @@ -5127,6 +5127,14 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity,
Diags.Report(DiagID);
return;
}
case UETT_VectorElements: {
DiagnosticsEngine &Diags = Context.getDiags();
unsigned DiagID = Diags.getCustomDiagID(
DiagnosticsEngine::Error,
"cannot yet mangle __builtin_vectorelements expression");
Diags.Report(DiagID);
return;
}
}
break;
}
Expand Down
6 changes: 5 additions & 1 deletion clang/lib/AST/Type.cpp
Expand Up @@ -2369,7 +2369,7 @@ bool Type::isIncompleteType(NamedDecl **Def) const {
}

bool Type::isSizelessBuiltinType() const {
if (isSVESizelessBuiltinType() || isRVVSizelessBuiltinType())
if (isSizelessVectorType())
return true;

if (const BuiltinType *BT = getAs<BuiltinType>()) {
Expand Down Expand Up @@ -2403,6 +2403,10 @@ bool Type::isWebAssemblyTableType() const {

bool Type::isSizelessType() const { return isSizelessBuiltinType(); }

bool Type::isSizelessVectorType() const {
return isSVESizelessBuiltinType() || isRVVSizelessBuiltinType();
}

bool Type::isSVESizelessBuiltinType() const {
if (const BuiltinType *BT = getAs<BuiltinType>()) {
switch (BT->getKind()) {
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CGExprScalar.cpp
Expand Up @@ -3083,6 +3083,9 @@ ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(
E->getTypeOfArgument()->getPointeeType()))
.getQuantity();
return llvm::ConstantInt::get(CGF.SizeTy, Alignment);
} else if (E->getKind() == UETT_VectorElements) {
auto *VecTy = cast<llvm::VectorType>(ConvertType(E->getTypeOfArgument()));
return Builder.CreateElementCount(CGF.SizeTy, VecTy->getElementCount());
}

// If this isn't sizeof(vla), the result must be constant; use the constant
Expand Down
9 changes: 7 additions & 2 deletions clang/lib/Parse/ParseExpr.cpp
Expand Up @@ -1463,6 +1463,7 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
case tok::kw_vec_step: // unary-expression: OpenCL 'vec_step' expression
// unary-expression: '__builtin_omp_required_simd_align' '(' type-name ')'
case tok::kw___builtin_omp_required_simd_align:
case tok::kw___builtin_vectorelements:
if (NotPrimaryExpression)
*NotPrimaryExpression = true;
AllowSuffix = false;
Expand Down Expand Up @@ -2339,7 +2340,8 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok,
assert(OpTok.isOneOf(tok::kw_typeof, tok::kw_typeof_unqual, tok::kw_sizeof,
tok::kw___alignof, tok::kw_alignof, tok::kw__Alignof,
tok::kw_vec_step,
tok::kw___builtin_omp_required_simd_align) &&
tok::kw___builtin_omp_required_simd_align,
tok::kw___builtin_vectorelements) &&
"Not a typeof/sizeof/alignof/vec_step expression!");

ExprResult Operand;
Expand Down Expand Up @@ -2460,7 +2462,8 @@ ExprResult Parser::ParseSYCLUniqueStableNameExpression() {
ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
assert(Tok.isOneOf(tok::kw_sizeof, tok::kw___alignof, tok::kw_alignof,
tok::kw__Alignof, tok::kw_vec_step,
tok::kw___builtin_omp_required_simd_align) &&
tok::kw___builtin_omp_required_simd_align,
tok::kw___builtin_vectorelements) &&
"Not a sizeof/alignof/vec_step expression!");
Token OpTok = Tok;
ConsumeToken();
Expand Down Expand Up @@ -2539,6 +2542,8 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
ExprKind = UETT_VecStep;
else if (OpTok.is(tok::kw___builtin_omp_required_simd_align))
ExprKind = UETT_OpenMPRequiredSimdAlign;
else if (OpTok.is(tok::kw___builtin_vectorelements))
ExprKind = UETT_VectorElements;

if (isCastExpr)
return Actions.ActOnUnaryExprOrTypeTraitExpr(OpTok.getLocation(),
Expand Down
5 changes: 3 additions & 2 deletions clang/lib/Sema/SemaChecking.cpp
Expand Up @@ -8752,8 +8752,9 @@ ExprResult Sema::SemaConvertVectorExpr(Expr *E, TypeSourceInfo *TInfo,
diag::err_convertvector_non_vector)
<< E->getSourceRange());
if (!DstTy->isVectorType() && !DstTy->isDependentType())
return ExprError(Diag(BuiltinLoc,
diag::err_convertvector_non_vector_type));
return ExprError(Diag(BuiltinLoc, diag::err_builtin_non_vector_type)
<< "second"
<< "__builtin_convertvector");

if (!SrcTy->isDependentType() && !DstTy->isDependentType()) {
unsigned SrcElts = SrcTy->castAs<VectorType>()->getNumElements();
Expand Down
23 changes: 23 additions & 0 deletions clang/lib/Sema/SemaExpr.cpp
Expand Up @@ -35,6 +35,7 @@
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Specifiers.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TypeTraits.h"
#include "clang/Lex/LiteralSupport.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/AnalysisBasedWarnings.h"
Expand Down Expand Up @@ -4353,6 +4354,18 @@ static bool CheckVecStepTraitOperandType(Sema &S, QualType T,
return false;
}

static bool CheckVectorElementsTraitOperandType(Sema &S, QualType T,
SourceLocation Loc,
SourceRange ArgRange) {
// builtin_vectorelements supports both fixed-sized and scalable vectors.
if (!T->isVectorType() && !T->isSizelessVectorType())
return S.Diag(Loc, diag::err_builtin_non_vector_type)
<< ""
<< "__builtin_vectorelements" << T << ArgRange;

return false;
}

static bool CheckExtensionTraitOperandType(Sema &S, QualType T,
SourceLocation Loc,
SourceRange ArgRange,
Expand Down Expand Up @@ -4454,6 +4467,10 @@ bool Sema::CheckUnaryExprOrTypeTraitOperand(Expr *E,
return CheckVecStepTraitOperandType(*this, ExprTy, E->getExprLoc(),
E->getSourceRange());

if (ExprKind == UETT_VectorElements)
return CheckVectorElementsTraitOperandType(*this, ExprTy, E->getExprLoc(),
E->getSourceRange());

// Explicitly list some types as extensions.
if (!CheckExtensionTraitOperandType(*this, ExprTy, E->getExprLoc(),
E->getSourceRange(), ExprKind))
Expand Down Expand Up @@ -4745,6 +4762,10 @@ bool Sema::CheckUnaryExprOrTypeTraitOperand(QualType ExprType,
if (ExprKind == UETT_VecStep)
return CheckVecStepTraitOperandType(*this, ExprType, OpLoc, ExprRange);

if (ExprKind == UETT_VectorElements)
return CheckVectorElementsTraitOperandType(*this, ExprType, OpLoc,
ExprRange);

// Explicitly list some types as extensions.
if (!CheckExtensionTraitOperandType(*this, ExprType, OpLoc, ExprRange,
ExprKind))
Expand Down Expand Up @@ -4851,6 +4872,8 @@ Sema::CreateUnaryExprOrTypeTraitExpr(Expr *E, SourceLocation OpLoc,
} else if (E->refersToBitField()) { // C99 6.5.3.4p1.
Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 0;
isInvalid = true;
} else if (ExprKind == UETT_VectorElements) {
isInvalid = CheckUnaryExprOrTypeTraitOperand(E, UETT_VectorElements);
} else {
isInvalid = CheckUnaryExprOrTypeTraitOperand(E, UETT_SizeOf);
}
Expand Down
121 changes: 121 additions & 0 deletions clang/test/CodeGen/builtin_vectorelements.c
@@ -0,0 +1,121 @@
// RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +neon %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,NEON %s
// RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +sve %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,SVE %s
// RUN: %clang_cc1 -O1 -triple riscv64 -target-feature +v %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,RISCV %s

// Note that this does not make sense to check for x86 SIMD types, because
// __m128i, __m256i, and __m512i do not specify the element type. There are no
// "logical" number of elements in them.

typedef int int1 __attribute__((vector_size(4)));
typedef int int4 __attribute__((vector_size(16)));
typedef int int8 __attribute__((vector_size(32)));
typedef int int16 __attribute__((vector_size(64)));
typedef float float2 __attribute__((vector_size(8)));
typedef long extLong4 __attribute__((ext_vector_type(4)));


int test_builtin_vectorelements_int1() {
// CHECK-LABEL: i32 @test_builtin_vectorelements_int1(
// CHECK: ret i32 1
return __builtin_vectorelements(int1);
}

int test_builtin_vectorelements_int4() {
// CHECK-LABEL: i32 @test_builtin_vectorelements_int4(
// CHECK: ret i32 4
return __builtin_vectorelements(int4);
}

int test_builtin_vectorelements_int8() {
// CHECK-LABEL: i32 @test_builtin_vectorelements_int8(
// CHECK: ret i32 8
return __builtin_vectorelements(int8);
}

int test_builtin_vectorelements_int16() {
// CHECK-LABEL: i32 @test_builtin_vectorelements_int16(
// CHECK: ret i32 16
return __builtin_vectorelements(int16);
}

int test_builtin_vectorelements_float2() {
// CHECK-LABEL: i32 @test_builtin_vectorelements_float2(
// CHECK: ret i32 2
return __builtin_vectorelements(float2);
}

int test_builtin_vectorelements_extLong4() {
// CHECK-LABEL: i32 @test_builtin_vectorelements_extLong4(
// CHECK: ret i32 4
return __builtin_vectorelements(extLong4);
}

int test_builtin_vectorelements_multiply_constant() {
// CHECK-LABEL: i32 @test_builtin_vectorelements_multiply_constant(
// CHECK: ret i32 32
return __builtin_vectorelements(int16) * 2;
}


#if defined(__ARM_NEON)
#include <arm_neon.h>

int test_builtin_vectorelements_neon32x4() {
// NEON: i32 @test_builtin_vectorelements_neon32x4(
// NEON: ret i32 4
return __builtin_vectorelements(uint32x4_t);
}

int test_builtin_vectorelements_neon64x1() {
// NEON: i32 @test_builtin_vectorelements_neon64x1(
// NEON: ret i32 1
return __builtin_vectorelements(uint64x1_t);
}
#endif

#if defined(__ARM_FEATURE_SVE)
#include <arm_sve.h>

long test_builtin_vectorelements_sve32() {
// SVE: i64 @test_builtin_vectorelements_sve32(
// SVE: [[VSCALE:%.+]] = call i64 @llvm.vscale.i64()
// SVE: [[RES:%.+]] = mul i64 [[VSCALE]], 4
// SVE: ret i64 [[RES]]
return __builtin_vectorelements(svuint32_t);
}

long test_builtin_vectorelements_sve8() {
// SVE: i64 @test_builtin_vectorelements_sve8(
// SVE: [[VSCALE:%.+]] = call i64 @llvm.vscale.i64()
// SVE: [[RES:%.+]] = mul i64 [[VSCALE]], 16
// SVE: ret i64 [[RES]]
return __builtin_vectorelements(svuint8_t);
}
#endif

#if defined(__riscv)
#include <riscv_vector.h>

long test_builtin_vectorelements_riscv8() {
// RISCV: i64 @test_builtin_vectorelements_riscv8(
// RISCV: [[VSCALE:%.+]] = call i64 @llvm.vscale.i64()
// RISCV: [[RES:%.+]] = mul i64 [[VSCALE]], 8
// RISCV: ret i64 [[RES]]
return __builtin_vectorelements(vuint8m1_t);
}

long test_builtin_vectorelements_riscv64() {
// RISCV: i64 @test_builtin_vectorelements_riscv64(
// RISCV: [[VSCALE:%.+]] = call i64 @llvm.vscale.i64()
// RISCV: ret i64 [[VSCALE]]
return __builtin_vectorelements(vuint64m1_t);
}

long test_builtin_vectorelements_riscv32m2() {
// RISCV: i64 @test_builtin_vectorelements_riscv32m2(
// RISCV: [[VSCALE:%.+]] = call i64 @llvm.vscale.i64()
// RISCV: [[RES:%.+]] = mul i64 [[VSCALE]], 4
// RISCV: ret i64 [[RES]]
return __builtin_vectorelements(vuint32m2_t);
}
#endif
23 changes: 23 additions & 0 deletions clang/test/Sema/builtin_vectorelements.c
@@ -0,0 +1,23 @@
// RUN: %clang_cc1 -triple aarch64 -fsyntax-only -verify -disable-llvm-passes %s

void test_builtin_vectorelements() {
__builtin_vectorelements(int); // expected-error {{argument to __builtin_vectorelements must be of vector type}}
__builtin_vectorelements(float); // expected-error {{argument to __builtin_vectorelements must be of vector type}}
__builtin_vectorelements(long*); // expected-error {{argument to __builtin_vectorelements must be of vector type}}

int a;
__builtin_vectorelements(a); // expected-error {{argument to __builtin_vectorelements must be of vector type}}

typedef int veci4 __attribute__((vector_size(16)));
(void) __builtin_vectorelements(veci4);

veci4 vec;
(void) __builtin_vectorelements(vec);

typedef veci4 some_other_vec;
(void) __builtin_vectorelements(some_other_vec);

struct Foo { int a; };
__builtin_vectorelements(struct Foo); // expected-error {{argument to __builtin_vectorelements must be of vector type}}
}

0 comments on commit de65b6b

Please sign in to comment.