-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[Clang] Support generic bit counting builtins on fixed boolean vectors #154203
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: Joseph Huber (jhuber6) ChangesSummary: Patch is 31.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154203.diff 4 Files Affected:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 40c56501b0c14..8eaa0c617ba4c 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13385,6 +13385,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
case Builtin::BI__lzcnt16: // Microsoft variants of count leading-zeroes
case Builtin::BI__lzcnt:
case Builtin::BI__lzcnt64: {
+ // TODO: Handle boolean vectors in constexpr contexts.
+ if (E->getArg(0)->getType()->isExtVectorBoolType())
+ return false;
+
APSInt Val;
if (!EvaluateInteger(E->getArg(0), Val, Info))
return false;
@@ -13463,6 +13467,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
case Builtin::BI__builtin_ctzll:
case Builtin::BI__builtin_ctzs:
case Builtin::BI__builtin_ctzg: {
+ // TODO: Handle boolean vectors in constexpr contexts.
+ if (E->getArg(0)->getType()->isExtVectorBoolType())
+ return false;
+
APSInt Val;
if (!EvaluateInteger(E->getArg(0), Val, Info))
return false;
@@ -13673,6 +13681,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
case Builtin::BI__popcnt16: // Microsoft variants of popcount
case Builtin::BI__popcnt:
case Builtin::BI__popcnt64: {
+ // TODO: Handle boolean vectors in constexpr contexts.
+ if (E->getArg(0)->getType()->isExtVectorBoolType())
+ return false;
+
APSInt Val;
if (!EvaluateInteger(E->getArg(0), Val, Info))
return false;
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a1f2a874f010d..f0b9c34420ea4 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -1693,6 +1693,22 @@ getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
llvm_unreachable("invalid interlocking");
}
+static llvm::Value *EmitIntegerExpr(CodeGenFunction &CGF, const Expr *E) {
+ llvm::Value *ArgValue = CGF.EmitScalarExpr(E);
+ llvm::Type *ArgType = ArgValue->getType();
+
+ if (auto *VT = dyn_cast<llvm::FixedVectorType>(ArgType);
+ VT && VT->getElementType()->isIntegerTy(1)) {
+ llvm::Type *StorageType = CGF.ConvertTypeForMem(E->getType());
+ ArgValue = CGF.emitBoolVecConversion(
+ ArgValue, StorageType->getPrimitiveSizeInBits(), "insertvec");
+ ArgValue = CGF.Builder.CreateBitCast(ArgValue, StorageType);
+ ArgType = ArgValue->getType();
+ }
+
+ return ArgValue;
+}
+
/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
/// bits and a bit position and read and optionally modify the bit at that
/// position. The position index can be arbitrarily large, i.e. it can be larger
@@ -2020,7 +2036,7 @@ Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) &&
"Unsupported builtin check kind");
- Value *ArgValue = EmitScalarExpr(E);
+ Value *ArgValue = EmitIntegerExpr(*this, E);
if (!SanOpts.has(SanitizerKind::Builtin))
return ArgValue;
@@ -3331,7 +3347,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
E->getNumArgs() > 1;
Value *ArgValue =
- HasFallback ? EmitScalarExpr(E->getArg(0))
+ HasFallback ? EmitIntegerExpr(*this, E->getArg(0))
: EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
llvm::Type *ArgType = ArgValue->getType();
@@ -3363,7 +3379,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
E->getNumArgs() > 1;
Value *ArgValue =
- HasFallback ? EmitScalarExpr(E->getArg(0))
+ HasFallback ? EmitIntegerExpr(*this, E->getArg(0))
: EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
llvm::Type *ArgType = ArgValue->getType();
@@ -3446,7 +3462,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_popcountl:
case Builtin::BI__builtin_popcountll:
case Builtin::BI__builtin_popcountg: {
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
+ Value *ArgValue = EmitIntegerExpr(*this, E->getArg(0));
llvm::Type *ArgType = ArgValue->getType();
Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 2dc4ee74dc9df..b79c0c0345171 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2214,7 +2214,7 @@ static bool BuiltinPopcountg(Sema &S, CallExpr *TheCall) {
QualType ArgTy = Arg->getType();
- if (!ArgTy->isUnsignedIntegerType()) {
+ if (!ArgTy->isUnsignedIntegerType() && !ArgTy->isExtVectorBoolType()) {
S.Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
<< 1 << /* scalar */ 1 << /* unsigned integer ty */ 3 << /* no fp */ 0
<< ArgTy;
@@ -2239,7 +2239,7 @@ static bool BuiltinCountZeroBitsGeneric(Sema &S, CallExpr *TheCall) {
QualType Arg0Ty = Arg0->getType();
- if (!Arg0Ty->isUnsignedIntegerType()) {
+ if (!Arg0Ty->isUnsignedIntegerType() && !Arg0Ty->isExtVectorBoolType()) {
S.Diag(Arg0->getBeginLoc(), diag::err_builtin_invalid_arg_type)
<< 1 << /* scalar */ 1 << /* unsigned integer ty */ 3 << /* no fp */ 0
<< Arg0Ty;
diff --git a/clang/test/CodeGen/builtins.c b/clang/test/CodeGen/builtins.c
index aa9965b815983..7ad143ed165c8 100644
--- a/clang/test/CodeGen/builtins.c
+++ b/clang/test/CodeGen/builtins.c
@@ -991,247 +991,288 @@ void test_builtin_os_log_long_double(void *buf, long double ld) {
void test_builtin_popcountg(unsigned char uc, unsigned short us,
unsigned int ui, unsigned long ul,
unsigned long long ull, unsigned __int128 ui128,
- unsigned _BitInt(128) ubi128) {
+ unsigned _BitInt(128) ubi128,
+ _Bool __attribute__((ext_vector_type(8))) vb8) {
volatile int pop;
- pop = __builtin_popcountg(uc);
- // CHECK: %1 = load i8, ptr %uc.addr, align 1
- // CHECK-NEXT: %2 = call i8 @llvm.ctpop.i8(i8 %1)
- // CHECK-NEXT: %cast = zext i8 %2 to i32
+ // CHECK: %2 = load i8, ptr %uc.addr, align 1
+ // CHECK-NEXT: %3 = call i8 @llvm.ctpop.i8(i8 %2)
+ // CHECK-NEXT: %cast = zext i8 %3 to i32
// CHECK-NEXT: store volatile i32 %cast, ptr %pop, align 4
+ pop = __builtin_popcountg(uc);
+ // CHECK: %4 = load i16, ptr %us.addr, align 2
+ // CHECK-NEXT: %5 = call i16 @llvm.ctpop.i16(i16 %4)
+ // CHECK-NEXT: %cast2 = zext i16 %5 to i32
+ // CHECK-NEXT: store volatile i32 %cast2, ptr %pop, align 4
pop = __builtin_popcountg(us);
- // CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2
- // CHECK-NEXT: %4 = call i16 @llvm.ctpop.i16(i16 %3)
- // CHECK-NEXT: %cast1 = zext i16 %4 to i32
- // CHECK-NEXT: store volatile i32 %cast1, ptr %pop, align 4
+ // CHECK: %6 = load i32, ptr %ui.addr, align 4
+ // CHECK-NEXT: %7 = call i32 @llvm.ctpop.i32(i32 %6)
+ // CHECK-NEXT: store volatile i32 %7, ptr %pop, align 4
pop = __builtin_popcountg(ui);
- // CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4
- // CHECK-NEXT: %6 = call i32 @llvm.ctpop.i32(i32 %5)
- // CHECK-NEXT: store volatile i32 %6, ptr %pop, align 4
+ // CHECK: %8 = load i64, ptr %ul.addr, align 8
+ // CHECK-NEXT: %9 = call i64 @llvm.ctpop.i64(i64 %8)
+ // CHECK-NEXT: %cast3 = trunc i64 %9 to i32
+ // CHECK-NEXT: store volatile i32 %cast3, ptr %pop, align 4
pop = __builtin_popcountg(ul);
- // CHECK-NEXT: %7 = load i64, ptr %ul.addr, align 8
- // CHECK-NEXT: %8 = call i64 @llvm.ctpop.i64(i64 %7)
- // CHECK-NEXT: %cast2 = trunc i64 %8 to i32
- // CHECK-NEXT: store volatile i32 %cast2, ptr %pop, align 4
+ // CHECK: %10 = load i64, ptr %ull.addr, align 8
+ // CHECK-NEXT: %11 = call i64 @llvm.ctpop.i64(i64 %10)
+ // CHECK-NEXT: %cast4 = trunc i64 %11 to i32
+ // CHECK-NEXT: store volatile i32 %cast4, ptr %pop, align 4
pop = __builtin_popcountg(ull);
- // CHECK-NEXT: %9 = load i64, ptr %ull.addr, align 8
- // CHECK-NEXT: %10 = call i64 @llvm.ctpop.i64(i64 %9)
- // CHECK-NEXT: %cast3 = trunc i64 %10 to i32
- // CHECK-NEXT: store volatile i32 %cast3, ptr %pop, align 4
+ // CHECK: %12 = load i128, ptr %ui128.addr, align 16
+ // CHECK-NEXT: %13 = call i128 @llvm.ctpop.i128(i128 %12)
+ // CHECK-NEXT: %cast5 = trunc i128 %13 to i32
+ // CHECK-NEXT: store volatile i32 %cast5, ptr %pop, align 4
pop = __builtin_popcountg(ui128);
- // CHECK-NEXT: %11 = load i128, ptr %ui128.addr, align 16
- // CHECK-NEXT: %12 = call i128 @llvm.ctpop.i128(i128 %11)
- // CHECK-NEXT: %cast4 = trunc i128 %12 to i32
- // CHECK-NEXT: store volatile i32 %cast4, ptr %pop, align 4
+ // CHECK: %14 = load i128, ptr %ubi128.addr, align 8
+ // CHECK-NEXT: %15 = call i128 @llvm.ctpop.i128(i128 %14)
+ // CHECK-NEXT: %cast6 = trunc i128 %15 to i32
+ // CHECK-NEXT: store volatile i32 %cast6, ptr %pop, align 4
pop = __builtin_popcountg(ubi128);
- // CHECK-NEXT: %13 = load i128, ptr %ubi128.addr, align 8
- // CHECK-NEXT: %14 = call i128 @llvm.ctpop.i128(i128 %13)
- // CHECK-NEXT: %cast5 = trunc i128 %14 to i32
- // CHECK-NEXT: store volatile i32 %cast5, ptr %pop, align 4
- // CHECK-NEXT: ret void
+ // CHECK: %load_bits7 = load i8, ptr %vb8.addr, align 1
+ // CHECK-NEXT: %16 = bitcast i8 %load_bits7 to <8 x i1>
+ // CHECK-NEXT: %17 = bitcast <8 x i1> %16 to i8
+ // CHECK-NEXT: %18 = call i8 @llvm.ctpop.i8(i8 %17)
+ // CHECK-NEXT: %cast8 = zext i8 %18 to i32
+ // CHECK-NEXT: store volatile i32 %cast8, ptr %pop, align 4
+ pop = __builtin_popcountg(vb8);
}
// CHECK-LABEL: define{{.*}} void @test_builtin_clzg
void test_builtin_clzg(unsigned char uc, unsigned short us, unsigned int ui,
unsigned long ul, unsigned long long ull,
unsigned __int128 ui128, unsigned _BitInt(128) ubi128,
- signed char sc, short s, int i) {
+ signed char sc, short s, int i,
+ _Bool __attribute__((ext_vector_type(8))) vb8) {
volatile int lz;
+ // CHECK: %2 = load i8, ptr %uc.addr, align 1
+ // CHECK-NEXT: %3 = call i8 @llvm.ctlz.i8(i8 %2, i1 true)
+ // CHECK-NEXT: %cast = zext i8 %3 to i32
+ // CHECK-NEXT: store volatile i32 %cast, ptr %lz, align 4
lz = __builtin_clzg(uc);
- // CHECK: %1 = load i8, ptr %uc.addr, align 1
- // CHECK-NEXT: %2 = call i8 @llvm.ctlz.i8(i8 %1, i1 true)
- // CHECK-NEXT: %cast = zext i8 %2 to i32
- // CHECK-NEXT: store volatile i32 %cast, ptr %lz, align 4
+ // CHECK-NEXT: %4 = load i16, ptr %us.addr, align 2
+ // CHECK-NEXT: %5 = call i16 @llvm.ctlz.i16(i16 %4, i1 true)
+ // CHECK-NEXT: %cast2 = zext i16 %5 to i32
+ // CHECK-NEXT: store volatile i32 %cast2, ptr %lz, align 4
lz = __builtin_clzg(us);
- // CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2
- // CHECK-NEXT: %4 = call i16 @llvm.ctlz.i16(i16 %3, i1 true)
- // CHECK-NEXT: %cast1 = zext i16 %4 to i32
- // CHECK-NEXT: store volatile i32 %cast1, ptr %lz, align 4
+ // CHECK-NEXT: %6 = load i32, ptr %ui.addr, align 4
+ // CHECK-NEXT: %7 = call i32 @llvm.ctlz.i32(i32 %6, i1 true)
+ // CHECK-NEXT: store volatile i32 %7, ptr %lz, align 4
lz = __builtin_clzg(ui);
- // CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4
- // CHECK-NEXT: %6 = call i32 @llvm.ctlz.i32(i32 %5, i1 true)
- // CHECK-NEXT: store volatile i32 %6, ptr %lz, align 4
+ // CHECK-NEXT: %8 = load i64, ptr %ul.addr, align 8
+ // CHECK-NEXT: %9 = call i64 @llvm.ctlz.i64(i64 %8, i1 true)
+ // CHECK-NEXT: %cast3 = trunc i64 %9 to i32
+ // CHECK-NEXT: store volatile i32 %cast3, ptr %lz, align 4
lz = __builtin_clzg(ul);
- // CHECK-NEXT: %7 = load i64, ptr %ul.addr, align 8
- // CHECK-NEXT: %8 = call i64 @llvm.ctlz.i64(i64 %7, i1 true)
- // CHECK-NEXT: %cast2 = trunc i64 %8 to i32
- // CHECK-NEXT: store volatile i32 %cast2, ptr %lz, align 4
+ // CHECK-NEXT: %10 = load i64, ptr %ull.addr, align 8
+ // CHECK-NEXT: %11 = call i64 @llvm.ctlz.i64(i64 %10, i1 true)
+ // CHECK-NEXT: %cast4 = trunc i64 %11 to i32
+ // CHECK-NEXT: store volatile i32 %cast4, ptr %lz, align 4
lz = __builtin_clzg(ull);
- // CHECK-NEXT: %9 = load i64, ptr %ull.addr, align 8
- // CHECK-NEXT: %10 = call i64 @llvm.ctlz.i64(i64 %9, i1 true)
- // CHECK-NEXT: %cast3 = trunc i64 %10 to i32
- // CHECK-NEXT: store volatile i32 %cast3, ptr %lz, align 4
+ // CHECK-NEXT: %12 = load i128, ptr %ui128.addr, align 16
+ // CHECK-NEXT: %13 = call i128 @llvm.ctlz.i128(i128 %12, i1 true)
+ // CHECK-NEXT: %cast5 = trunc i128 %13 to i32
+ // CHECK-NEXT: store volatile i32 %cast5, ptr %lz, align 4
lz = __builtin_clzg(ui128);
- // CHECK-NEXT: %11 = load i128, ptr %ui128.addr, align 16
- // CHECK-NEXT: %12 = call i128 @llvm.ctlz.i128(i128 %11, i1 true)
- // CHECK-NEXT: %cast4 = trunc i128 %12 to i32
- // CHECK-NEXT: store volatile i32 %cast4, ptr %lz, align 4
+ // CHECK-NEXT: %14 = load i128, ptr %ubi128.addr, align 8
+ // CHECK-NEXT: %15 = call i128 @llvm.ctlz.i128(i128 %14, i1 true)
+ // CHECK-NEXT: %cast6 = trunc i128 %15 to i32
+ // CHECK-NEXT: store volatile i32 %cast6, ptr %lz, align 4
lz = __builtin_clzg(ubi128);
- // CHECK-NEXT: %13 = load i128, ptr %ubi128.addr, align 8
- // CHECK-NEXT: %14 = call i128 @llvm.ctlz.i128(i128 %13, i1 true)
- // CHECK-NEXT: %cast5 = trunc i128 %14 to i32
- // CHECK-NEXT: store volatile i32 %cast5, ptr %lz, align 4
+ // CHECK-NEXT: %load_bits7 = load i8, ptr %vb8.addr, align 1
+ // CHECK-NEXT: %16 = bitcast i8 %load_bits7 to <8 x i1>
+ // CHECK-NEXT: %17 = bitcast <8 x i1> %16 to i8
+ // CHECK-NEXT: %18 = call i8 @llvm.ctlz.i8(i8 %17, i1 true)
+ // CHECK-NEXT: %cast8 = zext i8 %18 to i32
+ // CHECK-NEXT: store volatile i32 %cast8, ptr %lz, align 4
+ lz = __builtin_clzg(vb8);
+ // CHECK-NEXT: %19 = load i8, ptr %uc.addr, align 1
+ // CHECK-NEXT: %20 = call i8 @llvm.ctlz.i8(i8 %19, i1 true)
+ // CHECK-NEXT: %cast9 = zext i8 %20 to i32
+ // CHECK-NEXT: %iszero = icmp eq i8 %19, 0
+ // CHECK-NEXT: %21 = load i8, ptr %sc.addr, align 1
+ // CHECK-NEXT: %conv = sext i8 %21 to i32
+ // CHECK-NEXT: %clzg = select i1 %iszero, i32 %conv, i32 %cast9
+ // CHECK-NEXT: store volatile i32 %clzg, ptr %lz, align 4
lz = __builtin_clzg(uc, sc);
- // CHECK-NEXT: %15 = load i8, ptr %uc.addr, align 1
- // CHECK-NEXT: %16 = call i8 @llvm.ctlz.i8(i8 %15, i1 true)
- // CHECK-NEXT: %cast6 = zext i8 %16 to i32
- // CHECK-NEXT: %iszero = icmp eq i8 %15, 0
- // CHECK-NEXT: %17 = load i8, ptr %sc.addr, align 1
- // CHECK-NEXT: %conv = sext i8 %17 to i32
- // CHECK-NEXT: %clzg = select i1 %iszero, i32 %conv, i32 %cast6
- // CHECK-NEXT: store volatile i32 %clzg, ptr %lz, align 4
+ // CHECK-NEXT: %22 = load i16, ptr %us.addr, align 2
+ // CHECK-NEXT: %23 = call i16 @llvm.ctlz.i16(i16 %22, i1 true)
+ // CHECK-NEXT: %cast10 = zext i16 %23 to i32
+ // CHECK-NEXT: %iszero11 = icmp eq i16 %22, 0
+ // CHECK-NEXT: %24 = load i8, ptr %uc.addr, align 1
+ // CHECK-NEXT: %conv12 = zext i8 %24 to i32
+ // CHECK-NEXT: %clzg13 = select i1 %iszero11, i32 %conv12, i32 %cast10
+ // CHECK-NEXT: store volatile i32 %clzg13, ptr %lz, align 4
lz = __builtin_clzg(us, uc);
- // CHECK-NEXT: %18 = load i16, ptr %us.addr, align 2
- // CHECK-NEXT: %19 = call i16 @llvm.ctlz.i16(i16 %18, i1 true)
- // CHECK-NEXT: %cast7 = zext i16 %19 to i32
- // CHECK-NEXT: %iszero8 = icmp eq i16 %18, 0
- // CHECK-NEXT: %20 = load i8, ptr %uc.addr, align 1
- // CHECK-NEXT: %conv9 = zext i8 %20 to i32
- // CHECK-NEXT: %clzg10 = select i1 %iszero8, i32 %conv9, i32 %cast7
- // CHECK-NEXT: store volatile i32 %clzg10, ptr %lz, align 4
+ // CHECK-NEXT: %25 = load i32, ptr %ui.addr, align 4
+ // CHECK-NEXT: %26 = call i32 @llvm.ctlz.i32(i32 %25, i1 true)
+ // CHECK-NEXT: %iszero14 = icmp eq i32 %25, 0
+ // CHECK-NEXT: %27 = load i16, ptr %s.addr, align 2
+ // CHECK-NEXT: %conv15 = sext i16 %27 to i32
+ // CHECK-NEXT: %clzg16 = select i1 %iszero14, i32 %conv15, i32 %26
+ // CHECK-NEXT: store volatile i32 %clzg16, ptr %lz, align 4
lz = __builtin_clzg(ui, s);
- // CHECK-NEXT: %21 = load i32, ptr %ui.addr, align 4
- // CHECK-NEXT: %22 = call i32 @llvm.ctlz.i32(i32 %21, i1 true)
- // CHECK-NEXT: %iszero11 = icmp eq i32 %21, 0
- // CHECK-NEXT: %23 = load i16, ptr %s.addr, align 2
- // CHECK-NEXT: %conv12 = sext i16 %23 to i32
- // CHECK-NEXT: %clzg13 = select i1 %iszero11, i32 %conv12, i32 %22
- // CHECK-NEXT: store volatile i32 %clzg13, ptr %lz, align 4
+ // CHECK-NEXT: %28 = load i64, ptr %ul.addr, align 8
+ // CHECK-NEXT: %29 = call i64 @llvm.ctlz.i64(i64 %28, i1 true)
+ // CHECK-NEXT: %cast17 = trunc i64 %29 to i32
+ // CHECK-NEXT: %iszero18 = icmp eq i64 %28, 0
+ // CHECK-NEXT: %30 = load i16, ptr %us.addr, align 2
+ // CHECK-NEXT: %conv19 = zext i16 %30 to i32
+ // CHECK-NEXT: %clzg20 = select i1 %iszero18, i32 %conv19, i32 %cast17
+ // CHECK-NEXT: store volatile i32 %clzg20, ptr %lz, align 4
lz = __builtin_clzg(ul, us);
- // CHECK-NEXT: %24 = load i64, ptr %ul.addr, align 8
- // CHECK-NEXT: %25 = call i64 @llvm.ctlz.i64(i64 %24, i1 true)
- // CHECK-NEXT: %cast14 = trunc i64 %25 to i32
- // CHECK-NEXT: %iszero15 = icmp eq i64 %24, 0
- // CHECK-NEXT: %26 = load i16, ptr %us.addr, align 2
- // CHECK-NEXT: %conv16 = zext i16 %26 to i32
- // CHECK-NEXT: %clzg17 = select i1 %iszero15, i32 %conv16, i32 %cast14
- // CHECK-NEXT: store volatile i32 %clzg17, ptr %lz, align 4
+ // CHECK-NEXT: %31 = load i64, ptr %ull.addr, align 8
+ // CHECK-NEXT: %32 = call i64 @llvm.ctlz.i64(i64 %31, i1 true)
+ // CHECK-NEXT: %cast21 = trunc i64 %32 to i32
+ // CHECK-NEXT: %iszero22 = icmp eq i64 %31, 0
+ // CHECK-NEXT: %33 = load i32, ptr %i.addr, align 4
+ // CHECK-NEXT: %clzg23 = select i1 %iszero22, i32 %33, i32 %cast21
+ // CHECK-NEXT: store volatile i32 %clzg23, ptr %lz, align 4
lz = __builtin_clzg(ull, i);
- // CHECK-NEXT: %27 = load i64, ptr %ull.addr, align 8
- // CHECK-NEXT: %28 = call i64 @llvm.ctlz.i64(i64 %27, i1 true)
- // CHECK-NEXT: %cast18 = trunc i64 %28 to i32
- // CHECK-NEXT: %iszero19 = icmp eq i64 %27, 0
- // CHECK-NEXT: %29 = load i32, ptr %i.addr, align 4
- // CHECK-NEXT: %clzg20 = select i1 %iszero19, i32 %29, i32 %cast18
- // CHECK-NEXT: store volatile i32 %clzg20, ptr %lz, align 4
+ // CHECK-NEXT: %34 = load i128, ptr %ui128.addr, align 16
+ // CHECK-NEXT: %35 = call i128 @llvm.ctlz.i128(i128 %34, i1 true)
+ // CHECK-NEXT: %cast24 = trunc i128 %35 to i32
+ // CHECK-NEXT: %iszero25 = icmp eq i128 %34, 0
+ // CHECK-NEXT: %36 = load i32, ptr %i.addr, align 4
+ // CHECK-NEXT: %clzg26 = select i1 %iszero25, i32 %36, i32 %cast24
+ // CHECK-NEXT: store volatile i32 %clzg26, ptr %lz, align 4
lz = __builtin_clzg(ui128, i);
- // CHECK-NEXT: %30 = load i128, ptr %ui128.addr, align 16
- // CHECK-NEXT: %31 = call i128 @llvm.ctlz.i128(i128 %30, i1 true)
- // CHECK-NEXT: %cast21 = trunc i128 %31 to i32
- // CHECK-NEXT: %iszero22 = icmp eq i128 %30, 0
- // CHECK-NEXT: %32 = load i32, ptr %i.addr, align 4
- // CHECK-NEXT: %clzg23 = select i1 %iszero22, i32 %32, i32 %cast21
- // CHECK-NEXT: store volatile i32 %clzg23, ptr %lz, align 4
+ // CHECK-NEXT: %37 = load i128, ptr %ubi128.addr, align 8
+ // CHECK-NEXT: %38 = call i128 @llvm.ctlz.i128(i128 %37, i1 true)
+ // CHECK-NEXT: %cast27 = trunc i128 %38 to i32
+ // CHECK-NEXT: %iszero28 = icmp eq i128 %37, 0
+ // CHECK-NEXT: %39 = load i32, ptr %i.addr, align 4
+ // CHECK-NEXT: %clzg29 = select i1 %iszero28, i32 %39, i32 %cast27
+ // CHECK-NEXT: store volatile i32 %clzg29, ptr %lz, align 4
lz = __builtin_clzg(ubi128, i);
- // CHECK-NEXT: %33 = load i128, ptr %ubi128.addr, align 8
- // CHECK-NEXT: %34 = call i128 @llvm.ctlz.i128(i128 %33, i1 true)
- // CHECK-NEXT: %cast24 = trunc i128 %34 to i32
- // CHECK-NEXT: %iszero25 = icmp eq i128 %33, 0
- // CHECK-NEXT: %35 = load i32, ptr %i.addr, align 4
- // CHECK-NEXT: %clzg26 = select i1 %iszero25, i32 %35, i32 %cast24
- // CHECK-NEXT: store volatile i32 %clzg26, ptr %lz, align 4
- // CHECK-NEXT: ret void
+ // CHECK-NEXT: %load_bits30 = load i8, ptr %vb8.addr, align 1
+ // CHECK-NEXT: %40 = bitcast i8 %load_bits30 to <8 x i1>
+ // CHECK-NEXT: %41 = bitcast <8 x i1> %40 to i8
+ // CHECK-NEXT: %42 = call i8 @llvm.ctlz....
[truncated]
|
I'm looking into how to make this |
c2f86a3
to
6aee3cf
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the idea behind only supporting boolean vectors that the operation on arbitrary integer vectors probably ought to be a vector-to-vector operation? That seems reasonable, but are we going to regret having inconsistent behavior between different vector types, then?
Thanks for the review. The motivation is mostly that the |
I've updated the implementation to be constexpr and it now uses the LLVM arbitrary precision integers so that it calculates |
Summary: Boolean vectors as implemented in clang can be bit-casted to an integer that is rounded up to the next primitive sized integer. Users can do this themselves, but since the counting bits are very likely to be used with bitmasks like this and the generic forms are expected to be generic it seems reasonable that we handle this case directly.
ping |
The bytecode parts LGTM |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we've given a fair window for other feedback. Approved.
Summary:
Boolean vectors as implemented in clang can be bit-casted to an integer
that is rounded up to the next primitive sized integer. Users can do
this themselves, but since the counting bits are very likely to be used
with bitmasks like this and the generic forms are expected to be
generic it seems reasonable that we handle this case directly.