Skip to content

Commit

Permalink
[BitInt] Expose a _BitInt literal suffix in C++ (#86586)
Browse files Browse the repository at this point in the history
This exposes _BitInt literal suffixes __wb and u__wb as an extension
in C++. There is a new Extension warning, and the tests are
essentially the same as the existing _BitInt literal tests for C but
with a few additional cases.

Fixes #85223
  • Loading branch information
js324 authored Apr 22, 2024
1 parent 9a35951 commit ca1f1c9
Show file tree
Hide file tree
Showing 11 changed files with 273 additions and 16 deletions.
1 change: 1 addition & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ sections with improvements to Clang's support for those languages.

C++ Language Changes
--------------------
- Implemented ``_BitInt`` literal suffixes ``__wb`` or ``__WB`` as a Clang extension with ``unsigned`` modifiers also allowed. (#GH85223).

C++20 Feature Support
^^^^^^^^^^^^^^^^^^^^^
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticCommonKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,9 @@ def err_cxx23_size_t_suffix: Error<
def err_size_t_literal_too_large: Error<
"%select{signed |}0'size_t' literal is out of range of possible "
"%select{signed |}0'size_t' values">;
def ext_cxx_bitint_suffix : Extension<
"'_BitInt' suffix for literals is a Clang extension">,
InGroup<BitIntExtension>;
def ext_c23_bitint_suffix : ExtWarn<
"'_BitInt' suffix for literals is a C23 extension">,
InGroup<C23>;
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticGroups.td
Original file line number Diff line number Diff line change
Expand Up @@ -1520,5 +1520,8 @@ def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInCon
// Warnings and notes InstallAPI verification.
def InstallAPIViolation : DiagGroup<"installapi-violation">;

// Warnings related to _BitInt extension
def BitIntExtension : DiagGroup<"bit-int-extension">;

// Warnings about misuse of ExtractAPI options.
def ExtractAPIMisuse : DiagGroup<"extractapi-misuse">;
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/DiagnosticParseKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -1654,7 +1654,7 @@ def warn_ext_int_deprecated : Warning<
"'_ExtInt' is deprecated; use '_BitInt' instead">, InGroup<DeprecatedType>;
def ext_bit_int : Extension<
"'_BitInt' in %select{C17 and earlier|C++}0 is a Clang extension">,
InGroup<DiagGroup<"bit-int-extension">>;
InGroup<BitIntExtension>;
} // end of Parse Issue category.

let CategoryName = "Modules Issue" in {
Expand Down
3 changes: 2 additions & 1 deletion clang/include/clang/Lex/LiteralSupport.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ class NumericLiteralParser {
bool isFloat128 : 1; // 1.0q
bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr
bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk
bool isBitInt : 1; // 1wb, 1uwb (C23)
bool isBitInt : 1; // 1wb, 1uwb (C23) or 1__wb, 1__uwb (Clang extension in C++
// mode)
uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.


Expand Down
36 changes: 32 additions & 4 deletions clang/lib/Lex/LiteralSupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
bool isFixedPointConstant = isFixedPointLiteral();
bool isFPConstant = isFloatingLiteral();
bool HasSize = false;
bool DoubleUnderscore = false;

// Loop over all of the characters of the suffix. If we see something bad,
// we break out of the loop.
Expand Down Expand Up @@ -1117,6 +1118,31 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
if (isImaginary) break; // Cannot be repeated.
isImaginary = true;
continue; // Success.
case '_':
if (isFPConstant)
break; // Invalid for floats
if (HasSize)
break;
if (DoubleUnderscore)
break; // Cannot be repeated.
if (LangOpts.CPlusPlus && s + 2 < ThisTokEnd &&
s[1] == '_') { // s + 2 < ThisTokEnd to ensure some character exists
// after __
DoubleUnderscore = true;
s += 2; // Skip both '_'
if (s + 1 < ThisTokEnd &&
(*s == 'u' || *s == 'U')) { // Ensure some character after 'u'/'U'
isUnsigned = true;
++s;
}
if (s + 1 < ThisTokEnd &&
((*s == 'w' && *(++s) == 'b') || (*s == 'W' && *(++s) == 'B'))) {
isBitInt = true;
HasSize = true;
continue;
}
}
break;
case 'w':
case 'W':
if (isFPConstant)
Expand All @@ -1127,9 +1153,9 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
// wb and WB are allowed, but a mixture of cases like Wb or wB is not. We
// explicitly do not support the suffix in C++ as an extension because a
// library-based UDL that resolves to a library type may be more
// appropriate there.
if (!LangOpts.CPlusPlus && ((s[0] == 'w' && s[1] == 'b') ||
(s[0] == 'W' && s[1] == 'B'))) {
// appropriate there. The same rules apply for __wb/__WB.
if ((!LangOpts.CPlusPlus || DoubleUnderscore) && s + 1 < ThisTokEnd &&
((s[0] == 'w' && s[1] == 'b') || (s[0] == 'W' && s[1] == 'B'))) {
isBitInt = true;
HasSize = true;
++s; // Skip both characters (2nd char skipped on continue).
Expand Down Expand Up @@ -1241,7 +1267,9 @@ bool NumericLiteralParser::isValidUDSuffix(const LangOptions &LangOpts,
return false;

// By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid.
if (Suffix[0] == '_')
// Suffixes starting with '__' (double underscore) are for use by
// the implementation.
if (Suffix.starts_with("_") && !Suffix.starts_with("__"))
return true;

// In C++11, there are no library suffixes.
Expand Down
8 changes: 4 additions & 4 deletions clang/lib/Lex/PPExpressions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,11 +333,11 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
: diag::ext_cxx23_size_t_suffix
: diag::err_cxx23_size_t_suffix);

// 'wb/uwb' literals are a C23 feature. We explicitly do not support the
// suffix in C++ as an extension because a library-based UDL that resolves
// to a library type may be more appropriate there.
// 'wb/uwb' literals are a C23 feature.
// '__wb/__uwb' are a C++ extension.
if (Literal.isBitInt)
PP.Diag(PeekTok, PP.getLangOpts().C23
PP.Diag(PeekTok, PP.getLangOpts().CPlusPlus ? diag::ext_cxx_bitint_suffix
: PP.getLangOpts().C23
? diag::warn_c23_compat_bitint_suffix
: diag::ext_c23_bitint_suffix);

Expand Down
12 changes: 7 additions & 5 deletions clang/lib/Sema/SemaExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4137,11 +4137,13 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
// 'wb/uwb' literals are a C23 feature. We support _BitInt as a type in C++,
// but we do not currently support the suffix in C++ mode because it's not
// entirely clear whether WG21 will prefer this suffix to return a library
// type such as std::bit_int instead of returning a _BitInt.
if (Literal.isBitInt && !getLangOpts().CPlusPlus)
PP.Diag(Tok.getLocation(), getLangOpts().C23
? diag::warn_c23_compat_bitint_suffix
: diag::ext_c23_bitint_suffix);
// type such as std::bit_int instead of returning a _BitInt. '__wb/__uwb'
// literals are a C++ extension.
if (Literal.isBitInt)
PP.Diag(Tok.getLocation(),
getLangOpts().CPlusPlus ? diag::ext_cxx_bitint_suffix
: getLangOpts().C23 ? diag::warn_c23_compat_bitint_suffix
: diag::ext_c23_bitint_suffix);

// Get the value in the widest-possible width. What is "widest" depends on
// whether the literal is a bit-precise integer or not. For a bit-precise
Expand Down
32 changes: 32 additions & 0 deletions clang/test/AST/bitint-suffix.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// RUN: %clang_cc1 -ast-dump -Wno-unused %s | FileCheck --strict-whitespace %s

// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 func 'void ()'
void func() {
// Ensure that we calculate the correct type from the literal suffix.

// Note: 0__wb should create an _BitInt(2) because a signed bit-precise
// integer requires one bit for the sign and one bit for the value,
// at a minimum.
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 zero_wb 'typeof (0wb)':'_BitInt(2)'
typedef __typeof__(0__wb) zero_wb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_zero_wb 'typeof (-0wb)':'_BitInt(2)'
typedef __typeof__(-0__wb) neg_zero_wb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 one_wb 'typeof (1wb)':'_BitInt(2)'
typedef __typeof__(1__wb) one_wb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_one_wb 'typeof (-1wb)':'_BitInt(2)'
typedef __typeof__(-1__wb) neg_one_wb;

// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 zero_uwb 'typeof (0uwb)':'unsigned _BitInt(1)'
typedef __typeof__(0__uwb) zero_uwb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:31> col:31 neg_zero_uwb 'typeof (-0uwb)':'unsigned _BitInt(1)'
typedef __typeof__(-0__uwb) neg_zero_uwb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 one_uwb 'typeof (1uwb)':'unsigned _BitInt(1)'
typedef __typeof__(1__uwb) one_uwb;

// Try a value that is too large to fit in [u]intmax_t.

// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:49> col:49 huge_uwb 'typeof (18446744073709551616uwb)':'unsigned _BitInt(65)'
typedef __typeof__(18446744073709551616__uwb) huge_uwb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:48> col:48 huge_wb 'typeof (18446744073709551616wb)':'_BitInt(66)'
typedef __typeof__(18446744073709551616__wb) huge_wb;
}
11 changes: 10 additions & 1 deletion clang/test/Lexer/bitint-constants-compat.c
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
// RUN: %clang_cc1 -std=c17 -fsyntax-only -verify=ext -Wno-unused %s
// RUN: %clang_cc1 -std=c2x -fsyntax-only -verify=compat -Wpre-c2x-compat -Wno-unused %s
// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wno-unused -x c++ %s
// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wbit-int-extension -Wno-unused -x c++ %s

#if 18446744073709551615uwb // ext-warning {{'_BitInt' suffix for literals is a C23 extension}} \
compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C23}} \
cpp-error {{invalid suffix 'uwb' on integer constant}}
#endif

#if 18446744073709551615__uwb // ext-error {{invalid suffix '__uwb' on integer constant}} \
compat-error {{invalid suffix '__uwb' on integer constant}} \
cpp-warning {{'_BitInt' suffix for literals is a Clang extension}}
#endif

void func(void) {
18446744073709551615wb; // ext-warning {{'_BitInt' suffix for literals is a C23 extension}} \
compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C23}} \
cpp-error {{invalid suffix 'wb' on integer constant}}

18446744073709551615__wb; // ext-error {{invalid suffix '__wb' on integer constant}} \
compat-error {{invalid suffix '__wb' on integer constant}} \
cpp-warning {{'_BitInt' suffix for literals is a Clang extension}}
}
178 changes: 178 additions & 0 deletions clang/test/Lexer/bitint-constants.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
// RUN: %clang_cc1 -triple aarch64-unknown-unknown -fsyntax-only -verify -Wno-unused %s

// Test that the preprocessor behavior makes sense.
#if 1__wb != 1
#error "wb suffix must be recognized by preprocessor"
#endif
#if 1__uwb != 1
#error "uwb suffix must be recognized by preprocessor"
#endif
#if !(-1__wb < 0)
#error "wb suffix must be interpreted as signed"
#endif
#if !(-1__uwb > 0)
#error "uwb suffix must be interpreted as unsigned"
#endif

#if 18446744073709551615__uwb != 18446744073709551615ULL
#error "expected the max value for uintmax_t to compare equal"
#endif

// Test that the preprocessor gives appropriate diagnostics when the
// literal value is larger than what can be stored in a [u]intmax_t.
#if 18446744073709551616__wb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
#error "never expected to get here due to error"
#endif
#if 18446744073709551616__uwb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
#error "never expected to get here due to error"
#endif

// Despite using a bit-precise integer, this is expected to overflow
// because all preprocessor arithmetic is done in [u]intmax_t, so this
// should result in the value 0.
#if 18446744073709551615__uwb + 1 != 0ULL
#error "expected modulo arithmetic with uintmax_t width"
#endif

// Because this bit-precise integer is signed, it will also overflow,
// but Clang handles that by converting to uintmax_t instead of
// intmax_t.
#if 18446744073709551615__wb + 1 != 0LL // expected-warning {{integer literal is too large to be represented in a signed integer type, interpreting as unsigned}}
#error "expected modulo arithmetic with uintmax_t width"
#endif

// Test that just because the preprocessor can't figure out the bit
// width doesn't mean we can't form the constant, it just means we
// can't use the value in a preprocessor conditional.
unsigned _BitInt(65) Val = 18446744073709551616__uwb;
// UDL test to make sure underscore parsing is correct
unsigned operator ""_(const char *);

void ValidSuffix(void) {
// Decimal literals.
1__wb;
1__WB;
-1__wb;
_Static_assert((int)1__wb == 1, "not 1?");
_Static_assert((int)-1__wb == -1, "not -1?");

1__uwb;
1__uWB;
1__Uwb;
1__UWB;
1u__wb;
1__WBu;
1U__WB;
_Static_assert((unsigned int)1__uwb == 1u, "not 1?");

1'2__wb;
1'2__uwb;
_Static_assert((int)1'2__wb == 12, "not 12?");
_Static_assert((unsigned int)1'2__uwb == 12u, "not 12?");

// Hexadecimal literals.
0x1__wb;
0x1__uwb;
0x0'1'2'3__wb;
0xA'B'c'd__uwb;
_Static_assert((int)0x0'1'2'3__wb == 0x0123, "not 0x0123");
_Static_assert((unsigned int)0xA'B'c'd__uwb == 0xABCDu, "not 0xABCD");

// Binary literals.
0b1__wb;
0b1__uwb;
0b1'0'1'0'0'1__wb;
0b0'1'0'1'1'0__uwb;
_Static_assert((int)0b1__wb == 1, "not 1?");
_Static_assert((unsigned int)0b1__uwb == 1u, "not 1?");

// Octal literals.
01__wb;
01__uwb;
0'6'0__wb;
0'0'1__uwb;
0__wbu;
0__WBu;
0U__wb;
0U__WB;
0__wb;
_Static_assert((int)0__wb == 0, "not 0?");
_Static_assert((unsigned int)0__wbu == 0u, "not 0?");

// Imaginary or Complex. These are allowed because _Complex can work with any
// integer type, and that includes _BitInt.
1__wbi;
1i__wb;
1__wbj;

//UDL test as single underscore
unsigned i = 1.0_;
}

void InvalidSuffix(void) {
// Can't mix the case of wb or WB, and can't rearrange the letters.
0__wB; // expected-error {{invalid suffix '__wB' on integer constant}}
0__Wb; // expected-error {{invalid suffix '__Wb' on integer constant}}
0__bw; // expected-error {{invalid suffix '__bw' on integer constant}}
0__BW; // expected-error {{invalid suffix '__BW' on integer constant}}

// Trailing digit separators should still diagnose.
1'2'__wb; // expected-error {{digit separator cannot appear at end of digit sequence}}
1'2'__uwb; // expected-error {{digit separator cannot appear at end of digit sequence}}

// Long.
1l__wb; // expected-error {{invalid suffix}}
1__wbl; // expected-error {{invalid suffix}}
1l__uwb; // expected-error {{invalid suffix}}
1__l; // expected-error {{invalid suffix}}
1ul__wb; // expected-error {{invalid suffix}}

// Long long.
1ll__wb; // expected-error {{invalid suffix}}
1__uwbll; // expected-error {{invalid suffix}}

// Floating point.
0.1__wb; // expected-error {{invalid suffix}}
0.1f__wb; // expected-error {{invalid suffix}}

// Repetitive suffix.
1__wb__wb; // expected-error {{invalid suffix}}
1__uwbuwb; // expected-error {{invalid suffix}}
1__wbuwb; // expected-error {{invalid suffix}}
1__uwbwb; // expected-error {{invalid suffix}}

// Missing or extra characters in suffix.
1__; // expected-error {{invalid suffix}}
1__u; // expected-error {{invalid suffix}}
1___; // expected-error {{invalid suffix}}
1___WB; // expected-error {{invalid suffix}}
1__wb__; // expected-error {{invalid suffix}}
1__w; // expected-error {{invalid suffix}}
1__b; // expected-error {{invalid suffix}}
}

void ValidSuffixInvalidValue(void) {
// This is a valid suffix, but the value is larger than one that fits within
// the width of BITINT_MAXWIDTH. When this value changes in the future, the
// test cases should pick a new value that can't be represented by a _BitInt,
// but also add a test case that a 129-bit literal still behaves as-expected.
_Static_assert(__BITINT_MAXWIDTH__ <= 128,
"Need to pick a bigger constant for the test case below.");
0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1__wb; // expected-error {{integer literal is too large to be represented in any signed integer type}}
0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1__uwb; // expected-error {{integer literal is too large to be represented in any integer type}}
}

void TestTypes(void) {
// 2 value bits, one sign bit
_Static_assert(__is_same(decltype(3__wb), _BitInt(3)));
// 2 value bits, one sign bit
_Static_assert(__is_same(decltype(-3__wb), _BitInt(3)));
// 2 value bits, no sign bit
_Static_assert(__is_same(decltype(3__uwb), unsigned _BitInt(2)));
// 4 value bits, one sign bit
_Static_assert(__is_same(decltype(0xF__wb), _BitInt(5)));
// 4 value bits, one sign bit
_Static_assert(__is_same(decltype(-0xF__wb), _BitInt(5)));
// 4 value bits, no sign bit
_Static_assert(__is_same(decltype(0xF__uwb), unsigned _BitInt(4)));
}

0 comments on commit ca1f1c9

Please sign in to comment.