[BitInt] Expose a _BitInt literal suffix in C++ (#86586)

This exposes _BitInt literal suffixes __wb and u__wb as an extension in C++. There is a new Extension warning, and the tests are essentially the same as the existing _BitInt literal tests for C but with a few additional cases. Fixes #85223
llvm · Apr 22, 2024 · ca1f1c9 · ca1f1c9
1 parent 9a35951
commit ca1f1c9
Show file tree

Hide file tree

Showing 11 changed files with 273 additions and 16 deletions.
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
@@ -88,6 +88,7 @@ sections with improvements to Clang's support for those languages.
 
 C++ Language Changes
 --------------------
+- Implemented ``_BitInt`` literal suffixes ``__wb`` or ``__WB`` as a Clang extension with ``unsigned`` modifiers also allowed. (#GH85223).
 
 C++20 Feature Support
 ^^^^^^^^^^^^^^^^^^^^^

diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -234,6 +234,9 @@ def err_cxx23_size_t_suffix: Error<
 def err_size_t_literal_too_large: Error<
   "%select{signed |}0'size_t' literal is out of range of possible "
   "%select{signed |}0'size_t' values">;
+def ext_cxx_bitint_suffix : Extension<
+  "'_BitInt' suffix for literals is a Clang extension">,
+  InGroup<BitIntExtension>;
 def ext_c23_bitint_suffix : ExtWarn<
   "'_BitInt' suffix for literals is a C23 extension">,
   InGroup<C23>;

diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1520,5 +1520,8 @@ def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInCon
 // Warnings and notes InstallAPI verification.
 def InstallAPIViolation : DiagGroup<"installapi-violation">;
 
+// Warnings related to _BitInt extension
+def BitIntExtension : DiagGroup<"bit-int-extension">;
+
 // Warnings about misuse of ExtractAPI options.
 def ExtractAPIMisuse : DiagGroup<"extractapi-misuse">;
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1654,7 +1654,7 @@ def warn_ext_int_deprecated : Warning<
   "'_ExtInt' is deprecated; use '_BitInt' instead">, InGroup<DeprecatedType>;
 def ext_bit_int : Extension<
   "'_BitInt' in %select{C17 and earlier|C++}0 is a Clang extension">,
-  InGroup<DiagGroup<"bit-int-extension">>;
+  InGroup<BitIntExtension>;
 } // end of Parse Issue category.
 
 let CategoryName = "Modules Issue" in {

diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h
@@ -80,7 +80,8 @@ class NumericLiteralParser {
   bool isFloat128 : 1;      // 1.0q
   bool isFract : 1;         // 1.0hr/r/lr/uhr/ur/ulr
   bool isAccum : 1;         // 1.0hk/k/lk/uhk/uk/ulk
-  bool isBitInt : 1;        // 1wb, 1uwb (C23)
+  bool isBitInt : 1;        // 1wb, 1uwb (C23) or 1__wb, 1__uwb (Clang extension in C++
+                            // mode)
   uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
 
 

diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
@@ -974,6 +974,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   bool isFixedPointConstant = isFixedPointLiteral();
   bool isFPConstant = isFloatingLiteral();
   bool HasSize = false;
+  bool DoubleUnderscore = false;
 
   // Loop over all of the characters of the suffix.  If we see something bad,
   // we break out of the loop.
@@ -1117,6 +1118,31 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
       if (isImaginary) break;   // Cannot be repeated.
       isImaginary = true;
       continue;  // Success.
+    case '_':
+      if (isFPConstant)
+        break; // Invalid for floats
+      if (HasSize)
+        break;
+      if (DoubleUnderscore)
+        break; // Cannot be repeated.
+      if (LangOpts.CPlusPlus && s + 2 < ThisTokEnd &&
+          s[1] == '_') { // s + 2 < ThisTokEnd to ensure some character exists
+                         // after __
+        DoubleUnderscore = true;
+        s += 2; // Skip both '_'
+        if (s + 1 < ThisTokEnd &&
+            (*s == 'u' || *s == 'U')) { // Ensure some character after 'u'/'U'
+          isUnsigned = true;
+          ++s;
+        }
+        if (s + 1 < ThisTokEnd &&
+            ((*s == 'w' && *(++s) == 'b') || (*s == 'W' && *(++s) == 'B'))) {
+          isBitInt = true;
+          HasSize = true;
+          continue;
+        }
+      }
+      break;
     case 'w':
     case 'W':
       if (isFPConstant)
@@ -1127,9 +1153,9 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
       // wb and WB are allowed, but a mixture of cases like Wb or wB is not. We
       // explicitly do not support the suffix in C++ as an extension because a
       // library-based UDL that resolves to a library type may be more
-      // appropriate there.
-      if (!LangOpts.CPlusPlus && ((s[0] == 'w' && s[1] == 'b') ||
-          (s[0] == 'W' && s[1] == 'B'))) {
+      // appropriate there. The same rules apply for __wb/__WB.
+      if ((!LangOpts.CPlusPlus || DoubleUnderscore) && s + 1 < ThisTokEnd &&
+          ((s[0] == 'w' && s[1] == 'b') || (s[0] == 'W' && s[1] == 'B'))) {
         isBitInt = true;
         HasSize = true;
         ++s; // Skip both characters (2nd char skipped on continue).
@@ -1241,7 +1267,9 @@ bool NumericLiteralParser::isValidUDSuffix(const LangOptions &LangOpts,
     return false;
 
   // By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid.
-  if (Suffix[0] == '_')
+  // Suffixes starting with '__' (double underscore) are for use by
+  // the implementation.
+  if (Suffix.starts_with("_") && !Suffix.starts_with("__"))
     return true;
 
   // In C++11, there are no library suffixes.

diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
@@ -333,11 +333,11 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
                                  : diag::ext_cxx23_size_t_suffix
                            : diag::err_cxx23_size_t_suffix);
 
-    // 'wb/uwb' literals are a C23 feature. We explicitly do not support the
-    // suffix in C++ as an extension because a library-based UDL that resolves
-    // to a library type may be more appropriate there.
+    // 'wb/uwb' literals are a C23 feature.
+    // '__wb/__uwb' are a C++ extension.
     if (Literal.isBitInt)
-      PP.Diag(PeekTok, PP.getLangOpts().C23
+      PP.Diag(PeekTok, PP.getLangOpts().CPlusPlus ? diag::ext_cxx_bitint_suffix
+                       : PP.getLangOpts().C23
                            ? diag::warn_c23_compat_bitint_suffix
                            : diag::ext_c23_bitint_suffix);
 

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
@@ -4137,11 +4137,13 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
     // 'wb/uwb' literals are a C23 feature. We support _BitInt as a type in C++,
     // but we do not currently support the suffix in C++ mode because it's not
     // entirely clear whether WG21 will prefer this suffix to return a library
-    // type such as std::bit_int instead of returning a _BitInt.
-    if (Literal.isBitInt && !getLangOpts().CPlusPlus)
-      PP.Diag(Tok.getLocation(), getLangOpts().C23
-                                     ? diag::warn_c23_compat_bitint_suffix
-                                     : diag::ext_c23_bitint_suffix);
+    // type such as std::bit_int instead of returning a _BitInt. '__wb/__uwb'
+    // literals are a C++ extension.
+    if (Literal.isBitInt)
+      PP.Diag(Tok.getLocation(),
+              getLangOpts().CPlusPlus ? diag::ext_cxx_bitint_suffix
+              : getLangOpts().C23     ? diag::warn_c23_compat_bitint_suffix
+                                      : diag::ext_c23_bitint_suffix);
 
     // Get the value in the widest-possible width. What is "widest" depends on
     // whether the literal is a bit-precise integer or not. For a bit-precise

diff --git a/clang/test/AST/bitint-suffix.cpp b/clang/test/AST/bitint-suffix.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -ast-dump -Wno-unused %s | FileCheck --strict-whitespace %s
+
+// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 func 'void ()'
+void func() {
+  // Ensure that we calculate the correct type from the literal suffix.
+
+  // Note: 0__wb should create an _BitInt(2) because a signed bit-precise
+  // integer requires one bit for the sign and one bit for the value,
+  // at a minimum.
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 zero_wb 'typeof (0wb)':'_BitInt(2)'
+  typedef __typeof__(0__wb) zero_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_zero_wb 'typeof (-0wb)':'_BitInt(2)'
+  typedef __typeof__(-0__wb) neg_zero_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 one_wb 'typeof (1wb)':'_BitInt(2)'
+  typedef __typeof__(1__wb) one_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_one_wb 'typeof (-1wb)':'_BitInt(2)'
+  typedef __typeof__(-1__wb) neg_one_wb;
+
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 zero_uwb 'typeof (0uwb)':'unsigned _BitInt(1)'
+  typedef __typeof__(0__uwb) zero_uwb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:31> col:31 neg_zero_uwb 'typeof (-0uwb)':'unsigned _BitInt(1)'
+  typedef __typeof__(-0__uwb) neg_zero_uwb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 one_uwb 'typeof (1uwb)':'unsigned _BitInt(1)'
+  typedef __typeof__(1__uwb) one_uwb;
+
+  // Try a value that is too large to fit in [u]intmax_t.
+
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:49> col:49 huge_uwb 'typeof (18446744073709551616uwb)':'unsigned _BitInt(65)'
+  typedef __typeof__(18446744073709551616__uwb) huge_uwb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:48> col:48 huge_wb 'typeof (18446744073709551616wb)':'_BitInt(66)'
+  typedef __typeof__(18446744073709551616__wb) huge_wb;
+}
diff --git a/clang/test/Lexer/bitint-constants-compat.c b/clang/test/Lexer/bitint-constants-compat.c
@@ -1,14 +1,23 @@
 // RUN: %clang_cc1 -std=c17 -fsyntax-only -verify=ext -Wno-unused %s
 // RUN: %clang_cc1 -std=c2x -fsyntax-only -verify=compat -Wpre-c2x-compat -Wno-unused %s
-// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wno-unused -x c++ %s
+// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wbit-int-extension -Wno-unused -x c++ %s
 
 #if 18446744073709551615uwb // ext-warning {{'_BitInt' suffix for literals is a C23 extension}} \
                                compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C23}} \
                                cpp-error {{invalid suffix 'uwb' on integer constant}}
 #endif
 
+#if 18446744073709551615__uwb // ext-error {{invalid suffix '__uwb' on integer constant}} \
+                               compat-error {{invalid suffix '__uwb' on integer constant}} \
+                               cpp-warning {{'_BitInt' suffix for literals is a Clang extension}}
+#endif
+
 void func(void) {
   18446744073709551615wb; // ext-warning {{'_BitInt' suffix for literals is a C23 extension}} \
                              compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C23}} \
                              cpp-error {{invalid suffix 'wb' on integer constant}}
+
+  18446744073709551615__wb; // ext-error {{invalid suffix '__wb' on integer constant}} \
+                             compat-error {{invalid suffix '__wb' on integer constant}} \
+                             cpp-warning {{'_BitInt' suffix for literals is a Clang extension}}
 }
diff --git a/clang/test/Lexer/bitint-constants.cpp b/clang/test/Lexer/bitint-constants.cpp
@@ -0,0 +1,178 @@
+// RUN: %clang_cc1 -triple aarch64-unknown-unknown -fsyntax-only -verify -Wno-unused %s
+
+// Test that the preprocessor behavior makes sense.
+#if 1__wb != 1
+#error "wb suffix must be recognized by preprocessor"
+#endif
+#if 1__uwb != 1
+#error "uwb suffix must be recognized by preprocessor"
+#endif
+#if !(-1__wb < 0)
+#error "wb suffix must be interpreted as signed"
+#endif
+#if !(-1__uwb > 0)
+#error "uwb suffix must be interpreted as unsigned"
+#endif
+
+#if 18446744073709551615__uwb != 18446744073709551615ULL
+#error "expected the max value for uintmax_t to compare equal"
+#endif
+
+// Test that the preprocessor gives appropriate diagnostics when the
+// literal value is larger than what can be stored in a [u]intmax_t.
+#if 18446744073709551616__wb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
+#error "never expected to get here due to error"
+#endif
+#if 18446744073709551616__uwb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
+#error "never expected to get here due to error"
+#endif
+
+// Despite using a bit-precise integer, this is expected to overflow
+// because all preprocessor arithmetic is done in [u]intmax_t, so this
+// should result in the value 0.
+#if 18446744073709551615__uwb + 1 != 0ULL
+#error "expected modulo arithmetic with uintmax_t width"
+#endif
+
+// Because this bit-precise integer is signed, it will also overflow,
+// but Clang handles that by converting to uintmax_t instead of
+// intmax_t.
+#if 18446744073709551615__wb + 1 != 0LL // expected-warning {{integer literal is too large to be represented in a signed integer type, interpreting as unsigned}}
+#error "expected modulo arithmetic with uintmax_t width"
+#endif
+
+// Test that just because the preprocessor can't figure out the bit
+// width doesn't mean we can't form the constant, it just means we
+// can't use the value in a preprocessor conditional.
+unsigned _BitInt(65) Val = 18446744073709551616__uwb;
+// UDL test to make sure underscore parsing is correct
+unsigned operator ""_(const char *);
+
+void ValidSuffix(void) {
+  // Decimal literals.
+  1__wb;
+  1__WB;
+  -1__wb;
+  _Static_assert((int)1__wb == 1, "not 1?");
+  _Static_assert((int)-1__wb == -1, "not -1?");
+
+  1__uwb;
+  1__uWB;
+  1__Uwb;
+  1__UWB;
+  1u__wb;
+  1__WBu;
+  1U__WB;
+  _Static_assert((unsigned int)1__uwb == 1u, "not 1?");
+
+  1'2__wb;
+  1'2__uwb;
+  _Static_assert((int)1'2__wb == 12, "not 12?");
+  _Static_assert((unsigned int)1'2__uwb == 12u, "not 12?");
+
+  // Hexadecimal literals.
+  0x1__wb;
+  0x1__uwb;
+  0x0'1'2'3__wb;
+  0xA'B'c'd__uwb;
+  _Static_assert((int)0x0'1'2'3__wb == 0x0123, "not 0x0123");
+  _Static_assert((unsigned int)0xA'B'c'd__uwb == 0xABCDu, "not 0xABCD");
+
+  // Binary literals.
+  0b1__wb;
+  0b1__uwb;
+  0b1'0'1'0'0'1__wb;
+  0b0'1'0'1'1'0__uwb;
+  _Static_assert((int)0b1__wb == 1, "not 1?");
+  _Static_assert((unsigned int)0b1__uwb == 1u, "not 1?");
+
+  // Octal literals.
+  01__wb;
+  01__uwb;
+  0'6'0__wb;
+  0'0'1__uwb;
+  0__wbu;
+  0__WBu;
+  0U__wb;
+  0U__WB;
+  0__wb;
+  _Static_assert((int)0__wb == 0, "not 0?");
+  _Static_assert((unsigned int)0__wbu == 0u, "not 0?");
+
+  // Imaginary or Complex. These are allowed because _Complex can work with any
+  // integer type, and that includes _BitInt.
+  1__wbi;
+  1i__wb;
+  1__wbj;
+
+  //UDL test as single underscore
+  unsigned i = 1.0_;
+}
+
+void InvalidSuffix(void) {
+  // Can't mix the case of wb or WB, and can't rearrange the letters.
+  0__wB; // expected-error {{invalid suffix '__wB' on integer constant}}
+  0__Wb; // expected-error {{invalid suffix '__Wb' on integer constant}}
+  0__bw; // expected-error {{invalid suffix '__bw' on integer constant}}
+  0__BW; // expected-error {{invalid suffix '__BW' on integer constant}}
+
+  // Trailing digit separators should still diagnose.
+  1'2'__wb; // expected-error {{digit separator cannot appear at end of digit sequence}}
+  1'2'__uwb; // expected-error {{digit separator cannot appear at end of digit sequence}}
+
+  // Long.
+  1l__wb; // expected-error {{invalid suffix}}
+  1__wbl; // expected-error {{invalid suffix}}
+  1l__uwb; // expected-error {{invalid suffix}}
+  1__l; // expected-error {{invalid suffix}}
+  1ul__wb;  // expected-error {{invalid suffix}}
+
+  // Long long.
+  1ll__wb; // expected-error {{invalid suffix}}
+  1__uwbll; // expected-error {{invalid suffix}}
+
+  // Floating point.
+  0.1__wb;   // expected-error {{invalid suffix}}
+  0.1f__wb;   // expected-error {{invalid suffix}}
+
+  // Repetitive suffix.
+  1__wb__wb; // expected-error {{invalid suffix}}
+  1__uwbuwb; // expected-error {{invalid suffix}}
+  1__wbuwb; // expected-error {{invalid suffix}}
+  1__uwbwb; // expected-error {{invalid suffix}}
+
+  // Missing or extra characters in suffix.
+  1__; // expected-error {{invalid suffix}}
+  1__u; // expected-error {{invalid suffix}}
+  1___; // expected-error {{invalid suffix}}
+  1___WB; // expected-error {{invalid suffix}}
+  1__wb__; // expected-error {{invalid suffix}}
+  1__w; // expected-error {{invalid suffix}}
+  1__b; // expected-error {{invalid suffix}}
+}
+
+void ValidSuffixInvalidValue(void) {
+  // This is a valid suffix, but the value is larger than one that fits within
+  // the width of BITINT_MAXWIDTH. When this value changes in the future, the
+  // test cases should pick a new value that can't be represented by a _BitInt,
+  // but also add a test case that a 129-bit literal still behaves as-expected.
+  _Static_assert(__BITINT_MAXWIDTH__ <= 128,
+	             "Need to pick a bigger constant for the test case below.");
+  0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1__wb; // expected-error {{integer literal is too large to be represented in any signed integer type}}
+  0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1__uwb; // expected-error {{integer literal is too large to be represented in any integer type}}
+}
+
+void TestTypes(void) {
+  // 2 value bits, one sign bit
+  _Static_assert(__is_same(decltype(3__wb), _BitInt(3)));
+  // 2 value bits, one sign bit
+  _Static_assert(__is_same(decltype(-3__wb), _BitInt(3)));
+  // 2 value bits, no sign bit
+  _Static_assert(__is_same(decltype(3__uwb), unsigned _BitInt(2)));
+  // 4 value bits, one sign bit
+  _Static_assert(__is_same(decltype(0xF__wb), _BitInt(5)));
+  // 4 value bits, one sign bit
+  _Static_assert(__is_same(decltype(-0xF__wb), _BitInt(5)));
+  // 4 value bits, no sign bit
+  _Static_assert(__is_same(decltype(0xF__uwb), unsigned _BitInt(4)));
+}