diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 9bedbf7a1e7621..7fb21dbadea005 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -469,6 +469,7 @@ C2x Feature Support support for functions without prototypes, which no longer exist in C2x. - Implemented `WG14 N2841 No function declarators without prototypes `_ and `WG14 N2432 Remove support for function definitions with identifier lists `_. +- Implemented `WG14 N2836 Identifier Syntax using Unicode Standard Annex 31 `_. C++ Language Changes in Clang ----------------------------- diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index b3aac9df65465c..a4cff403e739c6 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -1462,11 +1462,11 @@ static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) { return false; } else if (LangOpts.DollarIdents && '$' == C) { return true; - } else if (LangOpts.CPlusPlus) { + } else if (LangOpts.CPlusPlus || LangOpts.C2x) { // A non-leading codepoint must have the XID_Continue property. // XIDContinueRanges doesn't contains characters also in XIDStartRanges, // so we need to check both tables. - // '_' doesn't have the XID_Continue property but is allowed in C++. + // '_' doesn't have the XID_Continue property but is allowed in C and C++. static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges); static const llvm::sys::UnicodeCharSet XIDContinueChars(XIDContinueRanges); return C == '_' || XIDStartChars.contains(C) || @@ -1486,7 +1486,7 @@ static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) { if (LangOpts.AsmPreprocessor) { return false; } - if (LangOpts.CPlusPlus) { + if (LangOpts.CPlusPlus || LangOpts.C2x) { static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges); // '_' doesn't have the XID_Start property but is allowed in C++. return C == '_' || XIDStartChars.contains(C); diff --git a/clang/test/Lexer/unicode.c b/clang/test/Lexer/unicode.c index efbd63fb00630d..8d5295ef338b4d 100644 --- a/clang/test/Lexer/unicode.c +++ b/clang/test/Lexer/unicode.c @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -fsyntax-only -verify -x c -std=c11 %s -// RUN: %clang_cc1 -fsyntax-only -verify -x c++ -std=c++11 %s +// RUN: %clang_cc1 -fsyntax-only -verify=expected,c2x -x c -std=c2x %s +// RUN: %clang_cc1 -fsyntax-only -verify=expected,cxx -x c++ -std=c++11 %s // RUN: %clang_cc1 -std=c99 -E -DPP_ONLY=1 %s | FileCheck %s --strict-whitespace // RUN: %clang_cc1 -E -DPP_ONLY=1 %s | FileCheck %s --strict-whitespace @@ -31,7 +32,7 @@ CHECK : The preprocessor should not complain about Unicode characters like ©. extern int X\UAAAAAAAA; // expected-error {{not allowed in an identifier}} int Y = '\UAAAAAAAA'; // expected-error {{invalid universal character}} -#ifdef __cplusplus +#if defined(__cplusplus) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L) extern int ༀ; extern int 𑩐; @@ -46,7 +47,8 @@ extern int _\N{TANGSALETTERGA}; // expected-error {{'TANGSALETTERGA' is not a va // This character doesn't have the XID_Start property -extern int \U00016AC0; // TANGSA DIGIT ZERO // expected-error {{expected unqualified-id}} +extern int \U00016AC0; // TANGSA DIGIT ZERO // cxx-error {{expected unqualified-id}} \ + // c2x-error {{expected identifier or '('}} extern int 🌹; // expected-error {{unexpected character }} \ expected-warning {{declaration does not declare anything}} diff --git a/clang/www/c_status.html b/clang/www/c_status.html index 17d18d8724f8d6..f3076d345237a1 100644 --- a/clang/www/c_status.html +++ b/clang/www/c_status.html @@ -1024,7 +1024,7 @@

C2x implementation status

Identifier Syntax using Unicode Standard Annex 31 N2836 - No + Clang 15 No function declarators without prototypes