Skip to content

Commit

Permalink
[Clang] Update Unicode version to 15.1 (#77147)
Browse files Browse the repository at this point in the history
This update all of our Unicode tables to Unicode 15.1. This is a minor
version so only a relatively small numbers of characters are added,
mainly ideographs

https://www.unicode.org/versions/Unicode15.1.0/#Appendices_nb
  • Loading branch information
cor3ntin committed Jan 17, 2024
1 parent b26bfcc commit 03e43cf
Show file tree
Hide file tree
Showing 9 changed files with 20,038 additions and 20,015 deletions.
2 changes: 2 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ Non-comprehensive list of changes in this release

* Added ``#pragma clang fp reciprocal``.

* The version of Unicode used by Clang (primarily to parse identifiers) has been updated to 15.1.

New Compiler Flags
------------------

Expand Down
47 changes: 25 additions & 22 deletions clang/lib/Lex/UnicodeCharSets.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

#include "llvm/Support/UnicodeCharRanges.h"

// Unicode 15.0 XID_Start
// Unicode 15.1 XID_Start
static const llvm::sys::UnicodeCharRange XIDStartRanges[] = {
{0x0041, 0x005A}, {0x0061, 0x007A}, {0x00AA, 0x00AA},
{0x00B5, 0x00B5}, {0x00BA, 0x00BA}, {0x00C0, 0x00D6},
Expand Down Expand Up @@ -233,9 +233,10 @@ static const llvm::sys::UnicodeCharRange XIDStartRanges[] = {
{0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9},
{0x1EEAB, 0x1EEBB}, {0x20000, 0x2A6DF}, {0x2A700, 0x2B739},
{0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0},
{0x2F800, 0x2FA1D}, {0x30000, 0x3134A}, {0x31350, 0x323AF}};
{0x2EBF0, 0x2EE5D}, {0x2F800, 0x2FA1D}, {0x30000, 0x3134A},
{0x31350, 0x323AF}};

// Unicode 15.0 XID_Continue, excluding XID_Start
// Unicode 15.1 XID_Continue, excluding XID_Start
// The Unicode Property XID_Continue is a super set of XID_Start.
// To save Space, the table below only contains the codepoints
// that are not also in XID_Start.
Expand Down Expand Up @@ -299,24 +300,25 @@ static const llvm::sys::UnicodeCharRange XIDContinueRanges[] = {
{0x1C24, 0x1C37}, {0x1C40, 0x1C49}, {0x1C50, 0x1C59},
{0x1CD0, 0x1CD2}, {0x1CD4, 0x1CE8}, {0x1CED, 0x1CED},
{0x1CF4, 0x1CF4}, {0x1CF7, 0x1CF9}, {0x1DC0, 0x1DFF},
{0x203F, 0x2040}, {0x2054, 0x2054}, {0x20D0, 0x20DC},
{0x20E1, 0x20E1}, {0x20E5, 0x20F0}, {0x2CEF, 0x2CF1},
{0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF}, {0x302A, 0x302F},
{0x3099, 0x309A}, {0xA620, 0xA629}, {0xA66F, 0xA66F},
{0xA674, 0xA67D}, {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1},
{0xA802, 0xA802}, {0xA806, 0xA806}, {0xA80B, 0xA80B},
{0xA823, 0xA827}, {0xA82C, 0xA82C}, {0xA880, 0xA881},
{0xA8B4, 0xA8C5}, {0xA8D0, 0xA8D9}, {0xA8E0, 0xA8F1},
{0xA8FF, 0xA909}, {0xA926, 0xA92D}, {0xA947, 0xA953},
{0xA980, 0xA983}, {0xA9B3, 0xA9C0}, {0xA9D0, 0xA9D9},
{0xA9E5, 0xA9E5}, {0xA9F0, 0xA9F9}, {0xAA29, 0xAA36},
{0xAA43, 0xAA43}, {0xAA4C, 0xAA4D}, {0xAA50, 0xAA59},
{0xAA7B, 0xAA7D}, {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4},
{0xAAB7, 0xAAB8}, {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1},
{0xAAEB, 0xAAEF}, {0xAAF5, 0xAAF6}, {0xABE3, 0xABEA},
{0xABEC, 0xABED}, {0xABF0, 0xABF9}, {0xFB1E, 0xFB1E},
{0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, {0xFE33, 0xFE34},
{0xFE4D, 0xFE4F}, {0xFF10, 0xFF19}, {0xFF3F, 0xFF3F},
{0x200C, 0x200D}, {0x203F, 0x2040}, {0x2054, 0x2054},
{0x20D0, 0x20DC}, {0x20E1, 0x20E1}, {0x20E5, 0x20F0},
{0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF},
{0x302A, 0x302F}, {0x3099, 0x309A}, {0x30FB, 0x30FB},
{0xA620, 0xA629}, {0xA66F, 0xA66F}, {0xA674, 0xA67D},
{0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA802, 0xA802},
{0xA806, 0xA806}, {0xA80B, 0xA80B}, {0xA823, 0xA827},
{0xA82C, 0xA82C}, {0xA880, 0xA881}, {0xA8B4, 0xA8C5},
{0xA8D0, 0xA8D9}, {0xA8E0, 0xA8F1}, {0xA8FF, 0xA909},
{0xA926, 0xA92D}, {0xA947, 0xA953}, {0xA980, 0xA983},
{0xA9B3, 0xA9C0}, {0xA9D0, 0xA9D9}, {0xA9E5, 0xA9E5},
{0xA9F0, 0xA9F9}, {0xAA29, 0xAA36}, {0xAA43, 0xAA43},
{0xAA4C, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA7B, 0xAA7D},
{0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8},
{0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, {0xAAEB, 0xAAEF},
{0xAAF5, 0xAAF6}, {0xABE3, 0xABEA}, {0xABEC, 0xABED},
{0xABF0, 0xABF9}, {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F},
{0xFE20, 0xFE2F}, {0xFE33, 0xFE34}, {0xFE4D, 0xFE4F},
{0xFF10, 0xFF19}, {0xFF3F, 0xFF3F}, {0xFF65, 0xFF65},
{0xFF9E, 0xFF9F}, {0x101FD, 0x101FD}, {0x102E0, 0x102E0},
{0x10376, 0x1037A}, {0x104A0, 0x104A9}, {0x10A01, 0x10A03},
{0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A},
Expand Down Expand Up @@ -364,7 +366,8 @@ static const llvm::sys::UnicodeCharRange XIDContinueRanges[] = {
{0x1E026, 0x1E02A}, {0x1E08F, 0x1E08F}, {0x1E130, 0x1E136},
{0x1E140, 0x1E149}, {0x1E2AE, 0x1E2AE}, {0x1E2EC, 0x1E2F9},
{0x1E4EC, 0x1E4F9}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A},
{0x1E950, 0x1E959}, {0x1FBF0, 0x1FBF9}, {0xE0100, 0xE01EF}};
{0x1E950, 0x1E959}, {0x1FBF0, 0x1FBF9}, {0xE0100, 0xE01EF},
};

// Clang supports the "Mathematical notation profile" as an extension,
// as described in https://www.unicode.org/L2/L2022/22230-math-profile.pdf
Expand Down
9 changes: 5 additions & 4 deletions clang/test/Lexer/unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ extern int ༀ;
extern int 𑩐;
extern int 𐠈;
extern int ;
extern int \u1B4C; // BALINESE LETTER ARCHAIC JNYA - Added in Unicode 14
extern int \U00016AA2; // TANGSA LETTER GA - Added in Unicode 14
extern int \U0001E4D0; // 𞓐 NAG MUNDARI LETTER O - Added in Unicode 15
extern int \u1B4C; // BALINESE LETTER ARCHAIC JNYA - Added in Unicode 14
extern int \U00016AA2; // TANGSA LETTER GA - Added in Unicode 14
extern int \U0001E4D0; // 𞓐 NAG MUNDARI LETTER O - Added in Unicode 15
extern int \u{2EBF0}; // CJK UNIFIED IDEOGRAPH-2EBF0 - Added in Unicode 15.1
extern int a\N{TANGSA LETTER GA};
extern int a\N{TANGSALETTERGA}; // expected-error {{'TANGSALETTERGA' is not a valid Unicode character name}} \
// expected-error {{expected ';' after top level declarator}} \
Expand Down Expand Up @@ -74,7 +75,7 @@ extern int 👷; // expected-error {{unexpected character <U+1F477>}} \

extern int 👷‍♀; // expected-warning {{declaration does not declare anything}} \
expected-error {{unexpected character <U+1F477>}} \
expected-error {{unexpected character <U+200D>}} \
expected-error {{character <U+200D> not allowed at the start of an identifier}} \
expected-error {{unexpected character <U+2640>}}
#else

Expand Down

0 comments on commit 03e43cf

Please sign in to comment.