diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7604df7482c76..f728f5b4fcfc4 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -100,7 +100,9 @@ Windows Support C Language Changes in Clang --------------------------- -- ... +- Wide multi-characters literals such as ``L'ab'`` that would previously be interpreted as ``L'b'`` + are now ill-formed in all language modes. The motivation for this change is outlined in + `P2362 `_. C++ Language Changes in Clang ----------------------------- diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 45a5b62af461a..c19adf104db1f 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -183,12 +183,10 @@ def warn_c2x_compat_digit_separator : Warning< InGroup, DefaultIgnore; def err_digit_separator_not_between_digits : Error< "digit separator cannot appear at %select{start|end}0 of digit sequence">; -def warn_extraneous_char_constant : Warning< - "extraneous characters in character constant ignored">; def warn_char_constant_too_large : Warning< "character constant too long for its type">; -def err_multichar_utf_character_literal : Error< - "Unicode character literals may not contain multiple characters">; +def err_multichar_character_literal : Error< + "%select{wide|Unicode}0 character literals may not contain multiple characters">; def err_exponent_has_no_digits : Error<"exponent has no digits">; def err_hex_constant_requires : Error< "hexadecimal floating %select{constant|literal}0 requires " diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index 85d826ce9c6f7..f012fb72580ed 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -1390,14 +1390,14 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front(); if (NumCharsSoFar > 1) { - if (isWide()) - PP.Diag(Loc, diag::warn_extraneous_char_constant); - else if (isAscii() && NumCharsSoFar == 4) + if (isAscii() && NumCharsSoFar == 4) PP.Diag(Loc, diag::warn_four_char_character_literal); else if (isAscii()) PP.Diag(Loc, diag::warn_multichar_character_literal); - else - PP.Diag(Loc, diag::err_multichar_utf_character_literal); + else { + PP.Diag(Loc, diag::err_multichar_character_literal) << (isWide() ? 0 : 1); + HadError = true; + } IsMultiChar = true; } else { IsMultiChar = false; diff --git a/clang/test/CodeGen/char-literal.c b/clang/test/CodeGen/char-literal.c index 6fdf8b7c02b1c..c7a2a7bee471f 100644 --- a/clang/test/CodeGen/char-literal.c +++ b/clang/test/CodeGen/char-literal.c @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-C %s -// RUN: %clang_cc1 -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-C %s // RUN: %clang_cc1 -x c++ -std=c++11 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-CPP0X %s #include @@ -33,11 +32,6 @@ int main() { // CHECK-CPP0X: store i32 97 wchar_t wa = L'a'; - // Should pick second character. - // CHECK-C: store i32 98 - // CHECK-CPP0X: store i32 98 - wchar_t wb = L'ab'; - #if __cplusplus >= 201103L // CHECK-CPP0X: store i16 97 char16_t ua = u'a'; @@ -83,8 +77,4 @@ int main() { char32_t Ud = U'\U0010F00B'; #endif - // Should pick second character. - // CHECK-C: store i32 1110027 - // CHECK-CPP0X: store i32 1110027 - wchar_t we = L'\u1234\U0010F00B'; } diff --git a/clang/test/CodeGen/string-literal-short-wstring.c b/clang/test/CodeGen/string-literal-short-wstring.c index 8894b8823e91a..899a820218467 100644 --- a/clang/test/CodeGen/string-literal-short-wstring.c +++ b/clang/test/CodeGen/string-literal-short-wstring.c @@ -1,11 +1,14 @@ -// RUN: %clang_cc1 -x c++ -triple %itanium_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=ITANIUM -// RUN: %clang_cc1 -x c++ -triple %ms_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=MSABI -// Runs in c++ mode so that wchar_t is available. +// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=ITANIUM +// RUN: %clang_cc1 -triple %ms_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=MSABI + +// Run in C mode as wide multichar literals are not valid in C++ // XFAIL: hexagon // Hexagon aligns arrays of size 8+ bytes to a 64-bit boundary, which fails // the first check line with "align 1". +typedef __WCHAR_TYPE__ wchar_t; + int main() { // This should convert to utf8. // CHECK: private unnamed_addr constant [10 x i8] c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1 @@ -20,8 +23,6 @@ int main() { // MSABI: linkonce_odr dso_local unnamed_addr constant [5 x i16] [i16 4384, i16 544, i16 -9272, i16 -9168, i16 0] const wchar_t *bar = L"\u1120\u0220\U00102030"; - - // Should pick second character. // CHECK: store i8 98 char c = 'ab'; @@ -29,10 +30,6 @@ int main() { // CHECK: store i16 97 wchar_t wa = L'a'; - // Should pick second character. - // CHECK: store i16 98 - wchar_t wb = L'ab'; - // -4085 == 0xf00b // CHECK: store i16 -4085 wchar_t wc = L'\uF00B'; diff --git a/clang/test/Lexer/char-literal.cpp b/clang/test/Lexer/char-literal.cpp index 1cd14a9b01167..a71500f8f0107 100644 --- a/clang/test/Lexer/char-literal.cpp +++ b/clang/test/Lexer/char-literal.cpp @@ -21,7 +21,8 @@ auto f = '\xE2\x8C\x98'; // expected-warning {{multi-character character constan char16_t g = u'ab'; // expected-error {{Unicode character literals may not contain multiple characters}} char16_t h = u'\U0010FFFD'; // expected-error {{character too large for enclosing character literal type}} -wchar_t i = L'ab'; // expected-warning {{extraneous characters in character constant ignored}} +wchar_t i = L'ab'; // expected-error {{wide character literals may not contain multiple characters}} + wchar_t j = L'\U0010FFFD'; char32_t k = U'\U0010FFFD'; diff --git a/clang/test/Lexer/wchar.c b/clang/test/Lexer/wchar.c index 47417382c9549..4ecaee429f739 100644 --- a/clang/test/Lexer/wchar.c +++ b/clang/test/Lexer/wchar.c @@ -3,10 +3,8 @@ void f() { (void)L"\U00010000"; // unicode escape produces UTF-16 sequence, so no warning - (void)L'\U00010000'; // expected-error {{character too large for enclosing character literal type}} + (void)L'ab'; // expected-error {{wide character literals may not contain multiple characters}} - (void)L'ab'; // expected-warning {{extraneous characters in character constant ignored}} - - (void)L'a\u1000'; // expected-warning {{extraneous characters in character constant ignored}} + (void)L'a\u1000'; // expected-error {{wide character literals may not contain multiple characters}} } diff --git a/clang/test/Misc/warning-flags.c b/clang/test/Misc/warning-flags.c index e4f9069b88c86..a9e0a784c5c81 100644 --- a/clang/test/Misc/warning-flags.c +++ b/clang/test/Misc/warning-flags.c @@ -18,7 +18,7 @@ This test serves two purposes: The list of warnings below should NEVER grow. It should gradually shrink to 0. -CHECK: Warnings without flags (68): +CHECK: Warnings without flags (67): CHECK-NEXT: ext_expected_semi_decl_list CHECK-NEXT: ext_explicit_specialization_storage_class @@ -50,7 +50,6 @@ CHECK-NEXT: warn_drv_pch_not_first_include CHECK-NEXT: warn_dup_category_def CHECK-NEXT: warn_enum_value_overflow CHECK-NEXT: warn_expected_qualified_after_typename -CHECK-NEXT: warn_extraneous_char_constant CHECK-NEXT: warn_fe_backend_unsupported CHECK-NEXT: warn_fe_cc_log_diagnostics_failure CHECK-NEXT: warn_fe_cc_print_header_failure diff --git a/clang/test/Preprocessor/Weverything_pragma.c b/clang/test/Preprocessor/Weverything_pragma.c index f2cf97ed4a1ca..5300e7aebf464 100644 --- a/clang/test/Preprocessor/Weverything_pragma.c +++ b/clang/test/Preprocessor/Weverything_pragma.c @@ -10,21 +10,21 @@ void foo(void) // expected-warning {{no previous prototype for function}} // expected-note@-1{{declare 'static' if the function is not intended to be used outside of this translation unit}} { // A diagnostic without DefaultIgnore, and not part of a group. - (void) L'ab'; // expected-warning {{extraneous characters in character constant ignored}} + (void) 'ab'; // expected-warning {{multi-character character constant}} #pragma clang diagnostic warning "-Weverything" // Should not change anyhting. #define UNUSED_MACRO2 1 // expected-warning{{macro is not used}} - (void) L'cd'; // expected-warning {{extraneous characters in character constant ignored}} + (void) 'cd'; // expected-warning {{multi-character character constant}} #pragma clang diagnostic ignored "-Weverything" // Ignore warnings now. #define UNUSED_MACRO2 1 // no warning - (void) L'ef'; // no warning here + (void) 'ef'; // no warning here #pragma clang diagnostic warning "-Weverything" // Revert back to warnings. #define UNUSED_MACRO3 1 // expected-warning{{macro is not used}} - (void) L'gh'; // expected-warning {{extraneous characters in character constant ignored}} + (void) 'gh'; // expected-warning {{multi-character character constant}} #pragma clang diagnostic error "-Weverything" // Give errors now. #define UNUSED_MACRO4 1 // expected-error{{macro is not used}} - (void) L'ij'; // expected-error {{extraneous characters in character constant ignored}} + (void) 'ij'; // expected-error {{multi-character character constant}} }