diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 3dce7f6a8f9d5..4c8b85fc29755 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -222,6 +222,41 @@ Improvements to Clang's diagnostics - ``-Wfixed-enum-extension`` and ``-Wmicrosoft-fixed-enum`` diagnostics are no longer emitted when building as C23, since C23 standardizes support for enums with a fixed underlying type. +- When describing the failure of static assertion of `==` expression, clang prints the integer + representation of the value as well as its character representation when + the user-provided expression is of character type. If the character is + non-printable, clang now shows the escpaed character. + Clang also prints multi-byte characters if the user-provided expression + is of multi-byte character type. + + *Example Code*: + + .. code-block:: c++ + + static_assert("A\n"[1] == U'🌍'); + + *BEFORE*: + + .. code-block:: text + + source:1:15: error: static assertion failed due to requirement '"A\n"[1] == U'\U0001f30d'' + 1 | static_assert("A\n"[1] == U'🌍'); + | ^~~~~~~~~~~~~~~~~ + source:1:24: note: expression evaluates to '' + ' == 127757' + 1 | static_assert("A\n"[1] == U'🌍'); + | ~~~~~~~~~^~~~~~~~ + + *AFTER*: + + .. code-block:: text + + source:1:15: error: static assertion failed due to requirement '"A\n"[1] == U'\U0001f30d'' + 1 | static_assert("A\n"[1] == U'🌍'); + | ^~~~~~~~~~~~~~~~~ + source:1:24: note: expression evaluates to ''\n' (0x0A, 10) == U'🌍' (0x1F30D, 127757)' + 1 | static_assert("A\n"[1] == U'🌍'); + | ~~~~~~~~~^~~~~~~~ Bug Fixes in This Version ------------------------- diff --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h index 5606a22fe9d68..3df037b793b39 100644 --- a/clang/include/clang/Basic/Diagnostic.h +++ b/clang/include/clang/Basic/Diagnostic.h @@ -1840,7 +1840,7 @@ const char ToggleHighlight = 127; void ProcessWarningOptions(DiagnosticsEngine &Diags, const DiagnosticOptions &Opts, bool ReportDiags = true); - +void EscapeStringForDiagnostic(StringRef Str, SmallVectorImpl &OutStr); } // namespace clang #endif // LLVM_CLANG_BASIC_DIAGNOSTIC_H diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp index 7a54d27ef9d8e..0208ccc31bd7f 100644 --- a/clang/lib/Basic/Diagnostic.cpp +++ b/clang/lib/Basic/Diagnostic.cpp @@ -800,9 +800,10 @@ FormatDiagnostic(SmallVectorImpl &OutStr) const { FormatDiagnostic(Diag.begin(), Diag.end(), OutStr); } -/// pushEscapedString - Append Str to the diagnostic buffer, +/// EscapeStringForDiagnostic - Append Str to the diagnostic buffer, /// escaping non-printable characters and ill-formed code unit sequences. -static void pushEscapedString(StringRef Str, SmallVectorImpl &OutStr) { +void clang::EscapeStringForDiagnostic(StringRef Str, + SmallVectorImpl &OutStr) { OutStr.reserve(OutStr.size() + Str.size()); auto *Begin = reinterpret_cast(Str.data()); llvm::raw_svector_ostream OutStream(OutStr); @@ -854,7 +855,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, StringRef(DiagStr, DiagEnd - DiagStr).equals("%0") && getArgKind(0) == DiagnosticsEngine::ak_std_string) { const std::string &S = getArgStdStr(0); - pushEscapedString(S, OutStr); + EscapeStringForDiagnostic(S, OutStr); return; } @@ -961,7 +962,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, case DiagnosticsEngine::ak_std_string: { const std::string &S = getArgStdStr(ArgNo); assert(ModifierLen == 0 && "No modifiers for strings yet"); - pushEscapedString(S, OutStr); + EscapeStringForDiagnostic(S, OutStr); break; } case DiagnosticsEngine::ak_c_string: { @@ -971,7 +972,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, // Don't crash if get passed a null pointer by accident. if (!S) S = "(null)"; - pushEscapedString(S, OutStr); + EscapeStringForDiagnostic(S, OutStr); break; } // ---- INTEGERS ---- diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 7ebb38ec2ec0c..f9c010b1a0024 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -49,6 +49,7 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/SaveAndRestore.h" #include #include @@ -17026,10 +17027,74 @@ Decl *Sema::ActOnStaticAssertDeclaration(SourceLocation StaticAssertLoc, AssertMessageExpr, RParenLoc, false); } +static void WriteCharTypePrefix(BuiltinType::Kind BTK, llvm::raw_ostream &OS) { + switch (BTK) { + case BuiltinType::Char_S: + case BuiltinType::Char_U: + break; + case BuiltinType::Char8: + OS << "u8"; + break; + case BuiltinType::Char16: + OS << 'u'; + break; + case BuiltinType::Char32: + OS << 'U'; + break; + case BuiltinType::WChar_S: + case BuiltinType::WChar_U: + OS << 'L'; + break; + default: + llvm_unreachable("Non-character type"); + } +} + +/// Convert character's value, interpreted as a code unit, to a string. +/// The value needs to be zero-extended to 32-bits. +/// FIXME: This assumes Unicode literal encodings +static void WriteCharValueForDiagnostic(uint32_t Value, const BuiltinType *BTy, + unsigned TyWidth, + SmallVectorImpl &Str) { + char Arr[UNI_MAX_UTF8_BYTES_PER_CODE_POINT]; + char *Ptr = Arr; + BuiltinType::Kind K = BTy->getKind(); + llvm::raw_svector_ostream OS(Str); + + // This should catch Char_S, Char_U, Char8, and use of escaped characters in + // other types. + if (K == BuiltinType::Char_S || K == BuiltinType::Char_U || + K == BuiltinType::Char8 || Value <= 0x7F) { + StringRef Escaped = escapeCStyle(Value); + if (!Escaped.empty()) + EscapeStringForDiagnostic(Escaped, Str); + else + OS << static_cast(Value); + return; + } + + switch (K) { + case BuiltinType::Char16: + case BuiltinType::Char32: + case BuiltinType::WChar_S: + case BuiltinType::WChar_U: { + if (llvm::ConvertCodePointToUTF8(Value, Ptr)) + EscapeStringForDiagnostic(StringRef(Arr, Ptr - Arr), Str); + else + OS << "\\x" + << llvm::format_hex_no_prefix(Value, TyWidth / 4, /*Upper=*/true); + break; + } + default: + llvm_unreachable("Non-character type is passed"); + } +} + /// Convert \V to a string we can present to the user in a diagnostic /// \T is the type of the expression that has been evaluated into \V static bool ConvertAPValueToString(const APValue &V, QualType T, - SmallVectorImpl &Str) { + SmallVectorImpl &Str, + ASTContext &Context) { if (!V.hasValue()) return false; @@ -17044,13 +17109,38 @@ static bool ConvertAPValueToString(const APValue &V, QualType T, "Bool type, but value is not 0 or 1"); llvm::raw_svector_ostream OS(Str); OS << (BoolValue ? "true" : "false"); - } else if (T->isCharType()) { + } else { + llvm::raw_svector_ostream OS(Str); // Same is true for chars. - Str.push_back('\''); - Str.push_back(V.getInt().getExtValue()); - Str.push_back('\''); - } else + // We want to print the character representation for textual types + const auto *BTy = T->getAs(); + if (BTy) { + switch (BTy->getKind()) { + case BuiltinType::Char_S: + case BuiltinType::Char_U: + case BuiltinType::Char8: + case BuiltinType::Char16: + case BuiltinType::Char32: + case BuiltinType::WChar_S: + case BuiltinType::WChar_U: { + unsigned TyWidth = Context.getIntWidth(T); + assert(8 <= TyWidth && TyWidth <= 32 && "Unexpected integer width"); + uint32_t CodeUnit = static_cast(V.getInt().getZExtValue()); + WriteCharTypePrefix(BTy->getKind(), OS); + OS << '\''; + WriteCharValueForDiagnostic(CodeUnit, BTy, TyWidth, Str); + OS << "' (0x" + << llvm::format_hex_no_prefix(CodeUnit, /*Width=*/2, + /*Upper=*/true) + << ", " << V.getInt() << ')'; + return true; + } + default: + break; + } + } V.getInt().toString(Str); + } break; @@ -17147,8 +17237,9 @@ void Sema::DiagnoseStaticAssertDetails(const Expr *E) { Side->EvaluateAsRValue(DiagSide[I].Result, Context, true); - DiagSide[I].Print = ConvertAPValueToString( - DiagSide[I].Result.Val, Side->getType(), DiagSide[I].ValueString); + DiagSide[I].Print = + ConvertAPValueToString(DiagSide[I].Result.Val, Side->getType(), + DiagSide[I].ValueString, Context); } if (DiagSide[0].Print && DiagSide[1].Print) { Diag(Op->getExprLoc(), diag::note_expr_evaluates_to) diff --git a/clang/test/Lexer/cxx1z-trigraphs.cpp b/clang/test/Lexer/cxx1z-trigraphs.cpp index 2e47c05e9bd33..6b39ff6716605 100644 --- a/clang/test/Lexer/cxx1z-trigraphs.cpp +++ b/clang/test/Lexer/cxx1z-trigraphs.cpp @@ -21,7 +21,7 @@ error here; #if !ENABLED_TRIGRAPHS // expected-error@11 {{}} expected-warning@11 {{trigraph ignored}} -// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' == '#''}} +// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' (0x3F, 63) == '#' (0x23, 35)'}} // expected-error@16 {{}} // expected-error@20 {{}} #else diff --git a/clang/test/SemaCXX/static-assert-cxx26.cpp b/clang/test/SemaCXX/static-assert-cxx26.cpp index c2bb2d2ebc6ff..b19aac6cabd13 100644 --- a/clang/test/SemaCXX/static-assert-cxx26.cpp +++ b/clang/test/SemaCXX/static-assert-cxx26.cpp @@ -298,3 +298,12 @@ Good a; // expected-note {{in instantiation}} Bad b; // expected-note {{in instantiation}} } + +namespace EscapeInDiagnostic { +static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''\t' (0x09, 9) == '' (0x01, 1)'}} +static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error {{failed}} \ + // expected-note {{evaluates to 'u8'<80>' (0x80, 128) == u8'<85>' (0x85, 133)'}} +static_assert((char16_t)0xFEFF == (char16_t)0xDB93, ""); // expected-error {{failed}} \ + // expected-note {{evaluates to 'u'' (0xFEFF, 65279) == u'\xDB93' (0xDB93, 56211)'}} +} diff --git a/clang/test/SemaCXX/static-assert.cpp b/clang/test/SemaCXX/static-assert.cpp index 5d64ff682a20e..4200d821339ed 100644 --- a/clang/test/SemaCXX/static-assert.cpp +++ b/clang/test/SemaCXX/static-assert.cpp @@ -268,7 +268,31 @@ namespace Diagnostics { return 'c'; } static_assert(getChar() == 'a', ""); // expected-error {{failed}} \ - // expected-note {{evaluates to ''c' == 'a''}} + // expected-note {{evaluates to ''c' (0x63, 99) == 'a' (0x61, 97)'}} + static_assert((char)9 == '\x61', ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''\t' (0x09, 9) == 'a' (0x61, 97)'}} + static_assert((char)10 == '\0', ""); // expected-error {{failed}} \ + // expected-note {{n' (0x0A, 10) == '' (0x00, 0)'}} + // The note above is intended to match "evaluates to '\n' (0x0A, 10) == '' (0x00, 0)'", but if we write it as it is, + // the "\n" cannot be consumed by the diagnostic consumer. + static_assert((signed char)10 == (char)-123, ""); // expected-error {{failed}} \ + // expected-note {{evaluates to '10 == '<85>' (0x85, -123)'}} + static_assert((char)-4 == (unsigned char)-8, ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''' (0xFC, -4) == 248'}} + static_assert((char)-128 == (char)-123, ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''<80>' (0x80, -128) == '<85>' (0x85, -123)'}} + static_assert('\xA0' == (char)'\x20', ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''' (0xA0, -96) == ' ' (0x20, 32)'}} + static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error {{failed}} \ + // expected-note {{evaluates to 'u'ゆ' (0x3086, 12422) == L'̵' (0x335, 821)'}} + static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \ + // expected-note {{evaluates to 'L'/' (0xFF0F, 65295) == u'�' (0xFFFD, 65533)'}} + static_assert(L"⚾"[0] == U'🌍', ""); // expected-error {{failed}} \ + // expected-note {{evaluates to 'L'⚾' (0x26BE, 9918) == U'🌍' (0x1F30D, 127757)'}} + static_assert(U"\a"[0] == (wchar_t)9, ""); // expected-error {{failed}} \ + // expected-note {{evaluates to 'U'\a' (0x07, 7) == L'\t' (0x09, 9)'}} + static_assert(L"§"[0] == U'Ö', ""); // expected-error {{failed}} \ + // expected-note {{evaluates to 'L'§' (0xA7, 167) == U'Ö' (0xD6, 214)'}} /// Bools are printed as bools. constexpr bool invert(bool b) {