Skip to content

Commit

Permalink
[Clang][Sema] Fix display of characters on static assertion failure
Browse files Browse the repository at this point in the history
This patch fixes the display of characters appearing in LHS or RHS of == expression in notes to static assertion failure.
This applies C-style escape if the printed character is a special character. This also adds a numerical value displayed next to the character representation.
This also tries to print multi-byte characters if the user-provided expression is multi-byte char type.

Reviewed By: cor3ntin
Differential Revision: https://reviews.llvm.org/D155610
  • Loading branch information
hazohelet committed Oct 4, 2023
1 parent 8641cdf commit 2176c5e
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 16 deletions.
35 changes: 35 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,41 @@ Improvements to Clang's diagnostics
- ``-Wfixed-enum-extension`` and ``-Wmicrosoft-fixed-enum`` diagnostics are no longer
emitted when building as C23, since C23 standardizes support for enums with a
fixed underlying type.
- When describing the failure of static assertion of `==` expression, clang prints the integer
representation of the value as well as its character representation when
the user-provided expression is of character type. If the character is
non-printable, clang now shows the escpaed character.
Clang also prints multi-byte characters if the user-provided expression
is of multi-byte character type.

*Example Code*:

.. code-block:: c++

static_assert("A\n"[1] == U'🌍');

*BEFORE*:

.. code-block:: text
source:1:15: error: static assertion failed due to requirement '"A\n"[1] == U'\U0001f30d''
1 | static_assert("A\n"[1] == U'🌍');
| ^~~~~~~~~~~~~~~~~
source:1:24: note: expression evaluates to ''
' == 127757'
1 | static_assert("A\n"[1] == U'🌍');
| ~~~~~~~~~^~~~~~~~
*AFTER*:

.. code-block:: text
source:1:15: error: static assertion failed due to requirement '"A\n"[1] == U'\U0001f30d''
1 | static_assert("A\n"[1] == U'🌍');
| ^~~~~~~~~~~~~~~~~
source:1:24: note: expression evaluates to ''\n' (0x0A, 10) == U'🌍' (0x1F30D, 127757)'
1 | static_assert("A\n"[1] == U'🌍');
| ~~~~~~~~~^~~~~~~~
Bug Fixes in This Version
-------------------------
Expand Down
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/Diagnostic.h
Original file line number Diff line number Diff line change
Expand Up @@ -1840,7 +1840,7 @@ const char ToggleHighlight = 127;
void ProcessWarningOptions(DiagnosticsEngine &Diags,
const DiagnosticOptions &Opts,
bool ReportDiags = true);

void EscapeStringForDiagnostic(StringRef Str, SmallVectorImpl<char> &OutStr);
} // namespace clang

#endif // LLVM_CLANG_BASIC_DIAGNOSTIC_H
11 changes: 6 additions & 5 deletions clang/lib/Basic/Diagnostic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -800,9 +800,10 @@ FormatDiagnostic(SmallVectorImpl<char> &OutStr) const {
FormatDiagnostic(Diag.begin(), Diag.end(), OutStr);
}

/// pushEscapedString - Append Str to the diagnostic buffer,
/// EscapeStringForDiagnostic - Append Str to the diagnostic buffer,
/// escaping non-printable characters and ill-formed code unit sequences.
static void pushEscapedString(StringRef Str, SmallVectorImpl<char> &OutStr) {
void clang::EscapeStringForDiagnostic(StringRef Str,
SmallVectorImpl<char> &OutStr) {
OutStr.reserve(OutStr.size() + Str.size());
auto *Begin = reinterpret_cast<const unsigned char *>(Str.data());
llvm::raw_svector_ostream OutStream(OutStr);
Expand Down Expand Up @@ -854,7 +855,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
StringRef(DiagStr, DiagEnd - DiagStr).equals("%0") &&
getArgKind(0) == DiagnosticsEngine::ak_std_string) {
const std::string &S = getArgStdStr(0);
pushEscapedString(S, OutStr);
EscapeStringForDiagnostic(S, OutStr);
return;
}

Expand Down Expand Up @@ -961,7 +962,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
case DiagnosticsEngine::ak_std_string: {
const std::string &S = getArgStdStr(ArgNo);
assert(ModifierLen == 0 && "No modifiers for strings yet");
pushEscapedString(S, OutStr);
EscapeStringForDiagnostic(S, OutStr);
break;
}
case DiagnosticsEngine::ak_c_string: {
Expand All @@ -971,7 +972,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
// Don't crash if get passed a null pointer by accident.
if (!S)
S = "(null)";
pushEscapedString(S, OutStr);
EscapeStringForDiagnostic(S, OutStr);
break;
}
// ---- INTEGERS ----
Expand Down
107 changes: 99 additions & 8 deletions clang/lib/Sema/SemaDeclCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/SaveAndRestore.h"
#include <map>
#include <optional>
Expand Down Expand Up @@ -17026,10 +17027,74 @@ Decl *Sema::ActOnStaticAssertDeclaration(SourceLocation StaticAssertLoc,
AssertMessageExpr, RParenLoc, false);
}

static void WriteCharTypePrefix(BuiltinType::Kind BTK, llvm::raw_ostream &OS) {
switch (BTK) {
case BuiltinType::Char_S:
case BuiltinType::Char_U:
break;
case BuiltinType::Char8:
OS << "u8";
break;
case BuiltinType::Char16:
OS << 'u';
break;
case BuiltinType::Char32:
OS << 'U';
break;
case BuiltinType::WChar_S:
case BuiltinType::WChar_U:
OS << 'L';
break;
default:
llvm_unreachable("Non-character type");
}
}

/// Convert character's value, interpreted as a code unit, to a string.
/// The value needs to be zero-extended to 32-bits.
/// FIXME: This assumes Unicode literal encodings
static void WriteCharValueForDiagnostic(uint32_t Value, const BuiltinType *BTy,
unsigned TyWidth,
SmallVectorImpl<char> &Str) {
char Arr[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
char *Ptr = Arr;
BuiltinType::Kind K = BTy->getKind();
llvm::raw_svector_ostream OS(Str);

// This should catch Char_S, Char_U, Char8, and use of escaped characters in
// other types.
if (K == BuiltinType::Char_S || K == BuiltinType::Char_U ||
K == BuiltinType::Char8 || Value <= 0x7F) {
StringRef Escaped = escapeCStyle<EscapeChar::Single>(Value);
if (!Escaped.empty())
EscapeStringForDiagnostic(Escaped, Str);
else
OS << static_cast<char>(Value);
return;
}

switch (K) {
case BuiltinType::Char16:
case BuiltinType::Char32:
case BuiltinType::WChar_S:
case BuiltinType::WChar_U: {
if (llvm::ConvertCodePointToUTF8(Value, Ptr))
EscapeStringForDiagnostic(StringRef(Arr, Ptr - Arr), Str);
else
OS << "\\x"
<< llvm::format_hex_no_prefix(Value, TyWidth / 4, /*Upper=*/true);
break;
}
default:
llvm_unreachable("Non-character type is passed");
}
}

/// Convert \V to a string we can present to the user in a diagnostic
/// \T is the type of the expression that has been evaluated into \V
static bool ConvertAPValueToString(const APValue &V, QualType T,
SmallVectorImpl<char> &Str) {
SmallVectorImpl<char> &Str,
ASTContext &Context) {
if (!V.hasValue())
return false;

Expand All @@ -17044,13 +17109,38 @@ static bool ConvertAPValueToString(const APValue &V, QualType T,
"Bool type, but value is not 0 or 1");
llvm::raw_svector_ostream OS(Str);
OS << (BoolValue ? "true" : "false");
} else if (T->isCharType()) {
} else {
llvm::raw_svector_ostream OS(Str);
// Same is true for chars.
Str.push_back('\'');
Str.push_back(V.getInt().getExtValue());
Str.push_back('\'');
} else
// We want to print the character representation for textual types
const auto *BTy = T->getAs<BuiltinType>();
if (BTy) {
switch (BTy->getKind()) {
case BuiltinType::Char_S:
case BuiltinType::Char_U:
case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
case BuiltinType::WChar_S:
case BuiltinType::WChar_U: {
unsigned TyWidth = Context.getIntWidth(T);
assert(8 <= TyWidth && TyWidth <= 32 && "Unexpected integer width");
uint32_t CodeUnit = static_cast<uint32_t>(V.getInt().getZExtValue());
WriteCharTypePrefix(BTy->getKind(), OS);
OS << '\'';
WriteCharValueForDiagnostic(CodeUnit, BTy, TyWidth, Str);
OS << "' (0x"
<< llvm::format_hex_no_prefix(CodeUnit, /*Width=*/2,
/*Upper=*/true)
<< ", " << V.getInt() << ')';
return true;
}
default:
break;
}
}
V.getInt().toString(Str);
}

break;

Expand Down Expand Up @@ -17147,8 +17237,9 @@ void Sema::DiagnoseStaticAssertDetails(const Expr *E) {

Side->EvaluateAsRValue(DiagSide[I].Result, Context, true);

DiagSide[I].Print = ConvertAPValueToString(
DiagSide[I].Result.Val, Side->getType(), DiagSide[I].ValueString);
DiagSide[I].Print =
ConvertAPValueToString(DiagSide[I].Result.Val, Side->getType(),
DiagSide[I].ValueString, Context);
}
if (DiagSide[0].Print && DiagSide[1].Print) {
Diag(Op->getExprLoc(), diag::note_expr_evaluates_to)
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Lexer/cxx1z-trigraphs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ error here;

#if !ENABLED_TRIGRAPHS
// expected-error@11 {{}} expected-warning@11 {{trigraph ignored}}
// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' == '#''}}
// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' (0x3F, 63) == '#' (0x23, 35)'}}
// expected-error@16 {{}}
// expected-error@20 {{}}
#else
Expand Down
9 changes: 9 additions & 0 deletions clang/test/SemaCXX/static-assert-cxx26.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,3 +298,12 @@ Good<Frobble> a; // expected-note {{in instantiation}}
Bad<int> b; // expected-note {{in instantiation}}

}

namespace EscapeInDiagnostic {
static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
// expected-note {{evaluates to ''\t' (0x09, 9) == '<U+0001>' (0x01, 1)'}}
static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error {{failed}} \
// expected-note {{evaluates to 'u8'<80>' (0x80, 128) == u8'<85>' (0x85, 133)'}}
static_assert((char16_t)0xFEFF == (char16_t)0xDB93, ""); // expected-error {{failed}} \
// expected-note {{evaluates to 'u'' (0xFEFF, 65279) == u'\xDB93' (0xDB93, 56211)'}}
}
26 changes: 25 additions & 1 deletion clang/test/SemaCXX/static-assert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,31 @@ namespace Diagnostics {
return 'c';
}
static_assert(getChar() == 'a', ""); // expected-error {{failed}} \
// expected-note {{evaluates to ''c' == 'a''}}
// expected-note {{evaluates to ''c' (0x63, 99) == 'a' (0x61, 97)'}}
static_assert((char)9 == '\x61', ""); // expected-error {{failed}} \
// expected-note {{evaluates to ''\t' (0x09, 9) == 'a' (0x61, 97)'}}
static_assert((char)10 == '\0', ""); // expected-error {{failed}} \
// expected-note {{n' (0x0A, 10) == '<U+0000>' (0x00, 0)'}}
// The note above is intended to match "evaluates to '\n' (0x0A, 10) == '<U+0000>' (0x00, 0)'", but if we write it as it is,
// the "\n" cannot be consumed by the diagnostic consumer.
static_assert((signed char)10 == (char)-123, ""); // expected-error {{failed}} \
// expected-note {{evaluates to '10 == '<85>' (0x85, -123)'}}
static_assert((char)-4 == (unsigned char)-8, ""); // expected-error {{failed}} \
// expected-note {{evaluates to ''<FC>' (0xFC, -4) == 248'}}
static_assert((char)-128 == (char)-123, ""); // expected-error {{failed}} \
// expected-note {{evaluates to ''<80>' (0x80, -128) == '<85>' (0x85, -123)'}}
static_assert('\xA0' == (char)'\x20', ""); // expected-error {{failed}} \
// expected-note {{evaluates to ''<A0>' (0xA0, -96) == ' ' (0x20, 32)'}}
static_assert((char16_t)L'' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error {{failed}} \
// expected-note {{evaluates to 'u'ゆ' (0x3086, 12422) == L'̵' (0x335, 821)'}}
static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \
// expected-note {{evaluates to 'L'/' (0xFF0F, 65295) == u'�' (0xFFFD, 65533)'}}
static_assert(L""[0] == U'🌍', ""); // expected-error {{failed}} \
// expected-note {{evaluates to 'L'⚾' (0x26BE, 9918) == U'🌍' (0x1F30D, 127757)'}}
static_assert(U"\a"[0] == (wchar_t)9, ""); // expected-error {{failed}} \
// expected-note {{evaluates to 'U'\a' (0x07, 7) == L'\t' (0x09, 9)'}}
static_assert(L"§"[0] == U'Ö', ""); // expected-error {{failed}} \
// expected-note {{evaluates to 'L'§' (0xA7, 167) == U'Ö' (0xD6, 214)'}}

/// Bools are printed as bools.
constexpr bool invert(bool b) {
Expand Down

0 comments on commit 2176c5e

Please sign in to comment.