Skip to content

Commit

Permalink
[AST] Print NTTP args as string-literals when possible
Browse files Browse the repository at this point in the history
C++20 non-type template parameter prints `MyType<{{116, 104, 105, 115}}>` when the code is as simple as `MyType<"this">`. This patch prints `MyType<{"this"}>`, with one layer of braces preserved for the intermediate structural type to trigger CTAD.

`StringLiteral` handles this case, but `StringLiteral` inside `APValue` code looks like a circular dependency. The proposed patch implements a cheap strategy to emit string literals in diagnostic messages only when they are readable and fall back to integer sequences.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D115031
  • Loading branch information
zhihaoy authored and lichray committed Mar 2, 2022
1 parent 9c6250e commit 44eee65
Show file tree
Hide file tree
Showing 13 changed files with 347 additions and 67 deletions.
2 changes: 2 additions & 0 deletions clang/include/clang/AST/DeclTemplate.h
Expand Up @@ -3313,10 +3313,12 @@ class TemplateParamObjectDecl : public ValueDecl,

/// Print this object as an equivalent expression.
void printAsExpr(llvm::raw_ostream &OS) const;
void printAsExpr(llvm::raw_ostream &OS, const PrintingPolicy &Policy) const;

/// Print this object as an initializer suitable for a variable of the
/// object's type.
void printAsInit(llvm::raw_ostream &OS) const;
void printAsInit(llvm::raw_ostream &OS, const PrintingPolicy &Policy) const;

const APValue &getValue() const { return Value; }

Expand Down
6 changes: 5 additions & 1 deletion clang/include/clang/AST/PrettyPrinter.h
Expand Up @@ -74,7 +74,7 @@ struct PrintingPolicy {
SuppressImplicitBase(false), FullyQualifiedName(false),
PrintCanonicalTypes(false), PrintInjectedClassNameWithArguments(true),
UsePreferredNames(true), AlwaysIncludeTypeForTemplateArgument(false),
CleanUglifiedParameters(false) {}
CleanUglifiedParameters(false), EntireContentsOfLargeArray(true) {}

/// Adjust this printing policy for cases where it's known that we're
/// printing C++ code (for instance, if AST dumping reaches a C++-only
Expand Down Expand Up @@ -286,6 +286,10 @@ struct PrintingPolicy {
/// This only affects parameter names, and so describes a compatible API.
unsigned CleanUglifiedParameters : 1;

/// Whether to print the entire array initializers, especially on non-type
/// template parameters, no matter how many elements there are.
unsigned EntireContentsOfLargeArray : 1;

/// Callbacks to use to allow the behavior of printing to be customized.
const PrintingCallbacks *Callbacks = nullptr;
};
Expand Down
43 changes: 41 additions & 2 deletions clang/include/clang/Basic/CharInfo.h
Expand Up @@ -38,15 +38,16 @@ namespace charinfo {
};
} // end namespace charinfo

/// Returns true if this is an ASCII character.
/// Returns true if a byte is an ASCII character.
LLVM_READNONE inline bool isASCII(char c) {
return static_cast<unsigned char>(c) <= 127;
}

LLVM_READNONE inline bool isASCII(unsigned char c) { return c <= 127; }

/// Returns true if this is an ASCII character.
/// Returns true if a codepoint is an ASCII character.
LLVM_READNONE inline bool isASCII(uint32_t c) { return c <= 127; }
LLVM_READNONE inline bool isASCII(int64_t c) { return 0 <= c && c <= 127; }

/// Returns true if this is a valid first character of a C identifier,
/// which is [a-zA-Z_].
Expand Down Expand Up @@ -162,6 +163,44 @@ LLVM_READONLY inline bool isRawStringDelimBody(unsigned char c) {
CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0;
}

enum class EscapeChar {
Single = 1,
Double = 2,
SingleAndDouble = static_cast<int>(Single) | static_cast<int>(Double),
};

/// Return C-style escaped string for special characters, or an empty string if
/// there is no such mapping.
template <EscapeChar Opt, class CharT>
LLVM_READONLY inline auto escapeCStyle(CharT Ch) -> StringRef {
switch (Ch) {
case '\\':
return "\\\\";
case '\'':
if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Single)) == 0)
break;
return "\\'";
case '"':
if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Double)) == 0)
break;
return "\\\"";
case '\a':
return "\\a";
case '\b':
return "\\b";
case '\f':
return "\\f";
case '\n':
return "\\n";
case '\r':
return "\\r";
case '\t':
return "\\t";
case '\v':
return "\\v";
}
return {};
}

/// Converts the given ASCII character to its lowercase equivalent.
///
Expand Down
81 changes: 74 additions & 7 deletions clang/lib/AST/APValue.cpp
Expand Up @@ -625,6 +625,67 @@ static double GetApproxValue(const llvm::APFloat &F) {
return V.convertToDouble();
}

static bool TryPrintAsStringLiteral(raw_ostream &Out,
const PrintingPolicy &Policy,
const ArrayType *ATy,
ArrayRef<APValue> Inits) {
if (Inits.empty())
return false;

QualType Ty = ATy->getElementType();
if (!Ty->isAnyCharacterType())
return false;

// Nothing we can do about a sequence that is not null-terminated
if (!Inits.back().getInt().isZero())
return false;
else
Inits = Inits.drop_back();

llvm::SmallString<40> Buf;
Buf.push_back('"');

// Better than printing a two-digit sequence of 10 integers.
constexpr size_t MaxN = 36;
StringRef Ellipsis;
if (Inits.size() > MaxN && !Policy.EntireContentsOfLargeArray) {
Ellipsis = "[...]";
Inits =
Inits.take_front(std::min(MaxN - Ellipsis.size() / 2, Inits.size()));
}

for (auto &Val : Inits) {
int64_t Char64 = Val.getInt().getExtValue();
if (!isASCII(Char64))
return false; // Bye bye, see you in integers.
auto Ch = static_cast<unsigned char>(Char64);
// The diagnostic message is 'quoted'
StringRef Escaped = escapeCStyle<EscapeChar::SingleAndDouble>(Ch);
if (Escaped.empty()) {
if (!isPrintable(Ch))
return false;
Buf.emplace_back(Ch);
} else {
Buf.append(Escaped);
}
}

Buf.append(Ellipsis);
Buf.push_back('"');

if (Ty->isWideCharType())
Out << 'L';
else if (Ty->isChar8Type())
Out << "u8";
else if (Ty->isChar16Type())
Out << 'u';
else if (Ty->isChar32Type())
Out << 'U';

Out << Buf;
return true;
}

void APValue::printPretty(raw_ostream &Out, const ASTContext &Ctx,
QualType Ty) const {
printPretty(Out, Ctx.getPrintingPolicy(), Ty, &Ctx);
Expand Down Expand Up @@ -795,17 +856,23 @@ void APValue::printPretty(raw_ostream &Out, const PrintingPolicy &Policy,
}
case APValue::Array: {
const ArrayType *AT = Ty->castAsArrayTypeUnsafe();
unsigned N = getArrayInitializedElts();
if (N != 0 && TryPrintAsStringLiteral(Out, Policy, AT,
{&getArrayInitializedElt(0), N}))
return;
QualType ElemTy = AT->getElementType();
Out << '{';
if (unsigned N = getArrayInitializedElts()) {
getArrayInitializedElt(0).printPretty(Out, Policy, ElemTy, Ctx);
for (unsigned I = 1; I != N; ++I) {
unsigned I = 0;
switch (N) {
case 0:
for (; I != N; ++I) {
Out << ", ";
if (I == 10) {
// Avoid printing out the entire contents of large arrays.
Out << "...";
break;
if (I == 10 && !Policy.EntireContentsOfLargeArray) {
Out << "...}";
return;
}
LLVM_FALLTHROUGH;
default:
getArrayInitializedElt(I).printPretty(Out, Policy, ElemTy, Ctx);
}
}
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/AST/ASTDiagnostic.cpp
Expand Up @@ -1874,7 +1874,7 @@ class TemplateDiff {
// FIXME: Diffing the APValue would be neat.
// FIXME: Suppress this and use the full name of the declaration if the
// parameter is a pointer or reference.
TPO->printAsInit(OS);
TPO->printAsInit(OS, Policy);
return;
}
VD->printName(OS);
Expand Down
18 changes: 13 additions & 5 deletions clang/lib/AST/DeclTemplate.cpp
Expand Up @@ -1514,12 +1514,20 @@ void TemplateParamObjectDecl::printName(llvm::raw_ostream &OS) const {
}

void TemplateParamObjectDecl::printAsExpr(llvm::raw_ostream &OS) const {
const ASTContext &Ctx = getASTContext();
getType().getUnqualifiedType().print(OS, Ctx.getPrintingPolicy());
printAsInit(OS);
printAsExpr(OS, getASTContext().getPrintingPolicy());
}

void TemplateParamObjectDecl::printAsExpr(llvm::raw_ostream &OS,
const PrintingPolicy &Policy) const {
getType().getUnqualifiedType().print(OS, Policy);
printAsInit(OS, Policy);
}

void TemplateParamObjectDecl::printAsInit(llvm::raw_ostream &OS) const {
const ASTContext &Ctx = getASTContext();
getValue().printPretty(OS, Ctx, getType());
printAsInit(OS, getASTContext().getPrintingPolicy());
}

void TemplateParamObjectDecl::printAsInit(llvm::raw_ostream &OS,
const PrintingPolicy &Policy) const {
getValue().printPretty(OS, Policy, getType(), &getASTContext());
}
61 changes: 12 additions & 49 deletions clang/lib/AST/Expr.cpp
Expand Up @@ -960,40 +960,10 @@ void CharacterLiteral::print(unsigned Val, CharacterKind Kind,
break;
}

switch (Val) {
case '\\':
OS << "'\\\\'";
break;
case '\'':
OS << "'\\''";
break;
case '\a':
// TODO: K&R: the meaning of '\\a' is different in traditional C
OS << "'\\a'";
break;
case '\b':
OS << "'\\b'";
break;
// Nonstandard escape sequence.
/*case '\e':
OS << "'\\e'";
break;*/
case '\f':
OS << "'\\f'";
break;
case '\n':
OS << "'\\n'";
break;
case '\r':
OS << "'\\r'";
break;
case '\t':
OS << "'\\t'";
break;
case '\v':
OS << "'\\v'";
break;
default:
StringRef Escaped = escapeCStyle<EscapeChar::Single>(Val);
if (!Escaped.empty()) {
OS << "'" << Escaped << "'";
} else {
// A character literal might be sign-extended, which
// would result in an invalid \U escape sequence.
// FIXME: multicharacter literals such as '\xFF\xFF\xFF\xFF'
Expand Down Expand Up @@ -1163,8 +1133,9 @@ void StringLiteral::outputString(raw_ostream &OS) const {

unsigned LastSlashX = getLength();
for (unsigned I = 0, N = getLength(); I != N; ++I) {
switch (uint32_t Char = getCodeUnit(I)) {
default:
uint32_t Char = getCodeUnit(I);
StringRef Escaped = escapeCStyle<EscapeChar::Double>(Char);
if (Escaped.empty()) {
// FIXME: Convert UTF-8 back to codepoints before rendering.

// Convert UTF-16 surrogate pairs back to codepoints before rendering.
Expand Down Expand Up @@ -1192,7 +1163,7 @@ void StringLiteral::outputString(raw_ostream &OS) const {
for (/**/; Shift >= 0; Shift -= 4)
OS << Hex[(Char >> Shift) & 15];
LastSlashX = I;
break;
continue;
}

if (Char > 0xffff)
Expand All @@ -1205,7 +1176,7 @@ void StringLiteral::outputString(raw_ostream &OS) const {
<< Hex[(Char >> 8) & 15]
<< Hex[(Char >> 4) & 15]
<< Hex[(Char >> 0) & 15];
break;
continue;
}

// If we used \x... for the previous character, and this character is a
Expand All @@ -1230,17 +1201,9 @@ void StringLiteral::outputString(raw_ostream &OS) const {
<< (char)('0' + ((Char >> 6) & 7))
<< (char)('0' + ((Char >> 3) & 7))
<< (char)('0' + ((Char >> 0) & 7));
break;
// Handle some common non-printable cases to make dumps prettier.
case '\\': OS << "\\\\"; break;
case '"': OS << "\\\""; break;
case '\a': OS << "\\a"; break;
case '\b': OS << "\\b"; break;
case '\f': OS << "\\f"; break;
case '\n': OS << "\\n"; break;
case '\r': OS << "\\r"; break;
case '\t': OS << "\\t"; break;
case '\v': OS << "\\v"; break;
} else {
// Handle some common non-printable cases to make dumps prettier.
OS << Escaped;
}
}
OS << '"';
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/AST/StmtPrinter.cpp
Expand Up @@ -1023,7 +1023,7 @@ void StmtPrinter::VisitDeclRefExpr(DeclRefExpr *Node) {
return;
}
if (const auto *TPOD = dyn_cast<TemplateParamObjectDecl>(Node->getDecl())) {
TPOD->printAsExpr(OS);
TPOD->printAsExpr(OS, Policy);
return;
}
if (NestedNameSpecifier *Qualifier = Node->getQualifier())
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/AST/TemplateBase.cpp
Expand Up @@ -434,7 +434,7 @@ void TemplateArgument::print(const PrintingPolicy &Policy, raw_ostream &Out,
NamedDecl *ND = getAsDecl();
if (getParamTypeForDecl()->isRecordType()) {
if (auto *TPO = dyn_cast<TemplateParamObjectDecl>(ND)) {
TPO->printAsInit(Out);
TPO->printAsInit(Out, Policy);
break;
}
}
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Sema/Sema.cpp
Expand Up @@ -114,6 +114,9 @@ PrintingPolicy Sema::getPrintingPolicy(const ASTContext &Context,
}
}

// Shorten the data output if needed
Policy.EntireContentsOfLargeArray = false;

return Policy;
}

Expand Down
34 changes: 34 additions & 0 deletions clang/test/SemaCXX/cxx2a-nttp-printing.cpp
@@ -0,0 +1,34 @@
// RUN: %clang_cc1 %s -std=c++20 -fsyntax-only -verify

template <int N> struct Str {
constexpr Str(char const (&s)[N]) { __builtin_memcpy(value, s, N); }
char value[N];
};

template <Str V> class ASCII {};

void Foo(ASCII<"this nontype template argument is too long to print">); // expected-note {{no known conversion from 'ASCII<{"this nontype template argument is too long"}>' to 'ASCII<{"this nontype template argument is too long to print"}>'}}
void Bar(ASCII<"this nttp argument is too short">); // expected-note {{no known conversion from 'ASCII<{{119, 97, 105, 116, 32, 97, 32, 115, 27, 99, 111, 110, 100, 0}}>' to 'ASCII<{"this nttp argument is too short"}>'}}
void Meow(ASCII<"what|">); // expected-note {{no known conversion from 'ASCII<{"what??!"}>' to 'ASCII<{"what|"}>' for 1st argument}}

void test_ascii() {
ASCII<"this nontype template argument"
" is too long">
a;
Foo(a); // expected-error {{no matching function}}
decltype(a)::display(); // expected-error {{no member named 'display' in 'ASCII<{"this nontype template argument is [...]"}>'}}
}

void test_non_ascii() {
ASCII<"wait a s\033cond"> a;
Bar(a); // expected-error {{no matching function}}
decltype(a)::display(); // expected-error {{no member named 'display' in 'ASCII<{{119, 97, 105, 116, 32, 97, 32, 115, 27, 99, ...}}>'}}
}

// The dialects (C++20 and above) that accept string literals as non-type
// template arguments do not support trigraphs.
void test_trigraph() {
ASCII<"what??!"> a; // expected-warning {{trigraph ignored}}
Meow(a); // expected-error {{no matching function}}
decltype(a)::display(); // expected-error {{no member named 'display' in 'ASCII<{"what??!"}>'}}
}

0 comments on commit 44eee65

Please sign in to comment.