Skip to content

Commit

Permalink
Implement P2361 Unevaluated string literals
Browse files Browse the repository at this point in the history
This patch proposes to handle in an uniform fashion
the parsing of strings that are never evaluated,
in asm statement, static assert, attrributes, extern,
etc.

Unevaluated strings are UTF-8 internally and so currently
behave as narrow strings, but these things will diverge with
D93031.

The big question both for this patch and the P2361 paper
is whether we risk breaking code by disallowing
encoding prefixes in this context.
I hope this patch may allow to gather some data on that.

Future work:
Improve the rendering of unicode characters, line break
and so forth in static-assert messages

Reviewed By: aaron.ballman, shafik

Differential Revision: https://reviews.llvm.org/D105759
  • Loading branch information
cor3ntin committed Jul 7, 2023
1 parent 7cd1f3a commit 95f5096
Show file tree
Hide file tree
Showing 23 changed files with 233 additions and 101 deletions.
Expand Up @@ -7,9 +7,6 @@ void f_textless(int a) {
static_assert(sizeof(a) <= 10, "");
// CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use unary 'static_assert' when the string literal is an empty string [modernize-unary-static-assert]
// CHECK-FIXES: {{^}} static_assert(sizeof(a) <= 10 );{{$}}
static_assert(sizeof(a) <= 12, L"");
// CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use unary 'static_assert' when
// CHECK-FIXES: {{^}} static_assert(sizeof(a) <= 12 );{{$}}
FOO
// CHECK-FIXES: {{^}} FOO{{$}}
static_assert(sizeof(a) <= 17, MSG);
Expand Down
2 changes: 2 additions & 0 deletions clang/docs/ReleaseNotes.rst
Expand Up @@ -135,6 +135,8 @@ C++2c Feature Support
^^^^^^^^^^^^^^^^^^^^^
- Compiler flags ``-std=c++2c`` and ``-std=gnu++2c`` have been added for experimental C++2c implementation work.
- Implemented `P2738R1: constexpr cast from void* <https://wg21.link/P2738R1>`_.
- Partially implemented `P2361R6: constexpr cast from void* <https://wg21.link/P2361R6>`_.
The changes to attributes declarations are not part of this release.

Resolutions to C++ Defect Reports
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
5 changes: 3 additions & 2 deletions clang/include/clang/AST/Expr.h
Expand Up @@ -1804,7 +1804,7 @@ class StringLiteral final
/// * An array of getByteLength() char used to store the string data.

public:
enum StringKind { Ordinary, Wide, UTF8, UTF16, UTF32 };
enum StringKind { Ordinary, Wide, UTF8, UTF16, UTF32, Unevaluated };

private:
unsigned numTrailingObjects(OverloadToken<unsigned>) const { return 1; }
Expand Down Expand Up @@ -1866,7 +1866,7 @@ class StringLiteral final
unsigned CharByteWidth);

StringRef getString() const {
assert(getCharByteWidth() == 1 &&
assert((isUnevaluated() || getCharByteWidth() == 1) &&
"This function is used in places that assume strings use char");
return StringRef(getStrDataAsChar(), getByteLength());
}
Expand Down Expand Up @@ -1906,6 +1906,7 @@ class StringLiteral final
bool isUTF8() const { return getKind() == UTF8; }
bool isUTF16() const { return getKind() == UTF16; }
bool isUTF32() const { return getKind() == UTF32; }
bool isUnevaluated() const { return getKind() == Unevaluated; }
bool isPascal() const { return StringLiteralBits.IsPascal; }

bool containsNonAscii() const {
Expand Down
7 changes: 7 additions & 0 deletions clang/include/clang/Basic/DiagnosticLexKinds.td
Expand Up @@ -276,6 +276,13 @@ def ext_ms_reserved_user_defined_literal : ExtWarn<
"identifier">, InGroup<ReservedUserDefinedLiteral>;
def err_unsupported_string_concat : Error<
"unsupported non-standard concatenation of string literals">;

def err_unevaluated_string_prefix : Error<
"an unevaluated string literal cannot have an encoding prefix">;
def err_unevaluated_string_udl : Error<
"an unevaluated string literal cannot be a user-defined literal">;
def err_unevaluated_string_invalid_escape_sequence : Error<
"invalid escape sequence '%0' in an unevaluated string literal">;
def err_string_concat_mixed_suffix : Error<
"differing user-defined suffixes ('%0' and '%1') in string literal "
"concatenation">;
Expand Down
3 changes: 0 additions & 3 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Expand Up @@ -433,9 +433,6 @@ def err_ellipsis_first_param : Error<
"ISO C requires a named parameter before '...'">;
def err_declarator_need_ident : Error<"declarator requires an identifier">;
def err_language_linkage_spec_unknown : Error<"unknown linkage language">;
def err_language_linkage_spec_not_ascii : Error<
"string literal in language linkage specifier cannot have an "
"encoding-prefix">;
def ext_use_out_of_scope_declaration : ExtWarn<
"use of out-of-scope declaration of %0%select{| whose type is not "
"compatible with that of an implicit declaration}1">,
Expand Down
29 changes: 20 additions & 9 deletions clang/include/clang/Lex/LiteralSupport.h
Expand Up @@ -212,6 +212,11 @@ class CharLiteralParser {
}
};

enum class StringLiteralEvalMethod {
Evaluated,
Unevaluated,
};

/// StringLiteralParser - This decodes string escape characters and performs
/// wide string analysis and Translation Phase #6 (concatenation of string
/// literals) (C99 5.1.1.2p1).
Expand All @@ -230,20 +235,23 @@ class StringLiteralParser {
SmallString<32> UDSuffixBuf;
unsigned UDSuffixToken;
unsigned UDSuffixOffset;
StringLiteralEvalMethod EvalMethod;

public:
StringLiteralParser(ArrayRef<Token> StringToks,
Preprocessor &PP);
StringLiteralParser(ArrayRef<Token> StringToks,
const SourceManager &sm, const LangOptions &features,
const TargetInfo &target,
StringLiteralParser(ArrayRef<Token> StringToks, Preprocessor &PP,
StringLiteralEvalMethod StringMethod =
StringLiteralEvalMethod::Evaluated);
StringLiteralParser(ArrayRef<Token> StringToks, const SourceManager &sm,
const LangOptions &features, const TargetInfo &target,
DiagnosticsEngine *diags = nullptr)
: SM(sm), Features(features), Target(target), Diags(diags),
MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
: SM(sm), Features(features), Target(target), Diags(diags),
MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
ResultPtr(ResultBuf.data()),
EvalMethod(StringLiteralEvalMethod::Evaluated), hadError(false),
Pascal(false) {
init(StringToks);
}


bool hadError;
bool Pascal;

Expand All @@ -269,6 +277,9 @@ class StringLiteralParser {
bool isUTF16() const { return Kind == tok::utf16_string_literal; }
bool isUTF32() const { return Kind == tok::utf32_string_literal; }
bool isPascal() const { return Pascal; }
bool isUnevaluated() const {
return EvalMethod == StringLiteralEvalMethod::Unevaluated;
}

StringRef getUDSuffix() const { return UDSuffixBuf; }

Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Parse/Parser.h
Expand Up @@ -1788,8 +1788,12 @@ class Parser : public CodeCompletionHandler {
bool IsUnevaluated);

ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral = false);
ExprResult ParseUnevaluatedStringLiteralExpression();

private:
ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral,
bool Unevaluated);

ExprResult ParseExpressionWithLeadingAt(SourceLocation AtLoc);

ExprResult ParseExpressionWithLeadingExtension(SourceLocation ExtLoc);
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Sema/Sema.h
Expand Up @@ -5703,6 +5703,8 @@ class Sema final {
ExprResult ActOnStringLiteral(ArrayRef<Token> StringToks,
Scope *UDLScope = nullptr);

ExprResult ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks);

/// ControllingExprOrType is either an opaque pointer coming out of a
/// ParsedType or an Expr *. FIXME: it'd be better to split this interface
/// into two so we don't take a void *, but that's awkward because one of
Expand Down
70 changes: 42 additions & 28 deletions clang/lib/AST/Expr.cpp
Expand Up @@ -1136,6 +1136,8 @@ unsigned StringLiteral::mapCharByteWidth(TargetInfo const &Target,
case UTF32:
CharByteWidth = Target.getChar32Width();
break;
case Unevaluated:
return sizeof(char); // Host;
}
assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
CharByteWidth /= 8;
Expand All @@ -1149,35 +1151,45 @@ StringLiteral::StringLiteral(const ASTContext &Ctx, StringRef Str,
const SourceLocation *Loc,
unsigned NumConcatenated)
: Expr(StringLiteralClass, Ty, VK_LValue, OK_Ordinary) {
assert(Ctx.getAsConstantArrayType(Ty) &&
"StringLiteral must be of constant array type!");
unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind);
unsigned ByteLength = Str.size();
assert((ByteLength % CharByteWidth == 0) &&
"The size of the data must be a multiple of CharByteWidth!");

// Avoid the expensive division. The compiler should be able to figure it
// out by itself. However as of clang 7, even with the appropriate
// llvm_unreachable added just here, it is not able to do so.
unsigned Length;
switch (CharByteWidth) {
case 1:
Length = ByteLength;
break;
case 2:
Length = ByteLength / 2;
break;
case 4:
Length = ByteLength / 4;
break;
default:
llvm_unreachable("Unsupported character width!");
}

unsigned Length = Str.size();

StringLiteralBits.Kind = Kind;
StringLiteralBits.CharByteWidth = CharByteWidth;
StringLiteralBits.IsPascal = Pascal;
StringLiteralBits.NumConcatenated = NumConcatenated;

if (Kind != StringKind::Unevaluated) {
assert(Ctx.getAsConstantArrayType(Ty) &&
"StringLiteral must be of constant array type!");
unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind);
unsigned ByteLength = Str.size();
assert((ByteLength % CharByteWidth == 0) &&
"The size of the data must be a multiple of CharByteWidth!");

// Avoid the expensive division. The compiler should be able to figure it
// out by itself. However as of clang 7, even with the appropriate
// llvm_unreachable added just here, it is not able to do so.
switch (CharByteWidth) {
case 1:
Length = ByteLength;
break;
case 2:
Length = ByteLength / 2;
break;
case 4:
Length = ByteLength / 4;
break;
default:
llvm_unreachable("Unsupported character width!");
}

StringLiteralBits.CharByteWidth = CharByteWidth;
StringLiteralBits.IsPascal = Pascal;
} else {
assert(!Pascal && "Can't make an unevaluated Pascal string");
StringLiteralBits.CharByteWidth = 1;
StringLiteralBits.IsPascal = false;
}

*getTrailingObjects<unsigned>() = Length;

// Initialize the trailing array of SourceLocation.
Expand All @@ -1186,7 +1198,7 @@ StringLiteral::StringLiteral(const ASTContext &Ctx, StringRef Str,
NumConcatenated * sizeof(SourceLocation));

// Initialize the trailing array of char holding the string data.
std::memcpy(getTrailingObjects<char>(), Str.data(), ByteLength);
std::memcpy(getTrailingObjects<char>(), Str.data(), Str.size());

setDependence(ExprDependence::None);
}
Expand Down Expand Up @@ -1223,6 +1235,7 @@ StringLiteral *StringLiteral::CreateEmpty(const ASTContext &Ctx,

void StringLiteral::outputString(raw_ostream &OS) const {
switch (getKind()) {
case Unevaluated:
case Ordinary:
break; // no prefix.
case Wide: OS << 'L'; break;
Expand Down Expand Up @@ -1333,7 +1346,8 @@ StringLiteral::getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
const TargetInfo &Target, unsigned *StartToken,
unsigned *StartTokenByteOffset) const {
assert((getKind() == StringLiteral::Ordinary ||
getKind() == StringLiteral::UTF8) &&
getKind() == StringLiteral::UTF8 ||
getKind() == StringLiteral::Unevaluated) &&
"Only narrow string literals are currently supported");

// Loop over all of the tokens in this string until we find the one that
Expand Down

0 comments on commit 95f5096

Please sign in to comment.