Skip to content

Commit 95f5096

Browse files
committed
Implement P2361 Unevaluated string literals
This patch proposes to handle in an uniform fashion the parsing of strings that are never evaluated, in asm statement, static assert, attrributes, extern, etc. Unevaluated strings are UTF-8 internally and so currently behave as narrow strings, but these things will diverge with D93031. The big question both for this patch and the P2361 paper is whether we risk breaking code by disallowing encoding prefixes in this context. I hope this patch may allow to gather some data on that. Future work: Improve the rendering of unicode characters, line break and so forth in static-assert messages Reviewed By: aaron.ballman, shafik Differential Revision: https://reviews.llvm.org/D105759
1 parent 7cd1f3a commit 95f5096

File tree

23 files changed

+233
-101
lines changed

23 files changed

+233
-101
lines changed

clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@ void f_textless(int a) {
77
static_assert(sizeof(a) <= 10, "");
88
// CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use unary 'static_assert' when the string literal is an empty string [modernize-unary-static-assert]
99
// CHECK-FIXES: {{^}} static_assert(sizeof(a) <= 10 );{{$}}
10-
static_assert(sizeof(a) <= 12, L"");
11-
// CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use unary 'static_assert' when
12-
// CHECK-FIXES: {{^}} static_assert(sizeof(a) <= 12 );{{$}}
1310
FOO
1411
// CHECK-FIXES: {{^}} FOO{{$}}
1512
static_assert(sizeof(a) <= 17, MSG);

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@ C++2c Feature Support
135135
^^^^^^^^^^^^^^^^^^^^^
136136
- Compiler flags ``-std=c++2c`` and ``-std=gnu++2c`` have been added for experimental C++2c implementation work.
137137
- Implemented `P2738R1: constexpr cast from void* <https://wg21.link/P2738R1>`_.
138+
- Partially implemented `P2361R6: constexpr cast from void* <https://wg21.link/P2361R6>`_.
139+
The changes to attributes declarations are not part of this release.
138140

139141
Resolutions to C++ Defect Reports
140142
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

clang/include/clang/AST/Expr.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1804,7 +1804,7 @@ class StringLiteral final
18041804
/// * An array of getByteLength() char used to store the string data.
18051805

18061806
public:
1807-
enum StringKind { Ordinary, Wide, UTF8, UTF16, UTF32 };
1807+
enum StringKind { Ordinary, Wide, UTF8, UTF16, UTF32, Unevaluated };
18081808

18091809
private:
18101810
unsigned numTrailingObjects(OverloadToken<unsigned>) const { return 1; }
@@ -1866,7 +1866,7 @@ class StringLiteral final
18661866
unsigned CharByteWidth);
18671867

18681868
StringRef getString() const {
1869-
assert(getCharByteWidth() == 1 &&
1869+
assert((isUnevaluated() || getCharByteWidth() == 1) &&
18701870
"This function is used in places that assume strings use char");
18711871
return StringRef(getStrDataAsChar(), getByteLength());
18721872
}
@@ -1906,6 +1906,7 @@ class StringLiteral final
19061906
bool isUTF8() const { return getKind() == UTF8; }
19071907
bool isUTF16() const { return getKind() == UTF16; }
19081908
bool isUTF32() const { return getKind() == UTF32; }
1909+
bool isUnevaluated() const { return getKind() == Unevaluated; }
19091910
bool isPascal() const { return StringLiteralBits.IsPascal; }
19101911

19111912
bool containsNonAscii() const {

clang/include/clang/Basic/DiagnosticLexKinds.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,13 @@ def ext_ms_reserved_user_defined_literal : ExtWarn<
276276
"identifier">, InGroup<ReservedUserDefinedLiteral>;
277277
def err_unsupported_string_concat : Error<
278278
"unsupported non-standard concatenation of string literals">;
279+
280+
def err_unevaluated_string_prefix : Error<
281+
"an unevaluated string literal cannot have an encoding prefix">;
282+
def err_unevaluated_string_udl : Error<
283+
"an unevaluated string literal cannot be a user-defined literal">;
284+
def err_unevaluated_string_invalid_escape_sequence : Error<
285+
"invalid escape sequence '%0' in an unevaluated string literal">;
279286
def err_string_concat_mixed_suffix : Error<
280287
"differing user-defined suffixes ('%0' and '%1') in string literal "
281288
"concatenation">;

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -433,9 +433,6 @@ def err_ellipsis_first_param : Error<
433433
"ISO C requires a named parameter before '...'">;
434434
def err_declarator_need_ident : Error<"declarator requires an identifier">;
435435
def err_language_linkage_spec_unknown : Error<"unknown linkage language">;
436-
def err_language_linkage_spec_not_ascii : Error<
437-
"string literal in language linkage specifier cannot have an "
438-
"encoding-prefix">;
439436
def ext_use_out_of_scope_declaration : ExtWarn<
440437
"use of out-of-scope declaration of %0%select{| whose type is not "
441438
"compatible with that of an implicit declaration}1">,

clang/include/clang/Lex/LiteralSupport.h

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,11 @@ class CharLiteralParser {
212212
}
213213
};
214214

215+
enum class StringLiteralEvalMethod {
216+
Evaluated,
217+
Unevaluated,
218+
};
219+
215220
/// StringLiteralParser - This decodes string escape characters and performs
216221
/// wide string analysis and Translation Phase #6 (concatenation of string
217222
/// literals) (C99 5.1.1.2p1).
@@ -230,20 +235,23 @@ class StringLiteralParser {
230235
SmallString<32> UDSuffixBuf;
231236
unsigned UDSuffixToken;
232237
unsigned UDSuffixOffset;
238+
StringLiteralEvalMethod EvalMethod;
239+
233240
public:
234-
StringLiteralParser(ArrayRef<Token> StringToks,
235-
Preprocessor &PP);
236-
StringLiteralParser(ArrayRef<Token> StringToks,
237-
const SourceManager &sm, const LangOptions &features,
238-
const TargetInfo &target,
241+
StringLiteralParser(ArrayRef<Token> StringToks, Preprocessor &PP,
242+
StringLiteralEvalMethod StringMethod =
243+
StringLiteralEvalMethod::Evaluated);
244+
StringLiteralParser(ArrayRef<Token> StringToks, const SourceManager &sm,
245+
const LangOptions &features, const TargetInfo &target,
239246
DiagnosticsEngine *diags = nullptr)
240-
: SM(sm), Features(features), Target(target), Diags(diags),
241-
MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
242-
ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
247+
: SM(sm), Features(features), Target(target), Diags(diags),
248+
MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
249+
ResultPtr(ResultBuf.data()),
250+
EvalMethod(StringLiteralEvalMethod::Evaluated), hadError(false),
251+
Pascal(false) {
243252
init(StringToks);
244253
}
245254

246-
247255
bool hadError;
248256
bool Pascal;
249257

@@ -269,6 +277,9 @@ class StringLiteralParser {
269277
bool isUTF16() const { return Kind == tok::utf16_string_literal; }
270278
bool isUTF32() const { return Kind == tok::utf32_string_literal; }
271279
bool isPascal() const { return Pascal; }
280+
bool isUnevaluated() const {
281+
return EvalMethod == StringLiteralEvalMethod::Unevaluated;
282+
}
272283

273284
StringRef getUDSuffix() const { return UDSuffixBuf; }
274285

clang/include/clang/Parse/Parser.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1788,8 +1788,12 @@ class Parser : public CodeCompletionHandler {
17881788
bool IsUnevaluated);
17891789

17901790
ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral = false);
1791+
ExprResult ParseUnevaluatedStringLiteralExpression();
17911792

17921793
private:
1794+
ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral,
1795+
bool Unevaluated);
1796+
17931797
ExprResult ParseExpressionWithLeadingAt(SourceLocation AtLoc);
17941798

17951799
ExprResult ParseExpressionWithLeadingExtension(SourceLocation ExtLoc);

clang/include/clang/Sema/Sema.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5703,6 +5703,8 @@ class Sema final {
57035703
ExprResult ActOnStringLiteral(ArrayRef<Token> StringToks,
57045704
Scope *UDLScope = nullptr);
57055705

5706+
ExprResult ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks);
5707+
57065708
/// ControllingExprOrType is either an opaque pointer coming out of a
57075709
/// ParsedType or an Expr *. FIXME: it'd be better to split this interface
57085710
/// into two so we don't take a void *, but that's awkward because one of

clang/lib/AST/Expr.cpp

Lines changed: 42 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,8 @@ unsigned StringLiteral::mapCharByteWidth(TargetInfo const &Target,
11361136
case UTF32:
11371137
CharByteWidth = Target.getChar32Width();
11381138
break;
1139+
case Unevaluated:
1140+
return sizeof(char); // Host;
11391141
}
11401142
assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
11411143
CharByteWidth /= 8;
@@ -1149,35 +1151,45 @@ StringLiteral::StringLiteral(const ASTContext &Ctx, StringRef Str,
11491151
const SourceLocation *Loc,
11501152
unsigned NumConcatenated)
11511153
: Expr(StringLiteralClass, Ty, VK_LValue, OK_Ordinary) {
1152-
assert(Ctx.getAsConstantArrayType(Ty) &&
1153-
"StringLiteral must be of constant array type!");
1154-
unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind);
1155-
unsigned ByteLength = Str.size();
1156-
assert((ByteLength % CharByteWidth == 0) &&
1157-
"The size of the data must be a multiple of CharByteWidth!");
1158-
1159-
// Avoid the expensive division. The compiler should be able to figure it
1160-
// out by itself. However as of clang 7, even with the appropriate
1161-
// llvm_unreachable added just here, it is not able to do so.
1162-
unsigned Length;
1163-
switch (CharByteWidth) {
1164-
case 1:
1165-
Length = ByteLength;
1166-
break;
1167-
case 2:
1168-
Length = ByteLength / 2;
1169-
break;
1170-
case 4:
1171-
Length = ByteLength / 4;
1172-
break;
1173-
default:
1174-
llvm_unreachable("Unsupported character width!");
1175-
}
1154+
1155+
unsigned Length = Str.size();
11761156

11771157
StringLiteralBits.Kind = Kind;
1178-
StringLiteralBits.CharByteWidth = CharByteWidth;
1179-
StringLiteralBits.IsPascal = Pascal;
11801158
StringLiteralBits.NumConcatenated = NumConcatenated;
1159+
1160+
if (Kind != StringKind::Unevaluated) {
1161+
assert(Ctx.getAsConstantArrayType(Ty) &&
1162+
"StringLiteral must be of constant array type!");
1163+
unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind);
1164+
unsigned ByteLength = Str.size();
1165+
assert((ByteLength % CharByteWidth == 0) &&
1166+
"The size of the data must be a multiple of CharByteWidth!");
1167+
1168+
// Avoid the expensive division. The compiler should be able to figure it
1169+
// out by itself. However as of clang 7, even with the appropriate
1170+
// llvm_unreachable added just here, it is not able to do so.
1171+
switch (CharByteWidth) {
1172+
case 1:
1173+
Length = ByteLength;
1174+
break;
1175+
case 2:
1176+
Length = ByteLength / 2;
1177+
break;
1178+
case 4:
1179+
Length = ByteLength / 4;
1180+
break;
1181+
default:
1182+
llvm_unreachable("Unsupported character width!");
1183+
}
1184+
1185+
StringLiteralBits.CharByteWidth = CharByteWidth;
1186+
StringLiteralBits.IsPascal = Pascal;
1187+
} else {
1188+
assert(!Pascal && "Can't make an unevaluated Pascal string");
1189+
StringLiteralBits.CharByteWidth = 1;
1190+
StringLiteralBits.IsPascal = false;
1191+
}
1192+
11811193
*getTrailingObjects<unsigned>() = Length;
11821194

11831195
// Initialize the trailing array of SourceLocation.
@@ -1186,7 +1198,7 @@ StringLiteral::StringLiteral(const ASTContext &Ctx, StringRef Str,
11861198
NumConcatenated * sizeof(SourceLocation));
11871199

11881200
// Initialize the trailing array of char holding the string data.
1189-
std::memcpy(getTrailingObjects<char>(), Str.data(), ByteLength);
1201+
std::memcpy(getTrailingObjects<char>(), Str.data(), Str.size());
11901202

11911203
setDependence(ExprDependence::None);
11921204
}
@@ -1223,6 +1235,7 @@ StringLiteral *StringLiteral::CreateEmpty(const ASTContext &Ctx,
12231235

12241236
void StringLiteral::outputString(raw_ostream &OS) const {
12251237
switch (getKind()) {
1238+
case Unevaluated:
12261239
case Ordinary:
12271240
break; // no prefix.
12281241
case Wide: OS << 'L'; break;
@@ -1333,7 +1346,8 @@ StringLiteral::getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
13331346
const TargetInfo &Target, unsigned *StartToken,
13341347
unsigned *StartTokenByteOffset) const {
13351348
assert((getKind() == StringLiteral::Ordinary ||
1336-
getKind() == StringLiteral::UTF8) &&
1349+
getKind() == StringLiteral::UTF8 ||
1350+
getKind() == StringLiteral::Unevaluated) &&
13371351
"Only narrow string literals are currently supported");
13381352

13391353
// Loop over all of the tokens in this string until we find the one that

0 commit comments

Comments
 (0)