Skip to content

Commit

Permalink
[clang-format] TableGen multi line string support. (#78032)
Browse files Browse the repository at this point in the history
Support the handling of TableGen's multiline string (code) literal.
That has the form, 
[{ this is the string possibly with multi line... }]
  • Loading branch information
hnakamura5 committed Jan 17, 2024
1 parent 2db9244 commit e3702f6
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 0 deletions.
3 changes: 3 additions & 0 deletions clang/lib/Format/ContinuationIndenter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1591,6 +1591,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.StartOfStringLiteral = State.Column + 1;
if (Current.is(TT_CSharpStringLiteral) && State.StartOfStringLiteral == 0) {
State.StartOfStringLiteral = State.Column + 1;
} else if (Current.is(TT_TableGenMultiLineString) &&
State.StartOfStringLiteral == 0) {
State.StartOfStringLiteral = State.Column + 1;
} else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) {
State.StartOfStringLiteral = State.Column;
} else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) &&
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Format/FormatToken.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ namespace format {
TYPE(StructLBrace) \
TYPE(StructRBrace) \
TYPE(StructuredBindingLSquare) \
TYPE(TableGenMultiLineString) \
TYPE(TemplateCloser) \
TYPE(TemplateOpener) \
TYPE(TemplateString) \
Expand Down
41 changes: 41 additions & 0 deletions clang/lib/Format/FormatTokenLexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
// string literals are correctly identified.
handleCSharpVerbatimAndInterpolatedStrings();
}
if (Style.isTableGen())
handleTableGenMultilineString();
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
FirstInLineIndex = Tokens.size() - 1;
} while (Tokens.back()->isNot(tok::eof));
Expand Down Expand Up @@ -272,6 +274,14 @@ void FormatTokenLexer::tryMergePreviousTokens() {
return;
}
}
// TableGen's Multi line string starts with [{
if (Style.isTableGen() && tryMergeTokens({tok::l_square, tok::l_brace},
TT_TableGenMultiLineString)) {
// Set again with finalizing. This must never be annotated as other types.
Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
Tokens.back()->Tok.setKind(tok::string_literal);
return;
}
}

bool FormatTokenLexer::tryMergeNSStringLiteral() {
Expand Down Expand Up @@ -763,6 +773,37 @@ void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
}

void FormatTokenLexer::handleTableGenMultilineString() {
FormatToken *MultiLineString = Tokens.back();
if (MultiLineString->isNot(TT_TableGenMultiLineString))
return;

auto OpenOffset = Lex->getCurrentBufferOffset() - 2 /* "[{" */;
// "}]" is the end of multi line string.
auto CloseOffset = Lex->getBuffer().find("}]", OpenOffset);
if (CloseOffset == StringRef::npos)
return;
auto Text = Lex->getBuffer().substr(OpenOffset, CloseOffset + 2);
MultiLineString->TokenText = Text;
resetLexer(SourceMgr.getFileOffset(
Lex->getSourceLocation(Lex->getBufferLocation() - 2 + Text.size())));
auto FirstLineText = Text;
auto FirstBreak = Text.find('\n');
// Set ColumnWidth and LastLineColumnWidth when it has multiple lines.
if (FirstBreak != StringRef::npos) {
MultiLineString->IsMultiline = true;
FirstLineText = Text.substr(0, FirstBreak + 1);
// LastLineColumnWidth holds the width of the last line.
auto LastBreak = Text.rfind('\n');
MultiLineString->LastLineColumnWidth = encoding::columnWidthWithTabs(
Text.substr(LastBreak + 1), MultiLineString->OriginalColumn,
Style.TabWidth, Encoding);
}
// ColumnWidth holds only the width of the first line.
MultiLineString->ColumnWidth = encoding::columnWidthWithTabs(
FirstLineText, MultiLineString->OriginalColumn, Style.TabWidth, Encoding);
}

void FormatTokenLexer::handleTemplateStrings() {
FormatToken *BacktickToken = Tokens.back();

Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Format/FormatTokenLexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ class FormatTokenLexer {

void handleCSharpVerbatimAndInterpolatedStrings();

// Handles TableGen multiline strings. It has the form [{ ... }].
void handleTableGenMultilineString();

void tryParsePythonComment();

bool tryMerge_TMacro();
Expand Down
16 changes: 16 additions & 0 deletions clang/unittests/Format/TokenAnnotatorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2193,6 +2193,22 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
ASSERT_TRUE(Keywords.isTableGenDefinition(*Tokens[0]));
ASSERT_TRUE(Tokens[0]->is(Keywords.kw_def));
ASSERT_TRUE(Tokens[1]->is(TT_StartOfName));

// Code, the multiline string token.
Tokens = Annotate("[{ code is multiline string }]");
ASSERT_EQ(Tokens.size(), 2u) << Tokens;
EXPECT_TOKEN(Tokens[0], tok::string_literal, TT_TableGenMultiLineString);
EXPECT_FALSE(Tokens[0]->IsMultiline);
// Case with multiple lines.
Tokens = Annotate("[{ It can break\n"
" across lines and the line breaks\n"
" are retained in \n"
" the string. }]");
ASSERT_EQ(Tokens.size(), 2u) << Tokens;
EXPECT_TOKEN(Tokens[0], tok::string_literal, TT_TableGenMultiLineString);
EXPECT_EQ(Tokens[0]->ColumnWidth, sizeof("[{ It can break\n") - 1);
EXPECT_TRUE(Tokens[0]->IsMultiline);
EXPECT_EQ(Tokens[0]->LastLineColumnWidth, sizeof(" the string. }]") - 1);
}

TEST_F(TokenAnnotatorTest, UnderstandConstructors) {
Expand Down

0 comments on commit e3702f6

Please sign in to comment.