491 changes: 491 additions & 0 deletions clang/lib/Format/FormatToken.h

Large diffs are not rendered by default.

210 changes: 150 additions & 60 deletions clang/lib/Format/FormatTokenLexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,56 @@ FormatToken *FormatTokenLexer::getStashedToken() {
return FormatTok;
}

/// Truncate the current token to the new length and make the lexer continue
/// from the end of the truncated token. Used for other languages that have
/// different token boundaries, like JavaScript in which a comment ends at a
/// line break regardless of whether the line break follows a backslash. Also
/// used to set the lexer to the end of whitespace if the lexer regards
/// whitespace and an unrecognized symbol as one token.
void FormatTokenLexer::truncateToken(size_t NewLen) {
assert(NewLen <= FormatTok->TokenText.size());
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(
Lex->getBufferLocation() - FormatTok->TokenText.size() + NewLen)));
FormatTok->TokenText = FormatTok->TokenText.substr(0, NewLen);
FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth,
Encoding);
FormatTok->Tok.setLength(NewLen);
}

/// Count the length of leading whitespace in a token.
static size_t countLeadingWhitespace(StringRef Text) {
// Basically counting the length matched by this regex.
// "^([\n\r\f\v \t]|(\\\\|\\?\\?/)[\n\r])+"
// Directly using the regex turned out to be slow. With the regex
// version formatting all files in this directory took about 1.25
// seconds. This version took about 0.5 seconds.
const char *Cur = Text.begin();
while (Cur < Text.end()) {
if (isspace(Cur[0])) {
++Cur;
} else if (Cur[0] == '\\' && (Cur[1] == '\n' || Cur[1] == '\r')) {
// A '\' followed by a newline always escapes the newline, regardless
// of whether there is another '\' before it.
// The source has a null byte at the end. So the end of the entire input
// isn't reached yet. Also the lexer doesn't break apart an escaped
// newline.
assert(Text.end() - Cur >= 2);
Cur += 2;
} else if (Cur[0] == '?' && Cur[1] == '?' && Cur[2] == '/' &&
(Cur[3] == '\n' || Cur[3] == '\r')) {
// Newlines can also be escaped by a '?' '?' '/' trigraph. By the way, the
// characters are quoted individually in this comment because if we write
// them together some compilers warn that we have a trigraph in the code.
assert(Text.end() - Cur >= 4);
Cur += 4;
} else {
break;
}
}
return Cur - Text.begin();
}

FormatToken *FormatTokenLexer::getNextToken() {
if (StateStack.top() == LexerState::TOKEN_STASHED) {
StateStack.pop();
Expand All @@ -854,34 +904,33 @@ FormatToken *FormatTokenLexer::getNextToken() {
IsFirstToken = false;

// Consume and record whitespace until we find a significant token.
// Some tok::unknown tokens are not just whitespace, e.g. whitespace
// followed by a symbol such as backtick. Those symbols may be
// significant in other languages.
unsigned WhitespaceLength = TrailingWhitespace;
while (FormatTok->is(tok::unknown)) {
while (FormatTok->isNot(tok::eof)) {
auto LeadingWhitespace = countLeadingWhitespace(FormatTok->TokenText);
if (LeadingWhitespace == 0)
break;
if (LeadingWhitespace < FormatTok->TokenText.size())
truncateToken(LeadingWhitespace);
StringRef Text = FormatTok->TokenText;
auto EscapesNewline = [&](int pos) {
// A '\r' here is just part of '\r\n'. Skip it.
if (pos >= 0 && Text[pos] == '\r')
--pos;
// See whether there is an odd number of '\' before this.
// FIXME: This is wrong. A '\' followed by a newline is always removed,
// regardless of whether there is another '\' before it.
// FIXME: Newlines can also be escaped by a '?' '?' '/' trigraph.
unsigned count = 0;
for (; pos >= 0; --pos, ++count)
if (Text[pos] != '\\')
break;
return count & 1;
};
// FIXME: This miscounts tok:unknown tokens that are not just
// whitespace, e.g. a '`' character.
bool InEscape = false;
for (int i = 0, e = Text.size(); i != e; ++i) {
switch (Text[i]) {
case '\r':
// If this is a CRLF sequence, break here and the LF will be handled on
// the next loop iteration. Otherwise, this is a single Mac CR, treat it
// the same as a single LF.
if (i + 1 < e && Text[i + 1] == '\n')
break;
LLVM_FALLTHROUGH;
case '\n':
++FormatTok->NewlinesBefore;
FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
Column = 0;
break;
case '\r':
if (!InEscape)
FormatTok->HasUnescapedNewline = true;
else
InEscape = false;
FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
Column = 0;
break;
Expand All @@ -897,24 +946,32 @@ FormatToken *FormatTokenLexer::getNextToken() {
Style.TabWidth - (Style.TabWidth ? Column % Style.TabWidth : 0);
break;
case '\\':
if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
FormatTok->setType(TT_ImplicitStringLiteral);
case '?':
case '/':
// The text was entirely whitespace when this loop was entered. Thus
// this has to be an escape sequence.
assert(Text.substr(i, 2) == "\\\r" || Text.substr(i, 2) == "\\\n" ||
Text.substr(i, 4) == "\?\?/\r" ||
Text.substr(i, 4) == "\?\?/\n" ||
(i >= 1 && (Text.substr(i - 1, 4) == "\?\?/\r" ||
Text.substr(i - 1, 4) == "\?\?/\n")) ||
(i >= 2 && (Text.substr(i - 2, 4) == "\?\?/\r" ||
Text.substr(i - 2, 4) == "\?\?/\n")));
InEscape = true;
break;
default:
FormatTok->setType(TT_ImplicitStringLiteral);
// This shouldn't happen.
assert(false);
break;
}
if (FormatTok->getType() == TT_ImplicitStringLiteral)
break;
}

if (FormatTok->is(TT_ImplicitStringLiteral))
break;
WhitespaceLength += FormatTok->Tok.getLength();

WhitespaceLength += Text.size();
readRawToken(*FormatTok);
}

if (FormatTok->is(tok::unknown))
FormatTok->setType(TT_ImplicitStringLiteral);

// JavaScript and Java do not allow to escape the end of the line with a
// backslash. Backslashes are syntax errors in plain source, but can occur in
// comments. When a single line comment ends with a \, it'll cause the next
Expand All @@ -928,40 +985,30 @@ FormatToken *FormatTokenLexer::getNextToken() {
while (BackslashPos != StringRef::npos) {
if (BackslashPos + 1 < FormatTok->TokenText.size() &&
FormatTok->TokenText[BackslashPos + 1] == '\n') {
const char *Offset = Lex->getBufferLocation();
Offset -= FormatTok->TokenText.size();
Offset += BackslashPos + 1;
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 1);
FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth,
Encoding);
truncateToken(BackslashPos + 1);
break;
}
BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1);
}
}

// In case the token starts with escaped newlines, we want to
// take them into account as whitespace - this pattern is quite frequent
// in macro definitions.
// FIXME: Add a more explicit test.
while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\') {
unsigned SkippedWhitespace = 0;
if (FormatTok->TokenText.size() > 2 &&
(FormatTok->TokenText[1] == '\r' && FormatTok->TokenText[2] == '\n')) {
SkippedWhitespace = 3;
} else if (FormatTok->TokenText[1] == '\n') {
SkippedWhitespace = 2;
} else {
break;
if (Style.isVerilog()) {
// Verilog uses the backtick instead of the hash for preprocessor stuff.
// And it uses the hash for delays and parameter lists. In order to continue
// using `tok::hash` in other places, the backtick gets marked as the hash
// here. And in order to tell the backtick and hash apart for
// Verilog-specific stuff, the hash becomes an identifier.
if (FormatTok->isOneOf(tok::hash, tok::hashhash)) {
FormatTok->Tok.setKind(tok::raw_identifier);
} else if (FormatTok->is(tok::raw_identifier)) {
if (FormatTok->TokenText == "`") {
FormatTok->Tok.setIdentifierInfo(nullptr);
FormatTok->Tok.setKind(tok::hash);
} else if (FormatTok->TokenText == "``") {
FormatTok->Tok.setIdentifierInfo(nullptr);
FormatTok->Tok.setKind(tok::hashhash);
}
}

++FormatTok->NewlinesBefore;
WhitespaceLength += SkippedWhitespace;
FormatTok->LastNewlineOffset = SkippedWhitespace;
Column = 0;
FormatTok->TokenText = FormatTok->TokenText.substr(SkippedWhitespace);
}

FormatTok->WhitespaceRange = SourceRange(
Expand Down Expand Up @@ -1051,8 +1098,51 @@ FormatToken *FormatTokenLexer::getNextToken() {
return FormatTok;
}

bool FormatTokenLexer::readRawTokenVerilogSpecific(Token &Tok) {
// In Verilog the quote is not a character literal.
//
// Make the backtick and double backtick identifiers to match against them
// more easily.
//
// In Verilog an escaped identifier starts with backslash and ends with
// whitespace. Unless that whitespace is an escaped newline. A backslash can
// also begin an escaped newline outside of an escaped identifier. We check
// for that outside of the Regex since we can't use negative lookhead
// assertions. Simply changing the '*' to '+' breaks stuff as the escaped
// identifier may have a length of 0 according to Section A.9.3.
// FIXME: If there is an escaped newline in the middle of an escaped
// identifier, allow for pasting the two lines together, But escaped
// identifiers usually occur only in generated code anyway.
static const llvm::Regex VerilogToken(R"re(^('|``?|\\(\\)re"
"(\r?\n|\r)|[^[:space:]])*)");

SmallVector<StringRef, 4> Matches;
const char *Start = Lex->getBufferLocation();
if (!VerilogToken.match(StringRef(Start, Lex->getBuffer().end() - Start),
&Matches)) {
return false;
}
// There is a null byte at the end of the buffer, so we don't have to check
// Start[1] is within the buffer.
if (Start[0] == '\\' && (Start[1] == '\r' || Start[1] == '\n'))
return false;
size_t Len = Matches[0].size();

Tok.setLength(Len);
Tok.setLocation(Lex->getSourceLocation(Start, Len));
// The kind has to be an identifier so we can match it against those defined
// in Keywords.
Tok.setKind(tok::raw_identifier);
Tok.setRawIdentifierData(Start);
Lex->seek(Lex->getCurrentBufferOffset() + Len, /*IsAtStartofline=*/false);
return true;
}

void FormatTokenLexer::readRawToken(FormatToken &Tok) {
Lex->LexFromRawLexer(Tok.Tok);
// For Verilog, first see if there is a special token, and fall back to the
// normal lexer if there isn't one.
if (!Style.isVerilog() || !readRawTokenVerilogSpecific(Tok.Tok))
Lex->LexFromRawLexer(Tok.Tok);
Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
Tok.Tok.getLength());
// For formatting, treat unterminated string literals like normal string
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Format/FormatTokenLexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ class FormatTokenLexer {

bool tryMergeConflictMarkers();

void truncateToken(size_t NewLen);

FormatToken *getStashedToken();

FormatToken *getNextToken();
Expand Down Expand Up @@ -124,6 +126,9 @@ class FormatTokenLexer {
// Targets that may appear inside a C# attribute.
static const llvm::StringSet<> CSharpAttributeTargets;

/// Handle Verilog-specific tokens.
bool readRawTokenVerilogSpecific(Token &Tok);

void readRawToken(FormatToken &Tok);

void resetLexer(unsigned Offset);
Expand Down
34 changes: 30 additions & 4 deletions clang/lib/Format/TokenAnnotator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1343,6 +1343,10 @@ class AnnotatingParser {
// sequence.
if (!CurrentToken->Tok.getIdentifierInfo())
return Type;
// In Verilog macro expansions start with a backtick just like preprocessor
// directives. Thus we stop if the word is not a preprocessor directive.
if (Style.isVerilog() && !Keywords.isVerilogPPDirective(*CurrentToken))
return LT_Invalid;
switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
case tok::pp_include:
case tok::pp_include_next:
Expand Down Expand Up @@ -1385,8 +1389,14 @@ class AnnotatingParser {
if (!CurrentToken)
return LT_Invalid;
NonTemplateLess.clear();
if (CurrentToken->is(tok::hash))
return parsePreprocessorDirective();
if (CurrentToken->is(tok::hash)) {
// We were not yet allowed to use C++17 optional when this was being
// written. So we used LT_Invalid to mark that the line is not a
// preprocessor directive.
auto Type = parsePreprocessorDirective();
if (Type != LT_Invalid)
return Type;
}

// Directly allow to 'import <string-literal>' to support protocol buffer
// definitions (github.com/google/protobuf) or missing "#" (either way we
Expand Down Expand Up @@ -3663,8 +3673,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
if (Left.Finalized)
return Right.hasWhitespaceBefore();

if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
return true; // Never ever merge two identifiers.
// Never ever merge two words.
if (Keywords.isWordLike(Right) && Keywords.isWordLike(Left))
return true;

// Leave a space between * and /* to avoid C4138 `comment end` found outside
// of comment.
Expand Down Expand Up @@ -3931,6 +3942,21 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
Right.is(TT_TemplateOpener)) {
return true;
}
} else if (Style.isVerilog()) {
// Don't add space within a delay like `#0`.
if (!Left.is(TT_BinaryOperator) &&
Left.isOneOf(Keywords.kw_verilogHash, Keywords.kw_verilogHashHash)) {
return false;
}
// Add space after a delay.
if (!Right.is(tok::semi) &&
(Left.endsSequence(tok::numeric_constant, Keywords.kw_verilogHash) ||
Left.endsSequence(tok::numeric_constant,
Keywords.kw_verilogHashHash) ||
(Left.is(tok::r_paren) && Left.MatchingParen &&
Left.MatchingParen->endsSequence(tok::l_paren, tok::at)))) {
return true;
}
}
if (Left.is(TT_ImplicitStringLiteral))
return Right.hasWhitespaceBefore();
Expand Down
47 changes: 40 additions & 7 deletions clang/lib/Format/UnwrappedLineParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -829,7 +829,17 @@ FormatToken *UnwrappedLineParser::parseBlock(
bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces,
IfStmtKind *IfKind, bool UnindentWhitesmithsBraces,
bool CanContainBracedList, TokenType NextLBracesType) {
assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
auto HandleVerilogBlockLabel = [this]() {
// ":" name
if (Style.isVerilog() && FormatTok->is(tok::colon)) {
nextToken();
if (Keywords.isVerilogIdentifier(*FormatTok))
nextToken();
}
};

assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
(Style.isVerilog() && Keywords.isVerilogBegin(*FormatTok))) &&
"'{' or macro block token expected");
FormatToken *Tok = FormatTok;
const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
Expand All @@ -846,6 +856,7 @@ FormatToken *UnwrappedLineParser::parseBlock(

const unsigned InitialLevel = Line->Level;
nextToken(/*LevelDifference=*/AddLevels);
HandleVerilogBlockLabel();

// Bail out if there are too many levels. Otherwise, the stack might overflow.
if (Line->Level > 300)
Expand Down Expand Up @@ -926,6 +937,7 @@ FormatToken *UnwrappedLineParser::parseBlock(

// Munch the closing brace.
nextToken(/*LevelDifference=*/-AddLevels);
HandleVerilogBlockLabel();

if (MacroBlock && FormatTok->is(tok::l_paren))
parseParens();
Expand Down Expand Up @@ -1897,9 +1909,18 @@ void UnwrappedLineParser::parseStructuralElement(
if (Style.isJavaScript())
break;

TokenCount = Line->Tokens.size();
if (TokenCount == 1 ||
(TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
auto OneTokenSoFar = [&]() {
const UnwrappedLineNode *Tok = &Line->Tokens.front(),
*End = Tok + Line->Tokens.size();
while (Tok != End && Tok->Tok->is(tok::comment))
++Tok;
// In Verilog, macro invocations start with a backtick which the code
// treats as a hash. Skip it.
if (Style.isVerilog() && Tok != End && Tok->Tok->is(tok::hash))
++Tok;
return End - Tok == 1;
};
if (OneTokenSoFar()) {
if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
Line->Tokens.begin()->Tok->MustBreakBefore = true;
parseLabel(!Style.IndentGotoLabels);
Expand Down Expand Up @@ -2577,7 +2598,7 @@ FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
FormatToken *IfLeftBrace = nullptr;
IfStmtKind IfBlockKind = IfStmtKind::NotIf;

if (FormatTok->is(tok::l_brace)) {
if (Keywords.isBlockBegin(*FormatTok, Style)) {
FormatTok->setFinalizedType(TT_ControlStatementLBrace);
IfLeftBrace = FormatTok;
CompoundStatementIndenter Indenter(this, Style, Line->Level);
Expand Down Expand Up @@ -2610,7 +2631,7 @@ FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
}
nextToken();
handleAttributes();
if (FormatTok->is(tok::l_brace)) {
if (Keywords.isBlockBegin(*FormatTok, Style)) {
const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
FormatTok->setFinalizedType(TT_ElseLBrace);
ElseLeftBrace = FormatTok;
Expand Down Expand Up @@ -2877,7 +2898,7 @@ void UnwrappedLineParser::parseNew() {
void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
keepAncestorBraces();

if (FormatTok->is(tok::l_brace)) {
if (Keywords.isBlockBegin(*FormatTok, Style)) {
if (!KeepBraces)
FormatTok->setFinalizedType(TT_ControlStatementLBrace);
FormatToken *LeftBrace = FormatTok;
Expand Down Expand Up @@ -4166,6 +4187,16 @@ void UnwrappedLineParser::nextToken(int LevelDifference) {
else
readTokenWithJavaScriptASI();
FormatTok->Previous = Previous;
if (Style.isVerilog()) {
// Blocks in Verilog can have `begin` and `end` instead of braces. For
// keywords like `begin`, we can't treat them the same as left braces
// because some contexts require one of them. For example structs use
// braces and if blocks use keywords, and a left brace can occur in an if
// statement, but it is not a block. For keywords like `end`, we simply
// treat them the same as right braces.
if (Keywords.isVerilogEnd(*FormatTok))
FormatTok->Tok.setKind(tok::r_brace);
}
}

void UnwrappedLineParser::distributeComments(
Expand Down Expand Up @@ -4261,6 +4292,8 @@ void UnwrappedLineParser::readToken(int LevelDifference) {
PreviousWasComment = FormatTok->is(tok::comment);

while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
(!Style.isVerilog() ||
Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
FirstNonCommentOnLine) {
distributeComments(Comments, FormatTok);
Comments.clear();
Expand Down
13 changes: 12 additions & 1 deletion clang/tools/clang-format/ClangFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,18 @@ static cl::opt<std::string> AssumeFileName(
"assume-filename",
cl::desc("Override filename used to determine the language.\n"
"When reading from stdin, clang-format assumes this\n"
"filename to determine the language."),
"filename to determine the language.\n"
"Unrecognized filenames are treated as C++.\n"
"supported:\n"
" CSharp: .cs\n"
" Java: .java\n"
" JavaScript: .mjs .js .ts\n"
" Json: .json\n"
" Objective-C: .m .mm\n"
" Proto: .proto .protodevel\n"
" TableGen: .td\n"
" TextProto: .textpb .pb.txt .textproto .asciipb\n"
" Verilog: .sv .svh .v .vh"),
cl::init("<stdin>"), cl::cat(ClangFormatCategory));

static cl::opt<bool> Inplace("i",
Expand Down
1 change: 1 addition & 0 deletions clang/unittests/Format/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ add_clang_unittest(FormatTests
FormatTestSelective.cpp
FormatTestTableGen.cpp
FormatTestTextProto.cpp
FormatTestVerilog.cpp
MacroExpanderTest.cpp
NamespaceEndCommentsFixerTest.cpp
QualifierFixerTest.cpp
Expand Down
7 changes: 5 additions & 2 deletions clang/unittests/Format/FormatTestUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ namespace clang {
namespace format {
namespace test {

inline std::string messUp(llvm::StringRef Code) {
// When HandleHash is false, preprocessor directives starting with hash will not
// be on separate lines. This is needed because Verilog uses hash for other
// purposes.
inline std::string messUp(llvm::StringRef Code, bool HandleHash = true) {
std::string MessedUp(Code.str());
bool InComment = false;
bool InPreprocessorDirective = false;
Expand All @@ -29,7 +32,7 @@ inline std::string messUp(llvm::StringRef Code) {
if (JustReplacedNewline)
MessedUp[i - 1] = '\n';
InComment = true;
} else if (MessedUp[i] == '#' &&
} else if (HandleHash && MessedUp[i] == '#' &&
(JustReplacedNewline || i == 0 || MessedUp[i - 1] == '\n')) {
if (i != 0)
MessedUp[i - 1] = '\n';
Expand Down
251 changes: 251 additions & 0 deletions clang/unittests/Format/FormatTestVerilog.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
//===- unittest/Format/FormatTestVerilog.cpp ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "FormatTestUtils.h"
#include "clang/Format/Format.h"
#include "llvm/Support/Debug.h"
#include "gtest/gtest.h"

#define DEBUG_TYPE "format-test"

namespace clang {
namespace format {

class FormatTestVerilog : public ::testing::Test {
protected:
static std::string format(llvm::StringRef Code, unsigned Offset,
unsigned Length, const FormatStyle &Style) {
LLVM_DEBUG(llvm::errs() << "---\n");
LLVM_DEBUG(llvm::errs() << Code << "\n\n");
std::vector<tooling::Range> Ranges(1, tooling::Range(Offset, Length));
tooling::Replacements Replaces = reformat(Style, Code, Ranges);
auto Result = applyAllReplacements(Code, Replaces);
EXPECT_TRUE(static_cast<bool>(Result));
LLVM_DEBUG(llvm::errs() << "\n" << *Result << "\n\n");
return *Result;
}

static std::string
format(llvm::StringRef Code,
const FormatStyle &Style = getLLVMStyle(FormatStyle::LK_Verilog)) {
return format(Code, 0, Code.size(), Style);
}

static void verifyFormat(
llvm::StringRef Code,
const FormatStyle &Style = getLLVMStyle(FormatStyle::LK_Verilog)) {
EXPECT_EQ(Code.str(), format(Code, Style)) << "Expected code is not stable";
EXPECT_EQ(Code.str(),
format(test::messUp(Code, /*HandleHash=*/false), Style));
}
};

TEST_F(FormatTestVerilog, Delay) {
// Delay by the default unit.
verifyFormat("#0;");
verifyFormat("#1;");
verifyFormat("#10;");
verifyFormat("#1.5;");
// Explicit unit.
verifyFormat("#1fs;");
verifyFormat("#1.5fs;");
verifyFormat("#1ns;");
verifyFormat("#1.5ns;");
verifyFormat("#1us;");
verifyFormat("#1.5us;");
verifyFormat("#1ms;");
verifyFormat("#1.5ms;");
verifyFormat("#1s;");
verifyFormat("#1.5s;");
// The following expression should be on the same line.
verifyFormat("#1 x = x;");
EXPECT_EQ("#1 x = x;", format("#1\n"
"x = x;"));
}

TEST_F(FormatTestVerilog, If) {
verifyFormat("if (x)\n"
" x = x;");
verifyFormat("if (x)\n"
" x = x;\n"
"x = x;");

// Test else
verifyFormat("if (x)\n"
" x = x;\n"
"else if (x)\n"
" x = x;\n"
"else\n"
" x = x;");
verifyFormat("if (x) begin\n"
" x = x;\n"
"end else if (x) begin\n"
" x = x;\n"
"end else begin\n"
" x = x;\n"
"end");
verifyFormat("if (x) begin : x\n"
" x = x;\n"
"end : x else if (x) begin : x\n"
" x = x;\n"
"end : x else begin : x\n"
" x = x;\n"
"end : x");

// Test block keywords.
verifyFormat("if (x) begin\n"
" x = x;\n"
"end");
verifyFormat("if (x) begin : x\n"
" x = x;\n"
"end : x");
verifyFormat("if (x) begin\n"
" x = x;\n"
" x = x;\n"
"end");
verifyFormat("disable fork;\n"
"x = x;");
verifyFormat("rand join x x;\n"
"x = x;");
verifyFormat("if (x) fork\n"
" x = x;\n"
"join");
verifyFormat("if (x) fork\n"
" x = x;\n"
"join_any");
verifyFormat("if (x) fork\n"
" x = x;\n"
"join_none");
verifyFormat("if (x) generate\n"
" x = x;\n"
"endgenerate");
verifyFormat("if (x) generate : x\n"
" x = x;\n"
"endgenerate : x");

// Test that concatenation braces don't get regarded as blocks.
verifyFormat("if (x)\n"
" {x} = x;");
verifyFormat("if (x)\n"
" x = {x};");
verifyFormat("if (x)\n"
" x = {x};\n"
"else\n"
" {x} = {x};");
}

TEST_F(FormatTestVerilog, Preprocessor) {
auto Style = getLLVMStyle(FormatStyle::LK_Verilog);
Style.ColumnLimit = 20;

// Macro definitions.
EXPECT_EQ("`define X \\\n"
" if (x) \\\n"
" x = x;",
format("`define X if(x)x=x;", Style));
EXPECT_EQ("`define X(x) \\\n"
" if (x) \\\n"
" x = x;",
format("`define X(x) if(x)x=x;", Style));
EXPECT_EQ("`define X \\\n"
" x = x; \\\n"
" x = x;",
format("`define X x=x;x=x;", Style));
// Macro definitions with invocations inside.
EXPECT_EQ("`define LIST \\\n"
" `ENTRY \\\n"
" `ENTRY",
format("`define LIST \\\n"
"`ENTRY \\\n"
"`ENTRY",
Style));
EXPECT_EQ("`define LIST \\\n"
" `x = `x; \\\n"
" `x = `x;",
format("`define LIST \\\n"
"`x = `x; \\\n"
"`x = `x;",
Style));
EXPECT_EQ("`define LIST \\\n"
" `x = `x; \\\n"
" `x = `x;",
format("`define LIST `x=`x;`x=`x;", Style));
// Macro invocations.
verifyFormat("`x = (`x1 + `x2 + x);");
// Lines starting with a preprocessor directive should not be indented.
std::string Directives[] = {
"begin_keywords",
"celldefine",
"default_nettype",
"define",
"else",
"elsif",
"end_keywords",
"endcelldefine",
"endif",
"ifdef",
"ifndef",
"include",
"line",
"nounconnected_drive",
"pragma",
"resetall",
"timescale",
"unconnected_drive",
"undef",
"undefineall",
};
for (auto &Name : Directives) {
EXPECT_EQ("if (x)\n"
"`" +
Name +
"\n"
" ;",
format("if (x)\n"
"`" +
Name +
"\n"
";",
Style));
}
// Lines starting with a regular macro invocation should be indented as a
// normal line.
EXPECT_EQ("if (x)\n"
" `x = `x;\n"
"`timescale 1ns / 1ps",
format("if (x)\n"
"`x = `x;\n"
"`timescale 1ns / 1ps",
Style));
EXPECT_EQ("if (x)\n"
"`timescale 1ns / 1ps\n"
" `x = `x;",
format("if (x)\n"
"`timescale 1ns / 1ps\n"
"`x = `x;",
Style));
std::string NonDirectives[] = {
// For `__FILE__` and `__LINE__`, although the standard classifies them as
// preprocessor directives, they are used like regular macros.
"__FILE__", "__LINE__", "elif", "foo", "x",
};
for (auto &Name : NonDirectives) {
EXPECT_EQ("if (x)\n"
" `" +
Name + ";",
format("if (x)\n"
"`" +
Name +
"\n"
";",
Style));
}
}

} // namespace format
} // end namespace clang