Skip to content

Commit

Permalink
[clang-format] Fix inconsistent annotation of operator&
Browse files Browse the repository at this point in the history
Token annotator incorrectly annotates operator& as a reference type in
situations like Boost serialization archives:
https://www.boost.org/doc/libs/1_81_0/libs/serialization/doc/tutorial.html

Add annotation rules for standalone and chained operator& instances while
preserving behavior for reference declarations at class scope. Add tests to
validate annotation and formatting behavior.

Differential Revision: https://reviews.llvm.org/D141959
  • Loading branch information
dkt01 authored and owenca committed Feb 5, 2023
1 parent c56846a commit 35f2ac1
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 5 deletions.
54 changes: 50 additions & 4 deletions clang/lib/Format/TokenAnnotator.cpp
Expand Up @@ -111,14 +111,29 @@ static bool isCppAttribute(bool IsCpp, const FormatToken &Tok) {
class AnnotatingParser {
public:
AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
const AdditionalKeywords &Keywords)
const AdditionalKeywords &Keywords,
SmallVector<ScopeType> &Scopes)
: Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
Keywords(Keywords) {
Keywords(Keywords), Scopes(Scopes) {
Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
resetTokenMetadata();
}

private:
ScopeType getScopeType(const FormatToken &Token) const {
switch (Token.getType()) {
case TT_FunctionLBrace:
case TT_LambdaLBrace:
return ST_Function;
case TT_ClassLBrace:
case TT_StructLBrace:
case TT_UnionLBrace:
return ST_Class;
default:
return ST_Other;
}
}

bool parseAngle() {
if (!CurrentToken || !CurrentToken->Previous)
return false;
Expand Down Expand Up @@ -849,6 +864,9 @@ class AnnotatingParser {
unsigned CommaCount = 0;
while (CurrentToken) {
if (CurrentToken->is(tok::r_brace)) {
assert(!Scopes.empty());
assert(Scopes.back() == getScopeType(OpeningBrace));
Scopes.pop_back();
assert(OpeningBrace.Optional == CurrentToken->Optional);
OpeningBrace.MatchingParen = CurrentToken;
CurrentToken->MatchingParen = &OpeningBrace;
Expand Down Expand Up @@ -1148,6 +1166,7 @@ class AnnotatingParser {
if (Previous && Previous->getType() != TT_DictLiteral)
Previous->setType(TT_SelectorName);
}
Scopes.push_back(getScopeType(*Tok));
if (!parseBrace())
return false;
break;
Expand Down Expand Up @@ -1178,6 +1197,9 @@ class AnnotatingParser {
case tok::r_square:
return false;
case tok::r_brace:
// Don't pop scope when encountering unbalanced r_brace.
if (!Scopes.empty())
Scopes.pop_back();
// Lines can start with '}'.
if (Tok->Previous)
return false;
Expand Down Expand Up @@ -2448,6 +2470,28 @@ class AnnotatingParser {
if (IsExpression && !Contexts.back().CaretFound)
return TT_BinaryOperator;

// Opeartors at class scope are likely pointer or reference members.
if (!Scopes.empty() && Scopes.back() == ST_Class)
return TT_PointerOrReference;

// Tokens that indicate member access or chained operator& use.
auto IsChainedOperatorAmpOrMember = [](const FormatToken *token) {
return !token || token->isOneOf(tok::amp, tok::period, tok::arrow,
tok::arrowstar, tok::periodstar);
};

// It's more likely that & represents operator& than an uninitialized
// reference.
if (Tok.is(tok::amp) && PrevToken && PrevToken->Tok.isAnyIdentifier() &&
IsChainedOperatorAmpOrMember(PrevToken->getPreviousNonComment()) &&
NextToken && NextToken->Tok.isAnyIdentifier()) {
if (auto NextNext = NextToken->getNextNonComment();
NextNext &&
(IsChainedOperatorAmpOrMember(NextNext) || NextNext->is(tok::semi))) {
return TT_BinaryOperator;
}
}

return TT_PointerOrReference;
}

Expand Down Expand Up @@ -2485,6 +2529,8 @@ class AnnotatingParser {
bool AutoFound;
const AdditionalKeywords &Keywords;

SmallVector<ScopeType> &Scopes;

// Set of "<" tokens that do not open a template parameter list. If parseAngle
// determines that a specific token can't be a template opener, it will make
// same decision irrespective of the decisions for tokens leading up to it.
Expand Down Expand Up @@ -2765,11 +2811,11 @@ static unsigned maxNestingDepth(const AnnotatedLine &Line) {
return Result;
}

void TokenAnnotator::annotate(AnnotatedLine &Line) const {
void TokenAnnotator::annotate(AnnotatedLine &Line) {
for (auto &Child : Line.Children)
annotate(*Child);

AnnotatingParser Parser(Style, Line, Keywords);
AnnotatingParser Parser(Style, Line, Keywords, Scopes);
Line.Type = Parser.parseLine();

// With very deep nesting, ExpressionParser uses lots of stack and the
Expand Down
13 changes: 12 additions & 1 deletion clang/lib/Format/TokenAnnotator.h
Expand Up @@ -34,6 +34,15 @@ enum LineType {
LT_CommentAbovePPDirective,
};

enum ScopeType {
// Contained in class declaration/definition.
ST_Class,
// Contained within function definition.
ST_Function,
// Contained within other scope block (loop, if/else, etc).
ST_Other,
};

class AnnotatedLine {
public:
AnnotatedLine(const UnwrappedLine &Line)
Expand Down Expand Up @@ -178,7 +187,7 @@ class TokenAnnotator {
// FIXME: Can/should this be done in the UnwrappedLineParser?
void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const;

void annotate(AnnotatedLine &Line) const;
void annotate(AnnotatedLine &Line);
void calculateFormattingInformation(AnnotatedLine &Line) const;

private:
Expand Down Expand Up @@ -220,6 +229,8 @@ class TokenAnnotator {
const FormatStyle &Style;

const AdditionalKeywords &Keywords;

SmallVector<ScopeType> Scopes;
};

} // end namespace format
Expand Down
7 changes: 7 additions & 0 deletions clang/unittests/Format/FormatTest.cpp
Expand Up @@ -11288,6 +11288,13 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) {

verifyFormat("int operator()(T (&&)[N]) { return 1; }");
verifyFormat("int operator()(T (&)[N]) { return 0; }");

verifyFormat("val1 & val2;");
verifyFormat("val1 & val2 & val3;");
verifyFormat("class c {\n"
" void func(type &a) { a & member; }\n"
" anotherType &member;\n"
"}");
}

TEST_F(FormatTest, UnderstandsAttributes) {
Expand Down
67 changes: 67 additions & 0 deletions clang/unittests/Format/TokenAnnotatorTest.cpp
Expand Up @@ -175,6 +175,73 @@ TEST_F(TokenAnnotatorTest, UnderstandsUsesOfStarAndAmp) {
ASSERT_EQ(Tokens.size(), 17u) << Tokens;
EXPECT_TOKEN(Tokens[9], tok::ampamp, TT_PointerOrReference);
EXPECT_TOKEN(Tokens[12], tok::ampamp, TT_PointerOrReference);

Tokens = annotate("Type1 &val1 = val2;");
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
EXPECT_TOKEN(Tokens[1], tok::amp, TT_PointerOrReference);

Tokens = annotate("Type1 *val1 = &val2;");
ASSERT_EQ(Tokens.size(), 8u) << Tokens;
EXPECT_TOKEN(Tokens[1], tok::star, TT_PointerOrReference);
EXPECT_TOKEN(Tokens[4], tok::amp, TT_UnaryOperator);

Tokens = annotate("val1 & val2;");
ASSERT_EQ(Tokens.size(), 5u) << Tokens;
EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator);

Tokens = annotate("val1 & val2.member;");
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator);

Tokens = annotate("val1 & val2.*member;");
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator);

Tokens = annotate("val1.*member & val2;");
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
EXPECT_TOKEN(Tokens[3], tok::amp, TT_BinaryOperator);

Tokens = annotate("val1 & val2->*member;");
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator);

Tokens = annotate("val1->member & val2;");
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
EXPECT_TOKEN(Tokens[3], tok::amp, TT_BinaryOperator);

Tokens = annotate("val1 & val2 & val3;");
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator);
EXPECT_TOKEN(Tokens[3], tok::amp, TT_BinaryOperator);

Tokens = annotate("val1 & val2 // comment\n"
" & val3;");
ASSERT_EQ(Tokens.size(), 8u) << Tokens;
EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator);
EXPECT_TOKEN(Tokens[4], tok::amp, TT_BinaryOperator);

Tokens =
annotate("val1 & val2.member & val3.member() & val4 & val5->member;");
ASSERT_EQ(Tokens.size(), 19u) << Tokens;
EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator);
EXPECT_TOKEN(Tokens[5], tok::amp, TT_BinaryOperator);
EXPECT_TOKEN(Tokens[11], tok::amp, TT_BinaryOperator);
EXPECT_TOKEN(Tokens[13], tok::amp, TT_BinaryOperator);

Tokens = annotate("class c {\n"
" void func(type &a) { a & member; }\n"
" anotherType &member;\n"
"}");
ASSERT_EQ(Tokens.size(), 22u) << Tokens;
EXPECT_TOKEN(Tokens[7], tok::amp, TT_PointerOrReference);
EXPECT_TOKEN(Tokens[12], tok::amp, TT_BinaryOperator);
EXPECT_TOKEN(Tokens[17], tok::amp, TT_PointerOrReference);

Tokens = annotate("struct S {\n"
" auto Mem = C & D;\n"
"}");
ASSERT_EQ(Tokens.size(), 12u) << Tokens;
EXPECT_TOKEN(Tokens[7], tok::amp, TT_BinaryOperator);
}

TEST_F(TokenAnnotatorTest, UnderstandsUsesOfPlusAndMinus) {
Expand Down

0 comments on commit 35f2ac1

Please sign in to comment.