Skip to content

Commit

Permalink
fix readability-braces-around-statements Stmt type dependency
Browse files Browse the repository at this point in the history
Replaces Token based approach to identify EndLoc of Stmt with AST traversal.
This also improves handling of macros.

Fixes Bugs 22785, 25970 and 35754.
  • Loading branch information
AlexanderLanin authored and AaronBallman committed Mar 25, 2021
1 parent ea61708 commit 0becc4d
Show file tree
Hide file tree
Showing 4 changed files with 368 additions and 54 deletions.
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "BracesAroundStatementsCheck.h"
#include "../utils/LexerUtils.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Lex/Lexer.h"
Expand All @@ -16,10 +17,9 @@ using namespace clang::ast_matchers;
namespace clang {
namespace tidy {
namespace readability {
namespace {

tok::TokenKind getTokenKind(SourceLocation Loc, const SourceManager &SM,
const ASTContext *Context) {
static tok::TokenKind getTokenKind(SourceLocation Loc, const SourceManager &SM,
const ASTContext *Context) {
Token Tok;
SourceLocation Beginning =
Lexer::GetBeginningOfToken(Loc, SM, Context->getLangOpts());
Expand All @@ -33,9 +33,9 @@ tok::TokenKind getTokenKind(SourceLocation Loc, const SourceManager &SM,
return Tok.getKind();
}

SourceLocation forwardSkipWhitespaceAndComments(SourceLocation Loc,
const SourceManager &SM,
const ASTContext *Context) {
static SourceLocation
forwardSkipWhitespaceAndComments(SourceLocation Loc, const SourceManager &SM,
const ASTContext *Context) {
assert(Loc.isValid());
for (;;) {
while (isWhitespace(*SM.getCharacterData(Loc)))
Expand All @@ -50,31 +50,15 @@ SourceLocation forwardSkipWhitespaceAndComments(SourceLocation Loc,
}
}

SourceLocation findEndLocation(SourceLocation LastTokenLoc,
const SourceManager &SM,
const ASTContext *Context) {
static SourceLocation findEndLocation(const Stmt &S, const SourceManager &SM,
const ASTContext *Context) {
SourceLocation Loc =
Lexer::GetBeginningOfToken(LastTokenLoc, SM, Context->getLangOpts());
// Loc points to the beginning of the last (non-comment non-ws) token
// before end or ';'.
assert(Loc.isValid());
bool SkipEndWhitespaceAndComments = true;
tok::TokenKind TokKind = getTokenKind(Loc, SM, Context);
if (TokKind == tok::NUM_TOKENS || TokKind == tok::semi ||
TokKind == tok::r_brace) {
// If we are at ";" or "}", we found the last token. We could use as well
// `if (isa<NullStmt>(S))`, but it wouldn't work for nested statements.
SkipEndWhitespaceAndComments = false;
}
utils::lexer::getUnifiedEndLoc(S, SM, Context->getLangOpts());
if (!Loc.isValid())
return Loc;

Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, Context->getLangOpts());
// Loc points past the last token before end or after ';'.
if (SkipEndWhitespaceAndComments) {
Loc = forwardSkipWhitespaceAndComments(Loc, SM, Context);
tok::TokenKind TokKind = getTokenKind(Loc, SM, Context);
if (TokKind == tok::semi)
Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, Context->getLangOpts());
}
// Start searching right after S.
Loc = Loc.getLocWithOffset(1);

for (;;) {
assert(Loc.isValid());
Expand Down Expand Up @@ -109,8 +93,6 @@ SourceLocation findEndLocation(SourceLocation LastTokenLoc,
return Loc;
}

} // namespace

BracesAroundStatementsCheck::BracesAroundStatementsCheck(
StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),
Expand Down Expand Up @@ -224,13 +206,6 @@ bool BracesAroundStatementsCheck::checkStmt(
const SourceManager &SM = *Result.SourceManager;
const ASTContext *Context = Result.Context;

// Treat macros.
CharSourceRange FileRange = Lexer::makeFileCharRange(
CharSourceRange::getTokenRange(S->getSourceRange()), SM,
Context->getLangOpts());
if (FileRange.isInvalid())
return false;

// Convert InitialLoc to file location, if it's on the same macro expansion
// level as the start of the statement. We also need file locations for
// Lexer::getLocForEndOfToken working properly.
Expand All @@ -250,13 +225,12 @@ bool BracesAroundStatementsCheck::checkStmt(
EndLoc = EndLocHint;
ClosingInsertion = "} ";
} else {
const auto FREnd = FileRange.getEnd().getLocWithOffset(-1);
EndLoc = findEndLocation(FREnd, SM, Context);
EndLoc = findEndLocation(*S, SM, Context);
ClosingInsertion = "\n}";
}

assert(StartLoc.isValid());
assert(EndLoc.isValid());

// Don't require braces for statements spanning less than certain number of
// lines.
if (ShortStatementLines && !ForceBracesStmts.erase(S)) {
Expand All @@ -267,6 +241,20 @@ bool BracesAroundStatementsCheck::checkStmt(
}

auto Diag = diag(StartLoc, "statement should be inside braces");

// Change only if StartLoc and EndLoc are on the same macro expansion level.
// This will also catch invalid EndLoc.
// Example: LLVM_DEBUG( for(...) do_something() );
// In this case fix-it cannot be provided as the semicolon which is not
// visible here is part of the macro. Adding braces here would require adding
// another semicolon.
if (Lexer::makeFileCharRange(
CharSourceRange::getTokenRange(SourceRange(
SM.getSpellingLoc(StartLoc), SM.getSpellingLoc(EndLoc))),
SM, Context->getLangOpts())
.isInvalid())
return false;

Diag << FixItHint::CreateInsertion(StartLoc, " {")
<< FixItHint::CreateInsertion(EndLoc, ClosingInsertion);
return true;
Expand Down
65 changes: 65 additions & 0 deletions clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "LexerUtils.h"
#include "clang/AST/AST.h"
#include "clang/Basic/SourceManager.h"

namespace clang {
Expand Down Expand Up @@ -148,6 +149,70 @@ llvm::Optional<Token> getQualifyingToken(tok::TokenKind TK,
return LastMatchAfterTemplate != None ? LastMatchAfterTemplate
: LastMatchBeforeTemplate;
}

static bool breakAndReturnEnd(const Stmt &S) {
return isa<CompoundStmt, DeclStmt, NullStmt>(S);
}

static bool breakAndReturnEndPlus1Token(const Stmt &S) {
return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt, SEHLeaveStmt>(S);
}

// Given a Stmt which does not include it's semicolon this method returns the
// SourceLocation of the semicolon.
static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
const SourceManager &SM,
const LangOptions &LangOpts) {

if (EndLoc.isMacroID()) {
// Assuming EndLoc points to a function call foo within macro F.
// This method is supposed to return location of the semicolon within
// those macro arguments:
// F ( foo() ; )
// ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
Optional<Token> NextTok =
findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);

// Was the next token found successfully?
// All macro issues are simply resolved by ensuring it's a semicolon.
if (NextTok && NextTok->is(tok::TokenKind::semi)) {
// Ideally this would return `F` with spelling location `;` (NextTok)
// following the examle above. For now simply return NextTok location.
return NextTok->getLocation();
}

// Fallthrough to 'normal handling'.
// F ( foo() ) ;
// ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
}

Optional<Token> NextTok = findNextTokenSkippingComments(EndLoc, SM, LangOpts);

// Testing for semicolon again avoids some issues with macros.
if (NextTok && NextTok->is(tok::TokenKind::semi))
return NextTok->getLocation();

return SourceLocation();
}

SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
const LangOptions &LangOpts) {

const Stmt *LastChild = &S;
while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
!breakAndReturnEndPlus1Token(*LastChild)) {
for (const Stmt *Child : LastChild->children())
LastChild = Child;
}

if (!breakAndReturnEnd(*LastChild) &&
breakAndReturnEndPlus1Token(*LastChild))
return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);

return S.getEndLoc();
}

} // namespace lexer
} // namespace utils
} // namespace tidy
Expand Down
8 changes: 8 additions & 0 deletions clang-tools-extra/clang-tidy/utils/LexerUtils.h
Expand Up @@ -14,6 +14,9 @@
#include "clang/Lex/Lexer.h"

namespace clang {

class Stmt;

namespace tidy {
namespace utils {
namespace lexer {
Expand Down Expand Up @@ -104,6 +107,11 @@ llvm::Optional<Token> getQualifyingToken(tok::TokenKind TK,
const ASTContext &Context,
const SourceManager &SM);

/// Stmt->getEndLoc does not always behave the same way depending on Token type.
/// See implementation for exceptions.
SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
const LangOptions &LangOpts);

} // namespace lexer
} // namespace utils
} // namespace tidy
Expand Down

0 comments on commit 0becc4d

Please sign in to comment.