Skip to content

Commit

Permalink
[clang-format] Add simple macro replacements in formatting.
Browse files Browse the repository at this point in the history
Add configuration to specify macros.
Macros will be expanded, and the code will be parsed and annotated
in the expanded state. In a second step, the formatting decisions
in the annotated expanded code will be reconstructed onto the
original unexpanded macro call.

Eventually, this will allow to remove special-case code for
various macro options we accumulated over the years in favor of
one principled mechanism.

Differential Revision: https://reviews.llvm.org/D144170
  • Loading branch information
r4nt committed Feb 24, 2023
1 parent bf579a7 commit 0140283
Show file tree
Hide file tree
Showing 18 changed files with 737 additions and 102 deletions.
36 changes: 35 additions & 1 deletion clang/include/clang/Format/Format.h
Expand Up @@ -2745,6 +2745,39 @@ struct FormatStyle {
/// \version 3.7
std::string MacroBlockEnd;

/// A list of macros of the form \c <definition>=<expansion> .
///
/// Code will be parsed with macros expanded, in order to determine how to
/// interpret and format the macro arguments.
///
/// For example, the code:
/// \code
/// A(a*b);
/// \endcode
/// will usually be interpreted as a call to a function A, and the
/// multiplication expression will be formatted as `a * b`.
///
/// If we specify the macro definition:
/// \code
/// Macros:
/// - A(x)=x
/// \endcode
/// the code will now be parsed as a declaration of the variable b of type a*,
/// and formatted as `a* b` (depending on pointer-binding rules).
///
/// Features and restrictions:
/// * Both function-like macros and object-like macros are supported.
/// * Macro arguments must be used exactly once in the expansion.
/// * No recursive expansion; macros referencing other macros will be
/// ignored.
/// * Overloading by arity is supported: for example, given the macro
/// definitions A=x, A()=y, A(a)=a,
/// 'A;' -> 'x;'
/// 'A();' -> 'y;'
/// 'A(z);' -> 'z;'
/// 'A(a, b) will not be expanded.
std::vector<std::string> Macros;

/// The maximum number of consecutive empty lines to keep.
/// \code
/// MaxEmptyLinesToKeep: 1 vs. MaxEmptyLinesToKeep: 0
Expand Down Expand Up @@ -4306,7 +4339,8 @@ struct FormatStyle {
StatementAttributeLikeMacros == R.StatementAttributeLikeMacros &&
StatementMacros == R.StatementMacros && TabWidth == R.TabWidth &&
TypenameMacros == R.TypenameMacros && UseTab == R.UseTab &&
WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros;
WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros &&
Macros == R.Macros;
}

std::optional<FormatStyle> GetLanguageStyle(LanguageKind Language) const;
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Format/ContinuationIndenter.cpp
Expand Up @@ -18,6 +18,7 @@
#include "WhitespaceManager.h"
#include "clang/Basic/OperatorPrecedence.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Format/Format.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -739,9 +740,14 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
if (Previous.is(TT_TemplateString) && Previous.opensScope())
CurrentState.NoLineBreak = true;

// Align following lines within parentheses / brackets if configured.
// Note: This doesn't apply to macro expansion lines, which are MACRO( , , )
// with args as children of the '(' and ',' tokens. It does not make sense to
// align the commas with the opening paren.
if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign &&
!CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() &&
Previous.isNot(TT_ObjCMethodExpr) && Previous.isNot(TT_RequiresClause) &&
!(Current.MacroParent && Previous.MacroParent) &&
(Current.isNot(TT_LineComment) || Previous.is(BK_BracedInit))) {
CurrentState.Indent = State.Column + Spaces;
CurrentState.IsAligned = true;
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Format/Format.cpp
Expand Up @@ -1036,6 +1036,7 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("UseTab", Style.UseTab);
IO.mapOptional("WhitespaceSensitiveMacros",
Style.WhitespaceSensitiveMacros);
IO.mapOptional("Macros", Style.Macros);

// If AlwaysBreakAfterDefinitionReturnType was specified but
// AlwaysBreakAfterReturnType was not, initialize the latter from the
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Format/FormatToken.h
Expand Up @@ -377,6 +377,11 @@ struct FormatToken {
/// binary operator.
TokenType getType() const { return Type; }
void setType(TokenType T) {
// If this token is a macro argument while formatting an unexpanded macro
// call, we do not change its type any more - the type was deduced from
// formatting the expanded macro stream already.
if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg)
return;
assert((!TypeIsFinalized || T == Type) &&
"Please use overwriteFixedType to change a fixed type.");
Type = T;
Expand Down
36 changes: 28 additions & 8 deletions clang/lib/Format/MacroExpander.cpp
Expand Up @@ -141,24 +141,44 @@ void MacroExpander::parseDefinition(const std::string &Macro) {
if (!Tokens.empty()) {
DefinitionParser Parser(Tokens);
auto Definition = Parser.parse();
Definitions[Definition.Name] = std::move(Definition);
if (Definition.ObjectLike) {
ObjectLike[Definition.Name] = std::move(Definition);
} else {
FunctionLike[Definition.Name][Definition.Params.size()] =
std::move(Definition);
}
}
}

bool MacroExpander::defined(llvm::StringRef Name) const {
return Definitions.find(Name) != Definitions.end();
return FunctionLike.find(Name) != FunctionLike.end() ||
ObjectLike.find(Name) != ObjectLike.end();
}

bool MacroExpander::objectLike(llvm::StringRef Name) const {
return Definitions.find(Name)->second.ObjectLike;
return ObjectLike.find(Name) != ObjectLike.end();
}

llvm::SmallVector<FormatToken *, 8> MacroExpander::expand(FormatToken *ID,
ArgsList Args) const {
assert(defined(ID->TokenText));
SmallVector<FormatToken *, 8> Result;
const Definition &Def = Definitions.find(ID->TokenText)->second;
bool MacroExpander::hasArity(llvm::StringRef Name, unsigned Arity) const {
auto it = FunctionLike.find(Name);
return it != FunctionLike.end() &&
(it->second.find(Arity) != it->second.end());
}

llvm::SmallVector<FormatToken *, 8>
MacroExpander::expand(FormatToken *ID,
std::optional<ArgsList> OptionalArgs) const {
if (OptionalArgs)
assert(hasArity(ID->TokenText, OptionalArgs->size()));
else
assert(objectLike(ID->TokenText));
const Definition &Def = OptionalArgs
? FunctionLike.find(ID->TokenText)
->second.find(OptionalArgs.value().size())
->second
: ObjectLike.find(ID->TokenText)->second;
ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList();
SmallVector<FormatToken *, 8> Result;
// Expand each argument at most once.
llvm::StringSet<> ExpandedArgs;

Expand Down
21 changes: 14 additions & 7 deletions clang/lib/Format/Macros.h
Expand Up @@ -106,17 +106,23 @@ class MacroExpander {
IdentifierTable &IdentTable);
~MacroExpander();

/// Returns whether a macro \p Name is defined.
/// Returns whether any macro \p Name is defined, regardless of overloads.
bool defined(llvm::StringRef Name) const;

/// Returns whether the macro has no arguments and should not consume
/// subsequent parentheses.
/// Returns whetherh there is an object-like overload, i.e. where the macro
/// has no arguments and should not consume subsequent parentheses.
bool objectLike(llvm::StringRef Name) const;

/// Returns whether macro \p Name provides an overload with the given arity.
bool hasArity(llvm::StringRef Name, unsigned Arity) const;

/// Returns the expanded stream of format tokens for \p ID, where
/// each element in \p Args is a positional argument to the macro call.
llvm::SmallVector<FormatToken *, 8> expand(FormatToken *ID,
ArgsList Args) const;
/// If \p Args is not set, the object-like overload is used.
/// If \p Args is set, the overload with the arity equal to \c Args.size() is
/// used.
llvm::SmallVector<FormatToken *, 8>
expand(FormatToken *ID, std::optional<ArgsList> OptionalArgs) const;

private:
struct Definition;
Expand All @@ -129,7 +135,8 @@ class MacroExpander {
llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
IdentifierTable &IdentTable;
SmallVector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
llvm::StringMap<Definition> Definitions;
llvm::StringMap<llvm::DenseMap<int, Definition>> FunctionLike;
llvm::StringMap<Definition> ObjectLike;
};

/// Converts a sequence of UnwrappedLines containing expanded macros into a
Expand All @@ -149,7 +156,7 @@ class MacroExpander {
///
/// After this point, the state of the spelled/expanded stream is "in sync"
/// (both at the start of an UnwrappedLine, with no macros open), so the
/// Unexpander can be thrown away and parsing can continue.
/// Reconstructor can be thrown away and parsing can continue.
///
/// Given a mapping from the macro name identifier token in the macro call
/// to the tokens of the macro call, for example:
Expand Down
6 changes: 3 additions & 3 deletions clang/lib/Format/TokenAnalyzer.cpp
Expand Up @@ -104,12 +104,12 @@ TokenAnalyzer::process(bool SkipAnnotation) {
IdentifierTable IdentTable(getFormattingLangOpts(Style));
FormatTokenLexer Lex(Env.getSourceManager(), Env.getFileID(),
Env.getFirstStartColumn(), Style, Encoding, Allocator,

IdentTable);
ArrayRef<FormatToken *> Toks(Lex.lex());
SmallVector<FormatToken *, 10> Tokens(Toks.begin(), Toks.end());
UnwrappedLineParser Parser(Style, Lex.getKeywords(),
Env.getFirstStartColumn(), Tokens, *this);
UnwrappedLineParser Parser(Env.getSourceManager(), Style, Lex.getKeywords(),
Env.getFirstStartColumn(), Tokens, *this,
Allocator, IdentTable);
Parser.parse();
assert(UnwrappedLines.back().empty());
unsigned Penalty = 0;
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Format/TokenAnalyzer.h
Expand Up @@ -46,7 +46,7 @@ class Environment {

FileID getFileID() const { return ID; }

const SourceManager &getSourceManager() const { return SM; }
SourceManager &getSourceManager() const { return SM; }

ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; }

Expand Down
13 changes: 12 additions & 1 deletion clang/lib/Format/TokenAnnotator.cpp
Expand Up @@ -2614,6 +2614,13 @@ class ExpressionParser {
// Consume operators with higher precedence.
parse(Precedence + 1);

// Do not assign fake parenthesis to tokens that are part of an
// unexpanded macro call. The line within the macro call contains
// the parenthesis and commas, and we will not find operators within
// that structure.
if (Current && Current->MacroParent)
break;

int CurrentPrecedence = getCurrentPrecedence();

if (Precedence == CurrentPrecedence && Current &&
Expand Down Expand Up @@ -4389,8 +4396,12 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) {
return true;
}
if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen))
if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen) &&
// In an unexpanded macro call we only find the parentheses and commas
// in a line; the commas and closing parenthesis do not require a space.
(Left.Children.empty() || !Left.MacroParent)) {
return true;
}
if (Right.is(tok::comma))
return false;
if (Right.is(TT_ObjCBlockLParen))
Expand Down
25 changes: 20 additions & 5 deletions clang/lib/Format/TokenAnnotator.h
Expand Up @@ -65,20 +65,32 @@ class AnnotatedLine {
// left them in a different state.
First->Previous = nullptr;
FormatToken *Current = First;
addChildren(Line.Tokens.front(), Current);
for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
if (Node.Tok->MacroParent)
ContainsMacroCall = true;
Current->Next = Node.Tok;
Node.Tok->Previous = Current;
Current = Current->Next;
Current->Children.clear();
for (const auto &Child : Node.Children) {
Children.push_back(new AnnotatedLine(Child));
Current->Children.push_back(Children.back());
}
addChildren(Node, Current);
// FIXME: if we add children, previous will point to the token before
// the children; changing this requires significant changes across
// clang-format.
}
Last = Current;
Last->Next = nullptr;
}

void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
Current->Children.clear();
for (const auto &Child : Node.Children) {
Children.push_back(new AnnotatedLine(Child));
if (Children.back()->ContainsMacroCall)
ContainsMacroCall = true;
Current->Children.push_back(Children.back());
}
}

~AnnotatedLine() {
for (AnnotatedLine *Child : Children)
delete Child;
Expand Down Expand Up @@ -149,6 +161,9 @@ class AnnotatedLine {
bool MightBeFunctionDecl;
bool IsMultiVariableDeclStmt;

/// \c True if this line contains a macro call for which an expansion exists.
bool ContainsMacroCall = false;

/// \c True if this line should be formatted, i.e. intersects directly or
/// indirectly with one of the input ranges.
bool Affected;
Expand Down
37 changes: 26 additions & 11 deletions clang/lib/Format/UnwrappedLineFormatter.cpp
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "UnwrappedLineFormatter.h"
#include "FormatToken.h"
#include "NamespaceEndCommentsFixer.h"
#include "WhitespaceManager.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -918,9 +919,22 @@ class LineJoiner {

static void markFinalized(FormatToken *Tok) {
for (; Tok; Tok = Tok->Next) {
Tok->Finalized = true;
for (AnnotatedLine *Child : Tok->Children)
markFinalized(Child->First);
if (Tok->MacroCtx && Tok->MacroCtx->Role == MR_ExpandedArg) {
// In the first pass we format all macro arguments in the expanded token
// stream. Instead of finalizing the macro arguments, we mark that they
// will be modified as unexpanded arguments (as part of the macro call
// formatting) in the next pass.
Tok->MacroCtx->Role = MR_UnexpandedArg;
// Reset whether spaces are required before this token, as that is context
// dependent, and that context may change when formatting the macro call.
// For example, given M(x) -> 2 * x, and the macro call M(var),
// the token 'var' will have SpacesRequiredBefore = 1 after being
// formatted as part of the expanded macro, but SpacesRequiredBefore = 0
// for its position within the macro call.
Tok->SpacesRequiredBefore = 0;
} else {
Tok->Finalized = true;
}
}
}

Expand Down Expand Up @@ -975,15 +989,15 @@ class LineFormatter {
bool formatChildren(LineState &State, bool NewLine, bool DryRun,
unsigned &Penalty) {
const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
bool HasLBrace = LBrace && LBrace->is(tok::l_brace) && LBrace->is(BK_Block);
FormatToken &Previous = *State.NextToken->Previous;
if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->isNot(BK_Block) ||
Previous.Children.size() == 0) {
if (Previous.Children.size() == 0 || (!HasLBrace && !LBrace->MacroParent)) {
// The previous token does not open a block. Nothing to do. We don't
// assert so that we can simply call this function for all tokens.
return true;
}

if (NewLine) {
if (NewLine || Previous.MacroParent) {
const ParenState &P = State.Stack.back();

int AdditionalIndent =
Expand Down Expand Up @@ -1349,11 +1363,12 @@ unsigned UnwrappedLineFormatter::format(
NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
bool FitsIntoOneLine =
TheLine.Last->TotalLength + Indent <= ColumnLimit ||
(TheLine.Type == LT_ImportStatement &&
(!Style.isJavaScript() || !Style.JavaScriptWrapImports)) ||
(Style.isCSharp() &&
TheLine.InPPDirective); // don't split #regions in C#
!TheLine.ContainsMacroCall &&
(TheLine.Last->TotalLength + Indent <= ColumnLimit ||
(TheLine.Type == LT_ImportStatement &&
(!Style.isJavaScript() || !Style.JavaScriptWrapImports)) ||
(Style.isCSharp() &&
TheLine.InPPDirective)); // don't split #regions in C#
if (Style.ColumnLimit == 0) {
NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
.formatLine(TheLine, NextStartColumn + Indent,
Expand Down

0 comments on commit 0140283

Please sign in to comment.