Skip to content

Commit

Permalink
[Syntax] Allow to mutate syntax trees
Browse files Browse the repository at this point in the history
Summary:
This patch adds facilities to mutate the syntax trees and produce
corresponding text replacements.

The public interface of the syntax library now includes facilities to:
    1. perform type-safe modifications of syntax trees,
    2. compute textual replacements to apply the modifications,
    3. create syntax trees not backed by the source code.

For each of the three, we only add a few example transformations in this
patch to illustrate the idea, support for more kinds of nodes and
transformations will be done in follow-up patches.

The high-level mutation operations are implemented on top of operations
that allow to arbitrarily change the trees. They are considered to be
implementation details and are not available to the users of the
library.

Reviewers: sammccall, gribozavr2

Reviewed By: gribozavr2

Subscribers: merge_guards_bot, mgorny, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64573
  • Loading branch information
ilya-biryukov committed Dec 18, 2019
1 parent bb1b0bc commit 1ad1504
Show file tree
Hide file tree
Showing 13 changed files with 572 additions and 20 deletions.
9 changes: 9 additions & 0 deletions clang/include/clang/Tooling/Syntax/BuildTree.h
Expand Up @@ -11,14 +11,23 @@
#define LLVM_CLANG_TOOLING_SYNTAX_TREE_H

#include "clang/AST/Decl.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Tooling/Syntax/Nodes.h"
#include "clang/Tooling/Syntax/Tree.h"

namespace clang {
namespace syntax {

/// Build a syntax tree for the main file.
syntax::TranslationUnit *buildSyntaxTree(Arena &A,
const clang::TranslationUnitDecl &TU);

// Create syntax trees from subtrees not backed by the source code.

clang::syntax::Leaf *createPunctuation(clang::syntax::Arena &A,
clang::tok::TokenKind K);
clang::syntax::EmptyStatement *createEmptyStatement(clang::syntax::Arena &A);

} // namespace syntax
} // namespace clang
#endif
37 changes: 37 additions & 0 deletions clang/include/clang/Tooling/Syntax/Mutations.h
@@ -0,0 +1,37 @@
//===- Mutations.h - mutate syntax trees --------------------*- C++ ---*-=====//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Defines high-level APIs for transforming syntax trees and producing the
// corresponding textual replacements.
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLING_SYNTAX_MUTATIONS_H
#define LLVM_CLANG_TOOLING_SYNTAX_MUTATIONS_H

#include "clang/Tooling/Core/Replacement.h"
#include "clang/Tooling/Syntax/Nodes.h"
#include "clang/Tooling/Syntax/Tree.h"

namespace clang {
namespace syntax {

/// Computes textual replacements required to mimic the tree modifications made
/// to the syntax tree.
tooling::Replacements computeReplacements(const Arena &A,
const syntax::TranslationUnit &TU);

/// Removes a statement or replaces it with an empty statement where one is
/// required syntactically. E.g., in the following example:
/// if (cond) { foo(); } else bar();
/// One can remove `foo();` completely and to remove `bar();` we would need to
/// replace it with an empty statement.
/// EXPECTS: S->canModify() == true
void removeStatement(syntax::Arena &A, syntax::Statement *S);

} // namespace syntax
} // namespace clang

#endif
4 changes: 4 additions & 0 deletions clang/include/clang/Tooling/Syntax/Tokens.h
Expand Up @@ -78,6 +78,10 @@ struct FileRange {
/// Gets the substring that this FileRange refers to.
llvm::StringRef text(const SourceManager &SM) const;

/// Convert to the clang range. The returned range is always a char range,
/// never a token range.
CharSourceRange toCharRange(const SourceManager &SM) const;

friend bool operator==(const FileRange &L, const FileRange &R) {
return std::tie(L.File, L.Begin, L.End) == std::tie(R.File, R.Begin, R.End);
}
Expand Down
44 changes: 43 additions & 1 deletion clang/include/clang/Tooling/Syntax/Tree.h
Expand Up @@ -65,6 +65,9 @@ class Arena {

class Tree;
class TreeBuilder;
class FactoryImpl;
class MutationsImpl;

enum class NodeKind : uint16_t;
enum class NodeRole : uint8_t;

Expand All @@ -79,6 +82,23 @@ class Node {
NodeKind kind() const { return static_cast<NodeKind>(Kind); }
NodeRole role() const { return static_cast<NodeRole>(Role); }

/// Whether the node is detached from a tree, i.e. does not have a parent.
bool isDetached() const;
/// Whether the node was created from the AST backed by the source code
/// rather than added later through mutation APIs or created with factory
/// functions.
/// When this flag is true, all subtrees are also original.
/// This flag is set to false on any modifications to the node or any of its
/// subtrees, even if this simply involves swapping existing subtrees.
bool isOriginal() const { return Original; }
/// If this function return false, the tree cannot be modified because there
/// is no reasonable way to produce the corresponding textual replacements.
/// This can happen when the node crosses macro expansion boundaries.
///
/// Note that even if the node is not modifiable, its child nodes can be
/// modifiable.
bool canModify() const { return CanModify; }

const Tree *parent() const { return Parent; }
Tree *parent() { return Parent; }

Expand All @@ -93,11 +113,17 @@ class Node {
private:
// Tree is allowed to change the Parent link and Role.
friend class Tree;
// TreeBuilder is allowed to set the Original and CanModify flags.
friend class TreeBuilder;
// MutationsImpl sets roles and CanModify flag.
friend class MutationsImpl;

Tree *Parent;
Node *NextSibling;
unsigned Kind : 16;
unsigned Role : 8;
unsigned Original : 1;
unsigned CanModify : 1;
};

/// A leaf node points to a single token inside the expanded token stream.
Expand All @@ -121,17 +147,33 @@ class Tree : public Node {
Node *firstChild() { return FirstChild; }
const Node *firstChild() const { return FirstChild; }

Leaf *firstLeaf();
const Leaf *firstLeaf() const {
return const_cast<Tree *>(this)->firstLeaf();
}

Leaf *lastLeaf();
const Leaf *lastLeaf() const { return const_cast<Tree *>(this)->lastLeaf(); }

protected:
/// Find the first node with a corresponding role.
syntax::Node *findChild(NodeRole R);

private:
/// Prepend \p Child to the list of children and and sets the parent pointer.
/// A very low-level operation that does not check any invariants, only used
/// by TreeBuilder.
/// by TreeBuilder and FactoryImpl.
/// EXPECTS: Role != NodeRoleDetached.
void prependChildLowLevel(Node *Child, NodeRole Role);
friend class TreeBuilder;
friend class FactoryImpl;

/// Replace a range of children [BeforeBegin->NextSibling, End) with a list of
/// new nodes starting at \p New.
/// Only used by MutationsImpl to implement higher-level mutation operations.
/// (!) \p New can be null to model removal of the child range.
void replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, Node *New);
friend class MutationsImpl;

Node *FirstChild = nullptr;
};
Expand Down
31 changes: 21 additions & 10 deletions clang/lib/Tooling/Syntax/BuildTree.cpp
Expand Up @@ -25,6 +25,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <map>

Expand Down Expand Up @@ -85,7 +86,7 @@ class syntax::TreeBuilder {
assert(Tokens.back().kind() == tok::eof);

// Build the root of the tree, consuming all the children.
Pending.foldChildren(Tokens.drop_back(),
Pending.foldChildren(Arena, Tokens.drop_back(),
new (Arena.allocator()) syntax::TranslationUnit);

return cast<syntax::TranslationUnit>(std::move(Pending).finalize());
Expand Down Expand Up @@ -156,9 +157,12 @@ class syntax::TreeBuilder {
assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof);
// Create all leaf nodes.
// Note that we do not have 'eof' in the tree.
for (auto &T : A.tokenBuffer().expandedTokens().drop_back())
Trees.insert(Trees.end(),
{&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}});
for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) {
auto *L = new (A.allocator()) syntax::Leaf(&T);
L->Original = true;
L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue();
Trees.insert(Trees.end(), {&T, NodeAndRole{L}});
}
}

~Forest() { assert(DelayedFolds.empty()); }
Expand All @@ -176,18 +180,19 @@ class syntax::TreeBuilder {
}

/// Add \p Node to the forest and attach child nodes based on \p Tokens.
void foldChildren(llvm::ArrayRef<syntax::Token> Tokens,
void foldChildren(const syntax::Arena &A,
llvm::ArrayRef<syntax::Token> Tokens,
syntax::Tree *Node) {
// Execute delayed folds inside `Tokens`.
auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin());
auto It = BeginExecuted;
for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It)
foldChildrenEager(llvm::makeArrayRef(It->first, It->second.End),
foldChildrenEager(A, llvm::makeArrayRef(It->first, It->second.End),
It->second.Node);
DelayedFolds.erase(BeginExecuted, It);

// Attach children to `Node`.
foldChildrenEager(Tokens, Node);
foldChildrenEager(A, Tokens, Node);
}

/// Schedule a call to `foldChildren` that will only be executed when
Expand Down Expand Up @@ -244,7 +249,8 @@ class syntax::TreeBuilder {
private:
/// Implementation detail of `foldChildren`, does acutal folding ignoring
/// delayed folds.
void foldChildrenEager(llvm::ArrayRef<syntax::Token> Tokens,
void foldChildrenEager(const syntax::Arena &A,
llvm::ArrayRef<syntax::Token> Tokens,
syntax::Tree *Node) {
assert(Node->firstChild() == nullptr && "node already has children");

Expand All @@ -263,6 +269,10 @@ class syntax::TreeBuilder {
Node->prependChildLowLevel(std::prev(It)->second.Node,
std::prev(It)->second.Role);

// Mark that this node came from the AST and is backed by the source code.
Node->Original = true;
Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue();

Trees.erase(BeginChildren, EndChildren);
Trees.insert({FirstToken, NodeAndRole(Node)});
}
Expand Down Expand Up @@ -585,7 +595,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {

void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range,
syntax::Tree *New) {
Pending.foldChildren(Range, New);
Pending.foldChildren(Arena, Range, New);
}

void syntax::TreeBuilder::noticeDeclaratorRange(
Expand Down Expand Up @@ -617,7 +627,8 @@ void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) {
Pending.assignRole(getExprRange(E),
NodeRole::ExpressionStatement_expression);
// (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon.
Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement);
Pending.foldChildren(Arena, Range,
new (allocator()) syntax::ExpressionStatement);
}
Pending.assignRole(Range, Role);
}
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Tooling/Syntax/CMakeLists.txt
Expand Up @@ -2,7 +2,10 @@ set(LLVM_LINK_COMPONENTS Support)

add_clang_library(clangToolingSyntax
BuildTree.cpp
ComputeReplacements.cpp
Nodes.cpp
Mutations.cpp
Synthesis.cpp
Tokens.cpp
Tree.cpp

Expand Down
126 changes: 126 additions & 0 deletions clang/lib/Tooling/Syntax/ComputeReplacements.cpp
@@ -0,0 +1,126 @@
//===- ComputeReplacements.cpp --------------------------------*- C++ -*-=====//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Core/Replacement.h"
#include "clang/Tooling/Syntax/Mutations.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "llvm/Support/Error.h"

using namespace clang;

namespace {
using ProcessTokensFn = llvm::function_ref<void(llvm::ArrayRef<syntax::Token>,
bool /*IsOriginal*/)>;
/// Enumerates spans of tokens from the tree consecutively laid out in memory.
void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) {
struct Enumerator {
Enumerator(ProcessTokensFn Callback)
: SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false),
Callback(Callback) {}

void run(const syntax::Tree *Root) {
process(Root);
// Report the last span to the user.
if (SpanBegin)
Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal);
}

private:
void process(const syntax::Node *N) {
if (auto *T = dyn_cast<syntax::Tree>(N)) {
for (auto *C = T->firstChild(); C != nullptr; C = C->nextSibling())
process(C);
return;
}

auto *L = cast<syntax::Leaf>(N);
if (SpanEnd == L->token() && SpanIsOriginal == L->isOriginal()) {
// Extend the current span.
++SpanEnd;
return;
}
// Report the current span to the user.
if (SpanBegin)
Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal);
// Start recording a new span.
SpanBegin = L->token();
SpanEnd = SpanBegin + 1;
SpanIsOriginal = L->isOriginal();
}

const syntax::Token *SpanBegin;
const syntax::Token *SpanEnd;
bool SpanIsOriginal;
ProcessTokensFn Callback;
};

return Enumerator(Callback).run(Root);
}

syntax::FileRange rangeOfExpanded(const syntax::Arena &A,
llvm::ArrayRef<syntax::Token> Expanded) {
auto &Buffer = A.tokenBuffer();
auto &SM = A.sourceManager();

// Check that \p Expanded actually points into expanded tokens.
assert(Buffer.expandedTokens().begin() <= Expanded.begin());
assert(Expanded.end() < Buffer.expandedTokens().end());

if (Expanded.empty())
// (!) empty tokens must always point before end().
return syntax::FileRange(
SM, SM.getExpansionLoc(Expanded.begin()->location()), /*Length=*/0);

auto Spelled = Buffer.spelledForExpanded(Expanded);
assert(Spelled && "could not find spelled tokens for expanded");
return syntax::Token::range(SM, Spelled->front(), Spelled->back());
}
} // namespace

tooling::Replacements
syntax::computeReplacements(const syntax::Arena &A,
const syntax::TranslationUnit &TU) {
auto &Buffer = A.tokenBuffer();
auto &SM = A.sourceManager();

tooling::Replacements Replacements;
// Text inserted by the replacement we are building now.
std::string Replacement;
auto emitReplacement = [&](llvm::ArrayRef<syntax::Token> ReplacedRange) {
if (ReplacedRange.empty() && Replacement.empty())
return;
llvm::cantFail(Replacements.add(tooling::Replacement(
SM, rangeOfExpanded(A, ReplacedRange).toCharRange(SM), Replacement)));
Replacement = "";
};

const syntax::Token *NextOriginal = Buffer.expandedTokens().begin();
enumerateTokenSpans(
&TU, [&](llvm::ArrayRef<syntax::Token> Tokens, bool IsOriginal) {
if (!IsOriginal) {
Replacement +=
syntax::Token::range(SM, Tokens.front(), Tokens.back()).text(SM);
return;
}
assert(NextOriginal <= Tokens.begin());
// We are looking at a span of original tokens.
if (NextOriginal != Tokens.begin()) {
// There is a gap, record a replacement or deletion.
emitReplacement(llvm::makeArrayRef(NextOriginal, Tokens.begin()));
} else {
// No gap, but we may have pending insertions. Emit them now.
emitReplacement(llvm::makeArrayRef(NextOriginal, /*Length=*/0));
}
NextOriginal = Tokens.end();
});

// We might have pending replacements at the end of file. If so, emit them.
emitReplacement(llvm::makeArrayRef(
NextOriginal, Buffer.expandedTokens().drop_back().end()));

return Replacements;
}

0 comments on commit 1ad1504

Please sign in to comment.