Skip to content

Commit

Permalink
[clang-format] Enable FormatTokenSource to insert tokens.
Browse files Browse the repository at this point in the history
In preparation for configured macro replacements in formatting,
add the ability to insert tokens to FormatTokenSource, and implement
token insertion in IndexedTokenSource.

Differential Revision: https://reviews.llvm.org/D143070
  • Loading branch information
r4nt committed Feb 15, 2023
1 parent 9ccc588 commit 1995d44
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 17 deletions.
78 changes: 71 additions & 7 deletions clang/lib/Format/FormatTokenSource.h
@@ -1,4 +1,3 @@

//===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
Expand All @@ -8,7 +7,7 @@
//===----------------------------------------------------------------------===//
///
/// \file
/// This file defines the \c TokenSource interface, which provides a token
/// This file defines the \c FormatTokenSource interface, which provides a token
/// stream as well as the ability to manipulate the token stream.
///
//===----------------------------------------------------------------------===//
Expand All @@ -18,12 +17,17 @@

#include "FormatToken.h"
#include "UnwrappedLineParser.h"
#include "llvm/ADT/DenseMap.h"

#define DEBUG_TYPE "format-token-source"

namespace clang {
namespace format {

// Navigate a token stream.
//
// Enables traversal of a token stream, resetting the position in a token
// stream, as well as inserting new tokens.
class FormatTokenSource {
public:
virtual ~FormatTokenSource() {}
Expand All @@ -33,6 +37,9 @@ class FormatTokenSource {

// Returns the token preceding the token returned by the last call to
// getNextToken() in the token stream, or nullptr if no such token exists.
//
// Must not be called directly at the position directly after insertTokens()
// is called.
virtual FormatToken *getPreviousToken() = 0;

// Returns the token that would be returned by the next call to
Expand All @@ -45,14 +52,31 @@ class FormatTokenSource {
virtual bool isEOF() = 0;

// Gets the current position in the token stream, to be used by setPosition().
//
// Note that the value of the position is not meaningful, and specifically
// should not be used to get relative token positions.
virtual unsigned getPosition() = 0;

// Resets the token stream to the state it was in when getPosition() returned
// Position, and return the token at that position in the stream.
virtual FormatToken *setPosition(unsigned Position) = 0;

// Insert the given tokens before the current position.
// Returns the first token in \c Tokens.
// The next returned token will be the second token in \c Tokens.
// Requires the last token in Tokens to be EOF; once the EOF token is reached,
// the next token will be the last token returned by getNextToken();
//
// For example, given the token sequence 'a1 a2':
// getNextToken() -> a1
// insertTokens('b1 b2') -> b1
// getNextToken() -> b2
// getNextToken() -> a1
// getNextToken() -> a2
virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0;
};

class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource {
class IndexedTokenSource : public FormatTokenSource {
public:
IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
: Tokens(Tokens), Position(-1) {}
Expand All @@ -65,7 +89,7 @@ class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource {
});
return Tokens[Position];
}
++Position;
Position = successor(Position);
LLVM_DEBUG({
llvm::dbgs() << "Next ";
dbgToken(Position);
Expand All @@ -74,16 +98,17 @@ class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource {
}

FormatToken *getPreviousToken() override {
assert(Position <= 0 || !Tokens[Position - 1]->is(tok::eof));
return Position > 0 ? Tokens[Position - 1] : nullptr;
}

FormatToken *peekNextToken(bool SkipComment = false) override {
if (isEOF())
return Tokens[Position];
int Next = Position + 1;
int Next = successor(Position);
if (SkipComment)
while (Tokens[Next]->is(tok::comment))
++Next;
Next = successor(Next);
LLVM_DEBUG({
llvm::dbgs() << "Peeking ";
dbgToken(Next);
Expand All @@ -107,18 +132,53 @@ class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource {
return Tokens[Position];
}

FormatToken *insertTokens(ArrayRef<FormatToken *> New) override {
assert(Position != -1);
assert((*New.rbegin())->Tok.is(tok::eof));
int Next = Tokens.size();
Tokens.append(New.begin(), New.end());
LLVM_DEBUG({
llvm::dbgs() << "Inserting:\n";
for (int I = Next, E = Tokens.size(); I != E; ++I)
dbgToken(I, " ");
llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> "
<< Position << "\n";
});
Jumps[Tokens.size() - 1] = Position;
Position = Next;
LLVM_DEBUG({
llvm::dbgs() << "At inserted token ";
dbgToken(Position);
});
return Tokens[Position];
}

void reset() { Position = -1; }

private:
int successor(int Current) const {
int Next = Current + 1;
auto it = Jumps.find(Next);
if (it != Jumps.end()) {
Next = it->second;
assert(Jumps.find(Next) == Jumps.end());
}
return Next;
}

void dbgToken(int Position, llvm::StringRef Indent = "") {
FormatToken *Tok = Tokens[Position];
llvm::dbgs() << Indent << "[" << Position
<< "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
<< ", Macro: " << !!Tok->MacroCtx << "\n";
}

ArrayRef<FormatToken *> Tokens;
SmallVector<FormatToken *> Tokens;
int Position;

// Maps from position a to position b, so that when we reach a, the token
// stream continues at position b instead.
llvm::DenseMap<int, int> Jumps;
};

class ScopedMacroState : public FormatTokenSource {
Expand Down Expand Up @@ -175,6 +235,10 @@ class ScopedMacroState : public FormatTokenSource {
return Token;
}

FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override {
assert(false && "Cannot insert tokens while parsing a macro.");
}

private:
bool eof() {
return Token && Token->HasUnescapedNewline &&
Expand Down
3 changes: 0 additions & 3 deletions clang/lib/Format/UnwrappedLineParser.h
Expand Up @@ -280,9 +280,6 @@ class UnwrappedLineParser {
FormatTokenSource *Tokens;
UnwrappedLineConsumer &Callback;

// FIXME: This is a temporary measure until we have reworked the ownership
// of the format tokens. The goal is to have the actual tokens created and
// owned outside of and handed into the UnwrappedLineParser.
ArrayRef<FormatToken *> AllTokens;

// Keeps a stack of the states of nested control statements (true if the
Expand Down
63 changes: 56 additions & 7 deletions clang/unittests/Format/FormatTokenSourceTest.cpp
Expand Up @@ -28,12 +28,17 @@ class IndexedTokenSourceTest : public ::testing::Test {
#define EXPECT_TOKEN_KIND(FormatTok, Kind) \
do { \
FormatToken *Tok = FormatTok; \
EXPECT_EQ((Tok)->Tok.getKind(), Kind) << *(Tok); \
EXPECT_EQ(Tok->Tok.getKind(), Kind) << *Tok; \
} while (false);
#define EXPECT_TOKEN_ID(FormatTok, Name) \
do { \
FormatToken *Tok = FormatTok; \
EXPECT_EQ(Tok->Tok.getKind(), tok::identifier) << *Tok; \
EXPECT_EQ(Tok->TokenText, Name) << *Tok; \
} while (false);

TEST_F(IndexedTokenSourceTest, EmptyInput) {
TokenList Tokens = lex("");
IndexedTokenSource Source(Tokens);
IndexedTokenSource Source(lex(""));
EXPECT_FALSE(Source.isEOF());
EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof);
EXPECT_TRUE(Source.isEOF());
Expand All @@ -46,8 +51,7 @@ TEST_F(IndexedTokenSourceTest, EmptyInput) {
}

TEST_F(IndexedTokenSourceTest, NavigateTokenStream) {
TokenList Tokens = lex("int a;");
IndexedTokenSource Source(Tokens);
IndexedTokenSource Source(lex("int a;"));
EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::kw_int);
EXPECT_TOKEN_KIND(Source.getNextToken(), tok::kw_int);
EXPECT_EQ(Source.getPreviousToken(), nullptr);
Expand All @@ -60,11 +64,12 @@ TEST_F(IndexedTokenSourceTest, NavigateTokenStream) {
EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::eof);
EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof);
EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi);
EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof);
EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi);
}

TEST_F(IndexedTokenSourceTest, ResetPosition) {
TokenList Tokens = lex("int a;");
IndexedTokenSource Source(Tokens);
IndexedTokenSource Source(lex("int a;"));
Source.getNextToken();
unsigned Position = Source.getPosition();
Source.getNextToken();
Expand All @@ -73,6 +78,50 @@ TEST_F(IndexedTokenSourceTest, ResetPosition) {
EXPECT_TOKEN_KIND(Source.setPosition(Position), tok::kw_int);
}

TEST_F(IndexedTokenSourceTest, InsertTokens) {
IndexedTokenSource Source(lex("A1 A2"));
EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1");
EXPECT_TOKEN_ID(Source.getNextToken(), "B2");
EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
EXPECT_TOKEN_ID(Source.getNextToken(), "A2");
}

TEST_F(IndexedTokenSourceTest, InsertTokensAtEOF) {
IndexedTokenSource Source(lex("A1"));
EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof);
EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1");
EXPECT_TOKEN_ID(Source.getNextToken(), "B2");
EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof);
}

TEST_F(IndexedTokenSourceTest, InsertTokensRecursive) {
IndexedTokenSource Source(lex("A1"));
EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
// A1
EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1");
// B1 A1
EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1");
// C1 B1 A1
EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1");
// D1 C1 B1 A1
EXPECT_TOKEN_ID(Source.getNextToken(), "C1");
EXPECT_TOKEN_ID(Source.getNextToken(), "B1");
EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
}

TEST_F(IndexedTokenSourceTest, InsertTokensRecursiveAtEndOfSequence) {
IndexedTokenSource Source(lex("A1"));
EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1");
EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1");
EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1");
EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
}

} // namespace
} // namespace format
} // namespace clang

0 comments on commit 1995d44

Please sign in to comment.