diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/DirectiveTree.h b/clang-tools-extra/pseudo/include/clang-pseudo/DirectiveTree.h index 5cd41efcb2ace3..e8220537649f96 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/DirectiveTree.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/DirectiveTree.h @@ -92,7 +92,11 @@ struct DirectiveTree { /// Extract preprocessor structure by examining the raw tokens. static DirectiveTree parse(const TokenStream &); - // FIXME: allow deriving a preprocessed stream + /// Produce a parseable token stream by stripping all directive tokens. + /// + /// Conditional sections are replaced by the taken branch, if any. + /// This tree must describe the provided token stream. + TokenStream stripDirectives(const TokenStream &) const; }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree &); llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree::Chunk &); diff --git a/clang-tools-extra/pseudo/lib/DirectiveTree.cpp b/clang-tools-extra/pseudo/lib/DirectiveTree.cpp index f464e3bc3ba665..82843125329b80 100644 --- a/clang-tools-extra/pseudo/lib/DirectiveTree.cpp +++ b/clang-tools-extra/pseudo/lib/DirectiveTree.cpp @@ -347,5 +347,53 @@ void chooseConditionalBranches(DirectiveTree &Tree, const TokenStream &Code) { BranchChooser{Code}.choose(Tree); } +namespace { +class Preprocessor { + const TokenStream &In; + TokenStream &Out; + +public: + Preprocessor(const TokenStream &In, TokenStream &Out) : In(In), Out(Out) {} + ~Preprocessor() { Out.finalize(); } + + void walk(const DirectiveTree &T) { + for (const auto &C : T.Chunks) + walk(C); + } + + void walk(const DirectiveTree::Chunk &C) { + switch (C.kind()) { + case DirectiveTree::Chunk::K_Code: + return walk((const DirectiveTree::Code &)C); + case DirectiveTree::Chunk::K_Directive: + return walk((const DirectiveTree::Directive &)C); + case DirectiveTree::Chunk::K_Conditional: + return walk((const DirectiveTree::Conditional &)C); + case DirectiveTree::Chunk::K_Empty: + break; + } + llvm_unreachable("bad chunk kind"); + } + + void walk(const DirectiveTree::Code &C) { + for (const auto &Tok : In.tokens(C.Tokens)) + Out.push(Tok); + } + + void walk(const DirectiveTree::Directive &) {} + + void walk(const DirectiveTree::Conditional &C) { + if (C.Taken) + walk(C.Branches[*C.Taken].second); + } +}; +} // namespace + +TokenStream DirectiveTree::stripDirectives(const TokenStream &In) const { + TokenStream Out; + Preprocessor(In, Out).walk(*this); + return Out; +} + } // namespace pseudo } // namespace clang diff --git a/clang-tools-extra/pseudo/test/lex.c b/clang-tools-extra/pseudo/test/lex.c index c1bf9296a8cce4..ebebd2e0fb72ff 100644 --- a/clang-tools-extra/pseudo/test/lex.c +++ b/clang-tools-extra/pseudo/test/lex.c @@ -18,7 +18,7 @@ SOURCE-NEXT: #end SOURCE-NEXT: } RUN: clang-pseudo -source %s -print-tokens | FileCheck %s -check-prefix=TOKEN - TOKEN: 0: raw_identifier 0:0 "int" flags=1 + TOKEN: 0: raw_identifier 0:0 "int" flags=1 TOKEN-NEXT: raw_identifier 0:0 "is_debug" TOKEN-NEXT: l_paren 0:0 "(" TOKEN-NEXT: r_paren 0:0 ")" @@ -39,14 +39,4 @@ TOKEN-NEXT: hash 5:0 "#" flags=1 TOKEN-NEXT: raw_identifier 5:0 "endif" TOKEN-NEXT: r_brace 6:0 "}" flags=1 -RUN: clang-pseudo -source %s -print-directive-tree | FileCheck %s -check-prefix=PPT --strict-whitespace - PPT: code (5 tokens) -PPT-NEXT: #ifndef (3 tokens) TAKEN -PPT-NEXT: code (4 tokens) -PPT-NEXT: #else (2 tokens) -PPT-NEXT: code (3 tokens) -PPT-NEXT: #endif (2 tokens) -PPT-NEXT: code (2 tokens) - ^ including this block comment - *******************************************************************************/ diff --git a/clang-tools-extra/pseudo/test/strip-directives.c b/clang-tools-extra/pseudo/test/strip-directives.c new file mode 100644 index 00000000000000..c7878d9295a08a --- /dev/null +++ b/clang-tools-extra/pseudo/test/strip-directives.c @@ -0,0 +1,49 @@ +#include +int main() { +#error This was inevitable... +#if HELLO + printf("hello, world\n"); + return 0; +#else + abort(); +#endif +} + +/* This comment gets lexed along with the input above! We just don't CHECK it. + +RUN: clang-pseudo -source %s -print-directive-tree | FileCheck %s -check-prefix=PPT --strict-whitespace + PPT: #include (7 tokens) +PPT-NEXT: code (5 tokens) +PPT-NEXT: #error (6 tokens) +PPT-NEXT: #if (3 tokens) TAKEN +PPT-NEXT: code (8 tokens) +PPT-NEXT: #else (2 tokens) +PPT-NEXT: code (4 tokens) +PPT-NEXT: #endif (2 tokens) +PPT-NEXT: code (2 tokens) + ^ including this block comment + +RUN: clang-pseudo -source %s -strip-directives -print-source | FileCheck %s --strict-whitespace + CHECK: int main() { +CHECK-NEXT: printf("hello, world\n"); +CHECK-NEXT: return 0; +CHECK-NEXT: } + +RUN: clang-pseudo -source %s -strip-directives -print-tokens | FileCheck %s --check-prefix=TOKEN + TOKEN: 0: raw_identifier 1:0 "int" flags=1 +TOKEN-NEXT: raw_identifier 1:0 "main" +TOKEN-NEXT: l_paren 1:0 "(" +TOKEN-NEXT: r_paren 1:0 ")" +TOKEN-NEXT: l_brace 1:0 "{" +TOKEN-NEXT: raw_identifier 4:2 "printf" flags=1 +TOKEN-NEXT: l_paren 4:2 "(" +TOKEN-NEXT: string_literal 4:2 "\22hello, world\\n\22" +TOKEN-NEXT: r_paren 4:2 ")" +TOKEN-NEXT: semi 4:2 ";" +TOKEN-NEXT: raw_identifier 5:2 "return" flags=1 +TOKEN-NEXT: numeric_constant 5:2 "0" +TOKEN-NEXT: semi 5:2 ";" +TOKEN-NEXT: r_brace 9:0 "}" flags=1 + +*******************************************************************************/ + diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp index d8517262faf1f7..47a0b2a10ae7ea 100644 --- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/Signals.h" using clang::pseudo::Grammar; +using clang::pseudo::TokenStream; using llvm::cl::desc; using llvm::cl::init; using llvm::cl::opt; @@ -37,6 +38,9 @@ static opt PrintTokens("print-tokens", desc("Print detailed token info")); static opt PrintDirectiveTree("print-directive-tree", desc("Print directive structure of source code")); +static opt + StripDirectives("strip-directives", + desc("Strip directives and select conditional sections")); static opt PrintStatistics("print-statistics", desc("Print GLR parser statistics")); static opt PrintForest("print-forest", desc("Print parse forest")); @@ -58,22 +62,30 @@ int main(int argc, char *argv[]) { clang::LangOptions LangOpts = clang::pseudo::genericLangOpts(); std::string SourceText; llvm::Optional RawStream; - llvm::Optional DirectiveStructure; + llvm::Optional PreprocessedStream; llvm::Optional ParseableStream; if (Source.getNumOccurrences()) { SourceText = readOrDie(Source); RawStream = clang::pseudo::lex(SourceText, LangOpts); - DirectiveStructure = clang::pseudo::DirectiveTree::parse(*RawStream); - clang::pseudo::chooseConditionalBranches(*DirectiveStructure, *RawStream); + TokenStream *Stream = RawStream.getPointer(); + + auto DirectiveStructure = clang::pseudo::DirectiveTree::parse(*RawStream); + clang::pseudo::chooseConditionalBranches(DirectiveStructure, *RawStream); + + llvm::Optional Preprocessed; + if (StripDirectives) { + Preprocessed = DirectiveStructure.stripDirectives(*Stream); + Stream = Preprocessed.getPointer(); + } - if (PrintDirectiveTree) - llvm::outs() << DirectiveStructure; if (PrintSource) - RawStream->print(llvm::outs()); + Stream->print(llvm::outs()); if (PrintTokens) - llvm::outs() << RawStream; + llvm::outs() << *Stream; + if (PrintDirectiveTree) + llvm::outs() << DirectiveStructure; - ParseableStream = clang::pseudo::stripComments(cook(*RawStream, LangOpts)); + ParseableStream = clang::pseudo::stripComments(cook(*Stream, LangOpts)); } if (Grammar.getNumOccurrences()) { diff --git a/clang-tools-extra/pseudo/unittests/DirectiveTreeTest.cpp b/clang-tools-extra/pseudo/unittests/DirectiveTreeTest.cpp index 476726c7fedc15..f8732e28c5e1fe 100644 --- a/clang-tools-extra/pseudo/unittests/DirectiveTreeTest.cpp +++ b/clang-tools-extra/pseudo/unittests/DirectiveTreeTest.cpp @@ -27,14 +27,23 @@ using testing::Pair; using testing::StrEq; using Chunk = DirectiveTree::Chunk; -MATCHER_P2(tokensAre, TS, Tokens, "tokens are " + std::string(Tokens)) { +// Matches text of a list of tokens against a string (joined with spaces). +// e.g. EXPECT_THAT(Stream.tokens(), tokens("int main ( ) { }")); +MATCHER_P(tokens, Tokens, "") { std::vector Texts; - for (const Token &Tok : TS.tokens(arg.Tokens)) + for (const Token &Tok : arg) Texts.push_back(Tok.text()); return Matcher(StrEq(Tokens)) .MatchAndExplain(llvm::join(Texts, " "), result_listener); } +// Matches tokens covered a directive chunk (with a Tokens property) against a +// string, similar to tokens() above. +// e.g. EXPECT_THAT(SomeDirective, tokensAre(Stream, "# include < vector >")); +MATCHER_P2(tokensAre, TS, Tokens, "tokens are " + std::string(Tokens)) { + return testing::Matches(tokens(Tokens))(TS.tokens(arg.Tokens)); +} + MATCHER_P(chunkKind, K, "") { return arg.kind() == K; } TEST(DirectiveTree, Parse) { @@ -301,6 +310,45 @@ TEST(DirectiveTree, ChooseBranches) { } } +TEST(DirectiveTree, StripDirectives) { + LangOptions Opts; + std::string Code = R"cpp( + #include + a a a + #warning AAA + b b b + #if 1 + c c c + #warning BBB + #if 0 + d d d + #warning CC + #else + e e e + #endif + f f f + #if 0 + g g g + #endif + h h h + #else + i i i + #endif + j j j + )cpp"; + TokenStream S = lex(Code, Opts); + + DirectiveTree Tree = DirectiveTree::parse(S); + chooseConditionalBranches(Tree, S); + EXPECT_THAT(Tree.stripDirectives(S).tokens(), + tokens("a a a b b b c c c e e e f f f h h h j j j")); + + const DirectiveTree &Part = + ((const DirectiveTree::Conditional &)Tree.Chunks[4]).Branches[0].second; + EXPECT_THAT(Part.stripDirectives(S).tokens(), + tokens("c c c e e e f f f h h h")); +} + } // namespace } // namespace pseudo } // namespace clang