Skip to content

Commit 93dc73b

Browse files
committed
[Lexer] Fix bug in makeFileCharRange called on split tokens.
When the end loc of the specified range is a split token, `makeFileCharRange` does not process it correctly. This patch adds proper support for split tokens. Differential Revision: https://reviews.llvm.org/D105365
1 parent 67002b5 commit 93dc73b

File tree

2 files changed

+81
-5
lines changed

2 files changed

+81
-5
lines changed

clang/lib/Lex/Lexer.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,14 @@ static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range,
877877
return CharSourceRange::getCharRange(Begin, End);
878878
}
879879

880+
// Assumes that `Loc` is in an expansion.
881+
static bool isInExpansionTokenRange(const SourceLocation Loc,
882+
const SourceManager &SM) {
883+
return SM.getSLocEntry(SM.getFileID(Loc))
884+
.getExpansion()
885+
.isExpansionTokenRange();
886+
}
887+
880888
CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
881889
const SourceManager &SM,
882890
const LangOptions &LangOpts) {
@@ -896,10 +904,12 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
896904
}
897905

898906
if (Begin.isFileID() && End.isMacroID()) {
899-
if ((Range.isTokenRange() && !isAtEndOfMacroExpansion(End, SM, LangOpts,
900-
&End)) ||
901-
(Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts,
902-
&End)))
907+
if (Range.isTokenRange()) {
908+
if (!isAtEndOfMacroExpansion(End, SM, LangOpts, &End))
909+
return {};
910+
// Use the *original* end, not the expanded one in `End`.
911+
Range.setTokenRange(isInExpansionTokenRange(Range.getEnd(), SM));
912+
} else if (!isAtStartOfMacroExpansion(End, SM, LangOpts, &End))
903913
return {};
904914
Range.setEnd(End);
905915
return makeRangeFromFileLocs(Range, SM, LangOpts);
@@ -914,6 +924,9 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
914924
&MacroEnd)))) {
915925
Range.setBegin(MacroBegin);
916926
Range.setEnd(MacroEnd);
927+
// Use the *original* `End`, not the expanded one in `MacroEnd`.
928+
if (Range.isTokenRange())
929+
Range.setTokenRange(isInExpansionTokenRange(End, SM));
917930
return makeRangeFromFileLocs(Range, SM, LangOpts);
918931
}
919932

clang/unittests/Lex/LexerTest.cpp

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "clang/Lex/PreprocessorOptions.h"
2626
#include "gmock/gmock.h"
2727
#include "gtest/gtest.h"
28+
#include <memory>
2829
#include <vector>
2930

3031
namespace {
@@ -65,7 +66,7 @@ class LexerTest : public ::testing::Test {
6566

6667
std::vector<Token> Lex(StringRef Source) {
6768
TrivialModuleLoader ModLoader;
68-
auto PP = CreatePP(Source, ModLoader);
69+
PP = CreatePP(Source, ModLoader);
6970

7071
std::vector<Token> toks;
7172
while (1) {
@@ -109,6 +110,7 @@ class LexerTest : public ::testing::Test {
109110
LangOptions LangOpts;
110111
std::shared_ptr<TargetOptions> TargetOpts;
111112
IntrusiveRefCntPtr<TargetInfo> Target;
113+
std::unique_ptr<Preprocessor> PP;
112114
};
113115

114116
TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
@@ -264,12 +266,14 @@ TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
264266

265267
TEST_F(LexerTest, LexAPI) {
266268
std::vector<tok::TokenKind> ExpectedTokens;
269+
// Line 1 (after the #defines)
267270
ExpectedTokens.push_back(tok::l_square);
268271
ExpectedTokens.push_back(tok::identifier);
269272
ExpectedTokens.push_back(tok::r_square);
270273
ExpectedTokens.push_back(tok::l_square);
271274
ExpectedTokens.push_back(tok::identifier);
272275
ExpectedTokens.push_back(tok::r_square);
276+
// Line 2
273277
ExpectedTokens.push_back(tok::identifier);
274278
ExpectedTokens.push_back(tok::identifier);
275279
ExpectedTokens.push_back(tok::identifier);
@@ -357,6 +361,65 @@ TEST_F(LexerTest, LexAPI) {
357361
EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
358362
}
359363

364+
TEST_F(LexerTest, HandlesSplitTokens) {
365+
std::vector<tok::TokenKind> ExpectedTokens;
366+
// Line 1 (after the #defines)
367+
ExpectedTokens.push_back(tok::identifier);
368+
ExpectedTokens.push_back(tok::less);
369+
ExpectedTokens.push_back(tok::identifier);
370+
ExpectedTokens.push_back(tok::less);
371+
ExpectedTokens.push_back(tok::greatergreater);
372+
// Line 2
373+
ExpectedTokens.push_back(tok::identifier);
374+
ExpectedTokens.push_back(tok::less);
375+
ExpectedTokens.push_back(tok::identifier);
376+
ExpectedTokens.push_back(tok::less);
377+
ExpectedTokens.push_back(tok::greatergreater);
378+
379+
std::vector<Token> toks = CheckLex("#define TY ty\n"
380+
"#define RANGLE ty<ty<>>\n"
381+
"TY<ty<>>\n"
382+
"RANGLE",
383+
ExpectedTokens);
384+
385+
SourceLocation outerTyLoc = toks[0].getLocation();
386+
SourceLocation innerTyLoc = toks[2].getLocation();
387+
SourceLocation gtgtLoc = toks[4].getLocation();
388+
// Split the token to simulate the action of the parser and force creation of
389+
// an `ExpansionTokenRange`.
390+
SourceLocation rangleLoc = PP->SplitToken(gtgtLoc, 1);
391+
392+
// Verify that it only captures the first greater-then and not the second one.
393+
CharSourceRange range = Lexer::makeFileCharRange(
394+
CharSourceRange::getTokenRange(innerTyLoc, rangleLoc), SourceMgr,
395+
LangOpts);
396+
EXPECT_TRUE(range.isCharRange());
397+
EXPECT_EQ(range.getAsRange(),
398+
SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1)));
399+
400+
// Verify case where range begins in a macro expansion.
401+
range = Lexer::makeFileCharRange(
402+
CharSourceRange::getTokenRange(outerTyLoc, rangleLoc), SourceMgr,
403+
LangOpts);
404+
EXPECT_TRUE(range.isCharRange());
405+
EXPECT_EQ(range.getAsRange(),
406+
SourceRange(SourceMgr.getExpansionLoc(outerTyLoc),
407+
gtgtLoc.getLocWithOffset(1)));
408+
409+
SourceLocation macroInnerTyLoc = toks[7].getLocation();
410+
SourceLocation macroGtgtLoc = toks[9].getLocation();
411+
// Split the token to simulate the action of the parser and force creation of
412+
// an `ExpansionTokenRange`.
413+
SourceLocation macroRAngleLoc = PP->SplitToken(macroGtgtLoc, 1);
414+
415+
// Verify that it fails (because it only captures the first greater-then and
416+
// not the second one, so it doesn't span the entire macro expansion).
417+
range = Lexer::makeFileCharRange(
418+
CharSourceRange::getTokenRange(macroInnerTyLoc, macroRAngleLoc),
419+
SourceMgr, LangOpts);
420+
EXPECT_TRUE(range.isInvalid());
421+
}
422+
360423
TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
361424
std::vector<Token> toks =
362425
Lex("#define helper1 0\n"

0 commit comments

Comments
 (0)