Skip to content

Commit

Permalink
[Lex] Bring back the magic number 50 in updateConsecutiveMacroArgTokens.
Browse files Browse the repository at this point in the history
This patch is a reland of 74e4f77 and
f83347b with the removed 50 trick back.

The magic number 50 was removed in D134942, as a behavior change for
performance reason.

While it reduces the number of SLocEntry, it increases the usage of
SourceLocation address space usage, which is critical for compiling
large TU.

This fixes a regression caused in D134942 -- clang failed to compile one of
our internal files, complaining the file is too large to process because clang
runs out of source location space (we spend 40% more address space!)

Differential Revision: https://reviews.llvm.org/D136539
  • Loading branch information
hokein committed Oct 26, 2022
1 parent 1a726cf commit 11c1d8b
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 50 deletions.
107 changes: 57 additions & 50 deletions clang/lib/Lex/TokenLexer.cpp
Expand Up @@ -25,6 +25,7 @@
#include "clang/Lex/Token.h"
#include "clang/Lex/VariadicMacroSupport.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
Expand Down Expand Up @@ -984,65 +985,71 @@ TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {
/// \arg begin_tokens will be updated to a position past all the found
/// consecutive tokens.
static void updateConsecutiveMacroArgTokens(SourceManager &SM,
SourceLocation InstLoc,
SourceLocation ExpandLoc,
Token *&begin_tokens,
Token * end_tokens) {
assert(begin_tokens < end_tokens);

SourceLocation FirstLoc = begin_tokens->getLocation();
SourceLocation CurLoc = FirstLoc;

// Compare the source location offset of tokens and group together tokens that
// are close, even if their locations point to different FileIDs. e.g.
//
// |bar | foo | cake | (3 tokens from 3 consecutive FileIDs)
// ^ ^
// |bar foo cake| (one SLocEntry chunk for all tokens)
//
// we can perform this "merge" since the token's spelling location depends
// on the relative offset.

Token *NextTok = begin_tokens + 1;
for (; NextTok < end_tokens; ++NextTok) {
SourceLocation NextLoc = NextTok->getLocation();
if (CurLoc.isFileID() != NextLoc.isFileID())
break; // Token from different kind of FileID.

SourceLocation::IntTy RelOffs;
if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs))
break; // Token from different local/loaded location.
// Check that token is not before the previous token or more than 50
// "characters" away.
if (RelOffs < 0 || RelOffs > 50)
break;

if (CurLoc.isMacroID() && !SM.isWrittenInSameFile(CurLoc, NextLoc))
break; // Token from a different macro.

CurLoc = NextLoc;
assert(begin_tokens + 1 < end_tokens);
SourceLocation BeginLoc = begin_tokens->getLocation();
llvm::MutableArrayRef<Token> All(begin_tokens, end_tokens);
llvm::MutableArrayRef<Token> Partition;

auto NearLast = [&, Last = BeginLoc](SourceLocation Loc) mutable {
// The maximum distance between two consecutive tokens in a partition.
// This is an important trick to avoid using too much SourceLocation address
// space!
static constexpr SourceLocation::IntTy MaxDistance = 50;
auto Distance = Loc.getRawEncoding() - Last.getRawEncoding();
Last = Loc;
return Distance <= MaxDistance;
};

// Partition the tokens by their FileID.
// This is a hot function, and calling getFileID can be expensive, the
// implementation is optimized by reducing the number of getFileID.
if (BeginLoc.isFileID()) {
// Consecutive tokens not written in macros must be from the same file.
// (Neither #include nor eof can occur inside a macro argument.)
Partition = All.take_while([&](const Token &T) {
return T.getLocation().isFileID() && NearLast(T.getLocation());
});
} else {
// Call getFileID once to calculate the bounds, and use the cheaper
// sourcelocation-against-bounds comparison.
FileID BeginFID = SM.getFileID(BeginLoc);
SourceLocation Limit =
SM.getComposedLoc(BeginFID, SM.getFileIDSize(BeginFID));
Partition = All.take_while([&](const Token &T) {
return T.getLocation() >= BeginLoc && T.getLocation() < Limit &&
NearLast(T.getLocation());
});
}
assert(!Partition.empty());

// For the consecutive tokens, find the length of the SLocEntry to contain
// all of them.
Token &LastConsecutiveTok = *(NextTok-1);
SourceLocation::IntTy LastRelOffs = 0;
SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(),
&LastRelOffs);
SourceLocation::UIntTy FullLength =
LastRelOffs + LastConsecutiveTok.getLength();

Partition.back().getEndLoc().getRawEncoding() -
Partition.front().getLocation().getRawEncoding();
// Create a macro expansion SLocEntry that will "contain" all of the tokens.
SourceLocation Expansion =
SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength);

SM.createMacroArgExpansionLoc(BeginLoc, ExpandLoc, FullLength);

#ifdef EXPENSIVE_CHECKS
assert(llvm::all_of(Partition.drop_front(),
[&SM, ID = SM.getFileID(Partition.front().getLocation())](
const Token &T) {
return ID == SM.getFileID(T.getLocation());
}) &&
"Must have the same FIleID!");
#endif
// Change the location of the tokens from the spelling location to the new
// expanded location.
for (; begin_tokens < NextTok; ++begin_tokens) {
Token &Tok = *begin_tokens;
SourceLocation::IntTy RelOffs = 0;
SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs);
Tok.setLocation(Expansion.getLocWithOffset(RelOffs));
for (Token& T : Partition) {
SourceLocation::IntTy RelativeOffset =
T.getLocation().getRawEncoding() - BeginLoc.getRawEncoding();
T.setLocation(Expansion.getLocWithOffset(RelativeOffset));
}
begin_tokens = &Partition.back() + 1;
}

/// Creates SLocEntries and updates the locations of macro argument
Expand All @@ -1055,20 +1062,20 @@ void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
Token *end_tokens) {
SourceManager &SM = PP.getSourceManager();

SourceLocation InstLoc =
SourceLocation ExpandLoc =
getExpansionLocForMacroDefLoc(ArgIdSpellLoc);

while (begin_tokens < end_tokens) {
// If there's only one token just create a SLocEntry for it.
if (end_tokens - begin_tokens == 1) {
Token &Tok = *begin_tokens;
Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(),
InstLoc,
ExpandLoc,
Tok.getLength()));
return;
}

updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens);
updateConsecutiveMacroArgTokens(SM, ExpandLoc, begin_tokens, end_tokens);
}
}

Expand Down
36 changes: 36 additions & 0 deletions clang/test/Lexer/update_consecutive_macro_address_space.c
@@ -0,0 +1,36 @@
// RUN: %clang -cc1 -print-stats %s 2>&1 | FileCheck %s
// CHECK: 6 local SLocEntry's allocated
//
// Verify that the macro arg expansion is split to two file ids, we have 6 file
// ids rather than 5:
// 0: invalid file id
// 1: main file
// 2: builtin file
// 3: macro expansion for X
// 4: macro arg expansions for 1
// 5: macro arg expansions for == 2
#define X(x) (int)(x);
void func() {
X(1
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
/*************************************************************************************************/
== 2);
}

0 comments on commit 11c1d8b

Please sign in to comment.