Skip to content

Commit

Permalink
[clang][modules] Move SLocEntry search into ASTReader (#66966)
Browse files Browse the repository at this point in the history
In `SourceManager::getFileID()`, Clang performs binary search over its
buffer of `SLocEntries`. For modules, this binary search fully
deserializes the entire `SLocEntry` block for each visited entry. For
some entries, that includes decompressing the associated buffer (e.g.
the predefines buffer, macro expansion buffers, contents of volatile
files), which shows up in profiles of the dependency scanner.

This patch moves the binary search over loaded entries into `ASTReader`,
which can perform cheaper partial deserialization during the binary
search, reducing the wall time of dependency scans by ~3%. This also
reduces the number of retired instructions by ~1.4% on regular
(implicit) modules compilation.

Note that this patch drops the optimizations based on the last lookup ID
(pruning the search space and performing linear search before resorting
to the full binary search). Instead, it reduces the search space by
asking `ASTReader::GlobalSLocOffsetMap` for the containing `ModuleFile`
and only does binary search over entries of single module file.
  • Loading branch information
jansvoboda11 committed Oct 6, 2023
1 parent 8eff5e4 commit 537344f
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 91 deletions.
20 changes: 20 additions & 0 deletions clang/include/clang/Basic/SourceManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,14 @@ class SLocEntry {
return Expansion;
}

/// Creates an incomplete SLocEntry that is only able to report its offset.
static SLocEntry getOffsetOnly(SourceLocation::UIntTy Offset) {
assert(!(Offset & (1ULL << OffsetBits)) && "Offset is too large");
SLocEntry E;
E.Offset = Offset;
return E;
}

static SLocEntry get(SourceLocation::UIntTy Offset, const FileInfo &FI) {
assert(!(Offset & (1ULL << OffsetBits)) && "Offset is too large");
SLocEntry E;
Expand Down Expand Up @@ -534,6 +542,12 @@ class ExternalSLocEntrySource {
/// entry from being loaded.
virtual bool ReadSLocEntry(int ID) = 0;

/// Get the index ID for the loaded SourceLocation offset.
///
/// \returns Invalid index ID (0) if an error occurred that prevented the
/// SLocEntry from being loaded.
virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0;

/// Retrieve the module import location and name for the given ID, if
/// in fact it was loaded from a module (rather than, say, a precompiled
/// header).
Expand Down Expand Up @@ -729,6 +743,12 @@ class SourceManager : public RefCountedBase<SourceManager> {
/// Same indexing as LoadedSLocEntryTable.
llvm::BitVector SLocEntryLoaded;

/// A bitmap that indicates whether the entries of LoadedSLocEntryTable
/// have already had their offset loaded from the external source.
///
/// Superset of SLocEntryLoaded. Same indexing as SLocEntryLoaded.
llvm::BitVector SLocEntryOffsetLoaded;

/// An external source for source location entries.
ExternalSLocEntrySource *ExternalSLocEntries = nullptr;

Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Serialization/ASTReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -2154,6 +2154,12 @@ class ASTReader

/// Read the source location entry with index ID.
bool ReadSLocEntry(int ID) override;
/// Get the index ID for the loaded SourceLocation offset.
int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override;
/// Try to read the offset of the SLocEntry at the given index in the given
/// module file.
llvm::Expected<SourceLocation::UIntTy> readSLocOffset(ModuleFile *F,
unsigned Index);

/// Retrieve the module import location and module name for the
/// given source manager entry ID.
Expand Down
70 changes: 5 additions & 65 deletions clang/lib/Basic/SourceManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ void SourceManager::clearIDTables() {
LocalSLocEntryTable.clear();
LoadedSLocEntryTable.clear();
SLocEntryLoaded.clear();
SLocEntryOffsetLoaded.clear();
LastLineNoFileIDQuery = FileID();
LastLineNoContentCache = nullptr;
LastFileIDLookup = FileID();
Expand Down Expand Up @@ -459,6 +460,7 @@ SourceManager::AllocateLoadedSLocEntries(unsigned NumSLocEntries,
}
LoadedSLocEntryTable.resize(LoadedSLocEntryTable.size() + NumSLocEntries);
SLocEntryLoaded.resize(LoadedSLocEntryTable.size());
SLocEntryOffsetLoaded.resize(LoadedSLocEntryTable.size());
CurrentLoadedOffset -= TotalSize;
int BaseID = -int(LoadedSLocEntryTable.size()) - 1;
LoadedSLocEntryAllocBegin.push_back(FileID::get(BaseID));
Expand Down Expand Up @@ -597,7 +599,7 @@ FileID SourceManager::createFileIDImpl(ContentCache &File, StringRef Filename,
assert(!SLocEntryLoaded[Index] && "FileID already loaded");
LoadedSLocEntryTable[Index] = SLocEntry::get(
LoadedOffset, FileInfo::get(IncludePos, File, FileCharacter, Filename));
SLocEntryLoaded[Index] = true;
SLocEntryLoaded[Index] = SLocEntryOffsetLoaded[Index] = true;
return FileID::get(LoadedID);
}
unsigned FileSize = File.getSize();
Expand Down Expand Up @@ -657,7 +659,7 @@ SourceManager::createExpansionLocImpl(const ExpansionInfo &Info,
assert(Index < LoadedSLocEntryTable.size() && "FileID out of range");
assert(!SLocEntryLoaded[Index] && "FileID already loaded");
LoadedSLocEntryTable[Index] = SLocEntry::get(LoadedOffset, Info);
SLocEntryLoaded[Index] = true;
SLocEntryLoaded[Index] = SLocEntryOffsetLoaded[Index] = true;
return SourceLocation::getMacroLoc(LoadedOffset);
}
LocalSLocEntryTable.push_back(SLocEntry::get(NextLocalOffset, Info));
Expand Down Expand Up @@ -858,69 +860,7 @@ FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const {
return FileID();
}

// Essentially the same as the local case, but the loaded array is sorted
// in the other direction (decreasing order).
// GreaterIndex is the one where the offset is greater, which is actually a
// lower index!
unsigned GreaterIndex = 0;
unsigned LessIndex = LoadedSLocEntryTable.size();
if (LastFileIDLookup.ID < 0) {
// Prune the search space.
int LastID = LastFileIDLookup.ID;
if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset)
GreaterIndex =
(-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache
else
LessIndex = -LastID - 2;
}

// First do a linear scan from the last lookup position, if possible.
unsigned NumProbes;
bool Invalid = false;
for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) {
// Make sure the entry is loaded!
const SrcMgr::SLocEntry &E = getLoadedSLocEntry(GreaterIndex, &Invalid);
if (Invalid)
return FileID(); // invalid entry.
if (E.getOffset() <= SLocOffset) {
FileID Res = FileID::get(-int(GreaterIndex) - 2);
LastFileIDLookup = Res;
NumLinearScans += NumProbes + 1;
return Res;
}
}

// Linear scan failed. Do the binary search.
NumProbes = 0;
while (true) {
++NumProbes;
unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex;
const SrcMgr::SLocEntry &E = getLoadedSLocEntry(MiddleIndex, &Invalid);
if (Invalid)
return FileID(); // invalid entry.

if (E.getOffset() > SLocOffset) {
if (GreaterIndex == MiddleIndex) {
assert(0 && "binary search missed the entry");
return FileID();
}
GreaterIndex = MiddleIndex;
continue;
}

if (isOffsetInFileID(FileID::get(-int(MiddleIndex) - 2), SLocOffset)) {
FileID Res = FileID::get(-int(MiddleIndex) - 2);
LastFileIDLookup = Res;
NumBinaryProbes += NumProbes;
return Res;
}

if (LessIndex == MiddleIndex) {
assert(0 && "binary search missed the entry");
return FileID();
}
LessIndex = MiddleIndex;
}
return FileID::get(ExternalSLocEntries->getSLocEntryID(SLocOffset));
}

SourceLocation SourceManager::
Expand Down
73 changes: 73 additions & 0 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1444,6 +1444,79 @@ llvm::Error ASTReader::ReadSourceManagerBlock(ModuleFile &F) {
}
}

llvm::Expected<SourceLocation::UIntTy>
ASTReader::readSLocOffset(ModuleFile *F, unsigned Index) {
BitstreamCursor &Cursor = F->SLocEntryCursor;
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error Err = Cursor.JumpToBit(F->SLocEntryOffsetsBase +
F->SLocEntryOffsets[Index]))
return Err;

Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
if (!MaybeEntry)
return MaybeEntry.takeError();

llvm::BitstreamEntry Entry = MaybeEntry.get();
if (Entry.Kind != llvm::BitstreamEntry::Record)
return llvm::createStringError(
std::errc::illegal_byte_sequence,
"incorrectly-formatted source location entry in AST file");

RecordData Record;
StringRef Blob;
Expected<unsigned> MaybeSLOC = Cursor.readRecord(Entry.ID, Record, &Blob);
if (!MaybeSLOC)
return MaybeSLOC.takeError();

switch (MaybeSLOC.get()) {
default:
return llvm::createStringError(
std::errc::illegal_byte_sequence,
"incorrectly-formatted source location entry in AST file");
case SM_SLOC_FILE_ENTRY:
case SM_SLOC_BUFFER_ENTRY:
case SM_SLOC_EXPANSION_ENTRY:
return F->SLocEntryBaseOffset + Record[0];
}
}

int ASTReader::getSLocEntryID(SourceLocation::UIntTy SLocOffset) {
auto SLocMapI =
GlobalSLocOffsetMap.find(SourceManager::MaxLoadedOffset - SLocOffset - 1);
assert(SLocMapI != GlobalSLocOffsetMap.end() &&
"Corrupted global sloc offset map");
ModuleFile *F = SLocMapI->second;

bool Invalid = false;

auto It = llvm::upper_bound(
llvm::index_range(0, F->LocalNumSLocEntries), SLocOffset,
[&](SourceLocation::UIntTy Offset, std::size_t LocalIndex) {
int ID = F->SLocEntryBaseID + LocalIndex;
std::size_t Index = -ID - 2;
if (!SourceMgr.SLocEntryOffsetLoaded[Index]) {
assert(!SourceMgr.SLocEntryLoaded[Index]);
auto MaybeEntryOffset = readSLocOffset(F, LocalIndex);
if (!MaybeEntryOffset) {
Error(MaybeEntryOffset.takeError());
Invalid = true;
return true;
}
SourceMgr.LoadedSLocEntryTable[Index] =
SrcMgr::SLocEntry::getOffsetOnly(*MaybeEntryOffset);
SourceMgr.SLocEntryOffsetLoaded[Index] = true;
}
return Offset < SourceMgr.LoadedSLocEntryTable[Index].getOffset();
});

if (Invalid)
return 0;

// The iterator points to the first entry with start offset greater than the
// offset of interest. The previous entry must contain the offset of interest.
return F->SLocEntryBaseID + *std::prev(It);
}

bool ASTReader::ReadSLocEntry(int ID) {
if (ID == 0)
return false;
Expand Down
1 change: 1 addition & 0 deletions clang/test/Modules/explicit-build-missing-files.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ int y = a2<int>;
// CHECK: In module 'a':
// CHECK-NEXT: a.h:1:45: error:

int z = b<int>;
// MISSING-B: could not find file '{{.*}}b.h'
// MISSING-B-NOT: please delete the module cache
#endif
Expand Down
76 changes: 50 additions & 26 deletions llvm/include/llvm/ADT/STLExtras.h
Original file line number Diff line number Diff line change
Expand Up @@ -2261,43 +2261,67 @@ template <typename... Refs> struct enumerator_result<std::size_t, Refs...> {
mutable range_reference_tuple Storage;
};

/// Infinite stream of increasing 0-based `size_t` indices.
struct index_stream {
struct iterator : iterator_facade_base<iterator, std::forward_iterator_tag,
const iterator> {
iterator &operator++() {
assert(Index != std::numeric_limits<std::size_t>::max() &&
"Attempting to increment end iterator");
++Index;
return *this;
}
struct index_iterator
: llvm::iterator_facade_base<index_iterator,
std::random_access_iterator_tag, std::size_t> {
index_iterator(std::size_t Index) : Index(Index) {}

// Note: This dereference operator returns a value instead of a reference
// and does not strictly conform to the C++17's definition of forward
// iterator. However, it satisfies all the forward_iterator requirements
// that the `zip_common` depends on and fully conforms to the C++20
// definition of forward iterator.
std::size_t operator*() const { return Index; }
index_iterator &operator+=(std::ptrdiff_t N) {
Index += N;
return *this;
}

friend bool operator==(const iterator &Lhs, const iterator &Rhs) {
return Lhs.Index == Rhs.Index;
}
index_iterator &operator-=(std::ptrdiff_t N) {
Index -= N;
return *this;
}

std::size_t Index = 0;
};
std::ptrdiff_t operator-(const index_iterator &R) const {
return Index - R.Index;
}

iterator begin() const { return {}; }
iterator end() const {
// Note: This dereference operator returns a value instead of a reference
// and does not strictly conform to the C++17's definition of forward
// iterator. However, it satisfies all the forward_iterator requirements
// that the `zip_common` depends on and fully conforms to the C++20
// definition of forward iterator.
std::size_t operator*() const { return Index; }

friend bool operator==(const index_iterator &Lhs, const index_iterator &Rhs) {
return Lhs.Index == Rhs.Index;
}

friend bool operator<(const index_iterator &Lhs, const index_iterator &Rhs) {
return Lhs.Index < Rhs.Index;
}

private:
std::size_t Index;
};

/// Infinite stream of increasing 0-based `size_t` indices.
struct index_stream {
index_iterator begin() const { return {0}; }
index_iterator end() const {
// We approximate 'infinity' with the max size_t value, which should be good
// enough to index over any container.
iterator It;
It.Index = std::numeric_limits<std::size_t>::max();
return It;
return index_iterator{std::numeric_limits<std::size_t>::max()};
}
};

} // end namespace detail

/// Increasing range of `size_t` indices.
class index_range {
std::size_t Begin;
std::size_t End;

public:
index_range(std::size_t Begin, std::size_t End) : Begin(Begin), End(End) {}
detail::index_iterator begin() const { return {Begin}; }
detail::index_iterator end() const { return {End}; }
};

/// Given two or more input ranges, returns a new range whose values are are
/// tuples (A, B, C, ...), such that A is the 0-based index of the item in the
/// sequence, and B, C, ..., are the values from the original input ranges. All
Expand Down

0 comments on commit 537344f

Please sign in to comment.