diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h index ad0682511f97d..00b1e0fa855b7 100644 --- a/clang/include/clang/Basic/SourceLocation.h +++ b/clang/include/clang/Basic/SourceLocation.h @@ -59,6 +59,7 @@ class FileID { friend class ASTWriter; friend class ASTReader; friend class SourceManager; + friend class SourceManagerTestHelper; static FileID get(int V) { FileID F; diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index 2f846502d6f33..5494d3f67a6c4 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -701,6 +701,11 @@ class SourceManager : public RefCountedBase { /// use (-ID - 2). SmallVector LoadedSLocEntryTable; + /// For each allocation in LoadedSLocEntryTable, we keep the first FileID. + /// We assume exactly one allocation per AST file, and use that to determine + /// whether two FileIDs come from the same AST file. + SmallVector LoadedSLocEntryAllocBegin; + /// The starting offset of the next local SLocEntry. /// /// This is LocalSLocEntryTable.back().Offset + the size of that entry. @@ -1649,6 +1654,11 @@ class SourceManager : public RefCountedBase { isInTheSameTranslationUnit(std::pair &LOffs, std::pair &ROffs) const; + /// Determines whether the two decomposed source location is in the same TU. + bool isInTheSameTranslationUnitImpl( + const std::pair &LOffs, + const std::pair &ROffs) const; + /// Determines the order of 2 source locations in the "source location /// address space". bool isBeforeInSLocAddrSpace(SourceLocation LHS, SourceLocation RHS) const { diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 9e115f2a5cce3..85f49e21b2e2e 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -51,7 +51,7 @@ const unsigned VERSION_MAJOR = 29; /// for the previous version could still support reading the new /// version by ignoring new kinds of subblocks), this number /// should be increased. -const unsigned VERSION_MINOR = 0; +const unsigned VERSION_MINOR = 1; /// An ID number that refers to an identifier in an AST file. /// @@ -524,13 +524,7 @@ enum ASTRecordTypes { /// of source-location information. SOURCE_LOCATION_OFFSETS = 14, - /// Record code for the set of source location entries - /// that need to be preloaded by the AST reader. - /// - /// This set contains the source location entry for the - /// predefines buffer and for any file entries that need to be - /// preloaded. - SOURCE_LOCATION_PRELOADS = 15, + // ID 15 used to be for source location entry preloads. /// Record code for the set of ext_vector type names. EXT_VECTOR_DECLS = 16, diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index 0af5cae6aebc3..48be8676cc26a 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -292,9 +292,6 @@ class ModuleFile { /// AST file. const uint32_t *SLocEntryOffsets = nullptr; - /// SLocEntries that we're going to preload. - SmallVector PreloadSLocEntries; - /// Remapping table for source locations in this module. ContinuousRangeMap SLocRemap; diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 0521ac7b30339..6a2d786f1989e 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -461,8 +461,9 @@ SourceManager::AllocateLoadedSLocEntries(unsigned NumSLocEntries, LoadedSLocEntryTable.resize(LoadedSLocEntryTable.size() + NumSLocEntries); SLocEntryLoaded.resize(LoadedSLocEntryTable.size()); CurrentLoadedOffset -= TotalSize; - int ID = LoadedSLocEntryTable.size(); - return std::make_pair(-ID - 1, CurrentLoadedOffset); + int BaseID = -int(LoadedSLocEntryTable.size()) - 1; + LoadedSLocEntryAllocBegin.push_back(FileID::get(BaseID)); + return std::make_pair(BaseID, CurrentLoadedOffset); } /// As part of recovering from missing or changed content, produce a @@ -1976,14 +1977,38 @@ SourceManager::getDecomposedIncludedLoc(FileID FID) const { return DecompLoc; } +bool SourceManager::isInTheSameTranslationUnitImpl( + const std::pair &LOffs, + const std::pair &ROffs) const { + // If one is local while the other is loaded. + if (isLoadedFileID(LOffs.first) != isLoadedFileID(ROffs.first)) + return false; + + if (isLoadedFileID(LOffs.first) && isLoadedFileID(ROffs.first)) { + auto FindSLocEntryAlloc = [this](FileID FID) { + // Loaded FileIDs are negative, we store the lowest FileID from each + // allocation, later allocations have lower FileIDs. + return llvm::lower_bound(LoadedSLocEntryAllocBegin, FID, std::greater{}); + }; + + // If both are loaded from different AST files. + if (FindSLocEntryAlloc(LOffs.first) != FindSLocEntryAlloc(ROffs.first)) + return false; + } + + return true; +} + /// Given a decomposed source location, move it up the include/expansion stack -/// to the parent source location. If this is possible, return the decomposed -/// version of the parent in Loc and return false. If Loc is the top-level -/// entry, return true and don't modify it. -static bool MoveUpIncludeHierarchy(std::pair &Loc, - const SourceManager &SM) { +/// to the parent source location within the same translation unit. If this is +/// possible, return the decomposed version of the parent in Loc and return +/// false. If Loc is a top-level entry, return true and don't modify it. +static bool +MoveUpTranslationUnitIncludeHierarchy(std::pair &Loc, + const SourceManager &SM) { std::pair UpperLoc = SM.getDecomposedIncludedLoc(Loc.first); - if (UpperLoc.first.isInvalid()) + if (UpperLoc.first.isInvalid() || + !SM.isInTheSameTranslationUnitImpl(UpperLoc, Loc)) return true; // We reached the top. Loc = UpperLoc; @@ -2039,45 +2064,18 @@ bool SourceManager::isBeforeInTranslationUnit(SourceLocation LHS, std::pair InSameTU = isInTheSameTranslationUnit(LOffs, ROffs); if (InSameTU.first) return InSameTU.second; - - // If we arrived here, the location is either in a built-ins buffer or - // associated with global inline asm. PR5662 and PR22576 are examples. - - StringRef LB = getBufferOrFake(LOffs.first).getBufferIdentifier(); - StringRef RB = getBufferOrFake(ROffs.first).getBufferIdentifier(); - bool LIsBuiltins = LB == ""; - bool RIsBuiltins = RB == ""; - // Sort built-in before non-built-in. - if (LIsBuiltins || RIsBuiltins) { - if (LIsBuiltins != RIsBuiltins) - return LIsBuiltins; - // Both are in built-in buffers, but from different files. We just claim that - // lower IDs come first. - return LOffs.first < ROffs.first; - } - bool LIsAsm = LB == ""; - bool RIsAsm = RB == ""; - // Sort assembler after built-ins, but before the rest. - if (LIsAsm || RIsAsm) { - if (LIsAsm != RIsAsm) - return RIsAsm; - assert(LOffs.first == ROffs.first); - return false; - } - bool LIsScratch = LB == ""; - bool RIsScratch = RB == ""; - // Sort scratch after inline asm, but before the rest. - if (LIsScratch || RIsScratch) { - if (LIsScratch != RIsScratch) - return LIsScratch; - return LOffs.second < ROffs.second; - } - llvm_unreachable("Unsortable locations found"); + // TODO: This should be unreachable, but some clients are calling this + // function before making sure LHS and RHS are in the same TU. + return LOffs.first < ROffs.first; } std::pair SourceManager::isInTheSameTranslationUnit( std::pair &LOffs, std::pair &ROffs) const { + // If the source locations are not in the same TU, return early. + if (!isInTheSameTranslationUnitImpl(LOffs, ROffs)) + return std::make_pair(false, false); + // If the source locations are in the same file, just compare offsets. if (LOffs.first == ROffs.first) return std::make_pair(true, LOffs.second < ROffs.second); @@ -2100,53 +2098,88 @@ std::pair SourceManager::isInTheSameTranslationUnit( // A location within a FileID on the path up from LOffs to the main file. struct Entry { - unsigned Offset; - FileID ParentFID; // Used for breaking ties. + std::pair DecomposedLoc; // FileID redundant, but clearer. + FileID ChildFID; // Used for breaking ties. Invalid for the initial loc. }; llvm::SmallDenseMap LChain; - FileID Parent; + FileID LChild; do { - LChain.try_emplace(LOffs.first, Entry{LOffs.second, Parent}); + LChain.try_emplace(LOffs.first, Entry{LOffs, LChild}); // We catch the case where LOffs is in a file included by ROffs and // quit early. The other way round unfortunately remains suboptimal. if (LOffs.first == ROffs.first) break; - Parent = LOffs.first; - } while (!MoveUpIncludeHierarchy(LOffs, *this)); + LChild = LOffs.first; + } while (!MoveUpTranslationUnitIncludeHierarchy(LOffs, *this)); - Parent = FileID(); + FileID RChild; do { - auto I = LChain.find(ROffs.first); - if (I != LChain.end()) { + auto LIt = LChain.find(ROffs.first); + if (LIt != LChain.end()) { // Compare the locations within the common file and cache them. - LOffs.first = I->first; - LOffs.second = I->second.Offset; - // The relative order of LParent and RParent is a tiebreaker when + LOffs = LIt->second.DecomposedLoc; + LChild = LIt->second.ChildFID; + // The relative order of LChild and RChild is a tiebreaker when // - locs expand to the same location (occurs in macro arg expansion) // - one loc is a parent of the other (we consider the parent as "first") - // For the parent to be first, the invalid file ID must compare smaller. + // For the parent entry to be first, its invalid child file ID must + // compare smaller to the valid child file ID of the other entry. // However loaded FileIDs are <0, so we perform *unsigned* comparison! // This changes the relative order of local vs loaded FileIDs, but it // doesn't matter as these are never mixed in macro expansion. - unsigned LParent = I->second.ParentFID.ID; - unsigned RParent = Parent.ID; + unsigned LChildID = LChild.ID; + unsigned RChildID = RChild.ID; assert(((LOffs.second != ROffs.second) || - (LParent == 0 || RParent == 0) || - isInSameSLocAddrSpace(getComposedLoc(I->second.ParentFID, 0), - getComposedLoc(Parent, 0), nullptr)) && + (LChildID == 0 || RChildID == 0) || + isInSameSLocAddrSpace(getComposedLoc(LChild, 0), + getComposedLoc(RChild, 0), nullptr)) && "Mixed local/loaded FileIDs with same include location?"); IsBeforeInTUCache.setCommonLoc(LOffs.first, LOffs.second, ROffs.second, - LParent < RParent); + LChildID < RChildID); return std::make_pair( true, IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second)); } - Parent = ROffs.first; - } while (!MoveUpIncludeHierarchy(ROffs, *this)); + RChild = ROffs.first; + } while (!MoveUpTranslationUnitIncludeHierarchy(ROffs, *this)); + + // If we found no match, the location is either in a built-ins buffer or + // associated with global inline asm. PR5662 and PR22576 are examples. + + StringRef LB = getBufferOrFake(LOffs.first).getBufferIdentifier(); + StringRef RB = getBufferOrFake(ROffs.first).getBufferIdentifier(); + + bool LIsBuiltins = LB == ""; + bool RIsBuiltins = RB == ""; + // Sort built-in before non-built-in. + if (LIsBuiltins || RIsBuiltins) { + if (LIsBuiltins != RIsBuiltins) + return std::make_pair(true, LIsBuiltins); + // Both are in built-in buffers, but from different files. We just claim + // that lower IDs come first. + return std::make_pair(true, LOffs.first < ROffs.first); + } + + bool LIsAsm = LB == ""; + bool RIsAsm = RB == ""; + // Sort assembler after built-ins, but before the rest. + if (LIsAsm || RIsAsm) { + if (LIsAsm != RIsAsm) + return std::make_pair(true, RIsAsm); + assert(LOffs.first == ROffs.first); + return std::make_pair(true, false); + } + + bool LIsScratch = LB == ""; + bool RIsScratch = RB == ""; + // Sort scratch after inline asm, but before the rest. + if (LIsScratch || RIsScratch) { + if (LIsScratch != RIsScratch) + return std::make_pair(true, LIsScratch); + return std::make_pair(true, LOffs.second < ROffs.second); + } - // If we found no match, we're not in the same TU. - // We don't cache this, but it is rare. - return std::make_pair(false, false); + llvm_unreachable("Unsortable locations found"); } void SourceManager::PrintStats() const { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 0952244d037a7..e796617455ac3 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3226,7 +3226,6 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, case SOURCE_LOCATION_OFFSETS: case MODULE_OFFSET_MAP: case SOURCE_MANAGER_LINE_TABLE: - case SOURCE_LOCATION_PRELOADS: case PPD_ENTITIES_OFFSETS: case HEADER_SEARCH_TABLE: case IMPORTED_MODULES: @@ -3576,18 +3575,6 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, ParseLineTable(F, Record); break; - case SOURCE_LOCATION_PRELOADS: { - // Need to transform from the local view (1-based IDs) to the global view, - // which is based off F.SLocEntryBaseID. - if (!F.PreloadSLocEntries.empty()) - return llvm::createStringError( - std::errc::illegal_byte_sequence, - "Multiple SOURCE_LOCATION_PRELOADS records in AST file"); - - F.PreloadSLocEntries.swap(Record); - break; - } - case EXT_VECTOR_DECLS: for (unsigned I = 0, N = Record.size(); I != N; ++I) ExtVectorDecls.push_back(getGlobalDeclID(F, Record[I])); @@ -4417,16 +4404,6 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, ModuleKind Type, for (ImportedModule &M : Loaded) { ModuleFile &F = *M.Mod; - // Preload SLocEntries. - for (unsigned I = 0, N = F.PreloadSLocEntries.size(); I != N; ++I) { - int Index = int(F.PreloadSLocEntries[I] - 1) + F.SLocEntryBaseID; - // Load it through the SourceManager and don't call ReadSLocEntry() - // directly because the entry may have already been loaded in which case - // calling ReadSLocEntry() directly would trigger an assertion in - // SourceManager. - SourceMgr.getLoadedSLocEntryByID(Index); - } - // Map the original source file ID into the ID space of the current // compilation. if (F.OriginalSourceFileID.isValid()) diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 65bee806d2c55..3392243d7ac4b 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -838,7 +838,6 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(METHOD_POOL); RECORD(PP_COUNTER_VALUE); RECORD(SOURCE_LOCATION_OFFSETS); - RECORD(SOURCE_LOCATION_PRELOADS); RECORD(EXT_VECTOR_DECLS); RECORD(UNUSED_FILESCOPED_DECLS); RECORD(PPD_ENTITIES_OFFSETS); @@ -2137,7 +2136,6 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr, // entry, which is always the same dummy entry. std::vector SLocEntryOffsets; uint64_t SLocEntryOffsetsBase = Stream.GetCurrentBitNo(); - RecordData PreloadSLocs; SLocEntryOffsets.reserve(SourceMgr.local_sloc_entry_size() - 1); for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size(); I != N; ++I) { @@ -2213,9 +2211,6 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr, Stream.EmitRecordWithBlob(SLocBufferAbbrv, Record, StringRef(Name.data(), Name.size() + 1)); EmitBlob = true; - - if (Name == "") - PreloadSLocs.push_back(SLocEntryOffsets.size()); } if (EmitBlob) { @@ -2277,9 +2272,6 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr, Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record, bytes(SLocEntryOffsets)); } - // Write the source location entry preloads array, telling the AST - // reader which source locations entries it should load eagerly. - Stream.EmitRecord(SOURCE_LOCATION_PRELOADS, PreloadSLocs); // Write the line table. It depends on remapping working, so it must come // after the source location offsets. diff --git a/clang/unittests/Basic/SourceManagerTest.cpp b/clang/unittests/Basic/SourceManagerTest.cpp index db48dfd47c5e4..d99bde907cb9b 100644 --- a/clang/unittests/Basic/SourceManagerTest.cpp +++ b/clang/unittests/Basic/SourceManagerTest.cpp @@ -26,6 +26,13 @@ using namespace clang; +namespace clang { +class SourceManagerTestHelper { +public: + static FileID makeFileID(int ID) { return FileID::get(ID); } +}; +} // namespace clang + namespace { // The test fixture. @@ -409,6 +416,52 @@ TEST_F(SourceManagerTest, getLineNumber) { ASSERT_NO_FATAL_FAILURE(SourceMgr.getLineNumber(mainFileID, 1, nullptr)); } +struct FakeExternalSLocEntrySource : ExternalSLocEntrySource { + bool ReadSLocEntry(int ID) override { return {}; } + std::pair getModuleImportLoc(int ID) override { + return {}; + } +}; + +TEST_F(SourceManagerTest, loadedSLocEntryIsInTheSameTranslationUnit) { + auto InSameTU = [=](int LID, int RID) { + return SourceMgr.isInTheSameTranslationUnitImpl( + std::make_pair(SourceManagerTestHelper::makeFileID(LID), 0), + std::make_pair(SourceManagerTestHelper::makeFileID(RID), 0)); + }; + + FakeExternalSLocEntrySource ExternalSource; + SourceMgr.setExternalSLocEntrySource(&ExternalSource); + + unsigned ANumFileIDs = 10; + auto [AFirstID, X] = SourceMgr.AllocateLoadedSLocEntries(ANumFileIDs, 10); + int ALastID = AFirstID + ANumFileIDs - 1; + // FileID(-11)..FileID(-2) + ASSERT_EQ(AFirstID, -11); + ASSERT_EQ(ALastID, -2); + + unsigned BNumFileIDs = 20; + auto [BFirstID, Y] = SourceMgr.AllocateLoadedSLocEntries(BNumFileIDs, 20); + int BLastID = BFirstID + BNumFileIDs - 1; + // FileID(-31)..FileID(-12) + ASSERT_EQ(BFirstID, -31); + ASSERT_EQ(BLastID, -12); + + // Loaded vs local. + EXPECT_FALSE(InSameTU(-2, 1)); + + // Loaded in the same allocation A. + EXPECT_TRUE(InSameTU(-11, -2)); + EXPECT_TRUE(InSameTU(-11, -6)); + + // Loaded in the same allocation B. + EXPECT_TRUE(InSameTU(-31, -12)); + EXPECT_TRUE(InSameTU(-31, -16)); + + // Loaded from different allocations A and B. + EXPECT_FALSE(InSameTU(-12, -11)); +} + #if defined(LLVM_ON_UNIX) TEST_F(SourceManagerTest, getMacroArgExpandedLocation) {