diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a9f25ec2ecb75..78f718533bd55 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -4386,27 +4386,56 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, if (F.OriginalSourceFileID.isValid()) F.OriginalSourceFileID = TranslateFileID(F, F.OriginalSourceFileID); - // Preload all the pending interesting identifiers by marking them out of - // date. for (auto Offset : F.PreloadIdentifierOffsets) { const unsigned char *Data = F.IdentifierTableData + Offset; ASTIdentifierLookupTrait Trait(*this, F); auto KeyDataLen = Trait.ReadKeyDataLength(Data); auto Key = Trait.ReadKey(Data, KeyDataLen.first); - auto &II = PP.getIdentifierTable().getOwn(Key); - II.setOutOfDate(true); + + IdentifierInfo *II; + if (!PP.getLangOpts().CPlusPlus) { + // Identifiers present in both the module file and the importing + // instance are marked out-of-date so that they can be deserialized + // on next use via ASTReader::updateOutOfDateIdentifier(). + // Identifiers present in the module file but not in the importing + // instance are ignored for now, preventing growth of the identifier + // table. They will be deserialized on first use via ASTReader::get(). + auto It = PP.getIdentifierTable().find(Key); + if (It == PP.getIdentifierTable().end()) + continue; + II = It->second; + } else { + // With C++ modules, not many identifiers are considered interesting. + // All identifiers in the module file can be placed into the identifier + // table of the importing instance and marked as out-of-date. This makes + // ASTReader::get() a no-op, and deserialization will take place on + // first/next use via ASTReader::updateOutOfDateIdentifier(). + II = &PP.getIdentifierTable().getOwn(Key); + } + + II->setOutOfDate(true); // Mark this identifier as being from an AST file so that we can track // whether we need to serialize it. - markIdentifierFromAST(*this, II); + markIdentifierFromAST(*this, *II); // Associate the ID with the identifier so that the writer can reuse it. auto ID = Trait.ReadIdentifierID(Data + KeyDataLen.first); - SetIdentifierInfo(ID, &II); + SetIdentifierInfo(ID, II); } } + // Builtins and library builtins have already been initialized. Mark all + // identifiers as out-of-date, so that they are deserialized on first use. + if (Type == MK_PCH || Type == MK_Preamble || Type == MK_MainFile) + for (auto &Id : PP.getIdentifierTable()) + Id.second->setOutOfDate(true); + + // Mark selectors as out of date. + for (const auto &Sel : SelectorGeneration) + SelectorOutOfDate[Sel.first] = true; + // Setup the import locations and notify the module manager that we've // committed to these module files. for (ImportedModule &M : Loaded) { @@ -4424,25 +4453,6 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, F.ImportLoc = TranslateSourceLocation(*M.ImportedBy, M.ImportLoc); } - if (!PP.getLangOpts().CPlusPlus || - (Type != MK_ImplicitModule && Type != MK_ExplicitModule && - Type != MK_PrebuiltModule)) { - // Mark all of the identifiers in the identifier table as being out of date, - // so that various accessors know to check the loaded modules when the - // identifier is used. - // - // For C++ modules, we don't need information on many identifiers (just - // those that provide macros or are poisoned), so we mark all of - // the interesting ones via PreloadIdentifierOffsets. - for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(), - IdEnd = PP.getIdentifierTable().end(); - Id != IdEnd; ++Id) - Id->second->setOutOfDate(true); - } - // Mark selectors as out of date. - for (const auto &Sel : SelectorGeneration) - SelectorOutOfDate[Sel.first] = true; - // Resolve any unresolved module exports. for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) { UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I]; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 2f73eb526641a..e2dec17f7cae7 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3573,14 +3573,16 @@ class ASTIdentifierTableTrait { // the mapping from persistent IDs to strings. Writer.SetIdentifierOffset(II, Out.tell()); + auto MacroOffset = Writer.getMacroDirectivesOffset(II); + // Emit the offset of the key/data length information to the interesting // identifiers table if necessary. - if (InterestingIdentifierOffsets && isInterestingIdentifier(II)) + if (InterestingIdentifierOffsets && + isInterestingIdentifier(II, MacroOffset)) InterestingIdentifierOffsets->push_back(Out.tell()); unsigned KeyLen = II->getLength() + 1; unsigned DataLen = 4; // 4 bytes for the persistent ID << 1 - auto MacroOffset = Writer.getMacroDirectivesOffset(II); if (isInterestingIdentifier(II, MacroOffset)) { DataLen += 2; // 2 bytes for builtin ID DataLen += 2; // 2 bytes for flags @@ -3659,9 +3661,8 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, // strings. { llvm::OnDiskChainedHashTableGenerator Generator; - ASTIdentifierTableTrait Trait( - *this, PP, IdResolver, IsModule, - (getLangOpts().CPlusPlus && IsModule) ? &InterestingIdents : nullptr); + ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule, + IsModule ? &InterestingIdents : nullptr); // Look for any identifiers that were named while processing the // headers, but are otherwise not needed. We add these to the hash