[clang][modules] Serialize Module::DefinitionLoc

This is a prep patch for avoiding the quadratic number of calls to `HeaderSearch::lookupModule()` in `ASTReader` for each (transitively) loaded PCM file. (Specifically in the context of `clang-scan-deps`). This patch explicitly serializes `Module::DefinitionLoc` so that we can stop relying on it being filled by the module map parser. This change also required change to the module map parser, where we used the absence of `DefinitionLoc` to determine whether a file came from a PCM file. We also need to make sure we consider the "containing" module map affecting when writing a PCM, so that it's not stripped during serialization, which ensures `DefinitionLoc` still ends up pointing to the correct offset. This is intended to be a NFC change. Reviewed By: benlangmuir Differential Revision: https://reviews.llvm.org/D150292
llvm · Jul 17, 2023 · abcf7ce · zygoloid · Sep 19, 2023 · abcf7ce
1 parent cbc4bbb
commit abcf7ce
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 12 deletions.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -41,7 +41,7 @@ namespace serialization {
 /// Version 4 of AST files also requires that the version control branch and
 /// revision match exactly, since there is no backward compatibility of
 /// AST files at this time.
-const unsigned VERSION_MAJOR = 26;
+const unsigned VERSION_MAJOR = 27;
 
 /// AST file minor version number supported by this version of
 /// Clang.

diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
@@ -2016,18 +2016,20 @@ void ModuleMapParser::parseModuleDecl() {
   Module *ShadowingModule = nullptr;
   if (Module *Existing = Map.lookupModuleQualified(ModuleName, ActiveModule)) {
     // We might see a (re)definition of a module that we already have a
-    // definition for in two cases:
+    // definition for in three cases:
     //  - If we loaded one definition from an AST file and we've just found a
     //    corresponding definition in a module map file, or
-    bool LoadedFromASTFile = Existing->DefinitionLoc.isInvalid();
+    bool LoadedFromASTFile = Existing->IsFromModuleFile;
+    //  - If we previously inferred this module from different module map file.
+    bool Inferred = Existing->IsInferred;
     //  - If we're building a (preprocessed) module and we've just loaded the
     //    module map file from which it was created.
     bool ParsedAsMainInput =
         Map.LangOpts.getCompilingModule() == LangOptions::CMK_ModuleMap &&
         Map.LangOpts.CurrentModule == ModuleName &&
         SourceMgr.getDecomposedLoc(ModuleNameLoc).first !=
             SourceMgr.getDecomposedLoc(Existing->DefinitionLoc).first;
-    if (!ActiveModule && (LoadedFromASTFile || ParsedAsMainInput)) {
+    if (!ActiveModule && (LoadedFromASTFile || Inferred || ParsedAsMainInput)) {
       // Skip the module definition.
       skipUntil(MMToken::RBrace);
       if (Tok.is(MMToken::RBrace))

diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
@@ -5607,7 +5607,7 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       break;
 
     case SUBMODULE_DEFINITION: {
-      if (Record.size() < 12)
+      if (Record.size() < 13)
         return llvm::createStringError(std::errc::illegal_byte_sequence,
                                        "malformed module definition");
 
@@ -5616,6 +5616,7 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx++]);
       SubmoduleID Parent = getGlobalSubmoduleID(F, Record[Idx++]);
       Module::ModuleKind Kind = (Module::ModuleKind)Record[Idx++];
+      SourceLocation DefinitionLoc = ReadSourceLocation(F, Record[Idx++]);
       bool IsFramework = Record[Idx++];
       bool IsExplicit = Record[Idx++];
       bool IsSystem = Record[Idx++];
@@ -5636,8 +5637,7 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
           ModMap.findOrCreateModule(Name, ParentModule, IsFramework, IsExplicit)
               .first;
 
-      // FIXME: set the definition loc for CurrentModule, or call
-      // ModMap.setInferredModuleAllowedBy()
+      // FIXME: Call ModMap.setInferredModuleAllowedBy()
 
       SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS;
       if (GlobalIndex >= SubmodulesLoaded.size() ||
@@ -5666,6 +5666,7 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       }
 
       CurrentModule->Kind = Kind;
+      CurrentModule->DefinitionLoc = DefinitionLoc;
       CurrentModule->Signature = F.Signature;
       CurrentModule->IsFromModuleFile = true;
       CurrentModule->IsSystem = IsSystem || CurrentModule->IsSystem;

diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
@@ -200,7 +200,9 @@ std::set<const FileEntry *> GetAffectingModuleMaps(const Preprocessor &PP,
     CB(F);
     FileID FID = SourceMgr.translateFile(F);
     SourceLocation Loc = SourceMgr.getIncludeLoc(FID);
-    while (Loc.isValid()) {
+    // The include location of inferred module maps can point into the header
+    // file that triggered the inferring. Cut off the walk if that's the case.
+    while (Loc.isValid() && isModuleMap(SourceMgr.getFileCharacteristic(Loc))) {
       FID = SourceMgr.getFileID(Loc);
       CB(*SourceMgr.getFileEntryRefForID(FID));
       Loc = SourceMgr.getIncludeLoc(FID);
@@ -209,11 +211,18 @@ std::set<const FileEntry *> GetAffectingModuleMaps(const Preprocessor &PP,
 
   auto ProcessModuleOnce = [&](const Module *M) {
     for (const Module *Mod = M; Mod; Mod = Mod->Parent)
-      if (ProcessedModules.insert(Mod).second)
+      if (ProcessedModules.insert(Mod).second) {
+        auto Insert = [&](FileEntryRef F) { ModuleMaps.insert(F); };
+        // The containing module map is affecting, because it's being pointed
+        // into by Module::DefinitionLoc.
+        if (auto ModuleMapFile = MM.getContainingModuleMapFile(Mod))
+          ForIncludeChain(*ModuleMapFile, Insert);
+        // For inferred modules, the module map that allowed inferring is not in
+        // the include chain of the virtual containing module map file. It did
+        // affect the compilation, though.
         if (auto ModuleMapFile = MM.getModuleMapFileForUniquing(Mod))
-          ForIncludeChain(*ModuleMapFile, [&](FileEntryRef F) {
-            ModuleMaps.insert(F);
-          });
+          ForIncludeChain(*ModuleMapFile, Insert);
+      }
   };
 
   for (const Module *CurrentModule : ModulesToProcess) {
@@ -2687,6 +2696,7 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ID
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Parent
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // Kind
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Definition location
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFramework
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsExplicit
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsSystem
@@ -2787,12 +2797,16 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
       ParentID = SubmoduleIDs[Mod->Parent];
     }
 
+    uint64_t DefinitionLoc =
+        SourceLocationEncoding::encode(getAdjustedLocation(Mod->DefinitionLoc));
+
     // Emit the definition of the block.
     {
       RecordData::value_type Record[] = {SUBMODULE_DEFINITION,
                                          ID,
                                          ParentID,
                                          (RecordData::value_type)Mod->Kind,
+                                         DefinitionLoc,
                                          Mod->IsFramework,
                                          Mod->IsExplicit,
                                          Mod->IsSystem,