Skip to content

Commit

Permalink
[clang][modules] Account for non-affecting inputs in ASTWriter
Browse files Browse the repository at this point in the history
In D106876, we stopped serializing module map files that didn't affect compilation of the current module.

However, since each `SourceLocation` is simply an offset into `SourceManager`'s global buffer of concatenated input files in, these need to be adjusted during serialization. Otherwise, they can incorrectly point after the buffer or into subsequent input file.

This patch starts adjusting `SourceLocation`s, `FileID`s and other `SourceManager` offsets in `ASTWriter`.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D136624
  • Loading branch information
jansvoboda11 committed Nov 2, 2022
1 parent a13122c commit 6924a49
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 89 deletions.
23 changes: 19 additions & 4 deletions clang/include/clang/Serialization/ASTWriter.h
Expand Up @@ -444,8 +444,21 @@ class ASTWriter : public ASTDeserializationListener,
std::vector<std::unique_ptr<ModuleFileExtensionWriter>>
ModuleFileExtensionWriters;

/// User ModuleMaps skipped when writing control block.
std::set<const FileEntry *> SkippedModuleMaps;
/// Mapping from a source location entry to whether it is affecting or not.
llvm::BitVector IsSLocAffecting;

/// Mapping from \c FileID to an index into the FileID adjustment table.
std::vector<FileID> NonAffectingFileIDs;
std::vector<unsigned> NonAffectingFileIDAdjustments;

/// Mapping from an offset to an index into the offset adjustment table.
std::vector<SourceRange> NonAffectingRanges;
std::vector<SourceLocation::UIntTy> NonAffectingOffsetAdjustments;

/// Collects input files that didn't affect compilation of the current module,
/// and initializes data structures necessary for leaving those files out
/// during \c SourceManager serialization.
void collectNonAffectingInputFiles();

/// Returns an adjusted \c FileID, accounting for any non-affecting input
/// files.
Expand All @@ -462,6 +475,9 @@ class ASTWriter : public ASTDeserializationListener,
/// Returns an adjusted \c SourceLocation offset, accounting for any
/// non-affecting input files.
SourceLocation::UIntTy getAdjustedOffset(SourceLocation::UIntTy Offset) const;
/// Returns an adjustment for offset into SourceManager, accounting for any
/// non-affecting input files.
SourceLocation::UIntTy getAdjustment(SourceLocation::UIntTy Offset) const;

/// Retrieve or create a submodule ID for this module.
unsigned getSubmoduleID(Module *Mod);
Expand All @@ -481,8 +497,7 @@ class ASTWriter : public ASTDeserializationListener,
static std::pair<ASTFileSignature, ASTFileSignature>
createSignature(StringRef AllBytes, StringRef ASTBlockBytes);

void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts,
std::set<const FileEntry *> &AffectingClangModuleMaps);
void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts);
void WriteSourceManagerBlock(SourceManager &SourceMgr,
const Preprocessor &PP);
void writeIncludedFiles(raw_ostream &Out, const Preprocessor &PP);
Expand Down
160 changes: 121 additions & 39 deletions clang/lib/Serialization/ASTWriter.cpp
Expand Up @@ -161,8 +161,8 @@ static TypeCode getTypeCodeForTypeClass(Type::TypeClass id) {

namespace {

std::set<const FileEntry *> GetAllModuleMaps(const HeaderSearch &HS,
Module *RootModule) {
std::set<const FileEntry *> GetAffectingModuleMaps(const HeaderSearch &HS,
Module *RootModule) {
std::set<const FileEntry *> ModuleMaps{};
std::set<const Module *> ProcessedModules;
SmallVector<const Module *> ModulesToProcess{RootModule};
Expand Down Expand Up @@ -1477,15 +1477,8 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
AddFileID(SM.getMainFileID(), Record);
Stream.EmitRecord(ORIGINAL_FILE_ID, Record);

std::set<const FileEntry *> AffectingClangModuleMaps;
if (WritingModule) {
AffectingClangModuleMaps =
GetAllModuleMaps(PP.getHeaderSearchInfo(), WritingModule);
}

WriteInputFiles(Context.SourceMgr,
PP.getHeaderSearchInfo().getHeaderSearchOpts(),
AffectingClangModuleMaps);
PP.getHeaderSearchInfo().getHeaderSearchOpts());
Stream.ExitBlock();
}

Expand All @@ -1503,9 +1496,8 @@ struct InputFileEntry {

} // namespace

void ASTWriter::WriteInputFiles(
SourceManager &SourceMgr, HeaderSearchOptions &HSOpts,
std::set<const FileEntry *> &AffectingClangModuleMaps) {
void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
HeaderSearchOptions &HSOpts) {
using namespace llvm;

Stream.EnterSubblock(INPUT_FILES_BLOCK_ID, 4);
Expand Down Expand Up @@ -1545,15 +1537,9 @@ void ASTWriter::WriteInputFiles(
if (!Cache->OrigEntry)
continue;

if (isModuleMap(File.getFileCharacteristic()) &&
!isSystem(File.getFileCharacteristic()) &&
!AffectingClangModuleMaps.empty() &&
AffectingClangModuleMaps.find(Cache->OrigEntry) ==
AffectingClangModuleMaps.end()) {
SkippedModuleMaps.insert(Cache->OrigEntry);
// Do not emit modulemaps that do not affect current module.
// Do not emit input files that do not affect current module.
if (!IsSLocAffecting[I])
continue;
}

InputFileEntry Entry;
Entry.File = Cache->OrigEntry;
Expand Down Expand Up @@ -2064,12 +2050,9 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
if (SLoc->isFile()) {
const SrcMgr::FileInfo &File = SLoc->getFile();
const SrcMgr::ContentCache *Content = &File.getContentCache();
if (Content->OrigEntry && !SkippedModuleMaps.empty() &&
SkippedModuleMaps.find(Content->OrigEntry) !=
SkippedModuleMaps.end()) {
// Do not emit files that were not listed as inputs.
// Do not emit files that were not listed as inputs.
if (!IsSLocAffecting[I])
continue;
}
SLocEntryOffsets.push_back(Offset);
// Starting offset of this entry within this module, so skip the dummy.
Record.push_back(getAdjustedOffset(SLoc->getOffset()) - 2);
Expand Down Expand Up @@ -4527,6 +4510,69 @@ static void AddLazyVectorDecls(ASTWriter &Writer, Vector &Vec,
}
}

void ASTWriter::collectNonAffectingInputFiles() {
SourceManager &SrcMgr = PP->getSourceManager();
unsigned N = SrcMgr.local_sloc_entry_size();

IsSLocAffecting.resize(N, true);

if (!WritingModule)
return;

auto AffectingModuleMaps =
GetAffectingModuleMaps(PP->getHeaderSearchInfo(), WritingModule);

unsigned FileIDAdjustment = 0;
unsigned OffsetAdjustment = 0;

NonAffectingFileIDAdjustments.reserve(N);
NonAffectingOffsetAdjustments.reserve(N);

NonAffectingFileIDAdjustments.push_back(FileIDAdjustment);
NonAffectingOffsetAdjustments.push_back(OffsetAdjustment);

for (unsigned I = 1; I != N; ++I) {
const SrcMgr::SLocEntry *SLoc = &SrcMgr.getLocalSLocEntry(I);
FileID FID = FileID::get(I);
assert(&SrcMgr.getSLocEntry(FID) == SLoc);

if (!SLoc->isFile())
continue;
const SrcMgr::FileInfo &File = SLoc->getFile();
const SrcMgr::ContentCache *Cache = &File.getContentCache();
if (!Cache->OrigEntry)
continue;

if (!isModuleMap(File.getFileCharacteristic()) ||
isSystem(File.getFileCharacteristic()) || AffectingModuleMaps.empty() ||
AffectingModuleMaps.find(Cache->OrigEntry) != AffectingModuleMaps.end())
continue;

IsSLocAffecting[I] = false;

FileIDAdjustment += 1;
// Even empty files take up one element in the offset table.
OffsetAdjustment += SrcMgr.getFileIDSize(FID) + 1;

// If the previous file was non-affecting as well, just extend its entry
// with our information.
if (!NonAffectingFileIDs.empty() &&
NonAffectingFileIDs.back().ID == FID.ID - 1) {
NonAffectingFileIDs.back() = FID;
NonAffectingRanges.back().setEnd(SrcMgr.getLocForEndOfFile(FID));
NonAffectingFileIDAdjustments.back() = FileIDAdjustment;
NonAffectingOffsetAdjustments.back() = OffsetAdjustment;
continue;
}

NonAffectingFileIDs.push_back(FID);
NonAffectingRanges.emplace_back(SrcMgr.getLocForStartOfFile(FID),
SrcMgr.getLocForEndOfFile(FID));
NonAffectingFileIDAdjustments.push_back(FileIDAdjustment);
NonAffectingOffsetAdjustments.push_back(OffsetAdjustment);
}
}

ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
Module *WritingModule) {
using namespace llvm;
Expand All @@ -4540,6 +4586,8 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
ASTContext &Context = SemaRef.Context;
Preprocessor &PP = SemaRef.PP;

collectNonAffectingInputFiles();

// Set up predefined declaration IDs.
auto RegisterPredefDecl = [&] (Decl *D, PredefinedDeclIDs ID) {
if (D) {
Expand Down Expand Up @@ -5229,32 +5277,65 @@ void ASTWriter::AddAlignPackInfo(const Sema::AlignPackInfo &Info,
}

FileID ASTWriter::getAdjustedFileID(FileID FID) const {
// TODO: Actually adjust this.
return FID;
if (FID.isInvalid() || PP->getSourceManager().isLoadedFileID(FID) ||
NonAffectingFileIDs.empty())
return FID;
auto It = llvm::lower_bound(NonAffectingFileIDs, FID);
unsigned Idx = std::distance(NonAffectingFileIDs.begin(), It);
unsigned Offset = NonAffectingFileIDAdjustments[Idx];
return FileID::get(FID.getOpaqueValue() - Offset);
}

unsigned ASTWriter::getAdjustedNumCreatedFIDs(FileID FID) const {
// TODO: Actually adjust this.
return PP->getSourceManager()
.getLocalSLocEntry(FID.ID)
.getFile()
.NumCreatedFIDs;
unsigned NumCreatedFIDs = PP->getSourceManager()
.getLocalSLocEntry(FID.ID)
.getFile()
.NumCreatedFIDs;

unsigned AdjustedNumCreatedFIDs = 0;
for (unsigned I = FID.ID, N = I + NumCreatedFIDs; I != N; ++I)
if (IsSLocAffecting[I])
++AdjustedNumCreatedFIDs;
return AdjustedNumCreatedFIDs;
}

SourceLocation ASTWriter::getAdjustedLocation(SourceLocation Loc) const {
// TODO: Actually adjust this.
return Loc;
if (Loc.isInvalid())
return Loc;
return Loc.getLocWithOffset(-getAdjustment(Loc.getOffset()));
}

SourceRange ASTWriter::getAdjustedRange(SourceRange Range) const {
// TODO: Actually adjust this.
return Range;
return SourceRange(getAdjustedLocation(Range.getBegin()),
getAdjustedLocation(Range.getEnd()));
}

SourceLocation::UIntTy
ASTWriter::getAdjustedOffset(SourceLocation::UIntTy Offset) const {
// TODO: Actually adjust this.
return Offset;
return Offset - getAdjustment(Offset);
}

SourceLocation::UIntTy
ASTWriter::getAdjustment(SourceLocation::UIntTy Offset) const {
if (NonAffectingRanges.empty())
return 0;

if (PP->getSourceManager().isLoadedOffset(Offset))
return 0;

if (Offset > NonAffectingRanges.back().getEnd().getOffset())
return NonAffectingOffsetAdjustments.back();

if (Offset < NonAffectingRanges.front().getBegin().getOffset())
return 0;

auto Contains = [](const SourceRange &Range, SourceLocation::UIntTy Offset) {
return Range.getEnd().getOffset() < Offset;
};

auto It = llvm::lower_bound(NonAffectingRanges, Offset, Contains);
unsigned Idx = std::distance(NonAffectingRanges.begin(), It);
return NonAffectingOffsetAdjustments[Idx];
}

void ASTWriter::AddFileID(FileID FID, RecordDataImpl &Record) {
Expand Down Expand Up @@ -5517,6 +5598,7 @@ void ASTWriter::associateDeclWithFile(const Decl *D, DeclID ID) {
if (FID.isInvalid())
return;
assert(SM.getSLocEntry(FID).isFile());
assert(IsSLocAffecting[FID.ID]);

std::unique_ptr<DeclIDInFileInfo> &Info = FileDeclIDs[FID];
if (!Info)
Expand Down
66 changes: 20 additions & 46 deletions clang/test/Modules/add-remove-irrelevant-module-map.m
@@ -1,58 +1,32 @@
// RUN: rm -rf %t && mkdir %t
// RUN: split-file %s %t

//--- a.modulemap
//--- a/module.modulemap
module a {}

//--- b.modulemap
//--- b/module.modulemap
module b {}

//--- test-simple.m
// expected-no-diagnostics
@import a;

// Build without b.modulemap:
//
// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -fdisable-module-hash \
// RUN: -fmodule-map-file=%t/a.modulemap %t/test-simple.m -verify
// RUN: mv %t/cache %t/cache-without-b

// Build with b.modulemap:
//
// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -fdisable-module-hash \
// RUN: -fmodule-map-file=%t/a.modulemap -fmodule-map-file=%t/b.modulemap %t/test-simple.m -verify
// RUN: mv %t/cache %t/cache-with-b

// Neither PCM file considers 'b.modulemap' an input:
//
// RUN: %clang_cc1 -module-file-info %t/cache-without-b/a.pcm | FileCheck %s --check-prefix=CHECK-B
// RUN: %clang_cc1 -module-file-info %t/cache-with-b/a.pcm | FileCheck %s --check-prefix=CHECK-B
// CHECK-B-NOT: Input file: {{.*}}b.modulemap

//--- c.modulemap
module c [no_undeclared_includes] { header "c.h" }
//--- c/module.modulemap
module c {}

//--- c.h
#if __has_include("d.h") // This should use 'd.modulemap' in order to determine that 'd.h'
// doesn't exist for 'c' because of its '[no_undeclared_includes]'.
#endif

//--- d.modulemap
module d { header "d.h" }

//--- d.h
// empty
//--- module.modulemap
module m { header "m.h" }
//--- m.h
@import c;

//--- test-no-undeclared-includes.m
//--- test-simple.m
// expected-no-diagnostics
@import c;
@import m;

// Build modules with the non-affecting "a/module.modulemap".
// RUN: %clang_cc1 -I %t/a -I %t/b -I %t/c -I %t -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -fdisable-module-hash %t/test-simple.m -verify
// RUN: mv %t/cache %t/cache-with

// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fdisable-module-hash \
// RUN: -fmodule-map-file=%t/c.modulemap -fmodule-map-file=%t/d.modulemap \
// RUN: %t/test-no-undeclared-includes.m -verify
// Build modules without the non-affecting "a/module.modulemap".
// RUN: rm -rf %t/a/module.modulemap
// RUN: %clang_cc1 -I %t/a -I %t/b -I %t/c -I %t -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -fdisable-module-hash %t/test-simple.m -verify
// RUN: mv %t/cache %t/cache-without

// The PCM file considers 'd.modulemap' an input because it affects the compilation,
// although it doesn't describe the built module or its imports.
//
// RUN: %clang_cc1 -module-file-info %t/cache/c.pcm | FileCheck %s --check-prefix=CHECK-D
// CHECK-D: Input file: {{.*}}d.modulemap
// Check that the PCM files are bit-for-bit identical.
// RUN: diff %t/cache-with/m.pcm %t/cache-without/m.pcm

0 comments on commit 6924a49

Please sign in to comment.