Skip to content

Commit

Permalink
[C++20][Modules] Implement include translation.
Browse files Browse the repository at this point in the history
This addresses [cpp.include]/7

(when encountering #include header-name)

If the header identified by the header-name denotes an importable header, it
is implementation-defined whether the #include preprocessing directive is
instead replaced by an import directive.

In this implementation, include translation is performed _only_ for headers
in the Global Module fragment, so:
```
module;
 #include "will-be-translated.h" // IFF the header unit is available.

export module M;
 #include "will-not-be-translated.h" // even if the header unit is available
```
The reasoning is that, in general, includes in the module purview would not
be validly translatable (they would have to immediately follow the module
decl and without any other intervening decls).  Otherwise that would violate
the rules on contiguous import directives.

This would be quite complex to track in the preprocessor, and for relatively
little gain (the user can 'import "will-not-be-translated.h";' instead.)

TODO: This is one area where it becomes increasingly difficult to disambiguate
clang modules in C++ from C++ standard modules.  That needs to be addressed in
both the driver and the FE.

Differential Revision: https://reviews.llvm.org/D128981
  • Loading branch information
iains committed Jul 10, 2022
1 parent 3085e42 commit af2d11b
Show file tree
Hide file tree
Showing 5 changed files with 266 additions and 28 deletions.
63 changes: 63 additions & 0 deletions clang/include/clang/Lex/Preprocessor.h
Expand Up @@ -385,6 +385,7 @@ class Preprocessor {

bool atTopLevel() { return S <= 0; }
bool afterImportSeq() { return S == AfterImportSeq; }
bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; }

private:
State S;
Expand All @@ -397,6 +398,67 @@ class Preprocessor {
/// Our current position within a C++20 import-seq.
ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq;

/// Track whether we are in a Global Module Fragment
class TrackGMF {
public:
enum GMFState : int {
GMFActive = 1,
MaybeGMF = 0,
BeforeGMFIntroducer = -1,
GMFAbsentOrEnded = -2,
};

TrackGMF(GMFState S) : S(S) {}

/// Saw a semicolon.
void handleSemi() {
// If it is immediately after the first instance of the module keyword,
// then that introduces the GMF.
if (S == MaybeGMF)
S = GMFActive;
}

/// Saw an 'export' identifier.
void handleExport() {
// The presence of an 'export' keyword always ends or excludes a GMF.
S = GMFAbsentOrEnded;
}

/// Saw an 'import' identifier.
void handleImport(bool AfterTopLevelTokenSeq) {
// If we see this before any 'module' kw, then we have no GMF.
if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
S = GMFAbsentOrEnded;
}

/// Saw a 'module' identifier.
void handleModule(bool AfterTopLevelTokenSeq) {
// This was the first module identifier and not preceded by any token
// that would exclude a GMF. It could begin a GMF, but only if directly
// followed by a semicolon.
if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
S = MaybeGMF;
else
S = GMFAbsentOrEnded;
}

/// Saw any other token.
void handleMisc() {
// We saw something other than ; after the 'module' kw, so not a GMF.
if (S == MaybeGMF)
S = GMFAbsentOrEnded;
}

bool inGMF() { return S == GMFActive; }

private:
/// Track the transitions into and out of a Global Module Fragment,
/// if one is present.
GMFState S;
};

TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;

/// Whether the module import expects an identifier next. Otherwise,
/// it expects a '.' or ';'.
bool ModuleImportExpectsIdentifier = false;
Expand Down Expand Up @@ -2414,6 +2476,7 @@ class Preprocessor {
None,
ModuleBegin,
ModuleImport,
HeaderUnitImport,
SkippedModuleImport,
Failure,
} Kind;
Expand Down
90 changes: 66 additions & 24 deletions clang/lib/Lex/PPDirectives.cpp
Expand Up @@ -1983,6 +1983,10 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
tok::annot_module_begin, Action.ModuleForHeader);
break;
case ImportAction::HeaderUnitImport:
EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit,
Action.ModuleForHeader);
break;
case ImportAction::ModuleImport:
EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
tok::annot_module_include, Action.ModuleForHeader);
Expand Down Expand Up @@ -2191,6 +2195,17 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
// known to have no effect beyond its effect on module visibility -- that is,
// if it's got an include guard that is already defined, set to Import if it
// is a modular header we've already built and should import.

// For C++20 Modules
// [cpp.include]/7 If the header identified by the header-name denotes an
// importable header, it is implementation-defined whether the #include
// preprocessing directive is instead replaced by an import directive.
// For this implementation, the translation is permitted when we are parsing
// the Global Module Fragment, and not otherwise (the cases where it would be
// valid to replace an include with an import are highly constrained once in
// named module purview; this choice avoids considerable complexity in
// determining valid cases).

enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;

if (PPOpts->SingleFileParseMode)
Expand All @@ -2203,13 +2218,34 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
alreadyIncluded(*File))
Action = IncludeLimitReached;

bool MaybeTranslateInclude = Action == Enter && File && SuggestedModule &&
!isForModuleBuilding(SuggestedModule.getModule(),
getLangOpts().CurrentModule,
getLangOpts().ModuleName);

// FIXME: We do not have a good way to disambiguate C++ clang modules from
// C++ standard modules (other than use/non-use of Header Units).
Module *SM = SuggestedModule.getModule();
// Maybe a usable Header Unit
bool UsableHeaderUnit = false;
if (getLangOpts().CPlusPlusModules && SM && SM->isHeaderUnit()) {
if (TrackGMFState.inGMF() || IsImportDecl)
UsableHeaderUnit = true;
else if (!IsImportDecl) {
// This is a Header Unit that we do not include-translate
SuggestedModule = ModuleMap::KnownHeader();
SM = nullptr;
}
}
// Maybe a usable clang header module.
bool UsableHeaderModule =
(getLangOpts().CPlusPlusModules || getLangOpts().Modules) && SM &&
!SM->isHeaderUnit();

// Determine whether we should try to import the module for this #include, if
// there is one. Don't do so if precompiled module support is disabled or we
// are processing this module textually (because we're building the module).
if (Action == Enter && File && SuggestedModule && getLangOpts().Modules &&
!isForModuleBuilding(SuggestedModule.getModule(),
getLangOpts().CurrentModule,
getLangOpts().ModuleName)) {
if (MaybeTranslateInclude && (UsableHeaderUnit || UsableHeaderModule)) {
// If this include corresponds to a module but that module is
// unavailable, diagnose the situation and bail out.
// FIXME: Remove this; loadModule does the same check (but produces
Expand All @@ -2226,7 +2262,7 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
// FIXME: Should we have a second loadModule() overload to avoid this
// extra lookup step?
SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
for (Module *Mod = SuggestedModule.getModule(); Mod; Mod = Mod->Parent)
for (Module *Mod = SM; Mod; Mod = Mod->Parent)
Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
FilenameTok.getLocation()));
std::reverse(Path.begin(), Path.end());
Expand Down Expand Up @@ -2293,17 +2329,23 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
// Ask HeaderInfo if we should enter this #include file. If not, #including
// this file will have no effect.
if (Action == Enter && File &&
!HeaderInfo.ShouldEnterIncludeFile(
*this, &File->getFileEntry(), EnterOnce, getLangOpts().Modules,
SuggestedModule.getModule(), IsFirstIncludeOfFile)) {
!HeaderInfo.ShouldEnterIncludeFile(*this, &File->getFileEntry(),
EnterOnce, getLangOpts().Modules, SM,
IsFirstIncludeOfFile)) {
// C++ standard modules:
// If we are not in the GMF, then we textually include only
// clang modules:
// Even if we've already preprocessed this header once and know that we
// don't need to see its contents again, we still need to import it if it's
// modular because we might not have imported it from this submodule before.
//
// FIXME: We don't do this when compiling a PCH because the AST
// serialization layer can't cope with it. This means we get local
// submodule visibility semantics wrong in that case.
Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
Action = TrackGMFState.inGMF() ? Import : Skip;
else
Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
}

// Check for circular inclusion of the main file.
Expand Down Expand Up @@ -2440,8 +2482,8 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
switch (Action) {
case Skip:
// If we don't need to enter the file, stop now.
if (Module *M = SuggestedModule.getModule())
return {ImportAction::SkippedModuleImport, M};
if (SM)
return {ImportAction::SkippedModuleImport, SM};
return {ImportAction::None};

case IncludeLimitReached:
Expand All @@ -2451,16 +2493,15 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(

case Import: {
// If this is a module import, make it visible if needed.
Module *M = SuggestedModule.getModule();
assert(M && "no module to import");
assert(SM && "no module to import");

makeModuleVisible(M, EndLoc);
makeModuleVisible(SM, EndLoc);

if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
tok::pp___include_macros)
return {ImportAction::None};

return {ImportAction::ModuleImport, M};
return {ImportAction::ModuleImport, SM};
}

case Enter:
Expand Down Expand Up @@ -2492,13 +2533,14 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
return {ImportAction::None};

// Determine if we're switching to building a new submodule, and which one.
if (auto *M = SuggestedModule.getModule()) {
if (M->getTopLevelModule()->ShadowingModule) {
// This does not apply for C++20 modules header units.
if (SM && !SM->isHeaderUnit()) {
if (SM->getTopLevelModule()->ShadowingModule) {
// We are building a submodule that belongs to a shadowed module. This
// means we find header files in the shadowed module.
Diag(M->DefinitionLoc, diag::err_module_build_shadowed_submodule)
<< M->getFullModuleName();
Diag(M->getTopLevelModule()->ShadowingModule->DefinitionLoc,
Diag(SM->DefinitionLoc, diag::err_module_build_shadowed_submodule)
<< SM->getFullModuleName();
Diag(SM->getTopLevelModule()->ShadowingModule->DefinitionLoc,
diag::note_previous_definition);
return {ImportAction::None};
}
Expand All @@ -2511,22 +2553,22 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
// that PCH, which means we should enter the submodule. We need to teach
// the AST serialization layer to deal with the resulting AST.
if (getLangOpts().CompilingPCH &&
isForModuleBuilding(M, getLangOpts().CurrentModule,
isForModuleBuilding(SM, getLangOpts().CurrentModule,
getLangOpts().ModuleName))
return {ImportAction::None};

assert(!CurLexerSubmodule && "should not have marked this as a module yet");
CurLexerSubmodule = M;
CurLexerSubmodule = SM;

// Let the macro handling code know that any future macros are within
// the new submodule.
EnterSubmodule(M, EndLoc, /*ForPragma*/false);
EnterSubmodule(SM, EndLoc, /*ForPragma*/ false);

// Let the parser know that any future declarations are within the new
// submodule.
// FIXME: There's no point doing this if we're handling a #__include_macros
// directive.
return {ImportAction::ModuleBegin, M};
return {ImportAction::ModuleBegin, SM};
}

assert(!IsImportDecl && "failed to diagnose missing module for import decl");
Expand Down
14 changes: 14 additions & 0 deletions clang/lib/Lex/Preprocessor.cpp
Expand Up @@ -941,6 +941,9 @@ void Preprocessor::Lex(Token &Result) {

// Update ImportSeqState to track our position within a C++20 import-seq
// if this token is being produced as a result of phase 4 of translation.
// Update TrackGMFState to decide if we are currently in a Global Module
// Fragment. GMF state updates should precede ImportSeq ones, since GMF state
// depends on the prevailing ImportSeq state in two cases.
if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
!Result.getFlag(Token::IsReinjected)) {
switch (Result.getKind()) {
Expand All @@ -953,18 +956,24 @@ void Preprocessor::Lex(Token &Result) {
case tok::r_brace:
ImportSeqState.handleCloseBrace();
break;
// This token is injected to represent the translation of '#include "a.h"'
// into "import a.h;". Mimic the notional ';'.
case tok::annot_module_include:
case tok::semi:
TrackGMFState.handleSemi();
ImportSeqState.handleSemi();
break;
case tok::header_name:
case tok::annot_header_unit:
ImportSeqState.handleHeaderName();
break;
case tok::kw_export:
TrackGMFState.handleExport();
ImportSeqState.handleExport();
break;
case tok::identifier:
if (Result.getIdentifierInfo()->isModulesImport()) {
TrackGMFState.handleImport(ImportSeqState.afterTopLevelSeq());
ImportSeqState.handleImport();
if (ImportSeqState.afterImportSeq()) {
ModuleImportLoc = Result.getLocation();
Expand All @@ -973,9 +982,13 @@ void Preprocessor::Lex(Token &Result) {
CurLexerKind = CLK_LexAfterModuleImport;
}
break;
} else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) {
TrackGMFState.handleModule(ImportSeqState.afterTopLevelSeq());
break;
}
LLVM_FALLTHROUGH;
default:
TrackGMFState.handleMisc();
ImportSeqState.handleMisc();
break;
}
Expand Down Expand Up @@ -1222,6 +1235,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
LLVM_FALLTHROUGH;

case ImportAction::ModuleImport:
case ImportAction::HeaderUnitImport:
case ImportAction::SkippedModuleImport:
// We chose to import (or textually enter) the file. Convert the
// header-name token into a header unit annotation token.
Expand Down
18 changes: 14 additions & 4 deletions clang/lib/Parse/Parser.cpp
Expand Up @@ -663,12 +663,22 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
return false;
}

case tok::annot_module_include:
Actions.ActOnModuleInclude(Tok.getLocation(),
reinterpret_cast<Module *>(
Tok.getAnnotationValue()));
case tok::annot_module_include: {
auto Loc = Tok.getLocation();
Module *Mod = reinterpret_cast<Module *>(Tok.getAnnotationValue());
// FIXME: We need a better way to disambiguate C++ clang modules and
// standard C++ modules.
if (!getLangOpts().CPlusPlusModules || !Mod->isHeaderUnit())
Actions.ActOnModuleInclude(Loc, Mod);
else {
DeclResult Import =
Actions.ActOnModuleImport(Loc, SourceLocation(), Loc, Mod);
Decl *ImportDecl = Import.isInvalid() ? nullptr : Import.get();
Result = Actions.ConvertDeclToDeclGroup(ImportDecl);
}
ConsumeAnnotationToken();
return false;
}

case tok::annot_module_begin:
Actions.ActOnModuleBegin(Tok.getLocation(), reinterpret_cast<Module *>(
Expand Down

0 comments on commit af2d11b

Please sign in to comment.