Skip to content

Commit

Permalink
[C++20] [Modules] [NFC] Add Preprocessor methods for named modules - …
Browse files Browse the repository at this point in the history
…for ClangScanDeps (1/4)

This patch prepares the necessary interfaces in the preprocessor part
for D137527 since we need to recognize if we're in a module unit, the
module kinds and the module declaration and the module we're importing
in the preprocessor.

Differential Revision: https://reviews.llvm.org/D137526
  • Loading branch information
ChuanqiXu9 committed Feb 10, 2023
1 parent ad81d01 commit 6470706
Show file tree
Hide file tree
Showing 4 changed files with 555 additions and 5 deletions.
171 changes: 171 additions & 0 deletions clang/include/clang/Lex/Preprocessor.h
Expand Up @@ -313,6 +313,9 @@ class Preprocessor {
/// The import path for named module that we're currently processing.
SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> NamedModuleImportPath;

/// Whether the import is an `@import` or a standard c++ modules import.
bool IsAtImport = false;

/// Whether the last token we lexed was an '@'.
bool LastTokenWasAt = false;

Expand Down Expand Up @@ -456,6 +459,144 @@ class Preprocessor {

TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;

/// Track the status of the c++20 module decl.
///
/// module-declaration:
/// 'export'[opt] 'module' module-name module-partition[opt]
/// attribute-specifier-seq[opt] ';'
///
/// module-name:
/// module-name-qualifier[opt] identifier
///
/// module-partition:
/// ':' module-name-qualifier[opt] identifier
///
/// module-name-qualifier:
/// identifier '.'
/// module-name-qualifier identifier '.'
///
/// Transition state:
///
/// NotAModuleDecl --- export ---> FoundExport
/// NotAModuleDecl --- module ---> ImplementationCandidate
/// FoundExport --- module ---> InterfaceCandidate
/// ImplementationCandidate --- Identifier ---> ImplementationCandidate
/// ImplementationCandidate --- period ---> ImplementationCandidate
/// ImplementationCandidate --- colon ---> ImplementationCandidate
/// InterfaceCandidate --- Identifier ---> InterfaceCandidate
/// InterfaceCandidate --- period ---> InterfaceCandidate
/// InterfaceCandidate --- colon ---> InterfaceCandidate
/// ImplementationCandidate --- Semi ---> NamedModuleImplementation
/// NamedModuleInterface --- Semi ---> NamedModuleInterface
/// NamedModuleImplementation --- Anything ---> NamedModuleImplementation
/// NamedModuleInterface --- Anything ---> NamedModuleInterface
///
/// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad
/// soon since we don't support any module attributes yet.
class ModuleDeclSeq {
enum ModuleDeclState : int {
NotAModuleDecl,
FoundExport,
InterfaceCandidate,
ImplementationCandidate,
NamedModuleInterface,
NamedModuleImplementation,
};

public:
ModuleDeclSeq() : State(NotAModuleDecl) {}

void handleExport() {
if (State == NotAModuleDecl)
State = FoundExport;
else if (!isNamedModule())
reset();
}

void handleModule() {
if (State == FoundExport)
State = InterfaceCandidate;
else if (State == NotAModuleDecl)
State = ImplementationCandidate;
else if (!isNamedModule())
reset();
}

void handleIdentifier(IdentifierInfo *Identifier) {
if (isModuleCandidate() && Identifier)
Name += Identifier->getName().str();
else if (!isNamedModule())
reset();
}

void handleColon() {
if (isModuleCandidate())
Name += ":";
else if (!isNamedModule())
reset();
}

void handlePeriod() {
if (isModuleCandidate())
Name += ".";
else if (!isNamedModule())
reset();
}

void handleSemi() {
if (!Name.empty() && isModuleCandidate()) {
if (State == InterfaceCandidate)
State = NamedModuleInterface;
else if (State == ImplementationCandidate)
State = NamedModuleImplementation;
else
llvm_unreachable("Unimaged ModuleDeclState.");
} else if (!isNamedModule())
reset();
}

void handleMisc() {
if (!isNamedModule())
reset();
}

bool isModuleCandidate() const {
return State == InterfaceCandidate || State == ImplementationCandidate;
}

bool isNamedModule() const {
return State == NamedModuleInterface ||
State == NamedModuleImplementation;
}

bool isNamedInterface() const { return State == NamedModuleInterface; }

bool isImplementationUnit() const {
return State == NamedModuleImplementation && !getName().contains(':');
}

StringRef getName() const {
assert(isNamedModule() && "Can't get name from a non named module");
return Name;
}

StringRef getPrimaryName() const {
assert(isNamedModule() && "Can't get name from a non named module");
return getName().split(':').first;
}

void reset() {
Name.clear();
State = NotAModuleDecl;
}

private:
ModuleDeclState State;
std::string Name;
};

ModuleDeclSeq ModuleDeclState;

/// Whether the module import expects an identifier next. Otherwise,
/// it expects a '.' or ';'.
bool ModuleImportExpectsIdentifier = false;
Expand Down Expand Up @@ -2225,6 +2366,36 @@ class Preprocessor {
/// Retrieves the module whose implementation we're current compiling, if any.
Module *getCurrentModuleImplementation();

/// If we are preprocessing a named module.
bool isInNamedModule() const { return ModuleDeclState.isNamedModule(); }

/// If we are proprocessing a named interface unit.
/// Note that a module implementation partition is not considered as an
/// named interface unit here although it is importable
/// to ease the parsing.
bool isInNamedInterfaceUnit() const {
return ModuleDeclState.isNamedInterface();
}

/// Get the named module name we're preprocessing.
/// Requires we're preprocessing a named module.
StringRef getNamedModuleName() const { return ModuleDeclState.getName(); }

/// If we are implementing an implementation module unit.
/// Note that the module implementation partition is not considered as an
/// implementation unit.
bool isInImplementationUnit() const {
return ModuleDeclState.isImplementationUnit();
}

/// If we're importing a standard C++20 Named Modules.
bool isInImportingCXXNamedModules() const {
// NamedModuleImportPath will be non-empty only if we're importing
// Standard C++ named modules.
return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules &&
!IsAtImport;
}

/// Allocate a new MacroInfo object with the provided SourceLocation.
MacroInfo *AllocateMacroInfo(SourceLocation L);

Expand Down
39 changes: 34 additions & 5 deletions clang/lib/Lex/Preprocessor.cpp
Expand Up @@ -873,6 +873,7 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
CurLexerKind != CLK_CachingLexer) {
ModuleImportLoc = Identifier.getLocation();
NamedModuleImportPath.clear();
IsAtImport = true;
ModuleImportExpectsIdentifier = true;
CurLexerKind = CLK_LexAfterModuleImport;
}
Expand Down Expand Up @@ -940,6 +941,7 @@ void Preprocessor::Lex(Token &Result) {
case tok::semi:
TrackGMFState.handleSemi();
StdCXXImportSeqState.handleSemi();
ModuleDeclState.handleSemi();
break;
case tok::header_name:
case tok::annot_header_unit:
Expand All @@ -948,6 +950,13 @@ void Preprocessor::Lex(Token &Result) {
case tok::kw_export:
TrackGMFState.handleExport();
StdCXXImportSeqState.handleExport();
ModuleDeclState.handleExport();
break;
case tok::colon:
ModuleDeclState.handleColon();
break;
case tok::period:
ModuleDeclState.handlePeriod();
break;
case tok::identifier:
if (Result.getIdentifierInfo()->isModulesImport()) {
Expand All @@ -956,18 +965,25 @@ void Preprocessor::Lex(Token &Result) {
if (StdCXXImportSeqState.afterImportSeq()) {
ModuleImportLoc = Result.getLocation();
NamedModuleImportPath.clear();
IsAtImport = false;
ModuleImportExpectsIdentifier = true;
CurLexerKind = CLK_LexAfterModuleImport;
}
break;
} else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) {
TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());
ModuleDeclState.handleModule();
break;
} else {
ModuleDeclState.handleIdentifier(Result.getIdentifierInfo());
if (ModuleDeclState.isModuleCandidate())
break;
}
[[fallthrough]];
default:
TrackGMFState.handleMisc();
StdCXXImportSeqState.handleMisc();
ModuleDeclState.handleMisc();
break;
}
}
Expand Down Expand Up @@ -1151,6 +1167,15 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
if (LexHeaderName(Result))
return true;

if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) {
std::string Name = ModuleDeclState.getPrimaryName().str();
Name += ":";
NamedModuleImportPath.push_back(
{getIdentifierInfo(Name), Result.getLocation()});
CurLexerKind = CLK_LexAfterModuleImport;
return true;
}
} else {
Lex(Result);
}
Expand All @@ -1164,9 +1189,10 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
/*DisableMacroExpansion*/ true, /*IsReinject*/ false);
};

bool ImportingHeader = Result.is(tok::header_name);
// Check for a header-name.
SmallVector<Token, 32> Suffix;
if (Result.is(tok::header_name)) {
if (ImportingHeader) {
// Enter the header-name token into the token stream; a Lex action cannot
// both return a token and cache tokens (doing so would corrupt the token
// cache if the call to Lex comes from CachingLex / PeekAhead).
Expand Down Expand Up @@ -1244,8 +1270,8 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
// We expected to see an identifier here, and we did; continue handling
// identifiers.
NamedModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
Result.getLocation()));
NamedModuleImportPath.push_back(
std::make_pair(Result.getIdentifierInfo(), Result.getLocation()));
ModuleImportExpectsIdentifier = false;
CurLexerKind = CLK_LexAfterModuleImport;
return true;
Expand Down Expand Up @@ -1285,7 +1311,8 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
std::string FlatModuleName;
if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) {
for (auto &Piece : NamedModuleImportPath) {
if (!FlatModuleName.empty())
// If the FlatModuleName ends with colon, it implies it is a partition.
if (!FlatModuleName.empty() && FlatModuleName.back() != ':')
FlatModuleName += ".";
FlatModuleName += Piece.first->getName();
}
Expand All @@ -1296,14 +1323,16 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
}

Module *Imported = nullptr;
if (getLangOpts().Modules) {
// We don't/shouldn't load the standard c++20 modules when preprocessing.
if (getLangOpts().Modules && !isInImportingCXXNamedModules()) {
Imported = TheModuleLoader.loadModule(ModuleImportLoc,
NamedModuleImportPath,
Module::Hidden,
/*IsInclusionDirective=*/false);
if (Imported)
makeModuleVisible(Imported, SemiLoc);
}

if (Callbacks)
Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported);

Expand Down
2 changes: 2 additions & 0 deletions clang/unittests/Lex/CMakeLists.txt
Expand Up @@ -7,6 +7,7 @@ add_clang_unittest(LexTests
HeaderMapTest.cpp
HeaderSearchTest.cpp
LexerTest.cpp
ModuleDeclStateTest.cpp
PPCallbacksTest.cpp
PPConditionalDirectiveRecordTest.cpp
PPDependencyDirectivesTest.cpp
Expand All @@ -17,6 +18,7 @@ clang_target_link_libraries(LexTests
PRIVATE
clangAST
clangBasic
clangFrontend
clangLex
clangParse
clangSema
Expand Down

0 comments on commit 6470706

Please sign in to comment.