Skip to content

Commit 2eada93

Browse files
committed
[cxxmodules] Implement global module indexing to improve performance.
The global module index represents an efficient on-disk hash table which stores identifier->module mapping. Every time clang finds a unknown identifier we are informed and we can load the corresponding module on demand. This way we can provide minimal set of loaded modules. Currently, we see that for hsimple.C only the half of the modules are loaded. This can be further improved because we currently load all modules which have an identifier, that is when looking for (for example TPad) we will load all modules which have the identifier TPad, including modules which contain only a forward declaration of it. Kudos Arpitha Raghunandan (arpi-r)!
1 parent 740d3dd commit 2eada93

File tree

7 files changed

+184
-28
lines changed

7 files changed

+184
-28
lines changed

core/metacling/src/TCling.cxx

Lines changed: 77 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,11 @@ clang/LLVM technology.
9595
#include "clang/Lex/HeaderSearch.h"
9696
#include "clang/Lex/Preprocessor.h"
9797
#include "clang/Lex/PreprocessorOptions.h"
98+
#include "clang/Parse/Parser.h"
9899
#include "clang/Sema/Lookup.h"
99100
#include "clang/Sema/Sema.h"
100-
#include "clang/Parse/Parser.h"
101+
#include "clang/Serialization/ASTReader.h"
102+
#include "clang/Serialization/GlobalModuleIndex.h"
101103

102104
#include "cling/Interpreter/ClangInternalState.h"
103105
#include "cling/Interpreter/DynamicLibraryManager.h"
@@ -1194,6 +1196,51 @@ static void RegisterPreIncludedHeaders(cling::Interpreter &clingInterp)
11941196
clingInterp.declare(PreIncludes);
11951197
}
11961198

1199+
static bool HaveFullGlobalModuleIndex = false;
1200+
static GlobalModuleIndex *loadGlobalModuleIndex(cling::Interpreter &interp, SourceLocation TriggerLoc)
1201+
{
1202+
CompilerInstance &CI = *interp.getCI();
1203+
Preprocessor &PP = CI.getPreprocessor();
1204+
auto ModuleManager = CI.getModuleManager();
1205+
assert(ModuleManager);
1206+
// StringRef ModuleIndexPath = HSI.getModuleCachePath();
1207+
// HeaderSearch& HSI = PP.getHeaderSearchInfo();
1208+
// HSI.setModuleCachePath(TROOT::GetLibDir().Data());
1209+
std::string ModuleIndexPath = TROOT::GetLibDir().Data();
1210+
if (ModuleIndexPath.empty())
1211+
return nullptr;
1212+
// Get an existing global index. This loads it if not already loaded.
1213+
ModuleManager->resetForReload();
1214+
ModuleManager->loadGlobalIndex();
1215+
GlobalModuleIndex *GlobalIndex = ModuleManager->getGlobalIndex();
1216+
if (!GlobalIndex && CI.hasFileManager()) {
1217+
}
1218+
1219+
// For finding modules needing to be imported for fixit messages,
1220+
// we need to make the global index cover all modules, so we do that here.
1221+
if (!GlobalIndex && !HaveFullGlobalModuleIndex) {
1222+
ModuleMap &MMap = PP.getHeaderSearchInfo().getModuleMap();
1223+
bool RecreateIndex = false;
1224+
for (ModuleMap::module_iterator I = MMap.module_begin(), E = MMap.module_end(); I != E; ++I) {
1225+
Module *TheModule = I->second;
1226+
// We do want the index only of the prebuilt modules
1227+
std::string ModuleName = GetModuleNameAsString(TheModule, PP);
1228+
if (ModuleName.empty())
1229+
continue;
1230+
LoadModule(ModuleName, interp);
1231+
RecreateIndex = true;
1232+
}
1233+
if (RecreateIndex) {
1234+
GlobalModuleIndex::writeIndex(CI.getFileManager(), CI.getPCHContainerReader(), ModuleIndexPath);
1235+
ModuleManager->resetForReload();
1236+
ModuleManager->loadGlobalIndex();
1237+
GlobalIndex = ModuleManager->getGlobalIndex();
1238+
}
1239+
HaveFullGlobalModuleIndex = true;
1240+
}
1241+
return GlobalIndex;
1242+
}
1243+
11971244
////////////////////////////////////////////////////////////////////////////////
11981245
/// Initialize the cling interpreter interface.
11991246
/// \param argv - array of arguments passed to the cling::Interpreter constructor
@@ -1325,6 +1372,7 @@ TCling::TCling(const char *name, const char *title, const char* const argv[])
13251372

13261373
clingArgsStorage.push_back("-fmodule-map-file=" + ModuleMapLoc);
13271374
}
1375+
clingArgsStorage.push_back("-fmodules-cache-path=" + std::string(TROOT::GetLibDir()));
13281376
}
13291377

13301378
std::vector<const char*> interpArgs;
@@ -1401,7 +1449,34 @@ TCling::TCling(const char *name, const char *title, const char* const argv[])
14011449
static llvm::raw_fd_ostream fMPOuts (STDOUT_FILENO, /*ShouldClose*/false);
14021450
fMetaProcessor = llvm::make_unique<cling::MetaProcessor>(*fInterpreter, fMPOuts);
14031451

1404-
RegisterCxxModules(*fInterpreter);
1452+
if (fInterpreter->getCI()->getLangOpts().Modules) {
1453+
// Setup core C++ modules if we have any to setup.
1454+
1455+
// Load libc and stl first.
1456+
#ifdef R__MACOSX
1457+
LoadModules({"Darwin", "std"}, *fInterpreter);
1458+
#else
1459+
LoadModules({"libc", "stl"}, *fInterpreter);
1460+
#endif
1461+
1462+
if (!fromRootCling)
1463+
loadGlobalModuleIndex(*fInterpreter, SourceLocation());
1464+
1465+
// C99 decided that it's a very good idea to name a macro `I` (the letter I).
1466+
// This seems to screw up nearly all the template code out there as `I` is
1467+
// common template parameter name and iterator variable name.
1468+
// Let's follow the GCC recommendation and undefine `I` in case any of the
1469+
// core modules have defined it:
1470+
// https://www.gnu.org/software/libc/manual/html_node/Complex-Numbers.html
1471+
fInterpreter->declare("#ifdef I\n #undef I\n #endif\n");
1472+
1473+
// These macros are from loading R related modules, which conflict with
1474+
// user's code.
1475+
fInterpreter->declare("#ifdef PI\n #undef PI\n #endif\n");
1476+
fInterpreter->declare("#ifdef ERROR\n #undef ERROR\n #endif\n");
1477+
}
1478+
1479+
// RegisterCxxModules(*fInterpreter);
14051480
RegisterPreIncludedHeaders(*fInterpreter);
14061481

14071482
// We are now ready (enough is loaded) to init the list of opaque typedefs.

core/metacling/src/TClingCallbacks.cxx

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
#include "clang/Parse/Parser.h"
3030
#include "clang/Sema/Lookup.h"
3131
#include "clang/Sema/Scope.h"
32+
#include "clang/Serialization/ASTReader.h"
33+
#include "clang/Serialization/GlobalModuleIndex.h"
3234

3335
#include "llvm/Support/FileSystem.h"
3436
#include "llvm/Support/Path.h"
@@ -285,6 +287,28 @@ bool TClingCallbacks::LookupObject(LookupResult &R, Scope *S) {
285287
return tryResolveAtRuntimeInternal(R, S);
286288
}
287289

290+
static bool findInGlobalIndex(cling::Interpreter &Interp, DeclarationName Name, bool loadFirstMatchOnly = true)
291+
{
292+
GlobalModuleIndex *Index = Interp.getCI()->getModuleManager()->getGlobalIndex();
293+
if (!Index)
294+
return false;
295+
296+
GlobalModuleIndex::FileNameHitSet FoundModules;
297+
298+
// Find the modules that reference the identifier.
299+
// Note that this only finds top-level modules.
300+
if (Index->lookupIdentifier(Name.getAsString(), FoundModules)) {
301+
for (auto FileName : FoundModules) {
302+
StringRef ModuleName = llvm::sys::path::stem(*FileName);
303+
Interp.loadModule(ModuleName);
304+
if (loadFirstMatchOnly)
305+
break;
306+
}
307+
return true;
308+
}
309+
return false;
310+
}
311+
288312
bool TClingCallbacks::LookupObject(const DeclContext* DC, DeclarationName Name) {
289313
if (!fROOTSpecialNamespace) {
290314
// init error or rootcling
@@ -293,9 +317,21 @@ bool TClingCallbacks::LookupObject(const DeclContext* DC, DeclarationName Name)
293317

294318
if (!IsAutoLoadingEnabled() || fIsAutoLoadingRecursively) return false;
295319

296-
if (Name.getNameKind() != DeclarationName::Identifier) return false;
320+
// We are currently building a module, we should not autoload.
321+
Sema &SemaR = m_Interpreter->getSema();
322+
const LangOptions &LangOpts = SemaR.getPreprocessor().getLangOpts();
323+
if (LangOpts.Modules) {
324+
if (LangOpts.isCompilingModule())
325+
return false;
297326

327+
// FIXME: We should load only the first available and rely on other callbacks
328+
// such as RequireCompleteType and LookupUnqualified to load all.
329+
if (findInGlobalIndex(*m_Interpreter, Name, /*loadFirstMatchOnly*/ false))
330+
return true;
331+
}
298332

333+
if (Name.getNameKind() != DeclarationName::Identifier)
334+
return false;
299335

300336
// Get the 'lookup' decl context.
301337
// We need to cast away the constness because we will lookup items of this
@@ -311,7 +347,6 @@ bool TClingCallbacks::LookupObject(const DeclContext* DC, DeclarationName Name)
311347
if (primaryDC != DC)
312348
return false;
313349

314-
Sema &SemaR = m_Interpreter->getSema();
315350
LookupResult R(SemaR, Name, SourceLocation(), Sema::LookupOrdinaryName);
316351
R.suppressDiagnostics();
317352
// We need the qualified name for TCling to find the right library.

interpreter/cling/lib/Interpreter/CIFactory.cpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -547,17 +547,15 @@ namespace {
547547
for (StringRef ModulePath : Paths) {
548548
// FIXME: If we have a prebuilt module path that is equal to our module
549549
// cache we fail to compile the clang builtin modules for some reason.
550-
// This can't be reproduced in clang, so I assume we have some strange
551-
// error in our interpreter setup where this is causing errors (or maybe
552-
// clang is doing the same check in some hidden place).
553-
// The error looks like this:
554-
// .../include/stddef.h error: unknown type name '__PTRDIFF_TYPE__'
555-
// typedef __PTRDIFF_TYPE__ ptrdiff_t;
556-
// <similar follow up errors>
550+
// This makes clang to think it failed to build a dependency module, i.e.
551+
// if we are building module C, clang goes off and builds B and A first.
552+
// If the module cache points to the same location as the prebuilt module
553+
// path, clang errors out on building module A, however, it builds it.
554+
// Next time we run, it will build module B and issue diagnostics.
555+
// If we run third time, it'd build successfully C and continue.
557556
// For now it is fixed by just checking those two paths are not identical.
558-
if (normalizePath(ModulePath) != normalizePath(Opts.ModuleCachePath)) {
557+
if (normalizePath(ModulePath) != normalizePath(Opts.ModuleCachePath))
559558
Opts.AddPrebuiltModulePath(ModulePath);
560-
}
561559
}
562560
}
563561

interpreter/cling/lib/Interpreter/Interpreter.cpp

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -873,10 +873,32 @@ namespace cling {
873873
if (getSema().isModuleVisible(M))
874874
return true;
875875

876+
// We cannot use #pragma clang module import because the on-demand modules
877+
// may load a module in the middle of a function body for example. In this
878+
// case this triggers an error:
879+
// fatal error: import of module '...' appears within function '...'
880+
//
881+
// if (declare("#pragma clang module import \"" + M->Name + "\"") ==
882+
// kSuccess)
883+
// return true;
884+
876885
// FIXME: What about importing submodules such as std.blah. This disables
877886
// this functionality.
878-
if (declare("#pragma clang module import \"" + M->Name + "\"") == kSuccess)
879-
return true;
887+
Preprocessor& PP = getCI()->getPreprocessor();
888+
IdentifierInfo* II = PP.getIdentifierInfo(M->Name);
889+
SourceLocation ValidLoc = M->DefinitionLoc;
890+
Interpreter::PushTransactionRAII RAII(this);
891+
bool success = !getCI()
892+
->getSema()
893+
.ActOnModuleImport(ValidLoc, ValidLoc,
894+
std::make_pair(II, ValidLoc))
895+
.isInvalid();
896+
897+
if (success) {
898+
// Also make the module visible in the preprocessor to export its macros.
899+
PP.makeModuleVisible(M, ValidLoc);
900+
return success;
901+
}
880902

881903
if (complain) {
882904
if (M->IsSystem)

interpreter/llvm/src/tools/clang/include/clang/Serialization/GlobalModuleIndex.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ class GlobalModuleIndex {
179179
/// \returns true if the identifier is known to the index, false otherwise.
180180
bool lookupIdentifier(StringRef Name, HitSet &Hits);
181181

182+
typedef llvm::SmallPtrSet<std::string *, 4> FileNameHitSet;
183+
bool lookupIdentifier(StringRef Name, FileNameHitSet &Hits);
184+
182185
/// \brief Note that the given module file has been loaded.
183186
///
184187
/// \returns false if the global module index has information about this

interpreter/llvm/src/tools/clang/lib/Sema/SemaDecl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16196,7 +16196,7 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc,
1619616196

1619716197
VisibleModules.setVisible(Mod, ImportLoc);
1619816198

16199-
checkModuleImportContext(*this, Mod, ImportLoc, CurContext);
16199+
// checkModuleImportContext(*this, Mod, ImportLoc, CurContext);
1620016200

1620116201
// FIXME: we should support importing a submodule within a different submodule
1620216202
// of the same top-level module. Until we do, make it an error rather than

interpreter/llvm/src/tools/clang/lib/Serialization/GlobalModuleIndex.cpp

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,32 @@ bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) {
313313
return true;
314314
}
315315

316+
bool GlobalModuleIndex::lookupIdentifier(StringRef Name, FileNameHitSet &Hits) {
317+
Hits.clear();
318+
319+
// If there's no identifier index, there is nothing we can do.
320+
if (!IdentifierIndex)
321+
return false;
322+
323+
// Look into the identifier index.
324+
++NumIdentifierLookups;
325+
IdentifierIndexTable &Table =
326+
*static_cast<IdentifierIndexTable *>(IdentifierIndex);
327+
IdentifierIndexTable::iterator Known = Table.find(Name);
328+
if (Known == Table.end()) {
329+
return true;
330+
}
331+
332+
SmallVector<unsigned, 2> ModuleIDs = *Known;
333+
for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
334+
assert(!Modules[ModuleIDs[I]].FileName.empty());
335+
Hits.insert(&Modules[ModuleIDs[I]].FileName);
336+
}
337+
338+
++NumIdentifierLookupHits;
339+
return true;
340+
}
341+
316342
bool GlobalModuleIndex::loadedModuleFile(ModuleFile *File) {
317343
// Look for the module in the global module index based on the module name.
318344
StringRef Name = File->ModuleName;
@@ -660,10 +686,7 @@ bool GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) {
660686
DEnd = Table->data_end();
661687
D != DEnd; ++D) {
662688
std::pair<StringRef, bool> Ident = *D;
663-
if (Ident.second)
664-
InterestingIdentifiers[Ident.first].push_back(ID);
665-
else
666-
(void)InterestingIdentifiers[Ident.first];
689+
InterestingIdentifiers[Ident.first].push_back(ID);
667690
}
668691
}
669692

@@ -725,14 +748,14 @@ bool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
725748
for (auto MapEntry : ImportedModuleFiles) {
726749
auto *File = MapEntry.first;
727750
ImportedModuleFileInfo &Info = MapEntry.second;
728-
if (getModuleFileInfo(File).Signature) {
729-
if (getModuleFileInfo(File).Signature != Info.StoredSignature)
730-
// Verify Signature.
731-
return true;
732-
} else if (Info.StoredSize != File->getSize() ||
733-
Info.StoredModTime != File->getModificationTime())
734-
// Verify Size and ModTime.
735-
return true;
751+
// if (getModuleFileInfo(File).Signature) {
752+
// if (getModuleFileInfo(File).Signature != Info.StoredSignature)
753+
// // Verify Signature.
754+
// return true;
755+
// } else if (Info.StoredSize != File->getSize() ||
756+
// Info.StoredModTime != File->getModificationTime())
757+
// // Verify Size and ModTime.
758+
// return true;
736759
}
737760

738761
using namespace llvm;

0 commit comments

Comments
 (0)