Skip to content

Commit

Permalink
[include-mapping] Implement language separation in stdlib recognizer …
Browse files Browse the repository at this point in the history
…library

Differential Revision: https://reviews.llvm.org/D142992
  • Loading branch information
VitaNuo authored and hokein committed Feb 3, 2023
1 parent 6827c4f commit 1285172
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 68 deletions.
32 changes: 21 additions & 11 deletions clang/include/clang/Tooling/Inclusions/StandardLibrary.h
Expand Up @@ -30,25 +30,29 @@ namespace tooling {
namespace stdlib {

class Symbol;
enum class Lang { C = 0, CXX, LastValue = CXX };

// A standard library header, such as <iostream>
// Lightweight class, in fact just an index into a table.
// C++ and C Library compatibility headers are considered different: e.g.
// "<cstdio>" and "<stdio.h>" (and their symbols) are treated differently.
class Header {
public:
static std::vector<Header> all();
static std::vector<Header> all(Lang L = Lang::CXX);
// Name should contain the angle brackets, e.g. "<vector>".
static std::optional<Header> named(llvm::StringRef Name);
static std::optional<Header> named(llvm::StringRef Name,
Lang Language = Lang::CXX);

friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Header &H) {
return OS << H.name();
}
llvm::StringRef name() const;

private:
Header(unsigned ID) : ID(ID) {}
Header(unsigned ID, Lang Language) : ID(ID), Language(Language) {}
unsigned ID;
Lang Language;

friend Symbol;
friend llvm::DenseMapInfo<Header>;
friend bool operator==(const Header &L, const Header &R) {
Expand All @@ -64,11 +68,11 @@ class Header {
// for them.
class Symbol {
public:
static std::vector<Symbol> all();
static std::vector<Symbol> all(Lang L = Lang::CXX);
/// \p Scope should have the trailing "::", for example:
/// named("std::chrono::", "system_clock")
static std::optional<Symbol> named(llvm::StringRef Scope,
llvm::StringRef Name);
static std::optional<Symbol>
named(llvm::StringRef Scope, llvm::StringRef Name, Lang Language = Lang::CXX);

friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) {
return OS << S.qualified_name();
Expand All @@ -82,8 +86,10 @@ class Symbol {
llvm::SmallVector<Header> headers() const;

private:
Symbol(unsigned ID) : ID(ID) {}
Symbol(unsigned ID, Lang Language) : ID(ID), Language(Language) {}
unsigned ID;
Lang Language;

friend class Recognizer;
friend llvm::DenseMapInfo<Symbol>;
friend bool operator==(const Symbol &L, const Symbol &R) {
Expand Down Expand Up @@ -114,10 +120,12 @@ namespace llvm {

template <> struct DenseMapInfo<clang::tooling::stdlib::Header> {
static inline clang::tooling::stdlib::Header getEmptyKey() {
return clang::tooling::stdlib::Header(-1);
return clang::tooling::stdlib::Header(-1,
clang::tooling::stdlib::Lang::CXX);
}
static inline clang::tooling::stdlib::Header getTombstoneKey() {
return clang::tooling::stdlib::Header(-2);
return clang::tooling::stdlib::Header(-2,
clang::tooling::stdlib::Lang::CXX);
}
static unsigned getHashValue(const clang::tooling::stdlib::Header &H) {
return hash_value(H.ID);
Expand All @@ -130,10 +138,12 @@ template <> struct DenseMapInfo<clang::tooling::stdlib::Header> {

template <> struct DenseMapInfo<clang::tooling::stdlib::Symbol> {
static inline clang::tooling::stdlib::Symbol getEmptyKey() {
return clang::tooling::stdlib::Symbol(-1);
return clang::tooling::stdlib::Symbol(-1,
clang::tooling::stdlib::Lang::CXX);
}
static inline clang::tooling::stdlib::Symbol getTombstoneKey() {
return clang::tooling::stdlib::Symbol(-2);
return clang::tooling::stdlib::Symbol(-2,
clang::tooling::stdlib::Lang::CXX);
}
static unsigned getHashValue(const clang::tooling::stdlib::Symbol &S) {
return hash_value(S.ID);
Expand Down
172 changes: 115 additions & 57 deletions clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
Expand Up @@ -8,53 +8,82 @@

#include "clang/Tooling/Inclusions/StandardLibrary.h"
#include "clang/AST/Decl.h"
#include "clang/Basic/LangOptions.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"

namespace clang {
namespace tooling {
namespace stdlib {

// Header::ID => header name
static llvm::StringRef *HeaderNames;
// Header name => Header::ID
static llvm::DenseMap<llvm::StringRef, unsigned> *HeaderIDs;

static unsigned SymbolCount = 0;
// Symbol::ID => symbol qualified_name/name/scope
static struct SymbolName {
const char *Data; // std::vector
unsigned ScopeLen; // ~~~~~
unsigned NameLen; // ~~~~~~
} *SymbolNames;
namespace {
// Symbol name -> Symbol::ID, within a namespace.
using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
static llvm::DenseMap<llvm::StringRef, NSSymbolMap *> *NamespaceSymbols;
// Symbol::ID => Header::ID
static unsigned *SymbolHeaderIDs;

static int initialize() {
SymbolCount = 0;
#define SYMBOL(Name, NS, Header) ++SymbolCount;
// A Mapping per language.
struct SymbolHeaderMapping {
llvm::StringRef *HeaderNames = nullptr;
// Header name => Header::ID
llvm::DenseMap<llvm::StringRef, unsigned> *HeaderIDs;

unsigned SymbolCount = 0;
// Symbol::ID => symbol qualified_name/name/scope
struct SymbolName {
const char *Data; // std::vector
unsigned ScopeLen; // ~~~~~
unsigned NameLen; // ~~~~~~
} *SymbolNames = nullptr;
// Symbol name -> Symbol::ID, within a namespace.
llvm::DenseMap<llvm::StringRef, NSSymbolMap *> *NamespaceSymbols = nullptr;
// Symbol::ID => Header::ID
unsigned *SymbolHeaderIDs = nullptr;
};
} // namespace
static SymbolHeaderMapping
*LanguageMappings[static_cast<unsigned>(Lang::LastValue) + 1];
static const SymbolHeaderMapping *getMappingPerLang(Lang L) {
return LanguageMappings[static_cast<unsigned>(L)];
}

static int countSymbols(Lang Language) {
unsigned SymCount = 0;
#define SYMBOL(Name, NS, Header) ++SymCount;
switch (Language) {
case Lang::C:
#include "clang/Tooling/Inclusions/CSymbolMap.inc"
break;
case Lang::CXX:
#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
break;
}
#undef SYMBOL
SymbolNames =
new std::remove_reference_t<decltype(*SymbolNames)>[SymbolCount];
SymbolHeaderIDs =
new std::remove_reference_t<decltype(*SymbolHeaderIDs)>[SymbolCount];
NamespaceSymbols = new std::remove_reference_t<decltype(*NamespaceSymbols)>;
HeaderIDs = new std::remove_reference_t<decltype(*HeaderIDs)>;
return SymCount;
}

static int initialize(Lang Language) {
SymbolHeaderMapping *Mapping = new SymbolHeaderMapping();
LanguageMappings[static_cast<unsigned>(Language)] = Mapping;

unsigned SymCount = countSymbols(Language);
Mapping->SymbolCount = SymCount;
Mapping->SymbolNames =
new std::remove_reference_t<decltype(*Mapping->SymbolNames)>[SymCount];
Mapping->SymbolHeaderIDs = new std::remove_reference_t<
decltype(*Mapping->SymbolHeaderIDs)>[SymCount];
Mapping->NamespaceSymbols =
new std::remove_reference_t<decltype(*Mapping->NamespaceSymbols)>;
Mapping->HeaderIDs =
new std::remove_reference_t<decltype(*Mapping->HeaderIDs)>;
auto AddNS = [&](llvm::StringRef NS) -> NSSymbolMap & {
auto R = NamespaceSymbols->try_emplace(NS, nullptr);
auto R = Mapping->NamespaceSymbols->try_emplace(NS, nullptr);
if (R.second)
R.first->second = new NSSymbolMap();
return *R.first->second;
};

auto AddHeader = [&](llvm::StringRef Header) -> unsigned {
return HeaderIDs->try_emplace(Header, HeaderIDs->size()).first->second;
return Mapping->HeaderIDs->try_emplace(Header, Mapping->HeaderIDs->size())
.first->second;
};

auto Add = [&, SymIndex(0)](llvm::StringRef QName, unsigned NSLen,
Expand All @@ -66,101 +95,130 @@ static int initialize() {
NSLen = 0;
}

SymbolNames[SymIndex] = {QName.data(), NSLen,
static_cast<unsigned int>(QName.size() - NSLen)};
SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName);
Mapping->SymbolNames[SymIndex] = {
QName.data(), NSLen, static_cast<unsigned int>(QName.size() - NSLen)};
Mapping->SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName);

NSSymbolMap &NSSymbols = AddNS(QName.take_front(NSLen));
NSSymbols.try_emplace(QName.drop_front(NSLen), SymIndex);

++SymIndex;
};
#define SYMBOL(Name, NS, Header) Add(#NS #Name, strlen(#NS), #Header);
switch (Language) {
case Lang::C:
#include "clang/Tooling/Inclusions/CSymbolMap.inc"
break;
case Lang::CXX:
#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
break;
}
#undef SYMBOL

HeaderNames = new llvm::StringRef[HeaderIDs->size()];
for (const auto &E : *HeaderIDs)
HeaderNames[E.second] = E.first;
Mapping->HeaderNames = new llvm::StringRef[Mapping->HeaderIDs->size()];
for (const auto &E : *Mapping->HeaderIDs)
Mapping->HeaderNames[E.second] = E.first;

return 0;
}

static void ensureInitialized() {
static int Dummy = initialize();
static int Dummy = []() {
for (unsigned L = 0; L <= static_cast<unsigned>(Lang::LastValue); ++L)
initialize(static_cast<Lang>(L));
return 0;
}();
(void)Dummy;
}

std::vector<Header> Header::all() {
std::vector<Header> Header::all(Lang L) {
ensureInitialized();
std::vector<Header> Result;
Result.reserve(HeaderIDs->size());
for (unsigned I = 0, E = HeaderIDs->size(); I < E; ++I)
Result.push_back(Header(I));
const auto *Mapping = getMappingPerLang(L);
Result.reserve(Mapping->HeaderIDs->size());
for (unsigned I = 0, E = Mapping->HeaderIDs->size(); I < E; ++I)
Result.push_back(Header(I, L));
return Result;
}
std::optional<Header> Header::named(llvm::StringRef Name) {
std::optional<Header> Header::named(llvm::StringRef Name, Lang L) {
ensureInitialized();
auto It = HeaderIDs->find(Name);
if (It == HeaderIDs->end())
const auto *Mapping = getMappingPerLang(L);
auto It = Mapping->HeaderIDs->find(Name);
if (It == Mapping->HeaderIDs->end())
return std::nullopt;
return Header(It->second);
return Header(It->second, L);
}
llvm::StringRef Header::name() const {
return getMappingPerLang(Language)->HeaderNames[ID];
}
llvm::StringRef Header::name() const { return HeaderNames[ID]; }

std::vector<Symbol> Symbol::all() {
std::vector<Symbol> Symbol::all(Lang L) {
ensureInitialized();
std::vector<Symbol> Result;
Result.reserve(SymbolCount);
for (unsigned I = 0, E = SymbolCount; I < E; ++I)
Result.push_back(Symbol(I));
const auto *Mapping = getMappingPerLang(L);
Result.reserve(Mapping->SymbolCount);
for (unsigned I = 0, E = Mapping->SymbolCount; I < E; ++I)
Result.push_back(Symbol(I, L));
return Result;
}
llvm::StringRef Symbol::scope() const {
SymbolName &S = SymbolNames[ID];
auto &S = getMappingPerLang(Language)->SymbolNames[ID];
return StringRef(S.Data, S.ScopeLen);
}
llvm::StringRef Symbol::name() const {
SymbolName &S = SymbolNames[ID];
auto &S = getMappingPerLang(Language)->SymbolNames[ID];
return StringRef(S.Data + S.ScopeLen, S.NameLen);
}
llvm::StringRef Symbol::qualified_name() const {
SymbolName &S = SymbolNames[ID];
auto &S = getMappingPerLang(Language)->SymbolNames[ID];
return StringRef(S.Data, S.ScopeLen + S.NameLen);
}
std::optional<Symbol> Symbol::named(llvm::StringRef Scope,
llvm::StringRef Name) {
std::optional<Symbol> Symbol::named(llvm::StringRef Scope, llvm::StringRef Name,
Lang L) {
ensureInitialized();
if (NSSymbolMap *NSSymbols = NamespaceSymbols->lookup(Scope)) {

if (NSSymbolMap *NSSymbols =
getMappingPerLang(L)->NamespaceSymbols->lookup(Scope)) {
auto It = NSSymbols->find(Name);
if (It != NSSymbols->end())
return Symbol(It->second);
return Symbol(It->second, L);
}
return std::nullopt;
}
Header Symbol::header() const { return Header(SymbolHeaderIDs[ID]); }
Header Symbol::header() const {
return Header(getMappingPerLang(Language)->SymbolHeaderIDs[ID], Language);
}
llvm::SmallVector<Header> Symbol::headers() const {
return {header()}; // FIXME: multiple in case of ambiguity
}

Recognizer::Recognizer() { ensureInitialized(); }

NSSymbolMap *Recognizer::namespaceSymbols(const NamespaceDecl *D) {
if (!D)
return nullptr;
Lang Language;
if (D->getLangOpts().CPlusPlus)
Language = Lang::CXX;
else if (D->getLangOpts().C11)
Language = Lang::C;
else
return nullptr;

auto It = NamespaceCache.find(D);
if (It != NamespaceCache.end())
return It->second;

NSSymbolMap *Result = [&]() -> NSSymbolMap * {
if (D && D->isAnonymousNamespace())
if (D->isAnonymousNamespace())
return nullptr;
// Print the namespace and its parents ommitting inline scopes.
std::string Scope;
for (const auto *ND = D; ND;
ND = llvm::dyn_cast_or_null<NamespaceDecl>(ND->getParent()))
if (!ND->isInlineNamespace() && !ND->isAnonymousNamespace())
Scope = ND->getName().str() + "::" + Scope;
return NamespaceSymbols->lookup(Scope);
return getMappingPerLang(Language)->NamespaceSymbols->lookup(Scope);
}();
NamespaceCache.try_emplace(D, Result);
return Result;
Expand Down Expand Up @@ -200,7 +258,7 @@ std::optional<Symbol> Recognizer::operator()(const Decl *D) {
auto It = Symbols->find(Name);
if (It == Symbols->end())
return std::nullopt;
return Symbol(It->second);
return Symbol(It->second, D->getLangOpts().CPlusPlus? Lang::CXX : Lang::C);
}

} // namespace stdlib
Expand Down

0 comments on commit 1285172

Please sign in to comment.