Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[clangd] Introduce a "Symbol" class.
Summary: * The "Symbol" class represents a C++ symbol in the codebase, containing all the information of a C++ symbol needed by clangd. clangd will use it in clangd's AST/dynamic index and global/static index (code completion and code navigation). * The SymbolCollector (another IndexAction) will be used to recollect the symbols when the source file is changed (for ASTIndex), or to generate all C++ symbols for the whole project. In the long term (when index-while-building is ready), clangd should share a same "Symbol" structure and IndexAction with index-while-building, but for now we want to have some stuff working in clangd. Reviewers: ioeric, sammccall, ilya-biryukov, malaperle Reviewed By: sammccall Subscribers: malaperle, klimek, mgorny, cfe-commits Differential Revision: https://reviews.llvm.org/D40897 llvm-svn: 320486
- Loading branch information
Showing
7 changed files
with
443 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
//===--- Index.cpp -----------------------------------------------*- C++-*-===// | ||
// | ||
// The LLVM Compiler Infrastructure | ||
// | ||
// This file is distributed under the University of Illinois Open Source | ||
// License. See LICENSE.TXT for details. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "Index.h" | ||
|
||
#include "llvm/Support/SHA1.h" | ||
|
||
namespace clang { | ||
namespace clangd { | ||
|
||
namespace { | ||
ArrayRef<uint8_t> toArrayRef(StringRef S) { | ||
return {reinterpret_cast<const uint8_t *>(S.data()), S.size()}; | ||
} | ||
} // namespace | ||
|
||
SymbolID::SymbolID(llvm::StringRef USR) | ||
: HashValue(llvm::SHA1::hash(toArrayRef(USR))) {} | ||
|
||
SymbolSlab::const_iterator SymbolSlab::begin() const { | ||
return Symbols.begin(); | ||
} | ||
|
||
SymbolSlab::const_iterator SymbolSlab::end() const { | ||
return Symbols.end(); | ||
} | ||
|
||
SymbolSlab::const_iterator SymbolSlab::find(const SymbolID& SymID) const { | ||
return Symbols.find(SymID); | ||
} | ||
|
||
void SymbolSlab::freeze() { | ||
Frozen = true; | ||
} | ||
|
||
void SymbolSlab::insert(Symbol S) { | ||
assert(!Frozen && | ||
"Can't insert a symbol after the slab has been frozen!"); | ||
Symbols[S.ID] = std::move(S); | ||
} | ||
|
||
} // namespace clangd | ||
} // namespace clang |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
//===--- Symbol.h -----------------------------------------------*- C++-*-===// | ||
// | ||
// The LLVM Compiler Infrastructure | ||
// | ||
// This file is distributed under the University of Illinois Open Source | ||
// License. See LICENSE.TXT for details. | ||
// | ||
//===---------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H | ||
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H | ||
|
||
#include "clang/Index/IndexSymbol.h" | ||
#include "llvm/ADT/DenseMap.h" | ||
#include "llvm/ADT/StringExtras.h" | ||
|
||
#include <array> | ||
#include <string> | ||
|
||
namespace clang { | ||
namespace clangd { | ||
|
||
struct SymbolLocation { | ||
// The absolute path of the source file where a symbol occurs. | ||
std::string FilePath; | ||
// The 0-based offset to the first character of the symbol from the beginning | ||
// of the source file. | ||
unsigned StartOffset; | ||
// The 0-based offset to the last character of the symbol from the beginning | ||
// of the source file. | ||
unsigned EndOffset; | ||
}; | ||
|
||
// The class identifies a particular C++ symbol (class, function, method, etc). | ||
// | ||
// As USRs (Unified Symbol Resolution) could be large, especially for functions | ||
// with long type arguments, SymbolID is using 160-bits SHA1(USR) values to | ||
// guarantee the uniqueness of symbols while using a relatively small amount of | ||
// memory (vs storing USRs directly). | ||
// | ||
// SymbolID can be used as key in the symbol indexes to lookup the symbol. | ||
class SymbolID { | ||
public: | ||
SymbolID() = default; | ||
SymbolID(llvm::StringRef USR); | ||
|
||
bool operator==(const SymbolID& Sym) const { | ||
return HashValue == Sym.HashValue; | ||
} | ||
|
||
private: | ||
friend class llvm::DenseMapInfo<clang::clangd::SymbolID>; | ||
|
||
std::array<uint8_t, 20> HashValue; | ||
}; | ||
|
||
// The class presents a C++ symbol, e.g. class, function. | ||
// | ||
// FIXME: instead of having own copy fields for each symbol, we can share | ||
// storage from SymbolSlab. | ||
struct Symbol { | ||
// The ID of the symbol. | ||
SymbolID ID; | ||
// The qualified name of the symbol, e.g. Foo::bar. | ||
std::string QualifiedName; | ||
// The symbol information, like symbol kind. | ||
index::SymbolInfo SymInfo; | ||
// The location of the canonical declaration of the symbol. | ||
// | ||
// A C++ symbol could have multiple declarations and one definition (e.g. | ||
// a function is declared in ".h" file, and is defined in ".cc" file). | ||
// * For classes, the canonical declaration is usually definition. | ||
// * For non-inline functions, the canonical declaration is a declaration | ||
// (not a definition), which is usually declared in ".h" file. | ||
SymbolLocation CanonicalDeclaration; | ||
|
||
// FIXME: add definition location of the symbol. | ||
// FIXME: add all occurrences support. | ||
// FIXME: add extra fields for index scoring signals. | ||
// FIXME: add code completion information. | ||
}; | ||
|
||
// A symbol container that stores a set of symbols. The container will maintain | ||
// the lifetime of the symbols. | ||
// | ||
// FIXME: Use a space-efficient implementation, a lot of Symbol fields could | ||
// share the same storage. | ||
class SymbolSlab { | ||
public: | ||
using const_iterator = llvm::DenseMap<SymbolID, Symbol>::const_iterator; | ||
|
||
SymbolSlab() = default; | ||
|
||
const_iterator begin() const; | ||
const_iterator end() const; | ||
const_iterator find(const SymbolID& SymID) const; | ||
|
||
// Once called, no more symbols would be added to the SymbolSlab. This | ||
// operation is irreversible. | ||
void freeze(); | ||
|
||
void insert(Symbol S); | ||
|
||
private: | ||
bool Frozen = false; | ||
|
||
llvm::DenseMap<SymbolID, Symbol> Symbols; | ||
}; | ||
|
||
} // namespace clangd | ||
} // namespace clang | ||
|
||
namespace llvm { | ||
|
||
template <> struct DenseMapInfo<clang::clangd::SymbolID> { | ||
static inline clang::clangd::SymbolID getEmptyKey() { | ||
static clang::clangd::SymbolID EmptyKey("EMPTYKEY"); | ||
return EmptyKey; | ||
} | ||
static inline clang::clangd::SymbolID getTombstoneKey() { | ||
static clang::clangd::SymbolID TombstoneKey("TOMBSTONEKEY"); | ||
return TombstoneKey; | ||
} | ||
static unsigned getHashValue(const clang::clangd::SymbolID &Sym) { | ||
return hash_value( | ||
ArrayRef<uint8_t>(Sym.HashValue.data(), Sym.HashValue.size())); | ||
} | ||
static bool isEqual(const clang::clangd::SymbolID &LHS, | ||
const clang::clangd::SymbolID &RHS) { | ||
return LHS == RHS; | ||
} | ||
}; | ||
|
||
} // namespace llvm | ||
|
||
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
//===--- SymbolCollector.cpp -------------------------------------*- C++-*-===// | ||
// | ||
// The LLVM Compiler Infrastructure | ||
// | ||
// This file is distributed under the University of Illinois Open Source | ||
// License. See LICENSE.TXT for details. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "SymbolCollector.h" | ||
|
||
#include "clang/AST/ASTContext.h" | ||
#include "clang/AST/Decl.h" | ||
#include "clang/AST/DeclCXX.h" | ||
#include "clang/Basic/SourceManager.h" | ||
#include "clang/Index/IndexSymbol.h" | ||
#include "clang/Index/USRGeneration.h" | ||
#include "llvm/Support/MemoryBuffer.h" | ||
#include "llvm/Support/Path.h" | ||
|
||
namespace clang { | ||
namespace clangd { | ||
|
||
namespace { | ||
// Make the Path absolute using the current working directory of the given | ||
// SourceManager if the Path is not an absolute path. | ||
// | ||
// The Path can be a path relative to the build directory, or retrieved from | ||
// the SourceManager. | ||
std::string makeAbsolutePath(const SourceManager &SM, StringRef Path) { | ||
llvm::SmallString<128> AbsolutePath(Path); | ||
if (std::error_code EC = | ||
SM.getFileManager().getVirtualFileSystem()->makeAbsolute( | ||
AbsolutePath)) | ||
llvm::errs() << "Warning: could not make absolute file: '" << EC.message() | ||
<< '\n'; | ||
// Handle the symbolic link path case where the current working directory | ||
// (getCurrentWorkingDirectory) is a symlink./ We always want to the real | ||
// file path (instead of the symlink path) for the C++ symbols. | ||
// | ||
// Consider the following example: | ||
// | ||
// src dir: /project/src/foo.h | ||
// current working directory (symlink): /tmp/build -> /project/src/ | ||
// | ||
// The file path of Symbol is "/project/src/foo.h" instead of | ||
// "/tmp/build/foo.h" | ||
const DirectoryEntry *Dir = SM.getFileManager().getDirectory( | ||
llvm::sys::path::parent_path(AbsolutePath.str())); | ||
if (Dir) { | ||
StringRef DirName = SM.getFileManager().getCanonicalName(Dir); | ||
SmallVector<char, 128> AbsoluteFilename; | ||
llvm::sys::path::append(AbsoluteFilename, DirName, | ||
llvm::sys::path::filename(AbsolutePath.str())); | ||
return llvm::StringRef(AbsoluteFilename.data(), AbsoluteFilename.size()) | ||
.str(); | ||
} | ||
return AbsolutePath.str(); | ||
} | ||
} // namespace | ||
|
||
// Always return true to continue indexing. | ||
bool SymbolCollector::handleDeclOccurence( | ||
const Decl *D, index::SymbolRoleSet Roles, | ||
ArrayRef<index::SymbolRelation> Relations, FileID FID, unsigned Offset, | ||
index::IndexDataConsumer::ASTNodeInfo ASTNode) { | ||
// FIXME: collect all symbol references. | ||
if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) || | ||
Roles & static_cast<unsigned>(index::SymbolRole::Definition))) | ||
return true; | ||
|
||
if (const NamedDecl *ND = llvm::dyn_cast<NamedDecl>(D)) { | ||
// FIXME: Should we include the internal linkage symbols? | ||
if (!ND->hasExternalFormalLinkage() || ND->isInAnonymousNamespace()) | ||
return true; | ||
|
||
llvm::SmallVector<char, 128> Buff; | ||
if (index::generateUSRForDecl(ND, Buff)) | ||
return true; | ||
|
||
std::string USR(Buff.data(), Buff.size()); | ||
auto ID = SymbolID(USR); | ||
if (Symbols.find(ID) != Symbols.end()) | ||
return true; | ||
|
||
auto &SM = ND->getASTContext().getSourceManager(); | ||
SymbolLocation Location = { | ||
makeAbsolutePath(SM, SM.getFilename(D->getLocation())), | ||
SM.getFileOffset(D->getLocStart()), SM.getFileOffset(D->getLocEnd())}; | ||
Symbols.insert({std::move(ID), ND->getQualifiedNameAsString(), | ||
index::getSymbolInfo(D), std::move(Location)}); | ||
} | ||
|
||
return true; | ||
} | ||
|
||
void SymbolCollector::finish() { | ||
Symbols.freeze(); | ||
} | ||
|
||
} // namespace clangd | ||
} // namespace clang |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
//===--- SymbolCollector.h ---------------------------------------*- C++-*-===// | ||
// | ||
// The LLVM Compiler Infrastructure | ||
// | ||
// This file is distributed under the University of Illinois Open Source | ||
// License. See LICENSE.TXT for details. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "Index.h" | ||
|
||
#include "clang/Index/IndexDataConsumer.h" | ||
#include "clang/Index/IndexSymbol.h" | ||
|
||
namespace clang { | ||
namespace clangd { | ||
|
||
// Collect all symbols from an AST. | ||
// | ||
// Clients (e.g. clangd) can use SymbolCollector together with | ||
// index::indexTopLevelDecls to retrieve all symbols when the source file is | ||
// changed. | ||
class SymbolCollector : public index::IndexDataConsumer { | ||
public: | ||
SymbolCollector() = default; | ||
|
||
bool | ||
handleDeclOccurence(const Decl *D, index::SymbolRoleSet Roles, | ||
ArrayRef<index::SymbolRelation> Relations, FileID FID, | ||
unsigned Offset, | ||
index::IndexDataConsumer::ASTNodeInfo ASTNode) override; | ||
|
||
void finish() override; | ||
|
||
SymbolSlab takeSymbols() const { return std::move(Symbols); } | ||
|
||
private: | ||
// All Symbols collected from the AST. | ||
SymbolSlab Symbols; | ||
}; | ||
|
||
} // namespace clangd | ||
} // namespace clang |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.