Skip to content

Commit

Permalink
[clangd][NFC] Move SymbolID to a separate file
Browse files Browse the repository at this point in the history
Prerequisity for textDocument/SymbolInfo

Differential Revision: https://reviews.llvm.org/D54799

llvm-svn: 347674
  • Loading branch information
jkorous-apple committed Nov 27, 2018
1 parent 6b2f3e0 commit 6089b61
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 77 deletions.
1 change: 1 addition & 0 deletions clang-tools-extra/clangd/CMakeLists.txt
Expand Up @@ -46,6 +46,7 @@ add_clang_library(clangDaemon
index/IndexAction.cpp
index/MemIndex.cpp
index/Merge.cpp
index/SymbolID.cpp
index/Serialization.cpp
index/SymbolCollector.cpp
index/YAMLSerialization.cpp
Expand Down
1 change: 1 addition & 0 deletions clang-tools-extra/clangd/Protocol.h
Expand Up @@ -25,6 +25,7 @@
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_PROTOCOL_H

#include "URI.h"
#include "index/SymbolID.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Support/JSON.h"
#include <bitset>
Expand Down
29 changes: 0 additions & 29 deletions clang-tools-extra/clangd/index/Index.cpp
Expand Up @@ -12,7 +12,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"

using namespace llvm;
Expand Down Expand Up @@ -43,34 +42,6 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolLocation &L) {
<< "-" << L.End.line() << ":" << L.End.column() << ")";
}

SymbolID::SymbolID(StringRef USR) {
auto Hash = SHA1::hash(arrayRefFromStringRef(USR));
static_assert(sizeof(Hash) >= RawSize, "RawSize larger than SHA1");
memcpy(HashValue.data(), Hash.data(), RawSize);
}

raw_ostream &operator<<(raw_ostream &OS, const SymbolID &ID) {
return OS << toHex(ID.raw());
}

SymbolID SymbolID::fromRaw(StringRef Raw) {
SymbolID ID;
assert(Raw.size() == RawSize);
memcpy(ID.HashValue.data(), Raw.data(), RawSize);
return ID;
}

std::string SymbolID::str() const { return toHex(raw()); }

Expected<SymbolID> SymbolID::fromStr(StringRef Str) {
if (Str.size() != RawSize * 2)
return createStringError(inconvertibleErrorCode(), "Bad ID length");
for (char C : Str)
if (!isHexDigit(C))
return createStringError(inconvertibleErrorCode(), "Bad hex ID");
return fromRaw(fromHex(Str));
}

raw_ostream &operator<<(raw_ostream &OS, SymbolOrigin O) {
if (O == SymbolOrigin::Unknown)
return OS << "unknown";
Expand Down
49 changes: 1 addition & 48 deletions clang-tools-extra/clangd/index/Index.h
Expand Up @@ -11,11 +11,11 @@
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H

#include "ExpectedTypes.h"
#include "SymbolID.h"
#include "clang/Index/IndexSymbol.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
Expand Down Expand Up @@ -95,53 +95,6 @@ inline bool operator<(const SymbolLocation &L, const SymbolLocation &R) {
}
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &);

// The class identifies a particular C++ symbol (class, function, method, etc).
//
// As USRs (Unified Symbol Resolution) could be large, especially for functions
// with long type arguments, SymbolID is using truncated SHA1(USR) values to
// guarantee the uniqueness of symbols while using a relatively small amount of
// memory (vs storing USRs directly).
//
// SymbolID can be used as key in the symbol indexes to lookup the symbol.
class SymbolID {
public:
SymbolID() = default;
explicit SymbolID(llvm::StringRef USR);

bool operator==(const SymbolID &Sym) const {
return HashValue == Sym.HashValue;
}
bool operator<(const SymbolID &Sym) const {
return HashValue < Sym.HashValue;
}

// The stored hash is truncated to RawSize bytes.
// This trades off memory against the number of symbols we can handle.
constexpr static size_t RawSize = 8;
llvm::StringRef raw() const {
return StringRef(reinterpret_cast<const char *>(HashValue.data()), RawSize);
}
static SymbolID fromRaw(llvm::StringRef);

// Returns a hex encoded string.
std::string str() const;
static llvm::Expected<SymbolID> fromStr(llvm::StringRef);

private:
std::array<uint8_t, RawSize> HashValue;
};

inline llvm::hash_code hash_value(const SymbolID &ID) {
// We already have a good hash, just return the first bytes.
assert(sizeof(size_t) <= SymbolID::RawSize && "size_t longer than SHA1!");
size_t Result;
memcpy(&Result, ID.raw().data(), sizeof(size_t));
return llvm::hash_code(Result);
}

// Write SymbolID into the given stream. SymbolID is encoded as ID.str().
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID);

} // namespace clangd
} // namespace clang
namespace llvm {
Expand Down
58 changes: 58 additions & 0 deletions clang-tools-extra/clangd/index/SymbolID.cpp
@@ -0,0 +1,58 @@
//===--- SymbolID.cpp --------------------------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#include "SymbolID.h"
#include "llvm/Support/SHA1.h"

using namespace llvm;
namespace clang {
namespace clangd {

SymbolID::SymbolID(StringRef USR) {
auto Hash = llvm::SHA1::hash(arrayRefFromStringRef(USR));
static_assert(sizeof(Hash) >= RawSize, "RawSize larger than SHA1");
memcpy(HashValue.data(), Hash.data(), RawSize);
}

llvm::StringRef SymbolID::raw() const {
return StringRef(reinterpret_cast<const char *>(HashValue.data()), RawSize);
}

SymbolID SymbolID::fromRaw(StringRef Raw) {
SymbolID ID;
assert(Raw.size() == RawSize);
memcpy(ID.HashValue.data(), Raw.data(), RawSize);
return ID;
}

std::string SymbolID::str() const { return toHex(raw()); }

Expected<SymbolID> SymbolID::fromStr(StringRef Str) {
if (Str.size() != RawSize * 2)
return createStringError(inconvertibleErrorCode(), "Bad ID length");
for (char C : Str)
if (!isHexDigit(C))
return createStringError(inconvertibleErrorCode(), "Bad hex ID");
return fromRaw(fromHex(Str));
}

raw_ostream &operator<<(raw_ostream &OS, const SymbolID &ID) {
return OS << toHex(ID.raw());
}

llvm::hash_code hash_value(const SymbolID &ID) {
// We already have a good hash, just return the first bytes.
assert(sizeof(size_t) <= SymbolID::RawSize && "size_t longer than SHA1!");
size_t Result;
memcpy(&Result, ID.raw().data(), sizeof(size_t));
return llvm::hash_code(Result);
}

} // namespace clangd
} // namespace clang
65 changes: 65 additions & 0 deletions clang-tools-extra/clangd/index/SymbolID.h
@@ -0,0 +1,65 @@
//===--- SymbolID.h ----------------------------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLID_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLID_H

#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
#include <array>
#include <string>

namespace clang {
namespace clangd {

// The class identifies a particular C++ symbol (class, function, method, etc).
//
// As USRs (Unified Symbol Resolution) could be large, especially for functions
// with long type arguments, SymbolID is using truncated SHA1(USR) values to
// guarantee the uniqueness of symbols while using a relatively small amount of
// memory (vs storing USRs directly).
//
// SymbolID can be used as key in the symbol indexes to lookup the symbol.
class SymbolID {
public:
SymbolID() = default;
explicit SymbolID(llvm::StringRef USR);

bool operator==(const SymbolID &Sym) const {
return HashValue == Sym.HashValue;
}
bool operator<(const SymbolID &Sym) const {
return HashValue < Sym.HashValue;
}

// The stored hash is truncated to RawSize bytes.
// This trades off memory against the number of symbols we can handle.
constexpr static size_t RawSize = 8;
llvm::StringRef raw() const;
static SymbolID fromRaw(llvm::StringRef);

// Returns a hex encoded string.
std::string str() const;
static llvm::Expected<SymbolID> fromStr(llvm::StringRef);

private:
std::array<uint8_t, RawSize> HashValue;
};

llvm::hash_code hash_value(const SymbolID &ID);

// Write SymbolID into the given stream. SymbolID is encoded as ID.str().
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID);

} // namespace clangd
} // namespace clang

#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLID_H

0 comments on commit 6089b61

Please sign in to comment.