From 671812462ae1b24c514e88b0116ac9f3c9cc594b Mon Sep 17 00:00:00 2001 From: Julie Hockett Date: Fri, 9 Mar 2018 03:16:39 +0000 Subject: [PATCH] [clang-doc] Setup clang-doc frontend framework Setting up the mapper part of the frontend framework for a clang-doc tool. It creates a series of relevant matchers for declarations, and uses the ToolExecutor to traverse the AST and extract the matching declarations and comments. The mapper serializes the extracted information to individual records for reducing and eventually doc generation. For a more detailed overview of the tool, see the design document on the mailing list: http://lists.llvm.org/pipermail/cfe-dev/2017-December/056203.html Differential Revision: https://reviews.llvm.org/D41102 llvm-svn: 327102 --- clang-tools-extra/CMakeLists.txt | 1 + clang-tools-extra/clang-doc/BitcodeWriter.cpp | 517 ++++++++++++++++++ clang-tools-extra/clang-doc/BitcodeWriter.h | 201 +++++++ clang-tools-extra/clang-doc/CMakeLists.txt | 23 + clang-tools-extra/clang-doc/ClangDoc.cpp | 61 +++ clang-tools-extra/clang-doc/ClangDoc.h | 33 ++ clang-tools-extra/clang-doc/Mapper.cpp | 86 +++ clang-tools-extra/clang-doc/Mapper.h | 57 ++ clang-tools-extra/clang-doc/Representation.h | 184 +++++++ clang-tools-extra/clang-doc/Serialize.cpp | 336 ++++++++++++ clang-tools-extra/clang-doc/Serialize.h | 53 ++ .../clang-doc/tool/CMakeLists.txt | 17 + .../clang-doc/tool/ClangDocMain.cpp | 114 ++++ clang-tools-extra/docs/clang-doc.rst | 62 +++ clang-tools-extra/test/CMakeLists.txt | 1 + .../test/clang-doc/mapper-class-in-class.cpp | 35 ++ .../clang-doc/mapper-class-in-function.cpp | 38 ++ .../test/clang-doc/mapper-class.cpp | 19 + .../test/clang-doc/mapper-comments.cpp | 172 ++++++ .../test/clang-doc/mapper-enum.cpp | 36 ++ .../test/clang-doc/mapper-function.cpp | 25 + .../test/clang-doc/mapper-method.cpp | 43 ++ .../test/clang-doc/mapper-namespace.cpp | 17 + .../test/clang-doc/mapper-struct.cpp | 23 + .../test/clang-doc/mapper-union.cpp | 29 + 25 files changed, 2183 insertions(+) create mode 100644 clang-tools-extra/clang-doc/BitcodeWriter.cpp create mode 100644 clang-tools-extra/clang-doc/BitcodeWriter.h create mode 100644 clang-tools-extra/clang-doc/CMakeLists.txt create mode 100644 clang-tools-extra/clang-doc/ClangDoc.cpp create mode 100644 clang-tools-extra/clang-doc/ClangDoc.h create mode 100644 clang-tools-extra/clang-doc/Mapper.cpp create mode 100644 clang-tools-extra/clang-doc/Mapper.h create mode 100644 clang-tools-extra/clang-doc/Representation.h create mode 100644 clang-tools-extra/clang-doc/Serialize.cpp create mode 100644 clang-tools-extra/clang-doc/Serialize.h create mode 100644 clang-tools-extra/clang-doc/tool/CMakeLists.txt create mode 100644 clang-tools-extra/clang-doc/tool/ClangDocMain.cpp create mode 100644 clang-tools-extra/docs/clang-doc.rst create mode 100644 clang-tools-extra/test/clang-doc/mapper-class-in-class.cpp create mode 100644 clang-tools-extra/test/clang-doc/mapper-class-in-function.cpp create mode 100644 clang-tools-extra/test/clang-doc/mapper-class.cpp create mode 100644 clang-tools-extra/test/clang-doc/mapper-comments.cpp create mode 100644 clang-tools-extra/test/clang-doc/mapper-enum.cpp create mode 100644 clang-tools-extra/test/clang-doc/mapper-function.cpp create mode 100644 clang-tools-extra/test/clang-doc/mapper-method.cpp create mode 100644 clang-tools-extra/test/clang-doc/mapper-namespace.cpp create mode 100644 clang-tools-extra/test/clang-doc/mapper-struct.cpp create mode 100644 clang-tools-extra/test/clang-doc/mapper-union.cpp diff --git a/clang-tools-extra/CMakeLists.txt b/clang-tools-extra/CMakeLists.txt index 760340a633798..c434682cf67ca 100644 --- a/clang-tools-extra/CMakeLists.txt +++ b/clang-tools-extra/CMakeLists.txt @@ -7,6 +7,7 @@ add_subdirectory(clang-tidy-vs) endif() add_subdirectory(change-namespace) +add_subdirectory(clang-doc) add_subdirectory(clang-query) add_subdirectory(clang-move) add_subdirectory(clangd) diff --git a/clang-tools-extra/clang-doc/BitcodeWriter.cpp b/clang-tools-extra/clang-doc/BitcodeWriter.cpp new file mode 100644 index 0000000000000..90fdc8e7dcfcd --- /dev/null +++ b/clang-tools-extra/clang-doc/BitcodeWriter.cpp @@ -0,0 +1,517 @@ +//===-- BitcodeWriter.cpp - ClangDoc Bitcode Writer ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "BitcodeWriter.h" +#include "llvm/ADT/IndexedMap.h" + +namespace clang { +namespace doc { + +// Since id enums are not zero-indexed, we need to transform the given id into +// its associated index. +struct BlockIdToIndexFunctor { + using argument_type = unsigned; + unsigned operator()(unsigned ID) const { return ID - BI_FIRST; } +}; + +struct RecordIdToIndexFunctor { + using argument_type = unsigned; + unsigned operator()(unsigned ID) const { return ID - RI_FIRST; } +}; + +using AbbrevDsc = void (*)(std::shared_ptr &Abbrev); + +static void AbbrevGen(std::shared_ptr &Abbrev, + const std::initializer_list Ops) { + for (const auto &Op : Ops) + Abbrev->Add(Op); +} + +static void BoolAbbrev(std::shared_ptr &Abbrev) { + AbbrevGen(Abbrev, + {// 0. Boolean + llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, + BitCodeConstants::BoolSize)}); +} + +static void IntAbbrev(std::shared_ptr &Abbrev) { + AbbrevGen(Abbrev, + {// 0. Fixed-size integer + llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, + BitCodeConstants::IntSize)}); +} + +static void SymbolIDAbbrev(std::shared_ptr &Abbrev) { + AbbrevGen(Abbrev, + {// 0. Fixed-size integer (length of the sha1'd USR) + llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, + BitCodeConstants::USRLengthSize), + // 1. Fixed-size array of Char6 (USR) + llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array), + llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, + BitCodeConstants::USRBitLengthSize)}); +} + +static void StringAbbrev(std::shared_ptr &Abbrev) { + AbbrevGen(Abbrev, + {// 0. Fixed-size integer (length of the following string) + llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, + BitCodeConstants::StringLengthSize), + // 1. The string blob + llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)}); +} + +// Assumes that the file will not have more than 65535 lines. +static void LocationAbbrev(std::shared_ptr &Abbrev) { + AbbrevGen( + Abbrev, + {// 0. Fixed-size integer (line number) + llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, + BitCodeConstants::LineNumberSize), + // 1. Fixed-size integer (length of the following string (filename)) + llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, + BitCodeConstants::StringLengthSize), + // 2. The string blob + llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)}); +} + +static void ReferenceAbbrev(std::shared_ptr &Abbrev) { + AbbrevGen(Abbrev, + {// 0. Fixed-size integer (ref type) + llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, + BitCodeConstants::ReferenceTypeSize), + // 1. Fixed-size integer (length of the USR or UnresolvedName) + llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, + BitCodeConstants::StringLengthSize), + // 2. The string blob + llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)}); +} + +struct RecordIdDsc { + llvm::StringRef Name; + AbbrevDsc Abbrev = nullptr; + + RecordIdDsc() = default; + RecordIdDsc(llvm::StringRef Name, AbbrevDsc Abbrev) + : Name(Name), Abbrev(Abbrev) {} + + // Is this 'description' valid? + operator bool() const { + return Abbrev != nullptr && Name.data() != nullptr && !Name.empty(); + } +}; + +static const llvm::IndexedMap + BlockIdNameMap = []() { + llvm::IndexedMap BlockIdNameMap; + BlockIdNameMap.resize(BlockIdCount); + + // There is no init-list constructor for the IndexedMap, so have to + // improvise + static constexpr std::initializer_list< + std::pair> + Inits = {{BI_VERSION_BLOCK_ID, "VersionBlock"}, + {BI_NAMESPACE_BLOCK_ID, "NamespaceBlock"}, + {BI_ENUM_BLOCK_ID, "EnumBlock"}, + {BI_TYPE_BLOCK_ID, "TypeBlock"}, + {BI_FIELD_TYPE_BLOCK_ID, "FieldTypeBlock"}, + {BI_MEMBER_TYPE_BLOCK_ID, "MemberTypeBlock"}, + {BI_RECORD_BLOCK_ID, "RecordBlock"}, + {BI_FUNCTION_BLOCK_ID, "FunctionBlock"}, + {BI_COMMENT_BLOCK_ID, "CommentBlock"}}; + static_assert(Inits.size() == BlockIdCount, + "unexpected count of initializers"); + for (const auto &Init : Inits) + BlockIdNameMap[Init.first] = Init.second; + assert(BlockIdNameMap.size() == BlockIdCount); + return BlockIdNameMap; + }(); + +static const llvm::IndexedMap + RecordIdNameMap = []() { + llvm::IndexedMap RecordIdNameMap; + RecordIdNameMap.resize(RecordIdCount); + + // There is no init-list constructor for the IndexedMap, so have to + // improvise + static std::initializer_list> Inits = { + {VERSION, {"Version", &IntAbbrev}}, + {COMMENT_KIND, {"Kind", &StringAbbrev}}, + {COMMENT_TEXT, {"Text", &StringAbbrev}}, + {COMMENT_NAME, {"Name", &StringAbbrev}}, + {COMMENT_DIRECTION, {"Direction", &StringAbbrev}}, + {COMMENT_PARAMNAME, {"ParamName", &StringAbbrev}}, + {COMMENT_CLOSENAME, {"CloseName", &StringAbbrev}}, + {COMMENT_SELFCLOSING, {"SelfClosing", &BoolAbbrev}}, + {COMMENT_EXPLICIT, {"Explicit", &BoolAbbrev}}, + {COMMENT_ATTRKEY, {"AttrKey", &StringAbbrev}}, + {COMMENT_ATTRVAL, {"AttrVal", &StringAbbrev}}, + {COMMENT_ARG, {"Arg", &StringAbbrev}}, + {TYPE_REF, {"Type", &ReferenceAbbrev}}, + {FIELD_TYPE_REF, {"Type", &ReferenceAbbrev}}, + {FIELD_TYPE_NAME, {"Name", &StringAbbrev}}, + {MEMBER_TYPE_REF, {"Type", &ReferenceAbbrev}}, + {MEMBER_TYPE_NAME, {"Name", &StringAbbrev}}, + {MEMBER_TYPE_ACCESS, {"Access", &IntAbbrev}}, + {NAMESPACE_USR, {"USR", &SymbolIDAbbrev}}, + {NAMESPACE_NAME, {"Name", &StringAbbrev}}, + {NAMESPACE_NAMESPACE, {"Namespace", &ReferenceAbbrev}}, + {ENUM_USR, {"USR", &SymbolIDAbbrev}}, + {ENUM_NAME, {"Name", &StringAbbrev}}, + {ENUM_NAMESPACE, {"Namespace", &ReferenceAbbrev}}, + {ENUM_DEFLOCATION, {"DefLocation", &LocationAbbrev}}, + {ENUM_LOCATION, {"Location", &LocationAbbrev}}, + {ENUM_MEMBER, {"Member", &StringAbbrev}}, + {ENUM_SCOPED, {"Scoped", &BoolAbbrev}}, + {RECORD_USR, {"USR", &SymbolIDAbbrev}}, + {RECORD_NAME, {"Name", &StringAbbrev}}, + {RECORD_NAMESPACE, {"Namespace", &ReferenceAbbrev}}, + {RECORD_DEFLOCATION, {"DefLocation", &LocationAbbrev}}, + {RECORD_LOCATION, {"Location", &LocationAbbrev}}, + {RECORD_TAG_TYPE, {"TagType", &IntAbbrev}}, + {RECORD_PARENT, {"Parent", &ReferenceAbbrev}}, + {RECORD_VPARENT, {"VParent", &ReferenceAbbrev}}, + {FUNCTION_USR, {"USR", &SymbolIDAbbrev}}, + {FUNCTION_NAME, {"Name", &StringAbbrev}}, + {FUNCTION_NAMESPACE, {"Namespace", &ReferenceAbbrev}}, + {FUNCTION_DEFLOCATION, {"DefLocation", &LocationAbbrev}}, + {FUNCTION_LOCATION, {"Location", &LocationAbbrev}}, + {FUNCTION_PARENT, {"Parent", &ReferenceAbbrev}}, + {FUNCTION_ACCESS, {"Access", &IntAbbrev}}, + {FUNCTION_IS_METHOD, {"IsMethod", &BoolAbbrev}}}; + // assert(Inits.size() == RecordIdCount); + for (const auto &Init : Inits) { + RecordIdNameMap[Init.first] = Init.second; + assert((Init.second.Name.size() + 1) <= BitCodeConstants::RecordSize); + } + // assert(RecordIdNameMap.size() == RecordIdCount); + return RecordIdNameMap; + }(); + +static const std::initializer_list< + std::pair>> + RecordsByBlock{ + // Version Block + {BI_VERSION_BLOCK_ID, {VERSION}}, + // Comment Block + {BI_COMMENT_BLOCK_ID, + {COMMENT_KIND, COMMENT_TEXT, COMMENT_NAME, COMMENT_DIRECTION, + COMMENT_PARAMNAME, COMMENT_CLOSENAME, COMMENT_SELFCLOSING, + COMMENT_EXPLICIT, COMMENT_ATTRKEY, COMMENT_ATTRVAL, COMMENT_ARG}}, + // Type Block + {BI_TYPE_BLOCK_ID, {TYPE_REF}}, + // FieldType Block + {BI_FIELD_TYPE_BLOCK_ID, {FIELD_TYPE_REF, FIELD_TYPE_NAME}}, + // MemberType Block + {BI_MEMBER_TYPE_BLOCK_ID, + {MEMBER_TYPE_REF, MEMBER_TYPE_NAME, MEMBER_TYPE_ACCESS}}, + // Enum Block + {BI_ENUM_BLOCK_ID, + {ENUM_USR, ENUM_NAME, ENUM_NAMESPACE, ENUM_DEFLOCATION, ENUM_LOCATION, + ENUM_MEMBER, ENUM_SCOPED}}, + // Namespace Block + {BI_NAMESPACE_BLOCK_ID, + {NAMESPACE_USR, NAMESPACE_NAME, NAMESPACE_NAMESPACE}}, + // Record Block + {BI_RECORD_BLOCK_ID, + {RECORD_USR, RECORD_NAME, RECORD_NAMESPACE, RECORD_DEFLOCATION, + RECORD_LOCATION, RECORD_TAG_TYPE, RECORD_PARENT, RECORD_VPARENT}}, + // Function Block + {BI_FUNCTION_BLOCK_ID, + {FUNCTION_USR, FUNCTION_NAME, FUNCTION_NAMESPACE, FUNCTION_DEFLOCATION, + FUNCTION_LOCATION, FUNCTION_PARENT, FUNCTION_ACCESS, + FUNCTION_IS_METHOD}}}; + +// AbbreviationMap + +void ClangDocBitcodeWriter::AbbreviationMap::add(RecordId RID, + unsigned AbbrevID) { + assert(RecordIdNameMap[RID] && "Unknown RecordId."); + assert(Abbrevs.find(RID) == Abbrevs.end() && "Abbreviation already added."); + Abbrevs[RID] = AbbrevID; +} + +unsigned ClangDocBitcodeWriter::AbbreviationMap::get(RecordId RID) const { + assert(RecordIdNameMap[RID] && "Unknown RecordId."); + assert(Abbrevs.find(RID) != Abbrevs.end() && "Unknown abbreviation."); + return Abbrevs.lookup(RID); +} + +// Validation and Overview Blocks + +/// \brief Emits the magic number header to check that its the right format, +/// in this case, 'DOCS'. +void ClangDocBitcodeWriter::emitHeader() { + for (char C : llvm::StringRef("DOCS")) + Stream.Emit((unsigned)C, BitCodeConstants::SignatureBitSize); +} + +void ClangDocBitcodeWriter::emitVersionBlock() { + StreamSubBlockGuard Block(Stream, BI_VERSION_BLOCK_ID); + emitRecord(VersionNumber, VERSION); +} + +/// \brief Emits a block ID and the block name to the BLOCKINFO block. +void ClangDocBitcodeWriter::emitBlockID(BlockId BID) { + const auto &BlockIdName = BlockIdNameMap[BID]; + assert(BlockIdName.data() && BlockIdName.size() && "Unknown BlockId."); + + Record.clear(); + Record.push_back(BID); + Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record); + Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, + ArrayRef(BlockIdNameMap[BID].bytes_begin(), + BlockIdNameMap[BID].bytes_end())); +} + +/// \brief Emits a record name to the BLOCKINFO block. +void ClangDocBitcodeWriter::emitRecordID(RecordId ID) { + assert(RecordIdNameMap[ID] && "Unknown RecordId."); + prepRecordData(ID); + Record.append(RecordIdNameMap[ID].Name.begin(), + RecordIdNameMap[ID].Name.end()); + Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record); +} + +// Abbreviations + +void ClangDocBitcodeWriter::emitAbbrev(RecordId ID, BlockId Block) { + assert(RecordIdNameMap[ID] && "Unknown abbreviation."); + auto Abbrev = std::make_shared(); + Abbrev->Add(llvm::BitCodeAbbrevOp(ID)); + RecordIdNameMap[ID].Abbrev(Abbrev); + Abbrevs.add(ID, Stream.EmitBlockInfoAbbrev(Block, std::move(Abbrev))); +} + +// Records + +void ClangDocBitcodeWriter::emitRecord(const SymbolID &Sym, RecordId ID) { + assert(RecordIdNameMap[ID] && "Unknown RecordId."); + assert(RecordIdNameMap[ID].Abbrev == &SymbolIDAbbrev && + "Abbrev type mismatch."); + if (!prepRecordData(ID, !Sym.empty())) + return; + assert(Sym.size() == 20); + // std::string Out = llvm::toHex(llvm::toStringRef(Str)); + Record.push_back(Sym.size()); + // for (unsigned I = 0, E = Sym.size(); I != E; ++I) { + // assert(llvm::BitCodeAbbrevOp::isFixed(Sym[I])); + // Record.push_back(Sym[I]); + // } + Record.append(Sym.begin(), Sym.end()); + Stream.EmitRecordWithAbbrev(Abbrevs.get(ID), Record); +} + +void ClangDocBitcodeWriter::emitRecord(llvm::StringRef Str, RecordId ID) { + assert(RecordIdNameMap[ID] && "Unknown RecordId."); + assert(RecordIdNameMap[ID].Abbrev == &StringAbbrev && + "Abbrev type mismatch."); + if (!prepRecordData(ID, !Str.empty())) + return; + assert(Str.size() < (1U << BitCodeConstants::StringLengthSize)); + Record.push_back(Str.size()); + Stream.EmitRecordWithBlob(Abbrevs.get(ID), Record, Str); +} + +void ClangDocBitcodeWriter::emitRecord(const Location &Loc, RecordId ID) { + assert(RecordIdNameMap[ID] && "Unknown RecordId."); + assert(RecordIdNameMap[ID].Abbrev == &LocationAbbrev && + "Abbrev type mismatch."); + if (!prepRecordData(ID, true)) + return; + // FIXME: Assert that the line number is of the appropriate size. + Record.push_back(Loc.LineNumber); + assert(Loc.Filename.size() < (1U << BitCodeConstants::StringLengthSize)); + // Record.push_back(Loc.Filename.size()); + // Stream.EmitRecordWithBlob(Abbrevs.get(ID), Record, Loc.Filename); + Record.push_back(4); + Stream.EmitRecordWithBlob(Abbrevs.get(ID), Record, "test"); +} + +void ClangDocBitcodeWriter::emitRecord(const Reference &Ref, RecordId ID) { + assert(RecordIdNameMap[ID] && "Unknown RecordId."); + assert(RecordIdNameMap[ID].Abbrev == &ReferenceAbbrev && + "Abbrev type mismatch."); + SmallString<40> StringUSR; + StringRef OutString; + if (Ref.RefType == InfoType::IT_default) + OutString = Ref.UnresolvedName; + else { + StringUSR = llvm::toHex(llvm::toStringRef(Ref.USR)); + OutString = StringUSR; + } + if (!prepRecordData(ID, !OutString.empty())) + return; + assert(OutString.size() < (1U << BitCodeConstants::StringLengthSize)); + Record.push_back((int)Ref.RefType); + Record.push_back(OutString.size()); + Stream.EmitRecordWithBlob(Abbrevs.get(ID), Record, OutString); +} + +void ClangDocBitcodeWriter::emitRecord(bool Val, RecordId ID) { + assert(RecordIdNameMap[ID] && "Unknown RecordId."); + assert(RecordIdNameMap[ID].Abbrev == &BoolAbbrev && "Abbrev type mismatch."); + if (!prepRecordData(ID, Val)) + return; + Record.push_back(Val); + Stream.EmitRecordWithAbbrev(Abbrevs.get(ID), Record); +} + +void ClangDocBitcodeWriter::emitRecord(int Val, RecordId ID) { + assert(RecordIdNameMap[ID] && "Unknown RecordId."); + assert(RecordIdNameMap[ID].Abbrev == &IntAbbrev && "Abbrev type mismatch."); + if (!prepRecordData(ID, Val)) + return; + // FIXME: Assert that the integer is of the appropriate size. + Record.push_back(Val); + Stream.EmitRecordWithAbbrev(Abbrevs.get(ID), Record); +} + +void ClangDocBitcodeWriter::emitRecord(unsigned Val, RecordId ID) { + assert(RecordIdNameMap[ID] && "Unknown RecordId."); + assert(RecordIdNameMap[ID].Abbrev == &IntAbbrev && "Abbrev type mismatch."); + if (!prepRecordData(ID, Val)) + return; + assert(Val < (1U << BitCodeConstants::IntSize)); + Record.push_back(Val); + Stream.EmitRecordWithAbbrev(Abbrevs.get(ID), Record); +} + +bool ClangDocBitcodeWriter::prepRecordData(RecordId ID, bool ShouldEmit) { + assert(RecordIdNameMap[ID] && "Unknown RecordId."); + if (!ShouldEmit) + return false; + Record.clear(); + Record.push_back(ID); + return true; +} + +// BlockInfo Block + +void ClangDocBitcodeWriter::emitBlockInfoBlock() { + Stream.EnterBlockInfoBlock(); + for (const auto &Block : RecordsByBlock) { + assert(Block.second.size() < (1U << BitCodeConstants::SubblockIDSize)); + emitBlockInfo(Block.first, Block.second); + } + Stream.ExitBlock(); +} + +void ClangDocBitcodeWriter::emitBlockInfo( + BlockId BID, const std::initializer_list &RIDs) { + assert(RIDs.size() < (1U << BitCodeConstants::SubblockIDSize)); + emitBlockID(BID); + for (RecordId RID : RIDs) { + emitRecordID(RID); + emitAbbrev(RID, BID); + } +} + +// Block emission + +void ClangDocBitcodeWriter::emitBlock(const TypeInfo &T) { + StreamSubBlockGuard Block(Stream, BI_TYPE_BLOCK_ID); + emitRecord(T.Type, TYPE_REF); +} + +void ClangDocBitcodeWriter::emitBlock(const FieldTypeInfo &T) { + StreamSubBlockGuard Block(Stream, BI_FIELD_TYPE_BLOCK_ID); + emitRecord(T.Type, FIELD_TYPE_REF); + emitRecord(T.Name, FIELD_TYPE_NAME); +} + +void ClangDocBitcodeWriter::emitBlock(const MemberTypeInfo &T) { + StreamSubBlockGuard Block(Stream, BI_MEMBER_TYPE_BLOCK_ID); + emitRecord(T.Type, MEMBER_TYPE_REF); + emitRecord(T.Name, MEMBER_TYPE_NAME); + emitRecord(T.Access, MEMBER_TYPE_ACCESS); +} + +void ClangDocBitcodeWriter::emitBlock(const CommentInfo &I) { + StreamSubBlockGuard Block(Stream, BI_COMMENT_BLOCK_ID); + for (const auto &L : + std::initializer_list>{ + {I.Kind, COMMENT_KIND}, + {I.Text, COMMENT_TEXT}, + {I.Name, COMMENT_NAME}, + {I.Direction, COMMENT_DIRECTION}, + {I.ParamName, COMMENT_PARAMNAME}, + {I.CloseName, COMMENT_CLOSENAME}}) + emitRecord(L.first, L.second); + emitRecord(I.SelfClosing, COMMENT_SELFCLOSING); + emitRecord(I.Explicit, COMMENT_EXPLICIT); + for (const auto &A : I.AttrKeys) + emitRecord(A, COMMENT_ATTRKEY); + for (const auto &A : I.AttrValues) + emitRecord(A, COMMENT_ATTRVAL); + for (const auto &A : I.Args) + emitRecord(A, COMMENT_ARG); + for (const auto &C : I.Children) + emitBlock(*C); +} + +#define EMITINFO(X) \ + emitRecord(I.USR, X##_USR); \ + emitRecord(I.Name, X##_NAME); \ + for (const auto &N : I.Namespace) \ + emitRecord(N, X##_NAMESPACE); \ + for (const auto &CI : I.Description) \ + emitBlock(CI); + +void ClangDocBitcodeWriter::emitBlock(const NamespaceInfo &I) { + StreamSubBlockGuard Block(Stream, BI_NAMESPACE_BLOCK_ID); + EMITINFO(NAMESPACE) +} + +void ClangDocBitcodeWriter::emitBlock(const EnumInfo &I) { + StreamSubBlockGuard Block(Stream, BI_ENUM_BLOCK_ID); + EMITINFO(ENUM) + if (I.DefLoc) + emitRecord(I.DefLoc.getValue(), ENUM_DEFLOCATION); + for (const auto &L : I.Loc) + emitRecord(L, ENUM_LOCATION); + emitRecord(I.Scoped, ENUM_SCOPED); + for (const auto &N : I.Members) + emitRecord(N, ENUM_MEMBER); +} + +void ClangDocBitcodeWriter::emitBlock(const RecordInfo &I) { + StreamSubBlockGuard Block(Stream, BI_RECORD_BLOCK_ID); + EMITINFO(RECORD) + if (I.DefLoc) + emitRecord(I.DefLoc.getValue(), RECORD_DEFLOCATION); + for (const auto &L : I.Loc) + emitRecord(L, RECORD_LOCATION); + emitRecord(I.TagType, RECORD_TAG_TYPE); + for (const auto &N : I.Members) + emitBlock(N); + for (const auto &P : I.Parents) + emitRecord(P, RECORD_PARENT); + for (const auto &P : I.VirtualParents) + emitRecord(P, RECORD_VPARENT); +} + +void ClangDocBitcodeWriter::emitBlock(const FunctionInfo &I) { + StreamSubBlockGuard Block(Stream, BI_FUNCTION_BLOCK_ID); + EMITINFO(FUNCTION) + emitRecord(I.IsMethod, FUNCTION_IS_METHOD); + if (I.DefLoc) + emitRecord(I.DefLoc.getValue(), FUNCTION_DEFLOCATION); + for (const auto &L : I.Loc) + emitRecord(L, FUNCTION_LOCATION); + emitRecord(I.Parent, FUNCTION_PARENT); + emitBlock(I.ReturnType); + for (const auto &N : I.Params) + emitBlock(N); +} + +#undef EMITINFO + +} // namespace doc +} // namespace clang diff --git a/clang-tools-extra/clang-doc/BitcodeWriter.h b/clang-tools-extra/clang-doc/BitcodeWriter.h new file mode 100644 index 0000000000000..092bfc6338a0f --- /dev/null +++ b/clang-tools-extra/clang-doc/BitcodeWriter.h @@ -0,0 +1,201 @@ +//===-- BitcodeWriter.h - ClangDoc Bitcode Writer --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a writer for serializing the clang-doc internal +// representation to LLVM bitcode. The writer takes in a stream and emits the +// generated bitcode to that stream. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_BITCODEWRITER_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_BITCODEWRITER_H + +#include "Representation.h" +#include "clang/AST/AST.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Bitcode/BitstreamWriter.h" +#include +#include + +namespace clang { +namespace doc { + +// Current version number of clang-doc bitcode. +// Should be bumped when removing or changing BlockIds, RecordIds, or +// BitCodeConstants, though they can be added without breaking it. +static const unsigned VersionNumber = 1; + +struct BitCodeConstants { + static constexpr unsigned RecordSize = 16U; + static constexpr unsigned SignatureBitSize = 8U; + static constexpr unsigned SubblockIDSize = 4U; + static constexpr unsigned BoolSize = 1U; + static constexpr unsigned IntSize = 16U; + static constexpr unsigned StringLengthSize = 16U; + static constexpr unsigned FilenameLengthSize = 16U; + static constexpr unsigned LineNumberSize = 16U; + static constexpr unsigned ReferenceTypeSize = 8U; + static constexpr unsigned USRLengthSize = 6U; + static constexpr unsigned USRBitLengthSize = 8U; +}; + +// New Ids need to be added to both the enum here and the relevant IdNameMap in +// the implementation file. +enum BlockId { + BI_VERSION_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID, + BI_NAMESPACE_BLOCK_ID, + BI_ENUM_BLOCK_ID, + BI_TYPE_BLOCK_ID, + BI_FIELD_TYPE_BLOCK_ID, + BI_MEMBER_TYPE_BLOCK_ID, + BI_RECORD_BLOCK_ID, + BI_FUNCTION_BLOCK_ID, + BI_COMMENT_BLOCK_ID, + BI_FIRST = BI_VERSION_BLOCK_ID, + BI_LAST = BI_COMMENT_BLOCK_ID +}; + +// New Ids need to be added to the enum here, and to the relevant IdNameMap and +// initialization list in the implementation file. +#define INFORECORDS(X) X##_USR, X##_NAME, X##_NAMESPACE + +enum RecordId { + VERSION = 1, + INFORECORDS(FUNCTION), + FUNCTION_DEFLOCATION, + FUNCTION_LOCATION, + FUNCTION_PARENT, + FUNCTION_ACCESS, + FUNCTION_IS_METHOD, + COMMENT_KIND, + COMMENT_TEXT, + COMMENT_NAME, + COMMENT_DIRECTION, + COMMENT_PARAMNAME, + COMMENT_CLOSENAME, + COMMENT_SELFCLOSING, + COMMENT_EXPLICIT, + COMMENT_ATTRKEY, + COMMENT_ATTRVAL, + COMMENT_ARG, + TYPE_REF, + FIELD_TYPE_REF, + FIELD_TYPE_NAME, + MEMBER_TYPE_REF, + MEMBER_TYPE_NAME, + MEMBER_TYPE_ACCESS, + INFORECORDS(NAMESPACE), + INFORECORDS(ENUM), + ENUM_DEFLOCATION, + ENUM_LOCATION, + ENUM_MEMBER, + ENUM_SCOPED, + INFORECORDS(RECORD), + RECORD_DEFLOCATION, + RECORD_LOCATION, + RECORD_TAG_TYPE, + RECORD_PARENT, + RECORD_VPARENT, + RI_FIRST = VERSION, + RI_LAST = RECORD_VPARENT +}; + +static constexpr unsigned BlockIdCount = BI_LAST - BI_FIRST + 1; +static constexpr unsigned RecordIdCount = RI_LAST - RI_FIRST + 1; + +#undef INFORECORDS + +class ClangDocBitcodeWriter { +public: + ClangDocBitcodeWriter(llvm::BitstreamWriter &Stream) : Stream(Stream) { + emitHeader(); + emitBlockInfoBlock(); + emitVersionBlock(); + } + +#ifndef NDEBUG // Don't want explicit dtor unless needed. + ~ClangDocBitcodeWriter() { + // Check that the static size is large-enough. + assert(Record.capacity() > BitCodeConstants::RecordSize); + } +#endif + + // Block emission of different info types. + void emitBlock(const NamespaceInfo &I); + void emitBlock(const RecordInfo &I); + void emitBlock(const FunctionInfo &I); + void emitBlock(const EnumInfo &I); + void emitBlock(const TypeInfo &B); + void emitBlock(const FieldTypeInfo &B); + void emitBlock(const MemberTypeInfo &B); + void emitBlock(const CommentInfo &B); + +private: + class AbbreviationMap { + llvm::DenseMap Abbrevs; + + public: + AbbreviationMap() : Abbrevs(RecordIdCount) {} + + void add(RecordId RID, unsigned AbbrevID); + unsigned get(RecordId RID) const; + }; + + class StreamSubBlockGuard { + llvm::BitstreamWriter &Stream; + + public: + StreamSubBlockGuard(llvm::BitstreamWriter &Stream_, BlockId ID) + : Stream(Stream_) { + // NOTE: SubBlockIDSize could theoretically be calculated on the fly, + // based on the initialization list of records in each block. + Stream.EnterSubblock(ID, BitCodeConstants::SubblockIDSize); + } + + StreamSubBlockGuard() = default; + StreamSubBlockGuard(const StreamSubBlockGuard &) = delete; + StreamSubBlockGuard &operator=(const StreamSubBlockGuard &) = delete; + + ~StreamSubBlockGuard() { Stream.ExitBlock(); } + }; + + // Emission of validation and overview blocks. + void emitHeader(); + void emitVersionBlock(); + void emitRecordID(RecordId ID); + void emitBlockID(BlockId ID); + void emitBlockInfoBlock(); + void emitBlockInfo(BlockId BID, const std::initializer_list &RIDs); + + // Emission of individual record types. + void emitRecord(StringRef Str, RecordId ID); + void emitRecord(const SymbolID &Str, RecordId ID); + void emitRecord(const Location &Loc, RecordId ID); + void emitRecord(const Reference &Ref, RecordId ID); + void emitRecord(bool Value, RecordId ID); + void emitRecord(int Value, RecordId ID); + void emitRecord(unsigned Value, RecordId ID); + bool prepRecordData(RecordId ID, bool ShouldEmit = true); + + // Emission of appropriate abbreviation type. + void emitAbbrev(RecordId ID, BlockId Block); + + // Static size is the maximum length of the block/record names we're pushing + // to this + 1. Longest is currently `MemberTypeBlock` at 15 chars. + SmallVector Record; + llvm::BitstreamWriter &Stream; + AbbreviationMap Abbrevs; +}; + +} // namespace doc +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_BITCODEWRITER_H diff --git a/clang-tools-extra/clang-doc/CMakeLists.txt b/clang-tools-extra/clang-doc/CMakeLists.txt new file mode 100644 index 0000000000000..1852baa6ba1d8 --- /dev/null +++ b/clang-tools-extra/clang-doc/CMakeLists.txt @@ -0,0 +1,23 @@ +set(LLVM_LINK_COMPONENTS + support + ) + +add_clang_library(clangDoc + BitcodeWriter.cpp + ClangDoc.cpp + Mapper.cpp + Serialize.cpp + + LINK_LIBS + clangAnalysis + clangAST + clangASTMatchers + clangBasic + clangFrontend + clangIndex + clangLex + clangTooling + clangToolingCore + ) + +add_subdirectory(tool) diff --git a/clang-tools-extra/clang-doc/ClangDoc.cpp b/clang-tools-extra/clang-doc/ClangDoc.cpp new file mode 100644 index 0000000000000..cd73723423ee9 --- /dev/null +++ b/clang-tools-extra/clang-doc/ClangDoc.cpp @@ -0,0 +1,61 @@ +//===-- ClangDoc.cpp - ClangDoc ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the main entry point for the clang-doc tool. It runs +// the clang-doc mapper on a given set of source code files using a +// FrontendActionFactory. +// +//===----------------------------------------------------------------------===// + +#include "ClangDoc.h" +#include "Mapper.h" +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Frontend/ASTConsumers.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendActions.h" + +namespace clang { +namespace doc { + +class MapperActionFactory : public tooling::FrontendActionFactory { +public: + MapperActionFactory(tooling::ExecutionContext *ECtx) : ECtx(ECtx) {} + clang::FrontendAction *create() override; + +private: + tooling::ExecutionContext *ECtx; +}; + +clang::FrontendAction *MapperActionFactory::create() { + class ClangDocAction : public clang::ASTFrontendAction { + public: + ClangDocAction(ExecutionContext *ECtx) : ECtx(ECtx) {} + + std::unique_ptr + CreateASTConsumer(clang::CompilerInstance &Compiler, + llvm::StringRef InFile) override { + return llvm::make_unique(&Compiler.getASTContext(), ECtx); + } + + private: + ExecutionContext *ECtx; + }; + return new ClangDocAction(ECtx); +} + +std::unique_ptr +newMapperActionFactory(tooling::ExecutionContext *ECtx) { + return llvm::make_unique(ECtx); +} + +} // namespace doc +} // namespace clang diff --git a/clang-tools-extra/clang-doc/ClangDoc.h b/clang-tools-extra/clang-doc/ClangDoc.h new file mode 100644 index 0000000000000..9a9817c76f834 --- /dev/null +++ b/clang-tools-extra/clang-doc/ClangDoc.h @@ -0,0 +1,33 @@ +//===-- ClangDoc.h - ClangDoc -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file exposes a method to craete the FrontendActionFactory for the +// clang-doc tool. The factory runs the clang-doc mapper on a given set of +// source code files, storing the results key-value pairs in its +// ExecutionContext. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANGDOC_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANGDOC_H + +#include "clang/Tooling/Execution.h" +#include "clang/Tooling/StandaloneExecution.h" +#include "clang/Tooling/Tooling.h" + +namespace clang { +namespace doc { + +std::unique_ptr +newMapperActionFactory(tooling::ExecutionContext *ECtx); + +} // namespace doc +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANGDOC_H diff --git a/clang-tools-extra/clang-doc/Mapper.cpp b/clang-tools-extra/clang-doc/Mapper.cpp new file mode 100644 index 0000000000000..f3ef99e621716 --- /dev/null +++ b/clang-tools-extra/clang-doc/Mapper.cpp @@ -0,0 +1,86 @@ +//===-- Mapper.cpp - ClangDoc Mapper ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Mapper.h" +#include "BitcodeWriter.h" +#include "Serialize.h" +#include "clang/AST/Comment.h" +#include "clang/Index/USRGeneration.h" +#include "llvm/ADT/StringExtras.h" + +using clang::comments::FullComment; + +namespace clang { +namespace doc { + +void MapASTVisitor::HandleTranslationUnit(ASTContext &Context) { + TraverseDecl(Context.getTranslationUnitDecl()); +} + +template bool MapASTVisitor::mapDecl(const T *D) { + // If we're looking a decl not in user files, skip this decl. + if (D->getASTContext().getSourceManager().isInSystemHeader(D->getLocation())) + return true; + + llvm::SmallString<128> USR; + // If there is an error generating a USR for the decl, skip this decl. + if (index::generateUSRForDecl(D, USR)) + return true; + + ECtx->reportResult(llvm::toHex(llvm::toStringRef(serialize::hashUSR(USR))), + serialize::emitInfo(D, getComment(D, D->getASTContext()), + getLine(D, D->getASTContext()), + getFile(D, D->getASTContext()))); + return true; +} + +bool MapASTVisitor::VisitNamespaceDecl(const NamespaceDecl *D) { + return mapDecl(D); +} + +bool MapASTVisitor::VisitRecordDecl(const RecordDecl *D) { return mapDecl(D); } + +bool MapASTVisitor::VisitEnumDecl(const EnumDecl *D) { return mapDecl(D); } + +bool MapASTVisitor::VisitCXXMethodDecl(const CXXMethodDecl *D) { + return mapDecl(D); +} + +bool MapASTVisitor::VisitFunctionDecl(const FunctionDecl *D) { + // Don't visit CXXMethodDecls twice + if (dyn_cast(D)) + return true; + return mapDecl(D); +} + +comments::FullComment * +MapASTVisitor::getComment(const NamedDecl *D, const ASTContext &Context) const { + RawComment *Comment = Context.getRawCommentForDeclNoCache(D); + // FIXME: Move setAttached to the initial comment parsing. + if (Comment) { + Comment->setAttached(); + return Comment->parse(Context, nullptr, D); + } + return nullptr; +} + +int MapASTVisitor::getLine(const NamedDecl *D, + const ASTContext &Context) const { + return Context.getSourceManager().getPresumedLoc(D->getLocStart()).getLine(); +} + +llvm::StringRef MapASTVisitor::getFile(const NamedDecl *D, + const ASTContext &Context) const { + return Context.getSourceManager() + .getPresumedLoc(D->getLocStart()) + .getFilename(); +} + +} // namespace doc +} // namespace clang diff --git a/clang-tools-extra/clang-doc/Mapper.h b/clang-tools-extra/clang-doc/Mapper.h new file mode 100644 index 0000000000000..1aa3f463fd19d --- /dev/null +++ b/clang-tools-extra/clang-doc/Mapper.h @@ -0,0 +1,57 @@ +//===-- Mapper.h - ClangDoc Mapper ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Mapper piece of the clang-doc tool. It implements +// a RecursiveASTVisitor to look at each declaration and populate the info +// into the internal representation. Each seen declaration is serialized to +// to bitcode and written out to the ExecutionContext as a KV pair where the +// key is the declaration's USR and the value is the serialized bitcode. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MAPPER_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MAPPER_H + +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Tooling/Execution.h" + +using namespace clang::comments; +using namespace clang::tooling; + +namespace clang { +namespace doc { + +class MapASTVisitor : public clang::RecursiveASTVisitor, + public ASTConsumer { +public: + explicit MapASTVisitor(ASTContext *Ctx, ExecutionContext *ECtx) + : ECtx(ECtx) {} + + void HandleTranslationUnit(ASTContext &Context) override; + bool VisitNamespaceDecl(const NamespaceDecl *D); + bool VisitRecordDecl(const RecordDecl *D); + bool VisitEnumDecl(const EnumDecl *D); + bool VisitCXXMethodDecl(const CXXMethodDecl *D); + bool VisitFunctionDecl(const FunctionDecl *D); + +private: + template bool mapDecl(const T *D); + + int getLine(const NamedDecl *D, const ASTContext &Context) const; + StringRef getFile(const NamedDecl *D, const ASTContext &Context) const; + comments::FullComment *getComment(const NamedDecl *D, + const ASTContext &Context) const; + + ExecutionContext *ECtx; +}; + +} // namespace doc +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MAPPER_H diff --git a/clang-tools-extra/clang-doc/Representation.h b/clang-tools-extra/clang-doc/Representation.h new file mode 100644 index 0000000000000..8b772a34cef55 --- /dev/null +++ b/clang-tools-extra/clang-doc/Representation.h @@ -0,0 +1,184 @@ +///===-- Representation.h - ClangDoc Represenation --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the internal representations of different declaration +// types for the clang-doc tool. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_REPRESENTATION_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_REPRESENTATION_H + +#include "clang/AST/Type.h" +#include "clang/Basic/Specifiers.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include +#include + +namespace clang { +namespace doc { + +using SymbolID = std::array; + +struct Info; +enum class InfoType { + IT_namespace, + IT_record, + IT_function, + IT_enum, + IT_default +}; + +// A representation of a parsed comment. +struct CommentInfo { + CommentInfo() = default; + CommentInfo(CommentInfo &&Other) : Children(std::move(Other.Children)) {} + + SmallString<16> + Kind; // Kind of comment (TextComment, InlineCommandComment, + // HTMLStartTagComment, HTMLEndTagComment, BlockCommandComment, + // ParamCommandComment, TParamCommandComment, VerbatimBlockComment, + // VerbatimBlockLineComment, VerbatimLineComment). + SmallString<64> Text; // Text of the comment. + SmallString<16> Name; // Name of the comment (for Verbatim and HTML). + SmallString<8> Direction; // Parameter direction (for (T)ParamCommand). + SmallString<16> ParamName; // Parameter name (for (T)ParamCommand). + SmallString<16> CloseName; // Closing tag name (for VerbatimBlock). + bool SelfClosing = false; // Indicates if tag is self-closing (for HTML). + bool Explicit = false; // Indicates if the direction of a param is explicit + // (for (T)ParamCommand). + llvm::SmallVector, 4> + AttrKeys; // List of attribute keys (for HTML). + llvm::SmallVector, 4> + AttrValues; // List of attribute values for each key (for HTML). + llvm::SmallVector, 4> + Args; // List of arguments to commands (for InlineCommand). + std::vector> + Children; // List of child comments for this CommentInfo. +}; + +struct Reference { + Reference() = default; + Reference(llvm::StringRef Name) : UnresolvedName(Name) {} + Reference(SymbolID USR, InfoType IT) : USR(USR), RefType(IT) {} + + SymbolID USR; // Unique identifer for referenced decl + SmallString<16> UnresolvedName; // Name of unresolved type. + InfoType RefType = + InfoType::IT_default; // Indicates the type of this Reference (namespace, + // record, function, enum, default). +}; + +// A base struct for TypeInfos +struct TypeInfo { + TypeInfo() = default; + TypeInfo(SymbolID &Type, InfoType IT) : Type(Type, IT) {} + TypeInfo(llvm::StringRef RefName) : Type(RefName) {} + + Reference Type; // Referenced type in this info. +}; + +// Info for field types. +struct FieldTypeInfo : public TypeInfo { + FieldTypeInfo() = default; + FieldTypeInfo(SymbolID &Type, InfoType IT, llvm::StringRef Name) + : TypeInfo(Type, IT), Name(Name) {} + FieldTypeInfo(llvm::StringRef RefName, llvm::StringRef Name) + : TypeInfo(RefName), Name(Name) {} + + SmallString<16> Name; // Name associated with this info. +}; + +// Info for member types. +struct MemberTypeInfo : public FieldTypeInfo { + MemberTypeInfo() = default; + MemberTypeInfo(SymbolID &Type, InfoType IT, llvm::StringRef Name) + : FieldTypeInfo(Type, IT, Name) {} + MemberTypeInfo(llvm::StringRef RefName, llvm::StringRef Name) + : FieldTypeInfo(RefName, Name) {} + + AccessSpecifier Access = + clang::AccessSpecifier::AS_none; // Access level associated with this + // info (public, protected, private, + // none). +}; + +struct Location { + Location() = default; + Location(int LineNumber, SmallString<16> Filename) + : LineNumber(LineNumber), Filename(std::move(Filename)) {} + + int LineNumber; // Line number of this Location. + SmallString<32> Filename; // File for this Location. +}; + +/// A base struct for Infos. +struct Info { + Info() = default; + Info(Info &&Other) : Description(std::move(Other.Description)) {} + virtual ~Info() = default; + + SymbolID USR; // Unique identifier for the decl described by this Info. + SmallString<16> Name; // Unqualified name of the decl. + llvm::SmallVector + Namespace; // List of parent namespaces for this decl. + std::vector Description; // Comment description of this decl. +}; + +// Info for namespaces. +struct NamespaceInfo : public Info {}; + +// Info for symbols. +struct SymbolInfo : public Info { + llvm::Optional DefLoc; // Location where this decl is defined. + llvm::SmallVector Loc; // Locations where this decl is declared. +}; + +// TODO: Expand to allow for documenting templating and default args. +// Info for functions. +struct FunctionInfo : public SymbolInfo { + bool IsMethod = false; // Indicates whether this function is a class method. + Reference Parent; // Reference to the parent class decl for this method. + TypeInfo ReturnType; // Info about the return type of this function. + llvm::SmallVector Params; // List of parameters. + AccessSpecifier Access = + AccessSpecifier::AS_none; // Access level for this method (public, + // private, protected, none). +}; + +// TODO: Expand to allow for documenting templating, inheritance access, +// friend classes +// Info for types. +struct RecordInfo : public SymbolInfo { + TagTypeKind TagType = TagTypeKind::TTK_Struct; // Type of this record (struct, + // class, union, interface). + llvm::SmallVector + Members; // List of info about record members. + llvm::SmallVector Parents; // List of base/parent records (does + // not include virtual parents). + llvm::SmallVector + VirtualParents; // List of virtual base/parent records. +}; + +// TODO: Expand to allow for documenting templating. +// Info for types. +struct EnumInfo : public SymbolInfo { + bool Scoped = + false; // Indicates whether this enum is scoped (e.g. enum class). + llvm::SmallVector, 4> Members; // List of enum members. +}; + +// TODO: Add functionality to include separate markdown pages. + +} // namespace doc +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_REPRESENTATION_H diff --git a/clang-tools-extra/clang-doc/Serialize.cpp b/clang-tools-extra/clang-doc/Serialize.cpp new file mode 100644 index 0000000000000..ccde579b1810e --- /dev/null +++ b/clang-tools-extra/clang-doc/Serialize.cpp @@ -0,0 +1,336 @@ +//===-- Serializer.cpp - ClangDoc Serializer --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Serialize.h" +#include "BitcodeWriter.h" +#include "clang/AST/Comment.h" +#include "clang/Index/USRGeneration.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/SHA1.h" + +using clang::comments::FullComment; + +namespace clang { +namespace doc { +namespace serialize { + +SymbolID hashUSR(llvm::StringRef USR) { + return llvm::SHA1::hash(arrayRefFromStringRef(USR)); +} + +class ClangDocCommentVisitor + : public ConstCommentVisitor { +public: + ClangDocCommentVisitor(CommentInfo &CI) : CurrentCI(CI) {} + + void parseComment(const comments::Comment *C); + + void visitTextComment(const TextComment *C); + void visitInlineCommandComment(const InlineCommandComment *C); + void visitHTMLStartTagComment(const HTMLStartTagComment *C); + void visitHTMLEndTagComment(const HTMLEndTagComment *C); + void visitBlockCommandComment(const BlockCommandComment *C); + void visitParamCommandComment(const ParamCommandComment *C); + void visitTParamCommandComment(const TParamCommandComment *C); + void visitVerbatimBlockComment(const VerbatimBlockComment *C); + void visitVerbatimBlockLineComment(const VerbatimBlockLineComment *C); + void visitVerbatimLineComment(const VerbatimLineComment *C); + +private: + std::string getCommandName(unsigned CommandID) const; + bool isWhitespaceOnly(StringRef S) const; + + CommentInfo &CurrentCI; +}; + +void ClangDocCommentVisitor::parseComment(const comments::Comment *C) { + CurrentCI.Kind = C->getCommentKindName(); + ConstCommentVisitor::visit(C); + for (comments::Comment *Child : + llvm::make_range(C->child_begin(), C->child_end())) { + CurrentCI.Children.emplace_back(llvm::make_unique()); + ClangDocCommentVisitor Visitor(*CurrentCI.Children.back()); + Visitor.parseComment(Child); + } +} + +void ClangDocCommentVisitor::visitTextComment(const TextComment *C) { + if (!isWhitespaceOnly(C->getText())) + CurrentCI.Text = C->getText(); +} + +void ClangDocCommentVisitor::visitInlineCommandComment( + const InlineCommandComment *C) { + CurrentCI.Name = getCommandName(C->getCommandID()); + for (unsigned I = 0, E = C->getNumArgs(); I != E; ++I) + CurrentCI.Args.push_back(C->getArgText(I)); +} + +void ClangDocCommentVisitor::visitHTMLStartTagComment( + const HTMLStartTagComment *C) { + CurrentCI.Name = C->getTagName(); + CurrentCI.SelfClosing = C->isSelfClosing(); + for (unsigned I = 0, E = C->getNumAttrs(); I < E; ++I) { + const HTMLStartTagComment::Attribute &Attr = C->getAttr(I); + CurrentCI.AttrKeys.push_back(Attr.Name); + CurrentCI.AttrValues.push_back(Attr.Value); + } +} + +void ClangDocCommentVisitor::visitHTMLEndTagComment( + const HTMLEndTagComment *C) { + CurrentCI.Name = C->getTagName(); + CurrentCI.SelfClosing = true; +} + +void ClangDocCommentVisitor::visitBlockCommandComment( + const BlockCommandComment *C) { + CurrentCI.Name = getCommandName(C->getCommandID()); + for (unsigned I = 0, E = C->getNumArgs(); I < E; ++I) + CurrentCI.Args.push_back(C->getArgText(I)); +} + +void ClangDocCommentVisitor::visitParamCommandComment( + const ParamCommandComment *C) { + CurrentCI.Direction = + ParamCommandComment::getDirectionAsString(C->getDirection()); + CurrentCI.Explicit = C->isDirectionExplicit(); + if (C->hasParamName()) + CurrentCI.ParamName = C->getParamNameAsWritten(); +} + +void ClangDocCommentVisitor::visitTParamCommandComment( + const TParamCommandComment *C) { + if (C->hasParamName()) + CurrentCI.ParamName = C->getParamNameAsWritten(); +} + +void ClangDocCommentVisitor::visitVerbatimBlockComment( + const VerbatimBlockComment *C) { + CurrentCI.Name = getCommandName(C->getCommandID()); + CurrentCI.CloseName = C->getCloseName(); +} + +void ClangDocCommentVisitor::visitVerbatimBlockLineComment( + const VerbatimBlockLineComment *C) { + if (!isWhitespaceOnly(C->getText())) + CurrentCI.Text = C->getText(); +} + +void ClangDocCommentVisitor::visitVerbatimLineComment( + const VerbatimLineComment *C) { + if (!isWhitespaceOnly(C->getText())) + CurrentCI.Text = C->getText(); +} + +bool ClangDocCommentVisitor::isWhitespaceOnly(llvm::StringRef S) const { + return std::all_of(S.begin(), S.end(), isspace); +} + +std::string ClangDocCommentVisitor::getCommandName(unsigned CommandID) const { + const CommandInfo *Info = CommandTraits::getBuiltinCommandInfo(CommandID); + if (Info) + return Info->Name; + // TODO: Add parsing for \file command. + return ""; +} + +// Serializing functions. + +template static std::string serialize(T &I) { + SmallString<2048> Buffer; + llvm::BitstreamWriter Stream(Buffer); + ClangDocBitcodeWriter Writer(Stream); + Writer.emitBlock(I); + return Buffer.str().str(); +} + +static void parseFullComment(const FullComment *C, CommentInfo &CI) { + ClangDocCommentVisitor Visitor(CI); + Visitor.parseComment(C); +} + +static SymbolID getUSRForDecl(const Decl *D) { + llvm::SmallString<128> USR; + if (index::generateUSRForDecl(D, USR)) + return SymbolID(); + return hashUSR(USR); +} + +static RecordDecl *getDeclForType(const QualType &T) { + auto *Ty = T->getAs(); + if (!Ty) + return nullptr; + return Ty->getDecl()->getDefinition(); +} + +static void parseFields(RecordInfo &I, const RecordDecl *D) { + for (const FieldDecl *F : D->fields()) { + // FIXME: Set Access to the appropriate value. + SymbolID Type; + std::string Name; + InfoType RefType; + if (const auto *T = getDeclForType(F->getTypeSourceInfo()->getType())) { + Type = getUSRForDecl(T); + if (dyn_cast(T)) + RefType = InfoType::IT_enum; + else if (dyn_cast(T)) + RefType = InfoType::IT_record; + I.Members.emplace_back(Type, RefType, F->getQualifiedNameAsString()); + } else { + Name = F->getTypeSourceInfo()->getType().getAsString(); + I.Members.emplace_back(Name, F->getQualifiedNameAsString()); + } + } +} + +static void parseEnumerators(EnumInfo &I, const EnumDecl *D) { + for (const EnumConstantDecl *E : D->enumerators()) + I.Members.emplace_back(E->getNameAsString()); +} + +static void parseParameters(FunctionInfo &I, const FunctionDecl *D) { + for (const ParmVarDecl *P : D->parameters()) { + SymbolID Type; + std::string Name; + InfoType RefType; + if (const auto *T = getDeclForType(P->getOriginalType())) { + Type = getUSRForDecl(T); + if (dyn_cast(T)) + RefType = InfoType::IT_enum; + else if (dyn_cast(T)) + RefType = InfoType::IT_record; + I.Params.emplace_back(Type, RefType, P->getQualifiedNameAsString()); + } else { + Name = P->getOriginalType().getAsString(); + I.Params.emplace_back(Name, P->getQualifiedNameAsString()); + } + } +} + +static void parseBases(RecordInfo &I, const CXXRecordDecl *D) { + for (const CXXBaseSpecifier &B : D->bases()) { + if (B.isVirtual()) + continue; + if (const auto *P = getDeclForType(B.getType())) + I.Parents.emplace_back(getUSRForDecl(P), InfoType::IT_record); + else + I.Parents.emplace_back(B.getType().getAsString()); + } + for (const CXXBaseSpecifier &B : D->vbases()) { + if (const auto *P = getDeclForType(B.getType())) + I.VirtualParents.emplace_back(getUSRForDecl(P), InfoType::IT_record); + else + I.VirtualParents.emplace_back(B.getType().getAsString()); + } +} + +template +static void +populateParentNamespaces(llvm::SmallVector &Namespaces, + const T *D) { + const auto *DC = dyn_cast(D); + while ((DC = DC->getParent())) { + if (const auto *N = dyn_cast(DC)) + Namespaces.emplace_back(getUSRForDecl(N), InfoType::IT_namespace); + else if (const auto *N = dyn_cast(DC)) + Namespaces.emplace_back(getUSRForDecl(N), InfoType::IT_record); + else if (const auto *N = dyn_cast(DC)) + Namespaces.emplace_back(getUSRForDecl(N), InfoType::IT_function); + else if (const auto *N = dyn_cast(DC)) + Namespaces.emplace_back(getUSRForDecl(N), InfoType::IT_enum); + } +} + +template +static void populateInfo(Info &I, const T *D, const FullComment *C) { + I.USR = getUSRForDecl(D); + I.Name = D->getNameAsString(); + populateParentNamespaces(I.Namespace, D); + if (C) { + I.Description.emplace_back(); + parseFullComment(C, I.Description.back()); + } +} + +template +static void populateSymbolInfo(SymbolInfo &I, const T *D, const FullComment *C, + int LineNumber, StringRef Filename) { + populateInfo(I, D, C); + if (D->isThisDeclarationADefinition()) + I.DefLoc.emplace(LineNumber, Filename); + else + I.Loc.emplace_back(LineNumber, Filename); +} + +static void populateFunctionInfo(FunctionInfo &I, const FunctionDecl *D, + const FullComment *FC, int LineNumber, + StringRef Filename) { + populateSymbolInfo(I, D, FC, LineNumber, Filename); + if (const auto *T = getDeclForType(D->getReturnType())) { + I.ReturnType.Type.USR = getUSRForDecl(T); + if (dyn_cast(T)) + I.ReturnType.Type.RefType = InfoType::IT_enum; + else if (dyn_cast(T)) + I.ReturnType.Type.RefType = InfoType::IT_record; + } else { + I.ReturnType.Type.UnresolvedName = D->getReturnType().getAsString(); + } + parseParameters(I, D); +} + +std::string emitInfo(const NamespaceDecl *D, const FullComment *FC, + int LineNumber, llvm::StringRef File) { + NamespaceInfo I; + populateInfo(I, D, FC); + return serialize(I); +} + +std::string emitInfo(const RecordDecl *D, const FullComment *FC, int LineNumber, + llvm::StringRef File) { + RecordInfo I; + populateSymbolInfo(I, D, FC, LineNumber, File); + I.TagType = D->getTagKind(); + parseFields(I, D); + if (const auto *C = dyn_cast(D)) + parseBases(I, C); + return serialize(I); +} + +std::string emitInfo(const FunctionDecl *D, const FullComment *FC, + int LineNumber, llvm::StringRef File) { + FunctionInfo I; + populateFunctionInfo(I, D, FC, LineNumber, File); + I.Access = clang::AccessSpecifier::AS_none; + return serialize(I); +} + +std::string emitInfo(const CXXMethodDecl *D, const FullComment *FC, + int LineNumber, llvm::StringRef File) { + FunctionInfo I; + populateFunctionInfo(I, D, FC, LineNumber, File); + I.IsMethod = true; + I.Parent = Reference(getUSRForDecl(D->getParent()), InfoType::IT_record); + I.Access = D->getAccess(); + return serialize(I); +} + +std::string emitInfo(const EnumDecl *D, const FullComment *FC, int LineNumber, + llvm::StringRef File) { + EnumInfo I; + populateSymbolInfo(I, D, FC, LineNumber, File); + I.Scoped = D->isScoped(); + parseEnumerators(I, D); + return serialize(I); +} + +} // namespace serialize +} // namespace doc +} // namespace clang diff --git a/clang-tools-extra/clang-doc/Serialize.h b/clang-tools-extra/clang-doc/Serialize.h new file mode 100644 index 0000000000000..5f13798bcdf57 --- /dev/null +++ b/clang-tools-extra/clang-doc/Serialize.h @@ -0,0 +1,53 @@ +//===-- Serializer.h - ClangDoc Serializer ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the serializing functions fro the clang-doc tool. Given +// a particular declaration, it collects the appropriate information and returns +// a serialized bitcode string for the declaration. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SERIALIZE_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SERIALIZE_H + +#include "Representation.h" +#include "clang/AST/AST.h" +#include "clang/AST/CommentVisitor.h" +#include +#include + +using namespace clang::comments; + +namespace clang { +namespace doc { +namespace serialize { + +std::string emitInfo(const NamespaceDecl *D, const FullComment *FC, + int LineNumber, StringRef File); +std::string emitInfo(const RecordDecl *D, const FullComment *FC, int LineNumber, + StringRef File); +std::string emitInfo(const EnumDecl *D, const FullComment *FC, int LineNumber, + StringRef File); +std::string emitInfo(const FunctionDecl *D, const FullComment *FC, + int LineNumber, StringRef File); +std::string emitInfo(const CXXMethodDecl *D, const FullComment *FC, + int LineNumber, StringRef File); + +// Function to hash a given USR value for storage. +// As USRs (Unified Symbol Resolution) could be large, especially for functions +// with long type arguments, we use 160-bits SHA1(USR) values to +// guarantee the uniqueness of symbols while using a relatively small amount of +// memory (vs storing USRs directly). +SymbolID hashUSR(llvm::StringRef USR); + +} // namespace serialize +} // namespace doc +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SERIALIZE_H diff --git a/clang-tools-extra/clang-doc/tool/CMakeLists.txt b/clang-tools-extra/clang-doc/tool/CMakeLists.txt new file mode 100644 index 0000000000000..d7f28cf681a5a --- /dev/null +++ b/clang-tools-extra/clang-doc/tool/CMakeLists.txt @@ -0,0 +1,17 @@ +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..) + +add_clang_executable(clang-doc + ClangDocMain.cpp + ) + +target_link_libraries(clang-doc + PRIVATE + clangAST + clangASTMatchers + clangBasic + clangFrontend + clangDoc + clangTooling + clangToolingCore + ) + \ No newline at end of file diff --git a/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp b/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp new file mode 100644 index 0000000000000..51c3aa9dfdc7f --- /dev/null +++ b/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp @@ -0,0 +1,114 @@ +//===-- ClangDocMain.cpp - ClangDoc -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This tool for generating C and C++ documenation from source code +// and comments. Generally, it runs a LibTooling FrontendAction on source files, +// mapping each declaration in those files to its USR and serializing relevant +// information into LLVM bitcode. It then runs a pass over the collected +// declaration information, reducing by USR. There is an option to dump this +// intermediate result to bitcode. Finally, it hands the reduced information +// off to a generator, which does the final parsing from the intermediate +// representation to the desired output format. +// +//===----------------------------------------------------------------------===// + +#include "ClangDoc.h" +#include "clang/AST/AST.h" +#include "clang/AST/Decl.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchersInternal.h" +#include "clang/Driver/Options.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Execution.h" +#include "clang/Tooling/StandaloneExecution.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace clang::ast_matchers; +using namespace clang::tooling; +using namespace clang; + +static llvm::cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage); +static llvm::cl::OptionCategory ClangDocCategory("clang-doc options"); + +static llvm::cl::opt + OutDirectory("output", + llvm::cl::desc("Directory for outputting generated files."), + llvm::cl::init("docs"), llvm::cl::cat(ClangDocCategory)); + +static llvm::cl::opt + DumpMapperResult("dump-mapper", + llvm::cl::desc("Dump mapper results to bitcode file."), + llvm::cl::init(false), llvm::cl::cat(ClangDocCategory)); + +static llvm::cl::opt DoxygenOnly( + "doxygen", + llvm::cl::desc("Use only doxygen-style comments to generate docs."), + llvm::cl::init(false), llvm::cl::cat(ClangDocCategory)); + +int main(int argc, const char **argv) { + llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); + std::error_code OK; + + auto Exec = clang::tooling::createExecutorFromCommandLineArgs( + argc, argv, ClangDocCategory); + + if (!Exec) { + llvm::errs() << toString(Exec.takeError()) << "\n"; + return 1; + } + + ArgumentsAdjuster ArgAdjuster; + if (!DoxygenOnly) + ArgAdjuster = combineAdjusters( + getInsertArgumentAdjuster("-fparse-all-comments", + tooling::ArgumentInsertPosition::END), + ArgAdjuster); + + // Mapping phase + llvm::outs() << "Mapping decls...\n"; + auto Err = Exec->get()->execute(doc::newMapperActionFactory( + Exec->get()->getExecutionContext()), + ArgAdjuster); + if (Err) + llvm::errs() << toString(std::move(Err)) << "\n"; + + if (DumpMapperResult) { + Exec->get()->getToolResults()->forEachResult([&](StringRef Key, + StringRef Value) { + SmallString<128> IRRootPath; + llvm::sys::path::native(OutDirectory, IRRootPath); + llvm::sys::path::append(IRRootPath, "bc"); + std::error_code DirectoryStatus = + llvm::sys::fs::create_directories(IRRootPath); + if (DirectoryStatus != OK) { + llvm::errs() << "Unable to create documentation directories.\n"; + return; + } + llvm::sys::path::append(IRRootPath, Key + ".bc"); + std::error_code OutErrorInfo; + llvm::raw_fd_ostream OS(IRRootPath, OutErrorInfo, llvm::sys::fs::F_None); + if (OutErrorInfo != OK) { + llvm::errs() << "Error opening documentation file.\n"; + return; + } + OS << Value; + OS.close(); + }); + } + + return 0; +} diff --git a/clang-tools-extra/docs/clang-doc.rst b/clang-tools-extra/docs/clang-doc.rst new file mode 100644 index 0000000000000..8b3da13297ab4 --- /dev/null +++ b/clang-tools-extra/docs/clang-doc.rst @@ -0,0 +1,62 @@ +=================== +Clang-Doc +=================== + +.. contents:: + +:program:`clang-doc` is a tool for generating C and C++ documenation from +source code and comments. + +The tool is in a very early development stage, so you might encounter bugs and +crashes. Submitting reports with information about how to reproduce the issue +to `the LLVM bugtracker `_ will definitely help the +project. If you have any ideas or suggestions, please to put a feature request +there. + +Use +===== + +:program:`clang-doc` is a `LibTooling +`_-based tool, and so requires a +compile command database for your project (for an example of how to do this +see `How To Setup Tooling For LLVM +`_). + +The tool can be used on a single file or multiple files as defined in +the compile commands database: + +.. code-block:: console + + $ clang-doc /path/to/file.cpp -p /path/to/compile/commands + +This generates an intermediate representation of the declarations and their +associated information in the specified TUs, serialized to LLVM bitcode. + +As currently implemented, the tool is only able to parse TUs that can be +stored in-memory. Future additions will extend the current framework to use +map-reduce frameworks to allow for use with large codebases. + +:program:`clang-doc` offers the following options: + +.. code-block:: console + + $ clang-doc --help +USAGE: clang-doc [options] [... ] + +OPTIONS: + +Generic Options: + + -help - Display available options (-help-hidden for more) + -help-list - Display list of available options (-help-list-hidden for more) + -version - Display the version of this program + +clang-doc options: + + -doxygen - Use only doxygen-style comments to generate docs. + -dump - Dump intermediate results to bitcode file. + -extra-arg= - Additional argument to append to the compiler command line + -extra-arg-before= - Additional argument to prepend to the compiler command line + -omit-filenames - Omit filenames in output. + -output= - Directory for outputting generated files. + -p= - Build path diff --git a/clang-tools-extra/test/CMakeLists.txt b/clang-tools-extra/test/CMakeLists.txt index 26d3405a28b30..28473b15beb27 100644 --- a/clang-tools-extra/test/CMakeLists.txt +++ b/clang-tools-extra/test/CMakeLists.txt @@ -41,6 +41,7 @@ set(CLANG_TOOLS_TEST_DEPS clang-apply-replacements clang-change-namespace clangd + clang-doc clang-include-fixer clang-move clang-query diff --git a/clang-tools-extra/test/clang-doc/mapper-class-in-class.cpp b/clang-tools-extra/test/clang-doc/mapper-class-in-class.cpp new file mode 100644 index 0000000000000..909f00d55cb0b --- /dev/null +++ b/clang-tools-extra/test/clang-doc/mapper-class-in-class.cpp @@ -0,0 +1,35 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump-mapper -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/bc/641AB4A3D36399954ACDE29C7A8833032BF40472.bc --dump | FileCheck %s --check-prefix CHECK-X-Y +// RUN: llvm-bcanalyzer %t/docs/bc/CA7C7935730B5EACD25F080E9C83FA087CCDC75E.bc --dump | FileCheck %s --check-prefix CHECK-X + +class X { + class Y {}; +}; + +// CHECK-X: +// CHECK-X-NEXT: + // CHECK-X-NEXT: +// CHECK-X-NEXT: +// CHECK-X-NEXT: + // CHECK-X-NEXT: + // CHECK-X-NEXT: blob data = 'X' + // CHECK-X-NEXT: blob data = '{{.*}}' + // CHECK-X-NEXT: +// CHECK-X-NEXT: + + +// CHECK-X-Y: +// CHECK-X-Y-NEXT: + // CHECK-X-Y-NEXT: +// CHECK-X-Y-NEXT: +// CHECK-X-Y-NEXT: + // CHECK-X-Y-NEXT: + // CHECK-X-Y-NEXT: blob data = 'Y' + // CHECK-X-Y-NEXT: blob data = 'CA7C7935730B5EACD25F080E9C83FA087CCDC75E' + // CHECK-X-Y-NEXT: blob data = '{{.*}}' + // CHECK-X-Y-NEXT: +// CHECK-X-Y-NEXT: diff --git a/clang-tools-extra/test/clang-doc/mapper-class-in-function.cpp b/clang-tools-extra/test/clang-doc/mapper-class-in-function.cpp new file mode 100644 index 0000000000000..0368f00851ed7 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/mapper-class-in-function.cpp @@ -0,0 +1,38 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump-mapper -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/bc/B6AC4C5C9F2EA3F2B3ECE1A33D349F4EE502B24E.bc --dump | FileCheck %s --check-prefix CHECK-H +// RUN: llvm-bcanalyzer %t/docs/bc/E03E804368784360D86C757B549D14BB84A94415.bc --dump | FileCheck %s --check-prefix CHECK-H-I + +void H() { + class I {}; +} + +// CHECK-H: +// CHECK-H-NEXT: + // CHECK-H-NEXT: +// CHECK-H-NEXT: +// CHECK-H-NEXT: + // CHECK-H-NEXT: + // CHECK-H-NEXT: blob data = 'H' + // CHECK-H-NEXT: blob data = '{{.*}}' + // CHECK-H-NEXT: + // CHECK-H-NEXT: blob data = 'void' + // CHECK-H-NEXT: +// CHECK-H-NEXT: + +// CHECK-H-I: +// CHECK-H-I-NEXT: + // CHECK-H-I-NEXT: +// CHECK-H-I-NEXT: +// CHECK-H-I-NEXT: + // CHECK-H-I-NEXT: + // CHECK-H-I-NEXT: blob data = 'I' + // CHECK-H-I-NEXT: blob data = 'B6AC4C5C9F2EA3F2B3ECE1A33D349F4EE502B24E' + // CHECK-H-I-NEXT: blob data = '{{.*}}' + // CHECK-H-I-NEXT: +// CHECK-H-I-NEXT: + + diff --git a/clang-tools-extra/test/clang-doc/mapper-class.cpp b/clang-tools-extra/test/clang-doc/mapper-class.cpp new file mode 100644 index 0000000000000..7c0965caf019c --- /dev/null +++ b/clang-tools-extra/test/clang-doc/mapper-class.cpp @@ -0,0 +1,19 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump-mapper -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/bc/289584A8E0FF4178A794622A547AA622503967A1.bc --dump | FileCheck %s + +class E {}; + +// CHECK: +// CHECK-NEXT: + // CHECK-NEXT: +// CHECK-NEXT: +// CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'E' + // CHECK-NEXT: blob data = '{{.*}}' + // CHECK-NEXT: +// CHECK-NEXT: diff --git a/clang-tools-extra/test/clang-doc/mapper-comments.cpp b/clang-tools-extra/test/clang-doc/mapper-comments.cpp new file mode 100644 index 0000000000000..91620cab08c57 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/mapper-comments.cpp @@ -0,0 +1,172 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump-mapper -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/bc/7574630614A535710E5A6ABCFFF98BCA2D06A4CA.bc --dump | FileCheck %s + +/// \brief Brief description. +/// +/// Extended description that +/// continues onto the next line. +/// +///
    class="test"> +///
  • Testing. +///
+/// +/// \verbatim +/// The description continues. +/// \endverbatim +/// +/// \param [out] I is a parameter. +/// \param J is a parameter. +/// \return int +int F(int I, int J); + +// CHECK: +// CHECK-NEXT: + // CHECK-NEXT: +// CHECK-NEXT: +// CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'F' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'FullComment' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'ParagraphComment' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'BlockCommandComment' + // CHECK-NEXT: blob data = 'brief' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'ParagraphComment' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: blob data = ' Brief description.' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'ParagraphComment' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: blob data = ' Extended description that' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: blob data = ' continues onto the next line.' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'ParagraphComment' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'HTMLStartTagComment' + // CHECK-NEXT: blob data = 'ul' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: blob data = ' class="test">' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'HTMLStartTagComment' + // CHECK-NEXT: blob data = 'li' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: blob data = ' Testing.' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'HTMLEndTagComment' + // CHECK-NEXT: blob data = 'ul' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'ParagraphComment' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'VerbatimBlockComment' + // CHECK-NEXT: blob data = 'verbatim' + // CHECK-NEXT: blob data = 'endverbatim' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'VerbatimBlockLineComment' + // CHECK-NEXT: blob data = ' The description continues.' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'ParagraphComment' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'ParamCommandComment' + // CHECK-NEXT: blob data = '[out]' + // CHECK-NEXT: blob data = 'I' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'ParagraphComment' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: blob data = ' is a parameter.' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'ParamCommandComment' + // CHECK-NEXT: blob data = '[in]' + // CHECK-NEXT: blob data = 'J' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'ParagraphComment' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: blob data = ' is a parameter.' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'BlockCommandComment' + // CHECK-NEXT: blob data = 'return' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'ParagraphComment' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'TextComment' + // CHECK-NEXT: blob data = ' int' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = '{{.*}}' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'int' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'int' + // CHECK-NEXT: blob data = 'I' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'int' + // CHECK-NEXT: blob data = 'J' + // CHECK-NEXT: +// CHECK-NEXT: diff --git a/clang-tools-extra/test/clang-doc/mapper-enum.cpp b/clang-tools-extra/test/clang-doc/mapper-enum.cpp new file mode 100644 index 0000000000000..a4db09798e54c --- /dev/null +++ b/clang-tools-extra/test/clang-doc/mapper-enum.cpp @@ -0,0 +1,36 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump-mapper -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/bc/FC07BD34D5E77782C263FA944447929EA8753740.bc --dump | FileCheck %s --check-prefix CHECK-B +// RUN: llvm-bcanalyzer %t/docs/bc/020E6C32A700C3170C009FCCD41671EDDBEAF575.bc --dump | FileCheck %s --check-prefix CHECK-C + +enum B { X, Y }; + +// CHECK-B: +// CHECK-B-NEXT: + // CHECK-B-NEXT: +// CHECK-B-NEXT: +// CHECK-B-NEXT: + // CHECK-B-NEXT: + // CHECK-B-NEXT: blob data = 'B' + // CHECK-B-NEXT: blob data = '{{.*}}' + // CHECK-B-NEXT: blob data = 'X' + // CHECK-B-NEXT: blob data = 'Y' +// CHECK-B-NEXT: + +enum class C { A, B }; + +// CHECK-C: +// CHECK-C-NEXT: + // CHECK-C-NEXT: +// CHECK-C-NEXT: +// CHECK-C-NEXT: + // CHECK-C-NEXT: + // CHECK-C-NEXT: blob data = 'C' + // CHECK-C-NEXT: blob data = '{{.*}}' + // CHECK-C-NEXT: + // CHECK-C-NEXT: blob data = 'A' + // CHECK-C-NEXT: blob data = 'B' +// CHECK-C-NEXT: diff --git a/clang-tools-extra/test/clang-doc/mapper-function.cpp b/clang-tools-extra/test/clang-doc/mapper-function.cpp new file mode 100644 index 0000000000000..07a6ecf830065 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/mapper-function.cpp @@ -0,0 +1,25 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump-mapper -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/bc/A44B32CC3C087C9AF75DAF50DE193E85E7B2C16B.bc --dump | FileCheck %s + +int F(int param) { return param; } + +// CHECK: +// CHECK-NEXT: + // CHECK-NEXT: +// CHECK-NEXT: +// CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'F' + // CHECK-NEXT: blob data = '{{.*}}' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'int' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'int' + // CHECK-NEXT: blob data = 'param' + // CHECK-NEXT: +// CHECK-NEXT: diff --git a/clang-tools-extra/test/clang-doc/mapper-method.cpp b/clang-tools-extra/test/clang-doc/mapper-method.cpp new file mode 100644 index 0000000000000..7d16d7c09b518 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/mapper-method.cpp @@ -0,0 +1,43 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump-mapper -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/bc/F0F9FC65FC90F54F690144A7AFB15DFC3D69B6E6.bc --dump | FileCheck %s --check-prefix CHECK-G-F +// RUN: llvm-bcanalyzer %t/docs/bc/4202E8BF0ECB12AE354C8499C52725B0EE30AED5.bc --dump | FileCheck %s --check-prefix CHECK-G + +class G { +public: + int Method(int param) { return param; } +}; + +// CHECK-G: +// CHECK-G-NEXT: + // CHECK-G-NEXT: +// CHECK-G-NEXT: +// CHECK-G-NEXT: + // CHECK-G-NEXT: + // CHECK-G-NEXT: blob data = 'G' + // CHECK-G-NEXT: blob data = '{{.*}}' + // CHECK-G-NEXT: +// CHECK-G-NEXT: + +// CHECK-G-F: +// CHECK-G-F-NEXT: + // CHECK-G-F-NEXT: +// CHECK-G-F-NEXT: +// CHECK-G-F-NEXT: + // CHECK-G-F-NEXT: + // CHECK-G-F-NEXT: blob data = 'Method' + // CHECK-G-F-NEXT: blob data = '4202E8BF0ECB12AE354C8499C52725B0EE30AED5' + // CHECK-G-F-NEXT: + // CHECK-G-F-NEXT: blob data = '{{.*}}' + // CHECK-G-F-NEXT: blob data = '4202E8BF0ECB12AE354C8499C52725B0EE30AED5' + // CHECK-G-F-NEXT: + // CHECK-G-F-NEXT: blob data = 'int' + // CHECK-G-F-NEXT: + // CHECK-G-F-NEXT: + // CHECK-G-F-NEXT: blob data = 'int' + // CHECK-G-F-NEXT: blob data = 'param' + // CHECK-G-F-NEXT: +// CHECK-G-F-NEXT: diff --git a/clang-tools-extra/test/clang-doc/mapper-namespace.cpp b/clang-tools-extra/test/clang-doc/mapper-namespace.cpp new file mode 100644 index 0000000000000..e46dfdaa5c636 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/mapper-namespace.cpp @@ -0,0 +1,17 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump-mapper -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/bc/8D042EFFC98B373450BC6B5B90A330C25A150E9C.bc --dump | FileCheck %s + +namespace A {} + +// CHECK: +// CHECK-NEXT: + // CHECK-NEXT: +// CHECK-NEXT: +// CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'A' +// CHECK-NEXT: diff --git a/clang-tools-extra/test/clang-doc/mapper-struct.cpp b/clang-tools-extra/test/clang-doc/mapper-struct.cpp new file mode 100644 index 0000000000000..f13dd60b316b5 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/mapper-struct.cpp @@ -0,0 +1,23 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump-mapper -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/bc/06B5F6A19BA9F6A832E127C9968282B94619B210.bc --dump | FileCheck %s + +struct C { int i; }; + +// CHECK: +// CHECK-NEXT: + // CHECK-NEXT: +// CHECK-NEXT: +// CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'C' + // CHECK-NEXT: blob data = '{{.*}}' + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'int' + // CHECK-NEXT: blob data = 'C::i' + // CHECK-NEXT: + // CHECK-NEXT: +// CHECK-NEXT: diff --git a/clang-tools-extra/test/clang-doc/mapper-union.cpp b/clang-tools-extra/test/clang-doc/mapper-union.cpp new file mode 100644 index 0000000000000..33b0aa9ac1ac5 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/mapper-union.cpp @@ -0,0 +1,29 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump-mapper -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/bc/0B8A6B938B939B77C6325CCCC8AA3E938BF9E2E8.bc --dump | FileCheck %s + +union D { int X; int Y; }; + +// CHECK: +// CHECK-NEXT: + // CHECK-NEXT: +// CHECK-NEXT: +// CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'D' + // CHECK-NEXT: blob data = '{{.*}}' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'int' + // CHECK-NEXT: blob data = 'D::X' + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: + // CHECK-NEXT: blob data = 'int' + // CHECK-NEXT: blob data = 'D::Y' + // CHECK-NEXT: + // CHECK-NEXT: +// CHECK-NEXT: