Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[clangd] Introduce intermediate representation of formatted text
Summary: That can render to markdown or plain text. Used for findHover requests. Reviewers: malaperle, sammccall, kadircet Reviewed By: sammccall Subscribers: mgorny, MaskRay, jkorous, arphaman, kadircet, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D58547 llvm-svn: 360151
- Loading branch information
1 parent
9306ced
commit 75e760b
Showing
5 changed files
with
388 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
//===--- FormattedString.cpp --------------------------------*- C++-*------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
#include "FormattedString.h" | ||
#include "clang/Basic/CharInfo.h" | ||
#include "llvm/ADT/StringRef.h" | ||
#include "llvm/Support/ErrorHandling.h" | ||
#include <cstddef> | ||
#include <string> | ||
|
||
namespace clang { | ||
namespace clangd { | ||
|
||
namespace { | ||
/// Escape a markdown text block. Ensures the punctuation will not introduce | ||
/// any of the markdown constructs. | ||
static std::string renderText(llvm::StringRef Input) { | ||
// Escaping ASCII punctiation ensures we can't start a markdown construct. | ||
constexpr llvm::StringLiteral Punctuation = | ||
R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt"; | ||
|
||
std::string R; | ||
for (size_t From = 0; From < Input.size();) { | ||
size_t Next = Input.find_first_of(Punctuation, From); | ||
R += Input.substr(From, Next - From); | ||
if (Next == llvm::StringRef::npos) | ||
break; | ||
R += "\\"; | ||
R += Input[Next]; | ||
|
||
From = Next + 1; | ||
} | ||
return R; | ||
} | ||
|
||
/// Renders \p Input as an inline block of code in markdown. The returned value | ||
/// is surrounded by backticks and the inner contents are properly escaped. | ||
static std::string renderInlineBlock(llvm::StringRef Input) { | ||
std::string R; | ||
// Double all backticks to make sure we don't close the inline block early. | ||
for (size_t From = 0; From < Input.size();) { | ||
size_t Next = Input.find("`", From); | ||
R += Input.substr(From, Next - From); | ||
if (Next == llvm::StringRef::npos) | ||
break; | ||
R += "``"; // double the found backtick. | ||
|
||
From = Next + 1; | ||
} | ||
// If results starts with a backtick, add spaces on both sides. The spaces | ||
// are ignored by markdown renderers. | ||
if (llvm::StringRef(R).startswith("`") || llvm::StringRef(R).endswith("`")) | ||
return "` " + std::move(R) + " `"; | ||
// Markdown render should ignore first and last space if both are there. We | ||
// add an extra pair of spaces in that case to make sure we render what the | ||
// user intended. | ||
if (llvm::StringRef(R).startswith(" ") && llvm::StringRef(R).endswith(" ")) | ||
return "` " + std::move(R) + " `"; | ||
return "`" + std::move(R) + "`"; | ||
} | ||
/// Render \p Input as markdown code block with a specified \p Language. The | ||
/// result is surrounded by >= 3 backticks. Although markdown also allows to use | ||
/// '~' for code blocks, they are never used. | ||
static std::string renderCodeBlock(llvm::StringRef Input, | ||
llvm::StringRef Language) { | ||
// Count the maximum number of consecutive backticks in \p Input. We need to | ||
// start and end the code block with more. | ||
unsigned MaxBackticks = 0; | ||
unsigned Backticks = 0; | ||
for (char C : Input) { | ||
if (C == '`') { | ||
++Backticks; | ||
continue; | ||
} | ||
MaxBackticks = std::max(MaxBackticks, Backticks); | ||
Backticks = 0; | ||
} | ||
MaxBackticks = std::max(Backticks, MaxBackticks); | ||
// Use the corresponding number of backticks to start and end a code block. | ||
std::string BlockMarker(/*Repeat=*/std::max(3u, MaxBackticks + 1), '`'); | ||
return BlockMarker + Language.str() + "\n" + Input.str() + "\n" + BlockMarker; | ||
} | ||
|
||
} // namespace | ||
|
||
void FormattedString::appendText(std::string Text) { | ||
// We merge consecutive blocks of text to simplify the overall structure. | ||
if (Chunks.empty() || Chunks.back().Kind != ChunkKind::PlainText) { | ||
Chunk C; | ||
C.Kind = ChunkKind::PlainText; | ||
Chunks.push_back(C); | ||
} | ||
// FIXME: ensure there is a whitespace between the chunks. | ||
Chunks.back().Contents += Text; | ||
} | ||
|
||
void FormattedString::appendCodeBlock(std::string Code, std::string Language) { | ||
Chunk C; | ||
C.Kind = ChunkKind::CodeBlock; | ||
C.Contents = std::move(Code); | ||
C.Language = std::move(Language); | ||
Chunks.push_back(std::move(C)); | ||
} | ||
|
||
void FormattedString::appendInlineCode(std::string Code) { | ||
Chunk C; | ||
C.Kind = ChunkKind::InlineCodeBlock; | ||
C.Contents = std::move(Code); | ||
Chunks.push_back(std::move(C)); | ||
} | ||
|
||
std::string FormattedString::renderAsMarkdown() const { | ||
std::string R; | ||
for (const auto &C : Chunks) { | ||
switch (C.Kind) { | ||
case ChunkKind::PlainText: | ||
R += renderText(C.Contents); | ||
continue; | ||
case ChunkKind::InlineCodeBlock: | ||
// Make sure we don't glue two backticks together. | ||
if (llvm::StringRef(R).endswith("`")) | ||
R += " "; | ||
R += renderInlineBlock(C.Contents); | ||
continue; | ||
case ChunkKind::CodeBlock: | ||
if (!R.empty() && !llvm::StringRef(R).endswith("\n")) | ||
R += "\n"; | ||
R += renderCodeBlock(C.Contents, C.Language); | ||
R += "\n"; | ||
continue; | ||
} | ||
llvm_unreachable("unhanlded ChunkKind"); | ||
} | ||
return R; | ||
} | ||
|
||
std::string FormattedString::renderAsPlainText() const { | ||
std::string R; | ||
auto EnsureWhitespace = [&]() { | ||
if (R.empty() || isWhitespace(R.back())) | ||
return; | ||
R += " "; | ||
}; | ||
for (const auto &C : Chunks) { | ||
switch (C.Kind) { | ||
case ChunkKind::PlainText: | ||
EnsureWhitespace(); | ||
R += C.Contents; | ||
continue; | ||
case ChunkKind::InlineCodeBlock: | ||
EnsureWhitespace(); | ||
R += C.Contents; | ||
continue; | ||
case ChunkKind::CodeBlock: | ||
if (!R.empty()) | ||
R += "\n\n"; | ||
R += C.Contents; | ||
if (!llvm::StringRef(C.Contents).endswith("\n")) | ||
R += "\n"; | ||
continue; | ||
} | ||
llvm_unreachable("unhanlded ChunkKind"); | ||
} | ||
while (!R.empty() && isWhitespace(R.back())) | ||
R.pop_back(); | ||
return R; | ||
} | ||
} // namespace clangd | ||
} // namespace clang |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
//===--- FormattedString.h ----------------------------------*- C++-*------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// A simple intermediate representation of formatted text that could be | ||
// converted to plaintext or markdown. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FORMATTEDSTRING_H | ||
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FORMATTEDSTRING_H | ||
|
||
#include <string> | ||
#include <vector> | ||
|
||
namespace clang { | ||
namespace clangd { | ||
|
||
/// A structured string representation that could be converted to markdown or | ||
/// plaintext upon requrest. | ||
class FormattedString { | ||
public: | ||
/// Append plain text to the end of the string. | ||
void appendText(std::string Text); | ||
/// Append a block of C++ code. This translates to a ``` block in markdown. | ||
/// In a plain text representation, the code block will be surrounded by | ||
/// newlines. | ||
void appendCodeBlock(std::string Code, std::string Language = "cpp"); | ||
/// Append an inline block of C++ code. This translates to the ` block in | ||
/// markdown. | ||
void appendInlineCode(std::string Code); | ||
|
||
std::string renderAsMarkdown() const; | ||
std::string renderAsPlainText() const; | ||
|
||
private: | ||
enum class ChunkKind { | ||
PlainText, /// A plain text paragraph. | ||
CodeBlock, /// A block of code. | ||
InlineCodeBlock, /// An inline block of code. | ||
}; | ||
struct Chunk { | ||
ChunkKind Kind = ChunkKind::PlainText; | ||
std::string Contents; | ||
/// Language for code block chunks. Ignored for other chunks. | ||
std::string Language; | ||
}; | ||
std::vector<Chunk> Chunks; | ||
}; | ||
|
||
} // namespace clangd | ||
} // namespace clang | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.