Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 73 additions & 113 deletions llvm/lib/Support/Mustache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,141 +371,101 @@ static const char *jsonKindToString(json::Value::Kind K) {
llvm_unreachable("Unknown json::Value::Kind");
}

static Tag findNextTag(StringRef Template, size_t StartPos, StringRef Open,
StringRef Close) {
const StringLiteral TripleOpen("{{{");
const StringLiteral TripleClose("}}}");

size_t NormalOpenPos = Template.find(Open, StartPos);
size_t TripleOpenPos = Template.find(TripleOpen, StartPos);

Tag Result;

// Determine which tag comes first.
if (TripleOpenPos != StringRef::npos &&
(NormalOpenPos == StringRef::npos || TripleOpenPos <= NormalOpenPos)) {
// Found a triple mustache tag.
size_t EndPos =
Template.find(TripleClose, TripleOpenPos + TripleOpen.size());
if (EndPos == StringRef::npos)
return Result; // No closing tag found.

Result.TagKind = Tag::Kind::Triple;
Result.StartPosition = TripleOpenPos;
size_t ContentStart = TripleOpenPos + TripleOpen.size();
Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
Result.FullMatch = Template.substr(
TripleOpenPos, (EndPos + TripleClose.size()) - TripleOpenPos);
} else if (NormalOpenPos != StringRef::npos) {
// Found a normal mustache tag.
size_t EndPos = Template.find(Close, NormalOpenPos + Open.size());
if (EndPos == StringRef::npos)
return Result; // No closing tag found.

Result.TagKind = Tag::Kind::Normal;
Result.StartPosition = NormalOpenPos;
size_t ContentStart = NormalOpenPos + Open.size();
Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
Result.FullMatch =
Template.substr(NormalOpenPos, (EndPos + Close.size()) - NormalOpenPos);
}

return Result;
}

static std::optional<std::pair<StringRef, StringRef>>
processTag(const Tag &T, SmallVectorImpl<Token> &Tokens, MustacheContext &Ctx) {
LLVM_DEBUG(dbgs() << "[Tag] " << T.FullMatch << ", Content: " << T.Content
<< ", Kind: " << tagKindToString(T.TagKind) << "\n");
if (T.TagKind == Tag::Kind::Triple) {
Tokens.emplace_back(T.FullMatch, Ctx.Saver.save("&" + T.Content), '&', Ctx);
return std::nullopt;
}
StringRef Interpolated = T.Content;
if (!Interpolated.trim().starts_with("=")) {
char Front = Interpolated.empty() ? ' ' : Interpolated.trim().front();
Tokens.emplace_back(T.FullMatch, Interpolated, Front, Ctx);
return std::nullopt;
}
Tokens.emplace_back(T.FullMatch, Interpolated, '=', Ctx);
StringRef DelimSpec = Interpolated.trim();
DelimSpec = DelimSpec.drop_front(1);
DelimSpec = DelimSpec.take_until([](char C) { return C == '='; });
DelimSpec = DelimSpec.trim();

auto [NewOpen, NewClose] = DelimSpec.split(' ');
LLVM_DEBUG(dbgs() << "[Set Delimiter] NewOpen: " << NewOpen
<< ", NewClose: " << NewClose << "\n");
return std::make_pair(NewOpen, NewClose);
}

// Simple tokenizer that splits the template into tokens.
// The mustache spec allows {{{ }}} to unescape variables,
// but we don't support that here. An unescape variable
// is represented only by {{& variable}}.
static SmallVector<Token> tokenize(StringRef Template, MustacheContext &Ctx) {
LLVM_DEBUG(dbgs() << "[Tokenize Template] \"" << Template << "\"\n");
SmallVector<Token> Tokens;
SmallString<8> Open("{{");
SmallString<8> Close("}}");
size_t Start = 0;
size_t Cursor = 0;
size_t TextStart = 0;

const StringLiteral TripleOpen("{{{");
const StringLiteral TripleClose("}}}");

while (Start < Template.size()) {
LLVM_DEBUG(dbgs() << "[Tokenize Loop] Start=" << Start << ", Open='" << Open
<< "', Close='" << Close << "'\n");
Tag T = findNextTag(Template, Start, Open, Close);
while (Cursor < Template.size()) {
StringRef TemplateSuffix = Template.substr(Cursor);
StringRef TagOpen, TagClose;
Tag::Kind Kind;

// Determine which tag we've encountered.
if (TemplateSuffix.starts_with(TripleOpen)) {
Kind = Tag::Kind::Triple;
TagOpen = TripleOpen;
TagClose = TripleClose;
} else if (TemplateSuffix.starts_with(Open)) {
Kind = Tag::Kind::Normal;
TagOpen = Open;
TagClose = Close;
} else {
// Not at a tag, continue scanning.
++Cursor;
continue;
}

if (T.TagKind == Tag::Kind::None) {
// No more tags, the rest is text.
Tokens.emplace_back(Template.substr(Start));
break;
// Found a tag, first add the preceding text.
if (Cursor > TextStart) {
Tokens.emplace_back(Template.slice(TextStart, Cursor));
}

// Add the text before the tag.
if (T.StartPosition > Start) {
StringRef Text = Template.substr(Start, T.StartPosition - Start);
Tokens.emplace_back(Text);
// Find the closing tag.
size_t EndPos = Template.find(TagClose, Cursor + TagOpen.size());
if (EndPos == StringRef::npos) {
// No closing tag, the rest is text.
Tokens.emplace_back(Template.substr(Cursor));
TextStart = Cursor = Template.size();
break;
}

if (auto NewDelims = processTag(T, Tokens, Ctx)) {
std::tie(Open, Close) = *NewDelims;
// Extract tag content and full match.
size_t ContentStart = Cursor + TagOpen.size();
StringRef Content = Template.substr(ContentStart, EndPos - ContentStart);
StringRef FullMatch =
Template.substr(Cursor, (EndPos + TagClose.size()) - Cursor);

// Process the tag (inlined logic from processTag).
LLVM_DEBUG(dbgs() << "[Tag] " << FullMatch << ", Content: " << Content
<< ", Kind: " << tagKindToString(Kind) << "\n");
if (Kind == Tag::Kind::Triple) {
Tokens.emplace_back(FullMatch, Ctx.Saver.save("&" + Content), '&', Ctx);
} else { // Normal Tag
StringRef Interpolated = Content;
if (!Interpolated.trim().starts_with("=")) {
char Front = Interpolated.empty() ? ' ' : Interpolated.trim().front();
Tokens.emplace_back(FullMatch, Interpolated, Front, Ctx);
} else { // Set Delimiter
Tokens.emplace_back(FullMatch, Interpolated, '=', Ctx);
StringRef DelimSpec = Interpolated.trim();
DelimSpec = DelimSpec.drop_front(1);
DelimSpec = DelimSpec.take_until([](char C) { return C == '='; });
DelimSpec = DelimSpec.trim();

auto [NewOpen, NewClose] = DelimSpec.split(' ');
LLVM_DEBUG(dbgs() << "[Set Delimiter] NewOpen: " << NewOpen
<< ", NewClose: " << NewClose << "\n");
Open = NewOpen;
Close = NewClose;
}
}

// Move past the tag.
Start = T.StartPosition + T.FullMatch.size();
// Move past the tag for the next iteration.
Cursor += FullMatch.size();
TextStart = Cursor;
}

// Fix up white spaces for:
// - open sections
// - inverted sections
// - close sections
// - comments
//
// This loop attempts to find standalone tokens and tries to trim out
// the surrounding whitespace.
// For example:
// if you have the template string
// {{#section}} \n Example \n{{/section}}
// The output should would be
// For example:
// \n Example \n
// Add any remaining text after the last tag.
if (TextStart < Template.size()) {
Tokens.emplace_back(Template.substr(TextStart));
}

// Fix up white spaces for standalone tags.
size_t LastIdx = Tokens.size() - 1;
for (size_t Idx = 0, End = Tokens.size(); Idx < End; ++Idx) {
Token &CurrentToken = Tokens[Idx];
Token::Type CurrentType = CurrentToken.getType();
// Check if token type requires cleanup.
bool RequiresCleanUp = requiresCleanUp(CurrentType);

if (!RequiresCleanUp)
if (!requiresCleanUp(CurrentType))
continue;

// We adjust the token body if there's no text behind or ahead.
// A token is considered to have no text ahead if the right of the previous
// token is a newline followed by spaces.
// A token is considered to have no text behind if the left of the next
// token is spaces followed by a newline.
// eg.
// "Line 1\n {{#section}} \n Line 2 \n {{/section}} \n Line 3"
bool HasTextBehind = hasTextBehind(Idx, Tokens);
bool HasTextAhead = hasTextAhead(Idx, Tokens);

Expand Down
Loading