Skip to content

Commit f3487c7

Browse files
committed
[YAMLParser] Add multi-line literal folding support
Last year I was working at Swift to add support for [Localization of Compiler Diagnostic Messages](https://forums.swift.org/t/localization-of-compiler-diagnostic-messages/36412/41). We are currently using YAML as the new diagnostic format. The LLVM::YAMLParser didn't have a support for multiline string literal folding and it's crucial to have that for the diagnostic message to help us keep up with the 80 columns rule. Therefore, I decided to add a multiline string literal folding support to the YAML parser. Patch By: @HassanElDesouky (Hassan ElDesouky) Differential Revision: https://reviews.llvm.org/D102590
1 parent de9611b commit f3487c7

File tree

3 files changed

+161
-15
lines changed

3 files changed

+161
-15
lines changed

llvm/include/llvm/Support/YAMLParser.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
// See http://www.yaml.org/spec/1.2/spec.html for the full standard.
1212
//
1313
// This currently does not implement the following:
14-
// * Multi-line literal folding.
1514
// * Tag resolution.
1615
// * UTF-16.
1716
// * BOMs anywhere other than the first Unicode scalar value in the file.

llvm/lib/Support/YAMLParser.cpp

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,9 @@ class Scanner {
392392
/// Pos is whitespace or a new line
393393
bool isBlankOrBreak(StringRef::iterator Position);
394394

395+
/// Return true if the line is a line break, false otherwise.
396+
bool isLineEmpty(StringRef Line);
397+
395398
/// Consume a single b-break[28] if it's present at the current position.
396399
///
397400
/// Return false if the code unit at the current position isn't a line break.
@@ -470,6 +473,18 @@ class Scanner {
470473
/// Scan a block scalar starting with | or >.
471474
bool scanBlockScalar(bool IsLiteral);
472475

476+
/// Scan a block scalar style indicator and header.
477+
///
478+
/// Note: This is distinct from scanBlockScalarHeader to mirror the fact that
479+
/// YAML does not consider the style indicator to be a part of the header.
480+
///
481+
/// Return false if an error occurred.
482+
bool scanBlockScalarIndicators(char &StyleIndicator, char &ChompingIndicator,
483+
unsigned &IndentIndicator, bool &IsDone);
484+
485+
/// Scan a style indicator in a block scalar header.
486+
char scanBlockStyleIndicator();
487+
473488
/// Scan a chomping indicator in a block scalar header.
474489
char scanBlockChompingIndicator();
475490

@@ -1034,6 +1049,13 @@ bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
10341049
*Position == '\n';
10351050
}
10361051

1052+
bool Scanner::isLineEmpty(StringRef Line) {
1053+
for (const auto *Position = Line.begin(); Position != Line.end(); ++Position)
1054+
if (!isBlankOrBreak(Position))
1055+
return false;
1056+
return true;
1057+
}
1058+
10371059
bool Scanner::consumeLineBreakIfPresent() {
10381060
auto Next = skip_b_break(Current);
10391061
if (Next == Current)
@@ -1516,6 +1538,25 @@ bool Scanner::scanAliasOrAnchor(bool IsAlias) {
15161538
return true;
15171539
}
15181540

1541+
bool Scanner::scanBlockScalarIndicators(char &StyleIndicator,
1542+
char &ChompingIndicator,
1543+
unsigned &IndentIndicator,
1544+
bool &IsDone) {
1545+
StyleIndicator = scanBlockStyleIndicator();
1546+
if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone))
1547+
return false;
1548+
return true;
1549+
}
1550+
1551+
char Scanner::scanBlockStyleIndicator() {
1552+
char Indicator = ' ';
1553+
if (Current != End && (*Current == '>' || *Current == '|')) {
1554+
Indicator = *Current;
1555+
skip(1);
1556+
}
1557+
return Indicator;
1558+
}
1559+
15191560
char Scanner::scanBlockChompingIndicator() {
15201561
char Indicator = ' ';
15211562
if (Current != End && (*Current == '+' || *Current == '-')) {
@@ -1654,19 +1695,19 @@ bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
16541695
}
16551696

16561697
bool Scanner::scanBlockScalar(bool IsLiteral) {
1657-
// Eat '|' or '>'
16581698
assert(*Current == '|' || *Current == '>');
1659-
skip(1);
1660-
1699+
char StyleIndicator;
16611700
char ChompingIndicator;
16621701
unsigned BlockIndent;
16631702
bool IsDone = false;
1664-
if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone))
1703+
if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent,
1704+
IsDone))
16651705
return false;
16661706
if (IsDone)
16671707
return true;
1708+
bool IsFolded = StyleIndicator == '>';
16681709

1669-
auto Start = Current;
1710+
const auto *Start = Current;
16701711
unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;
16711712
unsigned LineBreaks = 0;
16721713
if (BlockIndent == 0) {
@@ -1687,6 +1728,22 @@ bool Scanner::scanBlockScalar(bool IsLiteral) {
16871728
auto LineStart = Current;
16881729
advanceWhile(&Scanner::skip_nb_char);
16891730
if (LineStart != Current) {
1731+
if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) {
1732+
// The folded style "folds" any single line break between content into a
1733+
// single space, except when that content is "empty" (only contains
1734+
// whitespace) in which case the line break is left as-is.
1735+
if (LineBreaks == 1) {
1736+
Str.append(LineBreaks,
1737+
isLineEmpty(StringRef(LineStart, Current - LineStart))
1738+
? '\n'
1739+
: ' ');
1740+
}
1741+
// If we saw a single line break, we are completely replacing it and so
1742+
// want `LineBreaks == 0`. Otherwise this decrement accounts for the
1743+
// fact that the first line break is "trimmed", only being used to
1744+
// signal a sequence of line breaks which should not be folded.
1745+
LineBreaks--;
1746+
}
16901747
Str.append(LineBreaks, '\n');
16911748
Str.append(StringRef(LineStart, Current - LineStart));
16921749
LineBreaks = 0;
Lines changed: 99 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,103 @@
11
# RUN: yaml-bench -canonical %s | FileCheck %s
2-
# CHECK: ? !!str "strip"
3-
# CHECK: : !!str ""
4-
# CHECK: ? !!str "clip"
5-
# CHECK: : !!str ""
6-
# CHECK: ? !!str "keep"
7-
# CHECK: : !!str "\n"
2+
# CHECK: ? !!str "literal_strip"
3+
# CHECK: : !!str "Hello\n\n\nworld\non\nmultiple \n\n\nlines\n\nfoo bar"
4+
# CHECK: ? !!str "literal_clip"
5+
# CHECK: : !!str "Hello\n\n\nworld\non\nmultiple \n\n\nlines\n\nfoo bar\n"
6+
# CHECK: ? !!str "literal_keep"
7+
# CHECK: : !!str "Hello\n\n\nworld\non\nmultiple \n\n\nlines\n\nfoo bar\n\n\n\n"
8+
# CHECK: ? !!str "folded_strip"
9+
# CHECK: : !!str "Hello\n\nworld on multiple \n\nlines\nfoo bar"
10+
# CHECK: ? !!str "folded_clip"
11+
# CHECK: : !!str "Hello\n\nworld on multiple \n\nlines\nfoo bar\n"
12+
# CHECK: ? !!str "folded_keep"
13+
# CHECK: : !!str "Hello\n\nworld on multiple \n\nlines\nfoo bar\n\n\n"
814

9-
strip: >-
15+
literal_strip: |-
16+
Hello
1017

11-
clip: >
1218

13-
keep: |+
19+
world
20+
on
21+
multiple
22+
23+
24+
lines
25+
26+
foo bar
27+
28+
29+
30+
literal_clip: |
31+
Hello
32+
33+
34+
world
35+
on
36+
multiple
37+
38+
39+
lines
40+
41+
foo bar
42+
43+
44+
45+
literal_keep: |+
46+
Hello
47+
48+
49+
world
50+
on
51+
multiple
52+
53+
54+
lines
55+
56+
foo bar
57+
58+
59+
60+
folded_strip: >-
61+
Hello
62+
63+
64+
world
65+
on
66+
multiple
67+
68+
69+
lines
70+
71+
foo bar
72+
73+
74+
75+
folded_clip: >
76+
Hello
77+
78+
79+
world
80+
on
81+
multiple
82+
83+
84+
lines
85+
86+
foo bar
87+
88+
89+
90+
folded_keep: >+
91+
Hello
92+
93+
94+
world
95+
on
96+
multiple
97+
98+
99+
lines
100+
101+
foo bar
102+
103+

0 commit comments

Comments
 (0)