Skip to content

Commit

Permalink
Improve grammar check for described code points, force advance in par…
Browse files Browse the repository at this point in the history
…ser recovery
  • Loading branch information
rbuckton committed Jun 19, 2023
1 parent ad77285 commit b13c44a
Show file tree
Hide file tree
Showing 10 changed files with 126 additions and 101 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,22 @@ terminals/unicodeCharacterLiteral.grammar(17,5): error GM1012: Unicode code poin
terminals/unicodeCharacterLiteral.grammar(18,5): error GM1015: Unicode code point literal value is outside of the allowed range
terminals/unicodeCharacterLiteral.grammar(21,5): warning GM1013: Unicode code point literals should use uppercase 'U+' prefix
terminals/unicodeCharacterLiteral.grammar(22,5): warning GM1014: Unicode code point literals should use uppercase hexadecimal digits
terminals/unicodeCharacterLiteral.grammar(25,5): error GM1004: Unterminated string literal
terminals/unicodeCharacterLiteral.grammar(26,5): error GM1020: Unicode character name literal must be an ASCII identifier
terminals/unicodeCharacterLiteral.grammar(38,5): error GM1004: Unterminated string literal
terminals/unicodeCharacterLiteral.grammar(39,5): error GM1020: Unicode character name literal must be an ASCII identifier
terminals/unicodeCharacterLiteral.grammar(40,5): error GM1016: Unicode character name literal may not start with 'U+' unless it is a valid code point
terminals/unicodeCharacterLiteral.grammar(41,5): error GM1011: Unicode code point literals must have at least four hexadecimal digits
terminals/unicodeCharacterLiteral.grammar(42,5): error GM1011: Unicode code point literals must have at least four hexadecimal digits
terminals/unicodeCharacterLiteral.grammar(43,5): error GM1017: Unicode character name literal that includes a code point must have a description
terminals/unicodeCharacterLiteral.grammar(44,5): error GM1012: Unicode code point literals with more than four digits may not have leading zeros
terminals/unicodeCharacterLiteral.grammar(44,5): error GM1017: Unicode character name literal that includes a code point must have a description
terminals/unicodeCharacterLiteral.grammar(45,5): error GM1012: Unicode code point literals with more than four digits may not have leading zeros
terminals/unicodeCharacterLiteral.grammar(46,5): error GM1015: Unicode code point literal value is outside of the allowed range
terminals/unicodeCharacterLiteral.grammar(47,5): error GM1019: Unicode character name literal code point description may only contain printable ASCII characters
terminals/unicodeCharacterLiteral.grammar(48,5): error GM1020: Unicode character name literal must be an ASCII identifier
terminals/unicodeCharacterLiteral.grammar(51,5): warning GM1013: Unicode code point literals should use uppercase 'U+' prefix
terminals/unicodeCharacterLiteral.grammar(52,5): warning GM1014: Unicode code point literals should use uppercase hexadecimal digits
terminals/unicodeCharacterLiteral.grammar(46,5): error GM1012: Unicode code point literals with more than four digits may not have leading zeros
terminals/unicodeCharacterLiteral.grammar(47,5): error GM1015: Unicode code point literal value is outside of the allowed range
terminals/unicodeCharacterLiteral.grammar(48,5): error GM1019: Unicode character name literal code point description may only contain printable ASCII characters
terminals/unicodeCharacterLiteral.grammar(49,5): error GM1020: Unicode character name literal must be an ASCII identifier
terminals/unicodeCharacterLiteral.grammar(52,5): warning GM1013: Unicode code point literals should use uppercase 'U+' prefix
terminals/unicodeCharacterLiteral.grammar(53,5): warning GM1014: Unicode code point literals should use uppercase hexadecimal digits

/// [terminals/unicodeCharacterLiteral.grammar] 21 errors
/// [terminals/unicodeCharacterLiteral.grammar] 22 errors
CodePoints:
U+0000
U+FFFF
Expand Down Expand Up @@ -62,12 +63,6 @@ terminals/unicodeCharacterLiteral.grammar(52,5): warning GM1014: Unicode code po
!!! warning GM1014: Unicode code point literals should use uppercase hexadecimal digits

CharacterNames:
<foo<bar>
~~~~~~~
!!! error GM1004: Unterminated string literal
<foo\<bar>
~~~~~~~~~~~~~~~~~~~
!!! error GM1020: Unicode character name literal must be an ASCII identifier
<NBSP>
<U+2212 MINUS SIGN>
<U+0000 A>
Expand All @@ -81,6 +76,12 @@ terminals/unicodeCharacterLiteral.grammar(52,5): warning GM1014: Unicode code po
<U&#x2b;0000 A>

ErrorCharacterNames:
&lt;foo&lt;bar&gt;
~~~~~~~
!!! error GM1004: Unterminated string literal
&lt;foo\&lt;bar&gt;
~~~~~~~~~~~~~~~~~~~
!!! error GM1020: Unicode character name literal must be an ASCII identifier
<U+>
~~~~
!!! error GM1016: Unicode character name literal may not start with 'U+' unless it is a valid code point
Expand All @@ -92,6 +93,9 @@ terminals/unicodeCharacterLiteral.grammar(52,5): warning GM1014: Unicode code po
!!! error GM1011: Unicode code point literals must have at least four hexadecimal digits
<U+0000>
~~~~~~~~
!!! error GM1017: Unicode character name literal that includes a code point must have a description
<U+0000 >
~~~~~~~~~
!!! error GM1017: Unicode character name literal that includes a code point must have a description
<U+00000 A>
~~~~~~~~~~~
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,6 @@
</emu-rhs>
</emu-production>
<emu-production name="CharacterNames">
<emu-rhs a="2q7mjfi-">
<emu-gprose>&lt;foo</emu-gprose>
<emu-gprose>&lt;bar&gt;</emu-gprose>
</emu-rhs>
<emu-rhs a="wl3_uthh">
<emu-gprose>&lt;foo\&lt;bar&gt;</emu-gprose>
</emu-rhs>
<emu-rhs a="qe6qukax">
<emu-gprose>&lt;NBSP&gt;</emu-gprose>
</emu-rhs>
Expand Down Expand Up @@ -98,6 +91,13 @@
</emu-rhs>
</emu-production>
<emu-production name="ErrorCharacterNames">
<emu-rhs a="2q7mjfi-">
<emu-gprose>&lt;foo</emu-gprose>
<emu-gprose>&lt;bar&gt;</emu-gprose>
</emu-rhs>
<emu-rhs a="wl3_uthh">
<emu-gprose>&lt;foo\&lt;bar&gt;</emu-gprose>
</emu-rhs>
<emu-rhs a="18sb8v4d">
<emu-gprose>&lt;U+&gt;</emu-gprose>
</emu-rhs>
Expand All @@ -110,6 +110,9 @@
<emu-rhs a="m5ox1nyr">
<emu-gprose>&lt;U+0000&gt;</emu-gprose>
</emu-rhs>
<emu-rhs a="a89hsjuj">
<emu-gprose>&lt;U+0000 &gt;</emu-gprose>
</emu-rhs>
<emu-rhs a="7rlbieqo">
<emu-gprose>&lt;U+00000 A&gt;</emu-gprose>
</emu-rhs>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ WarningCodePoints :
U+ffff

CharacterNames :
<foo <bar>
<foo\<bar>
<NBSP>
<U+2212 MINUS SIGN>
<U+0000 A>
Expand All @@ -37,10 +35,13 @@ CharacterNames :
<U+0000 A>

ErrorCharacterNames :
<foo <bar>
<foo\<bar>
<U+>
<U+A>
<U+0>
<U+0000>
<U+0000 >
<U+00000 A>
<U+000000 A>
<U+110000 A>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,6 @@
<a name="CharacterNames"></a><div class="production">
<span class="nonterminal">CharacterNames</span><span class="punctuation"> :</span>
<div class="rhs-list">
<div class="rhs-list-item">
<a name="2q7mjfi-"></a><span class="rhs"><span class="unicode-character-literal">&lt;foo</span> <span class="unicode-character-literal">&lt;bar&gt;</span></span>
</div>
<div class="rhs-list-item">
<a name="wl3_uthh"></a><span class="rhs"><span class="unicode-character-literal">&lt;foo\&lt;bar&gt;</span></span>
</div>
<div class="rhs-list-item">
<a name="qe6qukax"></a><span class="rhs"><span class="unicode-character-literal">&lt;NBSP&gt;</span></span>
</div>
Expand Down Expand Up @@ -112,6 +106,12 @@
<a name="ErrorCharacterNames"></a><div class="production">
<span class="nonterminal">ErrorCharacterNames</span><span class="punctuation"> :</span>
<div class="rhs-list">
<div class="rhs-list-item">
<a name="2q7mjfi-"></a><span class="rhs"><span class="unicode-character-literal">&lt;foo</span> <span class="unicode-character-literal">&lt;bar&gt;</span></span>
</div>
<div class="rhs-list-item">
<a name="wl3_uthh"></a><span class="rhs"><span class="unicode-character-literal">&lt;foo\&lt;bar&gt;</span></span>
</div>
<div class="rhs-list-item">
<a name="18sb8v4d"></a><span class="rhs"><span class="unicode-character-literal">&lt;U+&gt;</span></span>
</div>
Expand All @@ -124,6 +124,9 @@
<div class="rhs-list-item">
<a name="m5ox1nyr"></a><span class="rhs"><span class="unicode-character-literal">&lt;U+0000&gt;</span></span>
</div>
<div class="rhs-list-item">
<a name="a89hsjuj"></a><span class="rhs"><span class="unicode-character-literal">&lt;U+0000 &gt;</span></span>
</div>
<div class="rhs-list-item">
<a name="7rlbieqo"></a><span class="rhs"><span class="unicode-character-literal">&lt;U+00000 A&gt;</span></span>
</div>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
&emsp;&emsp;&emsp;<a name="WarningCodePoints-bvpcq1k9"></a>U+ffff

&emsp;&emsp;<a name="CharacterNames"></a>*CharacterNames* **:**
&emsp;&emsp;&emsp;<a name="CharacterNames-2q7mjfi-"></a>&lt;foo&emsp;&lt;bar&gt;
&emsp;&emsp;&emsp;<a name="CharacterNames-wl3_uthh"></a>&lt;foo\&lt;bar&gt;
&emsp;&emsp;&emsp;<a name="CharacterNames-qe6qukax"></a>&lt;NBSP&gt;
&emsp;&emsp;&emsp;<a name="CharacterNames-eolopvl2"></a>&lt;U+2212 MINUS SIGN&gt;
&emsp;&emsp;&emsp;<a name="CharacterNames-ryhkfcxi"></a>&lt;U+0000 A&gt;
Expand All @@ -37,10 +35,13 @@
&emsp;&emsp;&emsp;<a name="CharacterNames-ryhkfcxi"></a>&lt;U+0000 A&gt;

&emsp;&emsp;<a name="ErrorCharacterNames"></a>*ErrorCharacterNames* **:**
&emsp;&emsp;&emsp;<a name="ErrorCharacterNames-2q7mjfi-"></a>&lt;foo&emsp;&lt;bar&gt;
&emsp;&emsp;&emsp;<a name="ErrorCharacterNames-wl3_uthh"></a>&lt;foo\&lt;bar&gt;
&emsp;&emsp;&emsp;<a name="ErrorCharacterNames-18sb8v4d"></a>&lt;U+&gt;
&emsp;&emsp;&emsp;<a name="ErrorCharacterNames-b5acuw72"></a>&lt;U+A&gt;
&emsp;&emsp;&emsp;<a name="ErrorCharacterNames-yj4dpbbw"></a>&lt;U+0&gt;
&emsp;&emsp;&emsp;<a name="ErrorCharacterNames-m5ox1nyr"></a>&lt;U+0000&gt;
&emsp;&emsp;&emsp;<a name="ErrorCharacterNames-a89hsjuj"></a>&lt;U+0000 &gt;
&emsp;&emsp;&emsp;<a name="ErrorCharacterNames-7rlbieqo"></a>&lt;U+00000 A&gt;
&emsp;&emsp;&emsp;<a name="ErrorCharacterNames-ldqb-gs9"></a>&lt;U+000000 A&gt;
&emsp;&emsp;&emsp;<a name="ErrorCharacterNames-oxiqj8rw"></a>&lt;U+110000 A&gt;
Expand Down
84 changes: 44 additions & 40 deletions baselines/reference/terminals/unicodeCharacterLiteral.grammar.nodes
Original file line number Diff line number Diff line change
Expand Up @@ -87,62 +87,62 @@
(25,5)SyntaxKind[RightHandSide]
(25,5)SyntaxKind[SymbolSpan]
(25,5)SyntaxKind[Terminal]
(25,5)SyntaxKind[UnicodeCharacterLiteral](text = &lt;foo)
(25,12)SyntaxKind[SymbolSpan]
(25,12)SyntaxKind[Terminal]
(25,12)SyntaxKind[UnicodeCharacterLiteral](text = &lt;bar&gt;)
(25,5)SyntaxKind[UnicodeCharacterLiteral](text = <NBSP>)
(26,5)SyntaxKind[RightHandSide]
(26,5)SyntaxKind[SymbolSpan]
(26,5)SyntaxKind[Terminal]
(26,5)SyntaxKind[UnicodeCharacterLiteral](text = &lt;foo\&lt;bar&gt;)
(26,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+2212 MINUS SIGN>)
(27,5)SyntaxKind[RightHandSide]
(27,5)SyntaxKind[SymbolSpan]
(27,5)SyntaxKind[Terminal]
(27,5)SyntaxKind[UnicodeCharacterLiteral](text = <NBSP>)
(27,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+0000 A>)
(28,5)SyntaxKind[RightHandSide]
(28,5)SyntaxKind[SymbolSpan]
(28,5)SyntaxKind[Terminal]
(28,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+2212 MINUS SIGN>)
(28,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+FFFF A>)
(29,5)SyntaxKind[RightHandSide]
(29,5)SyntaxKind[SymbolSpan]
(29,5)SyntaxKind[Terminal]
(29,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+0000 A>)
(29,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+10000 A>)
(30,5)SyntaxKind[RightHandSide]
(30,5)SyntaxKind[SymbolSpan]
(30,5)SyntaxKind[Terminal]
(30,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+FFFF A>)
(30,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+1FFFF A>)
(31,5)SyntaxKind[RightHandSide]
(31,5)SyntaxKind[SymbolSpan]
(31,5)SyntaxKind[Terminal]
(31,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+10000 A>)
(31,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+F0000 A>)
(32,5)SyntaxKind[RightHandSide]
(32,5)SyntaxKind[SymbolSpan]
(32,5)SyntaxKind[Terminal]
(32,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+1FFFF A>)
(32,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+FFFFF A>)
(33,5)SyntaxKind[RightHandSide]
(33,5)SyntaxKind[SymbolSpan]
(33,5)SyntaxKind[Terminal]
(33,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+F0000 A>)
(33,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+100000 A>)
(34,5)SyntaxKind[RightHandSide]
(34,5)SyntaxKind[SymbolSpan]
(34,5)SyntaxKind[Terminal]
(34,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+FFFFF A>)
(34,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+10FFFF A>)
(35,5)SyntaxKind[RightHandSide]
(35,5)SyntaxKind[SymbolSpan]
(35,5)SyntaxKind[Terminal]
(35,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+100000 A>)
(36,5)SyntaxKind[RightHandSide]
(36,5)SyntaxKind[SymbolSpan]
(36,5)SyntaxKind[Terminal]
(36,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+10FFFF A>)
(37,5)SyntaxKind[RightHandSide]
(37,5)SyntaxKind[SymbolSpan]
(37,5)SyntaxKind[Terminal]
(37,5)SyntaxKind[UnicodeCharacterLiteral](text = <U&#x2b;0000 A>)
(39,1)SyntaxKind[Production]
(39,1)SyntaxKind[Identifier](text = "ErrorCharacterNames")
(39,20)SyntaxKind[ColonToken]
(40,5)SyntaxKind[RightHandSideList]
(35,5)SyntaxKind[UnicodeCharacterLiteral](text = <U&#x2b;0000 A>)
(37,1)SyntaxKind[Production]
(37,1)SyntaxKind[Identifier](text = "ErrorCharacterNames")
(37,20)SyntaxKind[ColonToken]
(38,5)SyntaxKind[RightHandSideList]
(38,5)SyntaxKind[RightHandSide]
(38,5)SyntaxKind[SymbolSpan]
(38,5)SyntaxKind[Terminal]
(38,5)SyntaxKind[UnicodeCharacterLiteral](text = &lt;foo)
(38,12)SyntaxKind[SymbolSpan]
(38,12)SyntaxKind[Terminal]
(38,12)SyntaxKind[UnicodeCharacterLiteral](text = &lt;bar&gt;)
(39,5)SyntaxKind[RightHandSide]
(39,5)SyntaxKind[SymbolSpan]
(39,5)SyntaxKind[Terminal]
(39,5)SyntaxKind[UnicodeCharacterLiteral](text = &lt;foo\&lt;bar&gt;)
(40,5)SyntaxKind[RightHandSide]
(40,5)SyntaxKind[SymbolSpan]
(40,5)SyntaxKind[Terminal]
Expand All @@ -162,32 +162,36 @@
(44,5)SyntaxKind[RightHandSide]
(44,5)SyntaxKind[SymbolSpan]
(44,5)SyntaxKind[Terminal]
(44,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+00000 A>)
(44,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+0000 >)
(45,5)SyntaxKind[RightHandSide]
(45,5)SyntaxKind[SymbolSpan]
(45,5)SyntaxKind[Terminal]
(45,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+000000 A>)
(45,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+00000 A>)
(46,5)SyntaxKind[RightHandSide]
(46,5)SyntaxKind[SymbolSpan]
(46,5)SyntaxKind[Terminal]
(46,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+110000 A>)
(46,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+000000 A>)
(47,5)SyntaxKind[RightHandSide]
(47,5)SyntaxKind[SymbolSpan]
(47,5)SyntaxKind[Terminal]
(47,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+0000 &copy;>)
(47,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+110000 A>)
(48,5)SyntaxKind[RightHandSide]
(48,5)SyntaxKind[SymbolSpan]
(48,5)SyntaxKind[Terminal]
(48,5)SyntaxKind[UnicodeCharacterLiteral](text = <A B>)
(50,1)SyntaxKind[Production]
(50,1)SyntaxKind[Identifier](text = "WarningCharacterNames")
(50,22)SyntaxKind[ColonToken]
(51,5)SyntaxKind[RightHandSideList]
(51,5)SyntaxKind[RightHandSide]
(51,5)SyntaxKind[SymbolSpan]
(51,5)SyntaxKind[Terminal]
(51,5)SyntaxKind[UnicodeCharacterLiteral](text = <u+ffff A>)
(48,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+0000 &copy;>)
(49,5)SyntaxKind[RightHandSide]
(49,5)SyntaxKind[SymbolSpan]
(49,5)SyntaxKind[Terminal]
(49,5)SyntaxKind[UnicodeCharacterLiteral](text = <A B>)
(51,1)SyntaxKind[Production]
(51,1)SyntaxKind[Identifier](text = "WarningCharacterNames")
(51,22)SyntaxKind[ColonToken]
(52,5)SyntaxKind[RightHandSideList]
(52,5)SyntaxKind[RightHandSide]
(52,5)SyntaxKind[SymbolSpan]
(52,5)SyntaxKind[Terminal]
(52,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+ffff A>)
(52,5)SyntaxKind[UnicodeCharacterLiteral](text = <u+ffff A>)
(53,5)SyntaxKind[RightHandSide]
(53,5)SyntaxKind[SymbolSpan]
(53,5)SyntaxKind[Terminal]
(53,5)SyntaxKind[UnicodeCharacterLiteral](text = <U+ffff A>)

0 comments on commit b13c44a

Please sign in to comment.