From b21928fe39992527c158d35980b1f39437b0bf08 Mon Sep 17 00:00:00 2001 From: konard Date: Sun, 30 Nov 2025 20:11:00 +0000 Subject: [PATCH 01/15] Initial commit with task details for issue #142 Adding CLAUDE.md with task information for AI processing. This file will be removed when the task is complete. Issue: https://github.com/link-foundation/links-notation/issues/142 --- CLAUDE.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..90186fc --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,5 @@ +Issue to solve: https://github.com/link-foundation/links-notation/issues/142 +Your prepared branch: issue-142-23f307922307 +Your prepared working directory: /tmp/gh-issue-solver-1764533457922 + +Proceed. \ No newline at end of file From 4920a3ea6b3ac431fd43703999bb47d6de23755e Mon Sep 17 00:00:00 2001 From: konard Date: Sun, 30 Nov 2025 20:25:42 +0000 Subject: [PATCH 02/15] Add support for backtick quotes and multi-quote strings with escaping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit implements issue #142: 1. Backtick quotes: Added support for backtick (`) as a third quote type alongside double quotes (") and single quotes (') 2. Multi-quote strings: Support for 1-5+ quote sequences where: - N quotes open/close the string - 2*N quotes inside become N quotes in the output (escaping) Examples: - `hello` -> hello (backtick quoted) - ``code with ` inside`` -> code with ` inside - '''text with '' inside''' -> text with '' inside - """json with " in it""" -> json with " in it All four language implementations updated: - JavaScript: Updated grammar.pegjs - Rust: Updated parser.rs - C#: Updated Parser.peg - Python: Updated parser.py Tests added for all implementations covering: - All three quote types (", ', `) - Quote counts from 1 to 5 - Escape sequences - Complex scenarios (mixed quotes, code blocks, JSON, etc.) Version bumped to 0.13.0 Closes #142 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../MultiQuoteParserTests.cs | 332 ++++ .../Link.Foundation.Links.Notation.csproj | 2 +- .../Link.Foundation.Links.Notation/Parser.peg | 65 +- js/package.json | 2 +- js/src/grammar.pegjs | 87 +- js/src/parser-generated.js | 1659 +++++++++++++++-- js/tests/MultiQuoteParser.test.js | 408 ++++ python/links_notation/parser.py | 206 +- python/pyproject.toml | 2 +- python/tests/test_multi_quote_parser.py | 226 +++ rust/Cargo.toml | 2 +- rust/src/parser.rs | 138 +- rust/tests/multi_quote_parser_tests.rs | 349 ++++ 13 files changed, 3298 insertions(+), 180 deletions(-) create mode 100644 csharp/Link.Foundation.Links.Notation.Tests/MultiQuoteParserTests.cs create mode 100644 js/tests/MultiQuoteParser.test.js create mode 100644 python/tests/test_multi_quote_parser.py create mode 100644 rust/tests/multi_quote_parser_tests.rs diff --git a/csharp/Link.Foundation.Links.Notation.Tests/MultiQuoteParserTests.cs b/csharp/Link.Foundation.Links.Notation.Tests/MultiQuoteParserTests.cs new file mode 100644 index 0000000..071191e --- /dev/null +++ b/csharp/Link.Foundation.Links.Notation.Tests/MultiQuoteParserTests.cs @@ -0,0 +1,332 @@ +using System; +using Xunit; + +namespace Link.Foundation.Links.Notation.Tests +{ + public static class MultiQuoteParserTests + { + // Helper to extract single reference ID + private static string? GetSingleRefId(System.Collections.Generic.IList> result) + { + if (result.Count == 1 && result[0].Id == null && result[0].Values?.Count == 1) + { + return result[0].Values[0].Id; + } + return result.Count == 1 ? result[0].Id : null; + } + + // ============================================================================ + // Backtick Quote Tests (Single Backtick) + // ============================================================================ + + [Fact] + public static void TestBacktickQuotedReference() + { + var parser = new Parser(); + var result = parser.Parse("`backtick quoted`"); + Assert.Equal("backtick quoted", GetSingleRefId(result)); + } + + [Fact] + public static void TestBacktickQuotedWithSpaces() + { + var parser = new Parser(); + var result = parser.Parse("`text with spaces`"); + Assert.Equal("text with spaces", GetSingleRefId(result)); + } + + [Fact] + public static void TestBacktickQuotedMultiline() + { + var parser = new Parser(); + var result = parser.Parse("(`line1\nline2`)"); + Assert.Single(result); + Assert.NotNull(result[0].Values); + Assert.Single(result[0].Values); + Assert.Equal("line1\nline2", result[0].Values![0].Id); + } + + [Fact] + public static void TestBacktickQuotedWithEscapedBacktick() + { + var parser = new Parser(); + var result = parser.Parse("`text with `` escaped backtick`"); + Assert.Equal("text with ` escaped backtick", GetSingleRefId(result)); + } + + // ============================================================================ + // Single Quote Tests (with escaping) + // ============================================================================ + + [Fact] + public static void TestSingleQuoteWithEscapedSingleQuote() + { + var parser = new Parser(); + var result = parser.Parse("'text with '' escaped quote'"); + Assert.Equal("text with ' escaped quote", GetSingleRefId(result)); + } + + // ============================================================================ + // Double Quote Tests (with escaping) + // ============================================================================ + + [Fact] + public static void TestDoubleQuoteWithEscapedDoubleQuote() + { + var parser = new Parser(); + var result = parser.Parse("\"text with \"\" escaped quote\""); + Assert.Equal("text with \" escaped quote", GetSingleRefId(result)); + } + + // ============================================================================ + // Double Quotes (2 quote chars) Tests + // ============================================================================ + + [Fact] + public static void TestDoubleDoubleQuotes() + { + var parser = new Parser(); + var result = parser.Parse("\"\"double double quotes\"\""); + Assert.Equal("double double quotes", GetSingleRefId(result)); + } + + [Fact] + public static void TestDoubleDoubleQuotesWithSingleQuoteInside() + { + var parser = new Parser(); + var result = parser.Parse("\"\"text with \" inside\"\""); + Assert.Equal("text with \" inside", GetSingleRefId(result)); + } + + [Fact] + public static void TestDoubleDoubleQuotesWithEscape() + { + var parser = new Parser(); + var result = parser.Parse("\"\"text with \"\"\"\" escaped double\"\""); + Assert.Equal("text with \"\" escaped double", GetSingleRefId(result)); + } + + [Fact] + public static void TestDoubleSingleQuotes() + { + var parser = new Parser(); + var result = parser.Parse("''double single quotes''"); + Assert.Equal("double single quotes", GetSingleRefId(result)); + } + + [Fact] + public static void TestDoubleSingleQuotesWithSingleQuoteInside() + { + var parser = new Parser(); + var result = parser.Parse("''text with ' inside''"); + Assert.Equal("text with ' inside", GetSingleRefId(result)); + } + + [Fact] + public static void TestDoubleSingleQuotesWithEscape() + { + var parser = new Parser(); + var result = parser.Parse("''text with '''' escaped single''"); + Assert.Equal("text with '' escaped single", GetSingleRefId(result)); + } + + [Fact] + public static void TestDoubleBacktickQuotes() + { + var parser = new Parser(); + var result = parser.Parse("``double backtick quotes``"); + Assert.Equal("double backtick quotes", GetSingleRefId(result)); + } + + [Fact] + public static void TestDoubleBacktickQuotesWithBacktickInside() + { + var parser = new Parser(); + var result = parser.Parse("``text with ` inside``"); + Assert.Equal("text with ` inside", GetSingleRefId(result)); + } + + [Fact] + public static void TestDoubleBacktickQuotesWithEscape() + { + var parser = new Parser(); + var result = parser.Parse("``text with ```` escaped backtick``"); + Assert.Equal("text with `` escaped backtick", GetSingleRefId(result)); + } + + // ============================================================================ + // Triple Quotes (3 quote chars) Tests + // ============================================================================ + + [Fact] + public static void TestTripleDoubleQuotes() + { + var parser = new Parser(); + var result = parser.Parse("\"\"\"triple double quotes\"\"\""); + Assert.Equal("triple double quotes", GetSingleRefId(result)); + } + + [Fact] + public static void TestTripleDoubleQuotesWithDoubleQuoteInside() + { + var parser = new Parser(); + var result = parser.Parse("\"\"\"text with \"\" inside\"\"\""); + Assert.Equal("text with \"\" inside", GetSingleRefId(result)); + } + + [Fact] + public static void TestTripleDoubleQuotesWithEscape() + { + var parser = new Parser(); + var result = parser.Parse("\"\"\"text with \"\"\"\"\"\" escaped triple\"\"\""); + Assert.Equal("text with \"\"\" escaped triple", GetSingleRefId(result)); + } + + [Fact] + public static void TestTripleSingleQuotes() + { + var parser = new Parser(); + var result = parser.Parse("'''triple single quotes'''"); + Assert.Equal("triple single quotes", GetSingleRefId(result)); + } + + [Fact] + public static void TestTripleBacktickQuotes() + { + var parser = new Parser(); + var result = parser.Parse("```triple backtick quotes```"); + Assert.Equal("triple backtick quotes", GetSingleRefId(result)); + } + + // ============================================================================ + // Quadruple Quotes (4 quote chars) Tests + // ============================================================================ + + [Fact] + public static void TestQuadrupleDoubleQuotes() + { + var parser = new Parser(); + var result = parser.Parse("\"\"\"\"quadruple double quotes\"\"\"\""); + Assert.Equal("quadruple double quotes", GetSingleRefId(result)); + } + + [Fact] + public static void TestQuadrupleSingleQuotes() + { + var parser = new Parser(); + var result = parser.Parse("''''quadruple single quotes''''"); + Assert.Equal("quadruple single quotes", GetSingleRefId(result)); + } + + [Fact] + public static void TestQuadrupleBacktickQuotes() + { + var parser = new Parser(); + var result = parser.Parse("````quadruple backtick quotes````"); + Assert.Equal("quadruple backtick quotes", GetSingleRefId(result)); + } + + // ============================================================================ + // Quintuple Quotes (5 quote chars) Tests + // ============================================================================ + + [Fact] + public static void TestQuintupleDoubleQuotes() + { + var parser = new Parser(); + var result = parser.Parse("\"\"\"\"\"quintuple double quotes\"\"\"\"\""); + Assert.Equal("quintuple double quotes", GetSingleRefId(result)); + } + + [Fact] + public static void TestQuintupleSingleQuotes() + { + var parser = new Parser(); + var result = parser.Parse("'''''quintuple single quotes'''''"); + Assert.Equal("quintuple single quotes", GetSingleRefId(result)); + } + + [Fact] + public static void TestQuintupleBacktickQuotes() + { + var parser = new Parser(); + var result = parser.Parse("`````quintuple backtick quotes`````"); + Assert.Equal("quintuple backtick quotes", GetSingleRefId(result)); + } + + // ============================================================================ + // Complex Scenarios Tests + // ============================================================================ + + [Fact] + public static void TestMixedQuotesInLink() + { + var parser = new Parser(); + var result = parser.Parse("(\"double\" 'single' `backtick`)"); + Assert.Single(result); + Assert.NotNull(result[0].Values); + Assert.Equal(3, result[0].Values!.Count); + Assert.Equal("double", result[0].Values[0].Id); + Assert.Equal("single", result[0].Values[1].Id); + Assert.Equal("backtick", result[0].Values[2].Id); + } + + [Fact] + public static void TestBacktickAsIdInLink() + { + var parser = new Parser(); + var result = parser.Parse("(`myId`: value1 value2)"); + Assert.Single(result); + Assert.Equal("myId", result[0].Id); + Assert.NotNull(result[0].Values); + Assert.Equal(2, result[0].Values!.Count); + } + + [Fact] + public static void TestCodeBlockLikeContent() + { + var parser = new Parser(); + var result = parser.Parse("```const x = 1;```"); + Assert.Equal("const x = 1;", GetSingleRefId(result)); + } + + [Fact] + public static void TestNestedQuotesInMarkdown() + { + var parser = new Parser(); + var result = parser.Parse("``Use `code` in markdown``"); + Assert.Equal("Use `code` in markdown", GetSingleRefId(result)); + } + + [Fact] + public static void TestJsonStringWithQuotes() + { + var parser = new Parser(); + var result = parser.Parse("\"\"{ \"key\": \"value\"}\"\""); + Assert.Equal("{ \"key\": \"value\"}", GetSingleRefId(result)); + } + + // ============================================================================ + // Edge Cases + // ============================================================================ + + [Fact] + public static void TestWhitespacePreservedInQuotes() + { + var parser = new Parser(); + var result = parser.Parse("\" spaces \""); + Assert.Equal(" spaces ", GetSingleRefId(result)); + } + + [Fact] + public static void TestMultilineInDoubleDoubleQuotes() + { + var parser = new Parser(); + var result = parser.Parse("(\"\"line1\nline2\"\")"); + Assert.Single(result); + Assert.NotNull(result[0].Values); + Assert.Single(result[0].Values); + Assert.Equal("line1\nline2", result[0].Values![0].Id); + } + } +} diff --git a/csharp/Link.Foundation.Links.Notation/Link.Foundation.Links.Notation.csproj b/csharp/Link.Foundation.Links.Notation/Link.Foundation.Links.Notation.csproj index 82dc830..c3d7402 100644 --- a/csharp/Link.Foundation.Links.Notation/Link.Foundation.Links.Notation.csproj +++ b/csharp/Link.Foundation.Links.Notation/Link.Foundation.Links.Notation.csproj @@ -4,7 +4,7 @@ Link.Foundation's Platform.Protocols.Lino Class Library Konstantin Diachenko Link.Foundation.Links.Notation - 0.12.0 + 0.13.0 Konstantin Diachenko net8 Link.Foundation.Links.Notation diff --git a/csharp/Link.Foundation.Links.Notation/Parser.peg b/csharp/Link.Foundation.Links.Notation/Parser.peg index 6b1bcaf..7165a58 100644 --- a/csharp/Link.Foundation.Links.Notation/Parser.peg +++ b/csharp/Link.Foundation.Links.Notation/Parser.peg @@ -21,10 +21,69 @@ singleLineValueLink > = v:singleLineValues { new Link(v) } multiLineValueLink > = "(" v:multiLineValues _ ")" { new Link(v) } indentedIdLink > = id:(reference) __ ":" eol { new Link(id) } -reference = doubleQuotedReference / singleQuotedReference / simpleReference +reference = quintupleQuotedReference / quadrupleQuotedReference / tripleQuotedReference / doubleQuotedReference / singleQuotedReference / simpleReference + simpleReference = "" referenceSymbol+ -doubleQuotedReference = '"' r:([^"]+) '"' { string.Join("", r) } -singleQuotedReference = "'" r:([^']+) "'" { string.Join("", r) } + +// Single quotes (1 quote char) with escaping via doubling +singleQuotedReference = doubleQuote1 / singleQuote1 / backtickQuote1 + +doubleQuote1 = '"' r:doubleQuote1Content* '"' { string.Join("", r) } +doubleQuote1Content = '""' { "\"" } / c:[^"] { c.ToString() } + +singleQuote1 = "'" r:singleQuote1Content* "'" { string.Join("", r) } +singleQuote1Content = "''" { "'" } / c:[^'] { c.ToString() } + +backtickQuote1 = '`' r:backtickQuote1Content* '`' { string.Join("", r) } +backtickQuote1Content = '``' { "`" } / c:[^`] { c.ToString() } + +// Double quotes (2 quote chars) +doubleQuotedReference = doubleQuote2 / singleQuote2 / backtickQuote2 + +doubleQuote2 = '""' r:doubleQuote2Content* '""' { string.Join("", r) } +doubleQuote2Content = '""""' { "\"\"" } / !'""' c:. { c.ToString() } + +singleQuote2 = "''" r:singleQuote2Content* "''" { string.Join("", r) } +singleQuote2Content = "''''" { "''" } / !"''" c:. { c.ToString() } + +backtickQuote2 = '``' r:backtickQuote2Content* '``' { string.Join("", r) } +backtickQuote2Content = '````' { "``" } / !'``' c:. { c.ToString() } + +// Triple quotes (3 quote chars) +tripleQuotedReference = doubleQuote3 / singleQuote3 / backtickQuote3 + +doubleQuote3 = '"""' r:doubleQuote3Content* '"""' { string.Join("", r) } +doubleQuote3Content = '""""""' { "\"\"\"" } / !'"""' c:. { c.ToString() } + +singleQuote3 = "'''" r:singleQuote3Content* "'''" { string.Join("", r) } +singleQuote3Content = "''''''" { "'''" } / !"'''" c:. { c.ToString() } + +backtickQuote3 = '```' r:backtickQuote3Content* '```' { string.Join("", r) } +backtickQuote3Content = '``````' { "```" } / !'```' c:. { c.ToString() } + +// Quadruple quotes (4 quote chars) +quadrupleQuotedReference = doubleQuote4 / singleQuote4 / backtickQuote4 + +doubleQuote4 = '""""' r:doubleQuote4Content* '""""' { string.Join("", r) } +doubleQuote4Content = '""""""""' { "\"\"\"\"" } / !'""""' c:. { c.ToString() } + +singleQuote4 = "''''" r:singleQuote4Content* "''''" { string.Join("", r) } +singleQuote4Content = "''''''''''" { "''''" } / !"''''" c:. { c.ToString() } + +backtickQuote4 = '````' r:backtickQuote4Content* '````' { string.Join("", r) } +backtickQuote4Content = '````````' { "````" } / !'````' c:. { c.ToString() } + +// Quintuple quotes (5 quote chars) +quintupleQuotedReference = doubleQuote5 / singleQuote5 / backtickQuote5 + +doubleQuote5 = '"""""' r:doubleQuote5Content* '"""""' { string.Join("", r) } +doubleQuote5Content = '""""""""""' { "\"\"\"\"\"" } / !'"""""' c:. { c.ToString() } + +singleQuote5 = "'''''" r:singleQuote5Content* "'''''" { string.Join("", r) } +singleQuote5Content = "''''''''''" { "'''''" } / !"'''''" c:. { c.ToString() } + +backtickQuote5 = '`````' r:backtickQuote5Content* '`````' { string.Join("", r) } +backtickQuote5Content = '``````````' { "`````" } / !'`````' c:. { c.ToString() } SET_BASE_INDENTATION = spaces:" "* #{ if ((int)state["BaseIndentation"] == -1) state["BaseIndentation"] = spaces.Count; } PUSH_INDENTATION = spaces:" "* #{ state["NormalizedIndent"] = spaces.Count - ((int)state["BaseIndentation"] == -1 ? 0 : (int)state["BaseIndentation"]); if ((int)state["NormalizedIndent"] < 0) state["NormalizedIndent"] = 0; } &{ (int)state["NormalizedIndent"] > (int)state["IndentationStack"].Peek() } #{ state["IndentationStack"].Push((int)state["NormalizedIndent"]); } POP_INDENTATION = #{ state["IndentationStack"].Pop(); } diff --git a/js/package.json b/js/package.json index f32327b..da563f1 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "links-notation", - "version": "0.12.0", + "version": "0.13.0", "description": "Links Notation parser for JavaScript", "main": "dist/index.js", "type": "module", diff --git a/js/src/grammar.pegjs b/js/src/grammar.pegjs index a48e1a2..1aa96c2 100644 --- a/js/src/grammar.pegjs +++ b/js/src/grammar.pegjs @@ -34,6 +34,25 @@ function getCurrentIndentation() { return indentationStack[indentationStack.length - 1]; } + + // Process escape sequences for multi-quote strings + // For N quotes: 2*N consecutive quotes become N quotes + function processEscapes(content, quoteChar, quoteCount) { + const escapeSequence = quoteChar.repeat(quoteCount * 2); + const replacement = quoteChar.repeat(quoteCount); + let result = ''; + let i = 0; + while (i < content.length) { + if (content.substr(i, escapeSequence.length) === escapeSequence) { + result += replacement; + i += escapeSequence.length; + } else { + result += content[i]; + i++; + } + } + return result; + } } document = &{ indentationStack = [0]; baseIndentation = null; return true; } skipEmptyLines links:links _ eof { return links; } @@ -79,13 +98,73 @@ multiLineValueLink = "(" v:multiLineValues _ ")" { return { values: v }; } indentedIdLink = id:reference __ ":" eol { return { id: id, values: [] }; } -reference = doubleQuotedReference / singleQuotedReference / simpleReference +// Reference can be quoted (with 1-5+ quotes) or simple unquoted +reference = quotedReference / simpleReference simpleReference = chars:referenceSymbol+ { return chars.join(''); } -doubleQuotedReference = '"' r:[^"]+ '"' { return r.join(''); } +// Quoted references - try longer quote sequences first (greedy matching) +quotedReference = quintupleQuotedReference / quadrupleQuotedReference / tripleQuotedReference / doubleQuotedReference / singleQuotedReference + +// Single quote (1 quote char) +singleQuotedReference = doubleQuote1 / singleQuote1 / backtickQuote1 + +doubleQuote1 = '"' r:doubleQuote1Content* '"' { return r.join(''); } +doubleQuote1Content = '""' { return '"'; } / [^"] + +singleQuote1 = "'" r:singleQuote1Content* "'" { return r.join(''); } +singleQuote1Content = "''" { return "'"; } / [^'] + +backtickQuote1 = '`' r:backtickQuote1Content* '`' { return r.join(''); } +backtickQuote1Content = '``' { return '`'; } / [^`] + +// Double quotes (2 quote chars) +doubleQuotedReference = doubleQuote2 / singleQuote2 / backtickQuote2 + +doubleQuote2 = '""' r:doubleQuote2Content* '""' { return r.join(''); } +doubleQuote2Content = '""""' { return '""'; } / !('""') c:. { return c; } + +singleQuote2 = "''" r:singleQuote2Content* "''" { return r.join(''); } +singleQuote2Content = "''''" { return "''"; } / !("''") c:. { return c; } + +backtickQuote2 = '``' r:backtickQuote2Content* '``' { return r.join(''); } +backtickQuote2Content = '````' { return '``'; } / !('``') c:. { return c; } + +// Triple quotes (3 quote chars) +tripleQuotedReference = doubleQuote3 / singleQuote3 / backtickQuote3 + +doubleQuote3 = '"""' r:doubleQuote3Content* '"""' { return r.join(''); } +doubleQuote3Content = '""""""' { return '"""'; } / !('"""') c:. { return c; } + +singleQuote3 = "'''" r:singleQuote3Content* "'''" { return r.join(''); } +singleQuote3Content = "''''''" { return "'''"; } / !("'''") c:. { return c; } + +backtickQuote3 = '```' r:backtickQuote3Content* '```' { return r.join(''); } +backtickQuote3Content = '``````' { return '```'; } / !('```') c:. { return c; } + +// Quadruple quotes (4 quote chars) +quadrupleQuotedReference = doubleQuote4 / singleQuote4 / backtickQuote4 + +doubleQuote4 = '""""' r:doubleQuote4Content* '""""' { return r.join(''); } +doubleQuote4Content = '""""""""' { return '""""'; } / !('""""') c:. { return c; } + +singleQuote4 = "''''" r:singleQuote4Content* "''''" { return r.join(''); } +singleQuote4Content = "''''''''''" { return "''''"; } / !("''''") c:. { return c; } + +backtickQuote4 = '````' r:backtickQuote4Content* '````' { return r.join(''); } +backtickQuote4Content = '````````' { return '````'; } / !('````') c:. { return c; } + +// Quintuple quotes (5 quote chars) +quintupleQuotedReference = doubleQuote5 / singleQuote5 / backtickQuote5 + +doubleQuote5 = '"""""' r:doubleQuote5Content* '"""""' { return r.join(''); } +doubleQuote5Content = '""""""""""' { return '"""""'; } / !('"""""') c:. { return c; } + +singleQuote5 = "'''''" r:singleQuote5Content* "'''''" { return r.join(''); } +singleQuote5Content = "''''''''''" { return "'''''"; } / !("'''''") c:. { return c; } -singleQuotedReference = "'" r:[^']+ "'" { return r.join(''); } +backtickQuote5 = '`````' r:backtickQuote5Content* '`````' { return r.join(''); } +backtickQuote5Content = '``````````' { return '`````'; } / !('`````') c:. { return c; } SET_BASE_INDENTATION = spaces:" "* { setBaseIndentation(spaces); } @@ -103,4 +182,4 @@ _ = whiteSpaceSymbol* whiteSpaceSymbol = [ \t\n\r] -referenceSymbol = [^ \t\n\r(:)] \ No newline at end of file +referenceSymbol = [^ \t\n\r(:)] diff --git a/js/src/parser-generated.js b/js/src/parser-generated.js index 6d7c669..80dcc08 100644 --- a/js/src/parser-generated.js +++ b/js/src/parser-generated.js @@ -168,15 +168,37 @@ function peg$parse(input, options) { const peg$c1 = "("; const peg$c2 = ")"; const peg$c3 = "\""; - const peg$c4 = "'"; - const peg$c5 = " "; + const peg$c4 = "\"\""; + const peg$c5 = "'"; + const peg$c6 = "''"; + const peg$c7 = "`"; + const peg$c8 = "``"; + const peg$c9 = "\"\"\"\""; + const peg$c10 = "''''"; + const peg$c11 = "````"; + const peg$c12 = "\"\"\""; + const peg$c13 = "\"\"\"\"\"\""; + const peg$c14 = "'''"; + const peg$c15 = "''''''"; + const peg$c16 = "```"; + const peg$c17 = "``````"; + const peg$c18 = "\"\"\"\"\"\"\"\""; + const peg$c19 = "''''''''''"; + const peg$c20 = "````````"; + const peg$c21 = "\"\"\"\"\""; + const peg$c22 = "\"\"\"\"\"\"\"\"\"\""; + const peg$c23 = "'''''"; + const peg$c24 = "`````"; + const peg$c25 = "``````````"; + const peg$c26 = " "; const peg$r0 = /^[ \t]/; const peg$r1 = /^[\r\n]/; const peg$r2 = /^[^"]/; const peg$r3 = /^[^']/; - const peg$r4 = /^[ \t\n\r]/; - const peg$r5 = /^[^ \t\n\r(:)]/; + const peg$r4 = /^[^`]/; + const peg$r5 = /^[ \t\n\r]/; + const peg$r6 = /^[^ \t\n\r(:)]/; const peg$e0 = peg$classExpectation([" ", "\t"], false, false, false); const peg$e1 = peg$classExpectation(["\r", "\n"], false, false, false); @@ -184,13 +206,35 @@ function peg$parse(input, options) { const peg$e3 = peg$literalExpectation("(", false); const peg$e4 = peg$literalExpectation(")", false); const peg$e5 = peg$literalExpectation("\"", false); - const peg$e6 = peg$classExpectation(["\""], true, false, false); - const peg$e7 = peg$literalExpectation("'", false); - const peg$e8 = peg$classExpectation(["'"], true, false, false); - const peg$e9 = peg$literalExpectation(" ", false); - const peg$e10 = peg$anyExpectation(); - const peg$e11 = peg$classExpectation([" ", "\t", "\n", "\r"], false, false, false); - const peg$e12 = peg$classExpectation([" ", "\t", "\n", "\r", "(", ":", ")"], true, false, false); + const peg$e6 = peg$literalExpectation("\"\"", false); + const peg$e7 = peg$classExpectation(["\""], true, false, false); + const peg$e8 = peg$literalExpectation("'", false); + const peg$e9 = peg$literalExpectation("''", false); + const peg$e10 = peg$classExpectation(["'"], true, false, false); + const peg$e11 = peg$literalExpectation("`", false); + const peg$e12 = peg$literalExpectation("``", false); + const peg$e13 = peg$classExpectation(["`"], true, false, false); + const peg$e14 = peg$literalExpectation("\"\"\"\"", false); + const peg$e15 = peg$anyExpectation(); + const peg$e16 = peg$literalExpectation("''''", false); + const peg$e17 = peg$literalExpectation("````", false); + const peg$e18 = peg$literalExpectation("\"\"\"", false); + const peg$e19 = peg$literalExpectation("\"\"\"\"\"\"", false); + const peg$e20 = peg$literalExpectation("'''", false); + const peg$e21 = peg$literalExpectation("''''''", false); + const peg$e22 = peg$literalExpectation("```", false); + const peg$e23 = peg$literalExpectation("``````", false); + const peg$e24 = peg$literalExpectation("\"\"\"\"\"\"\"\"", false); + const peg$e25 = peg$literalExpectation("''''''''''", false); + const peg$e26 = peg$literalExpectation("````````", false); + const peg$e27 = peg$literalExpectation("\"\"\"\"\"", false); + const peg$e28 = peg$literalExpectation("\"\"\"\"\"\"\"\"\"\"", false); + const peg$e29 = peg$literalExpectation("'''''", false); + const peg$e30 = peg$literalExpectation("`````", false); + const peg$e31 = peg$literalExpectation("``````````", false); + const peg$e32 = peg$literalExpectation(" ", false); + const peg$e33 = peg$classExpectation([" ", "\t", "\n", "\r"], false, false, false); + const peg$e34 = peg$classExpectation([" ", "\t", "\n", "\r", "(", ":", ")"], true, false, false); function peg$f0() { indentationStack = [0]; baseIndentation = null; return true; } function peg$f1(links) { return links; } @@ -221,11 +265,51 @@ function peg$parse(input, options) { function peg$f24(id) { return { id: id, values: [] }; } function peg$f25(chars) { return chars.join(''); } function peg$f26(r) { return r.join(''); } - function peg$f27(r) { return r.join(''); } - function peg$f28(spaces) { setBaseIndentation(spaces); } - function peg$f29(spaces) { return normalizeIndentation(spaces) > getCurrentIndentation(); } - function peg$f30(spaces) { pushIndentation(spaces); } - function peg$f31(spaces) { return checkIndentation(spaces); } + function peg$f27() { return '"'; } + function peg$f28(r) { return r.join(''); } + function peg$f29() { return "'"; } + function peg$f30(r) { return r.join(''); } + function peg$f31() { return '`'; } + function peg$f32(r) { return r.join(''); } + function peg$f33() { return '""'; } + function peg$f34(c) { return c; } + function peg$f35(r) { return r.join(''); } + function peg$f36() { return "''"; } + function peg$f37(c) { return c; } + function peg$f38(r) { return r.join(''); } + function peg$f39() { return '``'; } + function peg$f40(c) { return c; } + function peg$f41(r) { return r.join(''); } + function peg$f42() { return '"""'; } + function peg$f43(c) { return c; } + function peg$f44(r) { return r.join(''); } + function peg$f45() { return "'''"; } + function peg$f46(c) { return c; } + function peg$f47(r) { return r.join(''); } + function peg$f48() { return '```'; } + function peg$f49(c) { return c; } + function peg$f50(r) { return r.join(''); } + function peg$f51() { return '""""'; } + function peg$f52(c) { return c; } + function peg$f53(r) { return r.join(''); } + function peg$f54() { return "''''"; } + function peg$f55(c) { return c; } + function peg$f56(r) { return r.join(''); } + function peg$f57() { return '````'; } + function peg$f58(c) { return c; } + function peg$f59(r) { return r.join(''); } + function peg$f60() { return '"""""'; } + function peg$f61(c) { return c; } + function peg$f62(r) { return r.join(''); } + function peg$f63() { return "'''''"; } + function peg$f64(c) { return c; } + function peg$f65(r) { return r.join(''); } + function peg$f66() { return '`````'; } + function peg$f67(c) { return c; } + function peg$f68(spaces) { setBaseIndentation(spaces); } + function peg$f69(spaces) { return normalizeIndentation(spaces) > getCurrentIndentation(); } + function peg$f70(spaces) { pushIndentation(spaces); } + function peg$f71(spaces) { return checkIndentation(spaces); } let peg$currPos = options.peg$currPos | 0; let peg$savedPos = peg$currPos; const peg$posDetailsCache = [{ line: 1, column: 1 }]; @@ -998,85 +1082,1301 @@ function peg$parse(input, options) { function peg$parsereference() { let s0; - s0 = peg$parsedoubleQuotedReference(); + s0 = peg$parsequotedReference(); if (s0 === peg$FAILED) { - s0 = peg$parsesingleQuotedReference(); + s0 = peg$parsesimpleReference(); + } + + return s0; + } + + function peg$parsesimpleReference() { + let s0, s1, s2; + + s0 = peg$currPos; + s1 = []; + s2 = peg$parsereferenceSymbol(); + if (s2 !== peg$FAILED) { + while (s2 !== peg$FAILED) { + s1.push(s2); + s2 = peg$parsereferenceSymbol(); + } + } else { + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f25(s1); + } + s0 = s1; + + return s0; + } + + function peg$parsequotedReference() { + let s0; + + s0 = peg$parsequintupleQuotedReference(); + if (s0 === peg$FAILED) { + s0 = peg$parsequadrupleQuotedReference(); + if (s0 === peg$FAILED) { + s0 = peg$parsetripleQuotedReference(); + if (s0 === peg$FAILED) { + s0 = peg$parsedoubleQuotedReference(); + if (s0 === peg$FAILED) { + s0 = peg$parsesingleQuotedReference(); + } + } + } + } + + return s0; + } + + function peg$parsesingleQuotedReference() { + let s0; + + s0 = peg$parsedoubleQuote1(); + if (s0 === peg$FAILED) { + s0 = peg$parsesingleQuote1(); + if (s0 === peg$FAILED) { + s0 = peg$parsebacktickQuote1(); + } + } + + return s0; + } + + function peg$parsedoubleQuote1() { + let s0, s1, s2, s3; + + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 34) { + s1 = peg$c3; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e5); } + } + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parsedoubleQuote1Content(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parsedoubleQuote1Content(); + } + if (input.charCodeAt(peg$currPos) === 34) { + s3 = peg$c3; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e5); } + } + if (s3 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f26(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parsedoubleQuote1Content() { + let s0, s1; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 2) === peg$c4) { + s1 = peg$c4; + peg$currPos += 2; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e6); } + } + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f27(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = input.charAt(peg$currPos); + if (peg$r2.test(s0)) { + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e7); } + } + } + + return s0; + } + + function peg$parsesingleQuote1() { + let s0, s1, s2, s3; + + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 39) { + s1 = peg$c5; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e8); } + } + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parsesingleQuote1Content(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parsesingleQuote1Content(); + } + if (input.charCodeAt(peg$currPos) === 39) { + s3 = peg$c5; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e8); } + } + if (s3 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f28(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parsesingleQuote1Content() { + let s0, s1; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 2) === peg$c6) { + s1 = peg$c6; + peg$currPos += 2; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e9); } + } + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f29(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = input.charAt(peg$currPos); + if (peg$r3.test(s0)) { + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e10); } + } + } + + return s0; + } + + function peg$parsebacktickQuote1() { + let s0, s1, s2, s3; + + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 96) { + s1 = peg$c7; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e11); } + } + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parsebacktickQuote1Content(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parsebacktickQuote1Content(); + } + if (input.charCodeAt(peg$currPos) === 96) { + s3 = peg$c7; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e11); } + } + if (s3 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f30(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parsebacktickQuote1Content() { + let s0, s1; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 2) === peg$c8) { + s1 = peg$c8; + peg$currPos += 2; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e12); } + } + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f31(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = input.charAt(peg$currPos); + if (peg$r4.test(s0)) { + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e13); } + } + } + + return s0; + } + + function peg$parsedoubleQuotedReference() { + let s0; + + s0 = peg$parsedoubleQuote2(); + if (s0 === peg$FAILED) { + s0 = peg$parsesingleQuote2(); + if (s0 === peg$FAILED) { + s0 = peg$parsebacktickQuote2(); + } + } + + return s0; + } + + function peg$parsedoubleQuote2() { + let s0, s1, s2, s3; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 2) === peg$c4) { + s1 = peg$c4; + peg$currPos += 2; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e6); } + } + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parsedoubleQuote2Content(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parsedoubleQuote2Content(); + } + if (input.substr(peg$currPos, 2) === peg$c4) { + s3 = peg$c4; + peg$currPos += 2; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e6); } + } + if (s3 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f32(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parsedoubleQuote2Content() { + let s0, s1, s2; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 4) === peg$c9) { + s1 = peg$c9; + peg$currPos += 4; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e14); } + } + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f33(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.substr(peg$currPos, 2) === peg$c4) { + s2 = peg$c4; + peg$currPos += 2; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e6); } + } + peg$silentFails--; + if (s2 === peg$FAILED) { + s1 = undefined; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e15); } + } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f34(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } + + return s0; + } + + function peg$parsesingleQuote2() { + let s0, s1, s2, s3; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 2) === peg$c6) { + s1 = peg$c6; + peg$currPos += 2; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e9); } + } + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parsesingleQuote2Content(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parsesingleQuote2Content(); + } + if (input.substr(peg$currPos, 2) === peg$c6) { + s3 = peg$c6; + peg$currPos += 2; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e9); } + } + if (s3 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f35(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parsesingleQuote2Content() { + let s0, s1, s2; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 4) === peg$c10) { + s1 = peg$c10; + peg$currPos += 4; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e16); } + } + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f36(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.substr(peg$currPos, 2) === peg$c6) { + s2 = peg$c6; + peg$currPos += 2; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e9); } + } + peg$silentFails--; + if (s2 === peg$FAILED) { + s1 = undefined; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e15); } + } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f37(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } + + return s0; + } + + function peg$parsebacktickQuote2() { + let s0, s1, s2, s3; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 2) === peg$c8) { + s1 = peg$c8; + peg$currPos += 2; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e12); } + } + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parsebacktickQuote2Content(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parsebacktickQuote2Content(); + } + if (input.substr(peg$currPos, 2) === peg$c8) { + s3 = peg$c8; + peg$currPos += 2; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e12); } + } + if (s3 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f38(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parsebacktickQuote2Content() { + let s0, s1, s2; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 4) === peg$c11) { + s1 = peg$c11; + peg$currPos += 4; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e17); } + } + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f39(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.substr(peg$currPos, 2) === peg$c8) { + s2 = peg$c8; + peg$currPos += 2; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e12); } + } + peg$silentFails--; + if (s2 === peg$FAILED) { + s1 = undefined; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e15); } + } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f40(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } + + return s0; + } + + function peg$parsetripleQuotedReference() { + let s0; + + s0 = peg$parsedoubleQuote3(); + if (s0 === peg$FAILED) { + s0 = peg$parsesingleQuote3(); + if (s0 === peg$FAILED) { + s0 = peg$parsebacktickQuote3(); + } + } + + return s0; + } + + function peg$parsedoubleQuote3() { + let s0, s1, s2, s3; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 3) === peg$c12) { + s1 = peg$c12; + peg$currPos += 3; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e18); } + } + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parsedoubleQuote3Content(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parsedoubleQuote3Content(); + } + if (input.substr(peg$currPos, 3) === peg$c12) { + s3 = peg$c12; + peg$currPos += 3; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e18); } + } + if (s3 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f41(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parsedoubleQuote3Content() { + let s0, s1, s2; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 6) === peg$c13) { + s1 = peg$c13; + peg$currPos += 6; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e19); } + } + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f42(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.substr(peg$currPos, 3) === peg$c12) { + s2 = peg$c12; + peg$currPos += 3; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e18); } + } + peg$silentFails--; + if (s2 === peg$FAILED) { + s1 = undefined; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e15); } + } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f43(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } + + return s0; + } + + function peg$parsesingleQuote3() { + let s0, s1, s2, s3; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 3) === peg$c14) { + s1 = peg$c14; + peg$currPos += 3; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e20); } + } + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parsesingleQuote3Content(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parsesingleQuote3Content(); + } + if (input.substr(peg$currPos, 3) === peg$c14) { + s3 = peg$c14; + peg$currPos += 3; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e20); } + } + if (s3 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f44(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parsesingleQuote3Content() { + let s0, s1, s2; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 6) === peg$c15) { + s1 = peg$c15; + peg$currPos += 6; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e21); } + } + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f45(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.substr(peg$currPos, 3) === peg$c14) { + s2 = peg$c14; + peg$currPos += 3; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e20); } + } + peg$silentFails--; + if (s2 === peg$FAILED) { + s1 = undefined; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e15); } + } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f46(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } + + return s0; + } + + function peg$parsebacktickQuote3() { + let s0, s1, s2, s3; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 3) === peg$c16) { + s1 = peg$c16; + peg$currPos += 3; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e22); } + } + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parsebacktickQuote3Content(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parsebacktickQuote3Content(); + } + if (input.substr(peg$currPos, 3) === peg$c16) { + s3 = peg$c16; + peg$currPos += 3; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e22); } + } + if (s3 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f47(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parsebacktickQuote3Content() { + let s0, s1, s2; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 6) === peg$c17) { + s1 = peg$c17; + peg$currPos += 6; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e23); } + } + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f48(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.substr(peg$currPos, 3) === peg$c16) { + s2 = peg$c16; + peg$currPos += 3; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e22); } + } + peg$silentFails--; + if (s2 === peg$FAILED) { + s1 = undefined; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e15); } + } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f49(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } + + return s0; + } + + function peg$parsequadrupleQuotedReference() { + let s0; + + s0 = peg$parsedoubleQuote4(); + if (s0 === peg$FAILED) { + s0 = peg$parsesingleQuote4(); if (s0 === peg$FAILED) { - s0 = peg$parsesimpleReference(); + s0 = peg$parsebacktickQuote4(); + } + } + + return s0; + } + + function peg$parsedoubleQuote4() { + let s0, s1, s2, s3; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 4) === peg$c9) { + s1 = peg$c9; + peg$currPos += 4; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e14); } + } + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parsedoubleQuote4Content(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parsedoubleQuote4Content(); + } + if (input.substr(peg$currPos, 4) === peg$c9) { + s3 = peg$c9; + peg$currPos += 4; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e14); } + } + if (s3 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f50(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parsedoubleQuote4Content() { + let s0, s1, s2; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 8) === peg$c18) { + s1 = peg$c18; + peg$currPos += 8; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e24); } + } + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f51(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.substr(peg$currPos, 4) === peg$c9) { + s2 = peg$c9; + peg$currPos += 4; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e14); } + } + peg$silentFails--; + if (s2 === peg$FAILED) { + s1 = undefined; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e15); } + } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f52(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } + + return s0; + } + + function peg$parsesingleQuote4() { + let s0, s1, s2, s3; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 4) === peg$c10) { + s1 = peg$c10; + peg$currPos += 4; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e16); } + } + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parsesingleQuote4Content(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parsesingleQuote4Content(); + } + if (input.substr(peg$currPos, 4) === peg$c10) { + s3 = peg$c10; + peg$currPos += 4; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e16); } + } + if (s3 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f53(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parsesingleQuote4Content() { + let s0, s1, s2; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 10) === peg$c19) { + s1 = peg$c19; + peg$currPos += 10; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e25); } + } + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f54(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.substr(peg$currPos, 4) === peg$c10) { + s2 = peg$c10; + peg$currPos += 4; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e16); } + } + peg$silentFails--; + if (s2 === peg$FAILED) { + s1 = undefined; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e15); } + } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f55(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } + + return s0; + } + + function peg$parsebacktickQuote4() { + let s0, s1, s2, s3; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 4) === peg$c11) { + s1 = peg$c11; + peg$currPos += 4; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e17); } + } + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parsebacktickQuote4Content(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parsebacktickQuote4Content(); + } + if (input.substr(peg$currPos, 4) === peg$c11) { + s3 = peg$c11; + peg$currPos += 4; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e17); } + } + if (s3 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f56(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; } + } else { + peg$currPos = s0; + s0 = peg$FAILED; } return s0; } - function peg$parsesimpleReference() { + function peg$parsebacktickQuote4Content() { let s0, s1, s2; s0 = peg$currPos; - s1 = []; - s2 = peg$parsereferenceSymbol(); - if (s2 !== peg$FAILED) { - while (s2 !== peg$FAILED) { - s1.push(s2); - s2 = peg$parsereferenceSymbol(); - } + if (input.substr(peg$currPos, 8) === peg$c20) { + s1 = peg$c20; + peg$currPos += 8; } else { s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e26); } } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f25(s1); + s1 = peg$f57(); } s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.substr(peg$currPos, 4) === peg$c11) { + s2 = peg$c11; + peg$currPos += 4; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e17); } + } + peg$silentFails--; + if (s2 === peg$FAILED) { + s1 = undefined; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e15); } + } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f58(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } return s0; } - function peg$parsedoubleQuotedReference() { + function peg$parsequintupleQuotedReference() { + let s0; + + s0 = peg$parsedoubleQuote5(); + if (s0 === peg$FAILED) { + s0 = peg$parsesingleQuote5(); + if (s0 === peg$FAILED) { + s0 = peg$parsebacktickQuote5(); + } + } + + return s0; + } + + function peg$parsedoubleQuote5() { let s0, s1, s2, s3; s0 = peg$currPos; - if (input.charCodeAt(peg$currPos) === 34) { - s1 = peg$c3; - peg$currPos++; + if (input.substr(peg$currPos, 5) === peg$c21) { + s1 = peg$c21; + peg$currPos += 5; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e5); } + if (peg$silentFails === 0) { peg$fail(peg$e27); } } if (s1 !== peg$FAILED) { s2 = []; - s3 = input.charAt(peg$currPos); - if (peg$r2.test(s3)) { - peg$currPos++; + s3 = peg$parsedoubleQuote5Content(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parsedoubleQuote5Content(); + } + if (input.substr(peg$currPos, 5) === peg$c21) { + s3 = peg$c21; + peg$currPos += 5; } else { s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e6); } + if (peg$silentFails === 0) { peg$fail(peg$e27); } } if (s3 !== peg$FAILED) { - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = input.charAt(peg$currPos); - if (peg$r2.test(s3)) { - peg$currPos++; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e6); } - } - } + peg$savedPos = s0; + s0 = peg$f59(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parsedoubleQuote5Content() { + let s0, s1, s2; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 10) === peg$c22) { + s1 = peg$c22; + peg$currPos += 10; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e28); } + } + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f60(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.substr(peg$currPos, 5) === peg$c21) { + s2 = peg$c21; + peg$currPos += 5; } else { s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e27); } } - if (s2 !== peg$FAILED) { - if (input.charCodeAt(peg$currPos) === 34) { - s3 = peg$c3; + peg$silentFails--; + if (s2 === peg$FAILED) { + s1 = undefined; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); peg$currPos++; } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e5); } + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e15); } } - if (s3 !== peg$FAILED) { + if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f26(s2); + s0 = peg$f61(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1085,59 +2385,96 @@ function peg$parse(input, options) { peg$currPos = s0; s0 = peg$FAILED; } - } else { - peg$currPos = s0; - s0 = peg$FAILED; } return s0; } - function peg$parsesingleQuotedReference() { + function peg$parsesingleQuote5() { let s0, s1, s2, s3; s0 = peg$currPos; - if (input.charCodeAt(peg$currPos) === 39) { - s1 = peg$c4; - peg$currPos++; + if (input.substr(peg$currPos, 5) === peg$c23) { + s1 = peg$c23; + peg$currPos += 5; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e7); } + if (peg$silentFails === 0) { peg$fail(peg$e29); } } if (s1 !== peg$FAILED) { s2 = []; - s3 = input.charAt(peg$currPos); - if (peg$r3.test(s3)) { - peg$currPos++; + s3 = peg$parsesingleQuote5Content(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parsesingleQuote5Content(); + } + if (input.substr(peg$currPos, 5) === peg$c23) { + s3 = peg$c23; + peg$currPos += 5; } else { s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e8); } + if (peg$silentFails === 0) { peg$fail(peg$e29); } } if (s3 !== peg$FAILED) { - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = input.charAt(peg$currPos); - if (peg$r3.test(s3)) { - peg$currPos++; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e8); } - } - } + peg$savedPos = s0; + s0 = peg$f62(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parsesingleQuote5Content() { + let s0, s1, s2; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 10) === peg$c19) { + s1 = peg$c19; + peg$currPos += 10; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e25); } + } + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f63(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.substr(peg$currPos, 5) === peg$c23) { + s2 = peg$c23; + peg$currPos += 5; } else { s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e29); } } - if (s2 !== peg$FAILED) { - if (input.charCodeAt(peg$currPos) === 39) { - s3 = peg$c4; + peg$silentFails--; + if (s2 === peg$FAILED) { + s1 = undefined; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); peg$currPos++; } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e7); } + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e15); } } - if (s3 !== peg$FAILED) { + if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f27(s2); + s0 = peg$f64(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1146,6 +2483,43 @@ function peg$parse(input, options) { peg$currPos = s0; s0 = peg$FAILED; } + } + + return s0; + } + + function peg$parsebacktickQuote5() { + let s0, s1, s2, s3; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 5) === peg$c24) { + s1 = peg$c24; + peg$currPos += 5; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e30); } + } + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parsebacktickQuote5Content(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parsebacktickQuote5Content(); + } + if (input.substr(peg$currPos, 5) === peg$c24) { + s3 = peg$c24; + peg$currPos += 5; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e30); } + } + if (s3 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f65(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1154,30 +2528,88 @@ function peg$parse(input, options) { return s0; } + function peg$parsebacktickQuote5Content() { + let s0, s1, s2; + + s0 = peg$currPos; + if (input.substr(peg$currPos, 10) === peg$c25) { + s1 = peg$c25; + peg$currPos += 10; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e31); } + } + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f66(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.substr(peg$currPos, 5) === peg$c24) { + s2 = peg$c24; + peg$currPos += 5; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e30); } + } + peg$silentFails--; + if (s2 === peg$FAILED) { + s1 = undefined; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e15); } + } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f67(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } + + return s0; + } + function peg$parseSET_BASE_INDENTATION() { let s0, s1, s2; s0 = peg$currPos; s1 = []; if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c5; + s2 = peg$c26; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } + if (peg$silentFails === 0) { peg$fail(peg$e32); } } while (s2 !== peg$FAILED) { s1.push(s2); if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c5; + s2 = peg$c26; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } + if (peg$silentFails === 0) { peg$fail(peg$e32); } } } peg$savedPos = s0; - s1 = peg$f28(s1); + s1 = peg$f68(s1); s0 = s1; return s0; @@ -1189,24 +2621,24 @@ function peg$parse(input, options) { s0 = peg$currPos; s1 = []; if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c5; + s2 = peg$c26; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } + if (peg$silentFails === 0) { peg$fail(peg$e32); } } while (s2 !== peg$FAILED) { s1.push(s2); if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c5; + s2 = peg$c26; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } + if (peg$silentFails === 0) { peg$fail(peg$e32); } } } peg$savedPos = peg$currPos; - s2 = peg$f29(s1); + s2 = peg$f69(s1); if (s2) { s2 = undefined; } else { @@ -1214,7 +2646,7 @@ function peg$parse(input, options) { } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f30(s1); + s0 = peg$f70(s1); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1229,24 +2661,24 @@ function peg$parse(input, options) { s0 = peg$currPos; s1 = []; if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c5; + s2 = peg$c26; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } + if (peg$silentFails === 0) { peg$fail(peg$e32); } } while (s2 !== peg$FAILED) { s1.push(s2); if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c5; + s2 = peg$c26; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } + if (peg$silentFails === 0) { peg$fail(peg$e32); } } } peg$savedPos = peg$currPos; - s2 = peg$f31(s1); + s2 = peg$f71(s1); if (s2) { s2 = undefined; } else { @@ -1314,7 +2746,7 @@ function peg$parse(input, options) { peg$currPos++; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e10); } + if (peg$silentFails === 0) { peg$fail(peg$e15); } } peg$silentFails--; if (s1 === peg$FAILED) { @@ -1369,11 +2801,11 @@ function peg$parse(input, options) { let s0; s0 = input.charAt(peg$currPos); - if (peg$r4.test(s0)) { + if (peg$r5.test(s0)) { peg$currPos++; } else { s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e11); } + if (peg$silentFails === 0) { peg$fail(peg$e33); } } return s0; @@ -1383,11 +2815,11 @@ function peg$parse(input, options) { let s0; s0 = input.charAt(peg$currPos); - if (peg$r5.test(s0)) { + if (peg$r6.test(s0)) { peg$currPos++; } else { s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e12); } + if (peg$silentFails === 0) { peg$fail(peg$e34); } } return s0; @@ -1430,6 +2862,25 @@ function peg$parse(input, options) { return indentationStack[indentationStack.length - 1]; } + // Process escape sequences for multi-quote strings + // For N quotes: 2*N consecutive quotes become N quotes + function processEscapes(content, quoteChar, quoteCount) { + const escapeSequence = quoteChar.repeat(quoteCount * 2); + const replacement = quoteChar.repeat(quoteCount); + let result = ''; + let i = 0; + while (i < content.length) { + if (content.substr(i, escapeSequence.length) === escapeSequence) { + result += replacement; + i += escapeSequence.length; + } else { + result += content[i]; + i++; + } + } + return result; + } + peg$result = peg$startRuleFunction(); const peg$success = (peg$result !== peg$FAILED && peg$currPos === input.length); diff --git a/js/tests/MultiQuoteParser.test.js b/js/tests/MultiQuoteParser.test.js new file mode 100644 index 0000000..e11e42a --- /dev/null +++ b/js/tests/MultiQuoteParser.test.js @@ -0,0 +1,408 @@ +import { test, expect } from "bun:test"; +import { Parser } from "../src/Parser.js"; + +const parser = new Parser(); + +// Helper to extract the reference ID from a single-value link +function getSingleRefId(result) { + // Single reference parses as: { id: null, values: [{ id: "the-id", values: [] }] } + if (result.length === 1 && result[0].id === null && result[0].values.length === 1) { + return result[0].values[0].id; + } + return result[0]?.id; +} + +// ============================================================================ +// Backtick Quote Tests (Single Backtick) +// ============================================================================ + +test("TestBacktickQuotedReference", () => { + const input = "`backtick quoted`"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("backtick quoted"); +}); + +test("TestBacktickQuotedWithSpaces", () => { + const input = "`text with spaces`"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("text with spaces"); +}); + +test("TestBacktickQuotedMultiline", () => { + const input = "(`line1\nline2`)"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(result[0].values[0].id).toBe("line1\nline2"); +}); + +test("TestBacktickQuotedWithEscapedBacktick", () => { + const input = "`text with `` escaped backtick`"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("text with ` escaped backtick"); +}); + +// ============================================================================ +// Single Quote Tests (with escaping) +// ============================================================================ + +test("TestSingleQuoteWithEscapedSingleQuote", () => { + const input = "'text with '' escaped quote'"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("text with ' escaped quote"); +}); + +// ============================================================================ +// Double Quote Tests (with escaping) +// ============================================================================ + +test("TestDoubleQuoteWithEscapedDoubleQuote", () => { + const input = '"text with "" escaped quote"'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe('text with " escaped quote'); +}); + +// ============================================================================ +// Double Quotes (2 quote chars) Tests +// ============================================================================ + +test("TestDoubleDoubleQuotes", () => { + const input = '""double double quotes""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("double double quotes"); +}); + +test("TestDoubleDoubleQuotesWithSingleQuoteInside", () => { + const input = '""text with " inside""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe('text with " inside'); +}); + +test("TestDoubleDoubleQuotesWithEscape", () => { + const input = '""text with """" escaped double""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe('text with "" escaped double'); +}); + +test("TestDoubleSingleQuotes", () => { + const input = "''double single quotes''"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("double single quotes"); +}); + +test("TestDoubleSingleQuotesWithSingleQuoteInside", () => { + const input = "''text with ' inside''"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("text with ' inside"); +}); + +test("TestDoubleSingleQuotesWithEscape", () => { + const input = "''text with '''' escaped single''"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("text with '' escaped single"); +}); + +test("TestDoubleBacktickQuotes", () => { + const input = "``double backtick quotes``"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("double backtick quotes"); +}); + +test("TestDoubleBacktickQuotesWithBacktickInside", () => { + const input = "``text with ` inside``"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("text with ` inside"); +}); + +test("TestDoubleBacktickQuotesWithEscape", () => { + const input = "``text with ```` escaped backtick``"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("text with `` escaped backtick"); +}); + +// ============================================================================ +// Triple Quotes (3 quote chars) Tests +// ============================================================================ + +test("TestTripleDoubleQuotes", () => { + const input = '"""triple double quotes"""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("triple double quotes"); +}); + +test("TestTripleDoubleQuotesWithDoubleQuoteInside", () => { + const input = '"""text with "" inside"""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe('text with "" inside'); +}); + +test("TestTripleDoubleQuotesWithEscape", () => { + const input = '"""text with """""" escaped triple"""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe('text with """ escaped triple'); +}); + +test("TestTripleSingleQuotes", () => { + const input = "'''triple single quotes'''"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("triple single quotes"); +}); + +test("TestTripleSingleQuotesWithDoubleQuoteInside", () => { + const input = "'''text with '' inside'''"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("text with '' inside"); +}); + +test("TestTripleSingleQuotesWithEscape", () => { + const input = "'''text with '''''' escaped triple'''"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("text with ''' escaped triple"); +}); + +test("TestTripleBacktickQuotes", () => { + const input = "```triple backtick quotes```"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("triple backtick quotes"); +}); + +test("TestTripleBacktickQuotesWithDoubleBacktickInside", () => { + const input = "```text with `` inside```"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("text with `` inside"); +}); + +test("TestTripleBacktickQuotesWithEscape", () => { + const input = "```text with `````` escaped triple```"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("text with ``` escaped triple"); +}); + +// ============================================================================ +// Quadruple Quotes (4 quote chars) Tests +// ============================================================================ + +test("TestQuadrupleDoubleQuotes", () => { + const input = '""""quadruple double quotes""""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("quadruple double quotes"); +}); + +test("TestQuadrupleDoubleQuotesWithTripleQuoteInside", () => { + const input = '""""text with """ inside""""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe('text with """ inside'); +}); + +test("TestQuadrupleDoubleQuotesWithEscape", () => { + const input = '""""text with """""""" escaped quad""""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe('text with """" escaped quad'); +}); + +test("TestQuadrupleSingleQuotes", () => { + const input = "''''quadruple single quotes''''"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("quadruple single quotes"); +}); + +test("TestQuadrupleBacktickQuotes", () => { + const input = "````quadruple backtick quotes````"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("quadruple backtick quotes"); +}); + +// ============================================================================ +// Quintuple Quotes (5 quote chars) Tests +// ============================================================================ + +test("TestQuintupleDoubleQuotes", () => { + const input = '"""""quintuple double quotes"""""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("quintuple double quotes"); +}); + +test("TestQuintupleDoubleQuotesWithQuadQuoteInside", () => { + const input = '"""""text with """" inside"""""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe('text with """" inside'); +}); + +test("TestQuintupleDoubleQuotesWithEscape", () => { + const input = '"""""text with """""""""" escaped quint"""""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe('text with """"" escaped quint'); +}); + +test("TestQuintupleSingleQuotes", () => { + const input = "'''''quintuple single quotes'''''"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("quintuple single quotes"); +}); + +test("TestQuintupleBacktickQuotes", () => { + const input = "`````quintuple backtick quotes`````"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("quintuple backtick quotes"); +}); + +// ============================================================================ +// Complex Scenarios Tests +// ============================================================================ + +test("TestMixedQuotesInLink", () => { + const input = '("double" \'single\' `backtick`)'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(result[0].values.length).toBe(3); + expect(result[0].values[0].id).toBe("double"); + expect(result[0].values[1].id).toBe("single"); + expect(result[0].values[2].id).toBe("backtick"); +}); + +test("TestBacktickAsIdInLink", () => { + const input = "(`myId`: value1 value2)"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(result[0].id).toBe("myId"); + expect(result[0].values.length).toBe(2); +}); + +test("TestCodeBlockLikeContent", () => { + // This demonstrates using triple backticks for code-like content + const input = "```const x = 1;```"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("const x = 1;"); +}); + +test("TestNestedQuotesInMarkdown", () => { + // Using double backticks to include single backtick + const input = "``Use `code` in markdown``"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("Use `code` in markdown"); +}); + +test("TestSQLWithQuotes", () => { + // Using double single quotes to include single quote in SQL-like string + // Inside '', to get a single quote, we need '''' (4 single quotes = escaped pair) + const input = "''SELECT * FROM users WHERE name = ''''John''''''"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe("SELECT * FROM users WHERE name = ''John''"); +}); + +test("TestJSONStringWithQuotes", () => { + // Using double double quotes to include double quote in JSON-like string + const input = '""{"key": "value"}""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe('{"key": "value"}'); +}); + +// ============================================================================ +// Edge Cases +// ============================================================================ + +test("TestEmptySingleQuotedReference", () => { + const input = "''"; + // Empty quoted reference should not match - becomes simple reference or fails + // Let's verify what happens + try { + const result = parser.parse(input); + // If it parses, check what we get + expect(result.length).toBeGreaterThanOrEqual(0); + } catch (e) { + // Expected for empty quotes + expect(e).toBeTruthy(); + } +}); + +test("TestWhitespacePreservedInQuotes", () => { + const input = '" spaces "'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe(" spaces "); +}); + +test("TestMultilineInDoubleDoubleQuotes", () => { + const input = '(""line1\nline2"")'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(result[0].values[0].id).toBe("line1\nline2"); +}); diff --git a/python/links_notation/parser.py b/python/links_notation/parser.py index c4c2ad9..ada5ce2 100644 --- a/python/links_notation/parser.py +++ b/python/links_notation/parser.py @@ -95,6 +95,7 @@ def _split_lines_respecting_quotes(self, text: str) -> List[str]: current_line = "" in_single = False in_double = False + in_backtick = False paren_depth = 0 i = 0 @@ -102,20 +103,23 @@ def _split_lines_respecting_quotes(self, text: str) -> List[str]: char = text[i] # Handle quote toggling - if char == '"' and not in_single: + if char == '"' and not in_single and not in_backtick: in_double = not in_double current_line += char - elif char == "'" and not in_double: + elif char == "'" and not in_double and not in_backtick: in_single = not in_single current_line += char - elif char == '(' and not in_single and not in_double: + elif char == "`" and not in_single and not in_double: + in_backtick = not in_backtick + current_line += char + elif char == '(' and not in_single and not in_double and not in_backtick: paren_depth += 1 current_line += char - elif char == ')' and not in_single and not in_double: + elif char == ')' and not in_single and not in_double and not in_backtick: paren_depth -= 1 current_line += char elif char == '\n': - if in_single or in_double or paren_depth > 0: + if in_single or in_double or in_backtick or paren_depth > 0: # Inside quotes or unclosed parens: preserve the newline current_line += char else: @@ -253,18 +257,21 @@ def _find_colon_outside_quotes(self, text: str) -> int: """ in_single = False in_double = False + in_backtick = False paren_depth = 0 for i, char in enumerate(text): - if char == "'" and not in_double: + if char == "'" and not in_double and not in_backtick: in_single = not in_single - elif char == '"' and not in_single: + elif char == '"' and not in_single and not in_backtick: in_double = not in_double - elif char == '(' and not in_single and not in_double: + elif char == '`' and not in_single and not in_double: + in_backtick = not in_backtick + elif char == '(' and not in_single and not in_double and not in_backtick: paren_depth += 1 - elif char == ')' and not in_single and not in_double: + elif char == ')' and not in_single and not in_double and not in_backtick: paren_depth -= 1 - elif char == ':' and not in_single and not in_double and paren_depth == 0: + elif char == ':' and not in_single and not in_double and not in_backtick and paren_depth == 0: # Only return colon if it's outside quotes AND at parenthesis depth 0 return i @@ -276,42 +283,106 @@ def _parse_values(self, text: str) -> List[Dict]: return [] values = [] - current = "" + i = 0 + + while i < len(text): + # Skip all whitespace (space, tab, newline, carriage return) + while i < len(text) and text[i] in ' \t\n\r': + i += 1 + if i >= len(text): + break + + # Try to extract the next value + value_end, value_text = self._extract_next_value(text, i) + if value_text and value_text.strip(): + values.append(self._parse_value(value_text)) + if value_end == i: + # No progress made - skip this character to avoid infinite loop + i += 1 + else: + i = value_end + + return values + + def _extract_next_value(self, text: str, start: int) -> tuple: + """ + Extract the next value from text starting at start position. + Returns (end_position, value_text). + """ + if start >= len(text): + return (start, "") + + # Check if this starts with a multi-quote string + for quote_count in range(5, 0, -1): + for quote_char in ['"', "'", '`']: + quote_seq = quote_char * quote_count + if text[start:].startswith(quote_seq): + # Parse this multi-quote string + remaining = text[start:] + open_close = quote_seq + escape_seq = quote_char * (quote_count * 2) + + pos = len(open_close) + while pos < len(remaining): + # Check for escape sequence (2*N quotes) + if remaining[pos:].startswith(escape_seq): + pos += len(escape_seq) + continue + # Check for closing quotes + if remaining[pos:].startswith(open_close): + after_close_pos = pos + len(open_close) + # Make sure this is exactly N quotes (not more) + if after_close_pos >= len(remaining) or remaining[after_close_pos] != quote_char: + # Found the end + return (start + after_close_pos, remaining[:after_close_pos]) + pos += 1 + + # No closing found, treat as regular text + break + + # Check if this starts with a parenthesized expression + if text[start] == '(': + paren_depth = 1 + in_single = False + in_double = False + in_backtick = False + i = start + 1 + + while i < len(text) and paren_depth > 0: + char = text[i] + if char == "'" and not in_double and not in_backtick: + in_single = not in_single + elif char == '"' and not in_single and not in_backtick: + in_double = not in_double + elif char == '`' and not in_single and not in_double: + in_backtick = not in_backtick + elif char == '(' and not in_single and not in_double and not in_backtick: + paren_depth += 1 + elif char == ')' and not in_single and not in_double and not in_backtick: + paren_depth -= 1 + i += 1 + + return (i, text[start:i]) + + # Regular value - read until space or end in_single = False in_double = False - paren_depth = 0 + in_backtick = False + i = start - i = 0 while i < len(text): char = text[i] - - if char == "'" and not in_double: + if char == "'" and not in_double and not in_backtick: in_single = not in_single - current += char - elif char == '"' and not in_single: + elif char == '"' and not in_single and not in_backtick: in_double = not in_double - current += char - elif char == '(' and not in_single and not in_double: - paren_depth += 1 - current += char - elif char == ')' and not in_single and not in_double: - paren_depth -= 1 - current += char - elif char == ' ' and not in_single and not in_double and paren_depth == 0: - # End of current value - if current.strip(): - values.append(self._parse_value(current.strip())) - current = "" - else: - current += char - + elif char == '`' and not in_single and not in_double: + in_backtick = not in_backtick + elif char == ' ' and not in_single and not in_double and not in_backtick: + break i += 1 - # Add last value - if current.strip(): - values.append(self._parse_value(current.strip())) - - return values + return (i, text[start:i]) def _parse_value(self, value: str) -> Dict: """Parse a single value (could be a reference or nested link).""" @@ -325,20 +396,65 @@ def _parse_value(self, value: str) -> Dict: return {'id': ref} def _extract_reference(self, text: str) -> str: - """Extract reference, handling quoted strings.""" + """Extract reference, handling quoted strings with escaping support.""" text = text.strip() - # Double quoted - if text.startswith('"') and text.endswith('"'): - return text[1:-1] - - # Single quoted - if text.startswith("'") and text.endswith("'"): - return text[1:-1] + # Try multi-quote strings (check longer sequences first: 5, 4, 3, 2, 1) + for quote_count in range(5, 0, -1): + for quote_char in ['"', "'", '`']: + quote_seq = quote_char * quote_count + if text.startswith(quote_seq) and len(text) > len(quote_seq): + # Try to parse this multi-quote string + result = self._parse_multi_quote_string(text, quote_char, quote_count) + if result is not None: + return result # Unquoted return text + def _parse_multi_quote_string(self, text: str, quote_char: str, quote_count: int) -> Optional[str]: + """ + Parse a multi-quote string. + + For N quotes: opening = N quotes, closing = N quotes, escape = 2*N quotes -> N quotes + """ + open_close = quote_char * quote_count + escape_seq = quote_char * (quote_count * 2) + escape_val = quote_char * quote_count + + # Check for opening quotes + if not text.startswith(open_close): + return None + + remaining = text[len(open_close):] + content = "" + + while remaining: + # Check for escape sequence (2*N quotes) + if remaining.startswith(escape_seq): + content += escape_val + remaining = remaining[len(escape_seq):] + continue + + # Check for closing quotes (N quotes not followed by more quotes) + if remaining.startswith(open_close): + after_close = remaining[len(open_close):] + # Make sure this is exactly N quotes (not more) + if not after_close or not after_close.startswith(quote_char): + # Closing found - but only if we consumed the entire text + if not after_close.strip(): + return content + else: + # There's more text after closing, may not be valid + return content + + # Take the next character + content += remaining[0] + remaining = remaining[1:] + + # No closing quotes found + return None + def _transform_result(self, raw_result: List[Dict]) -> List[Link]: """Transform raw parse result into Link objects.""" links = [] diff --git a/python/pyproject.toml b/python/pyproject.toml index dd01090..6bc3669 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "links-notation" -version = "0.12.0" +version = "0.13.0" description = "Python implementation of the Links Notation parser" readme = "README.md" license = {text = "Unlicense"} diff --git a/python/tests/test_multi_quote_parser.py b/python/tests/test_multi_quote_parser.py new file mode 100644 index 0000000..02060f0 --- /dev/null +++ b/python/tests/test_multi_quote_parser.py @@ -0,0 +1,226 @@ +"""Tests for multi-quote string support in parser.""" + +import pytest +from links_notation.parser import Parser + + +def get_single_ref_id(result): + """Extract the single reference ID from a parsed result.""" + if len(result) == 1 and result[0].id is None and result[0].values and len(result[0].values) == 1: + return result[0].values[0].id + return result[0].id if len(result) == 1 else None + + +class TestBacktickQuotes: + """Tests for backtick quote support.""" + + def test_backtick_quoted_reference(self): + parser = Parser() + result = parser.parse("`backtick quoted`") + assert get_single_ref_id(result) == "backtick quoted" + + def test_backtick_quoted_with_spaces(self): + parser = Parser() + result = parser.parse("`text with spaces`") + assert get_single_ref_id(result) == "text with spaces" + + def test_backtick_quoted_multiline(self): + parser = Parser() + result = parser.parse("(`line1\nline2`)") + assert len(result) == 1 + assert result[0].values is not None + assert len(result[0].values) == 1 + assert result[0].values[0].id == "line1\nline2" + + def test_backtick_quoted_with_escaped_backtick(self): + parser = Parser() + result = parser.parse("`text with `` escaped backtick`") + assert get_single_ref_id(result) == "text with ` escaped backtick" + + +class TestSingleQuoteEscaping: + """Tests for single quote escaping.""" + + def test_single_quote_with_escaped_single_quote(self): + parser = Parser() + result = parser.parse("'text with '' escaped quote'") + assert get_single_ref_id(result) == "text with ' escaped quote" + + +class TestDoubleQuoteEscaping: + """Tests for double quote escaping.""" + + def test_double_quote_with_escaped_double_quote(self): + parser = Parser() + result = parser.parse('"text with "" escaped quote"') + assert get_single_ref_id(result) == 'text with " escaped quote' + + +class TestDoubleDoubleQuotes: + """Tests for double-double quotes (2 quote chars).""" + + def test_double_double_quotes(self): + parser = Parser() + result = parser.parse('""double double quotes""') + assert get_single_ref_id(result) == "double double quotes" + + def test_double_double_quotes_with_single_quote_inside(self): + parser = Parser() + result = parser.parse('""text with " inside""') + assert get_single_ref_id(result) == 'text with " inside' + + def test_double_double_quotes_with_escape(self): + parser = Parser() + result = parser.parse('""text with """" escaped double""') + assert get_single_ref_id(result) == 'text with "" escaped double' + + def test_double_single_quotes(self): + parser = Parser() + result = parser.parse("''double single quotes''") + assert get_single_ref_id(result) == "double single quotes" + + def test_double_single_quotes_with_single_quote_inside(self): + parser = Parser() + result = parser.parse("''text with ' inside''") + assert get_single_ref_id(result) == "text with ' inside" + + def test_double_single_quotes_with_escape(self): + parser = Parser() + result = parser.parse("''text with '''' escaped single''") + assert get_single_ref_id(result) == "text with '' escaped single" + + def test_double_backtick_quotes(self): + parser = Parser() + result = parser.parse("``double backtick quotes``") + assert get_single_ref_id(result) == "double backtick quotes" + + def test_double_backtick_quotes_with_backtick_inside(self): + parser = Parser() + result = parser.parse("``text with ` inside``") + assert get_single_ref_id(result) == "text with ` inside" + + def test_double_backtick_quotes_with_escape(self): + parser = Parser() + result = parser.parse("``text with ```` escaped backtick``") + assert get_single_ref_id(result) == "text with `` escaped backtick" + + +class TestTripleQuotes: + """Tests for triple quotes (3 quote chars).""" + + def test_triple_double_quotes(self): + parser = Parser() + result = parser.parse('"""triple double quotes"""') + assert get_single_ref_id(result) == "triple double quotes" + + def test_triple_double_quotes_with_double_quote_inside(self): + parser = Parser() + result = parser.parse('"""text with "" inside"""') + assert get_single_ref_id(result) == 'text with "" inside' + + def test_triple_double_quotes_with_escape(self): + parser = Parser() + result = parser.parse('"""text with """""" escaped triple"""') + assert get_single_ref_id(result) == 'text with """ escaped triple' + + def test_triple_single_quotes(self): + parser = Parser() + result = parser.parse("'''triple single quotes'''") + assert get_single_ref_id(result) == "triple single quotes" + + def test_triple_backtick_quotes(self): + parser = Parser() + result = parser.parse("```triple backtick quotes```") + assert get_single_ref_id(result) == "triple backtick quotes" + + +class TestQuadrupleQuotes: + """Tests for quadruple quotes (4 quote chars).""" + + def test_quadruple_double_quotes(self): + parser = Parser() + result = parser.parse('""""quadruple double quotes""""') + assert get_single_ref_id(result) == "quadruple double quotes" + + def test_quadruple_single_quotes(self): + parser = Parser() + result = parser.parse("''''quadruple single quotes''''") + assert get_single_ref_id(result) == "quadruple single quotes" + + def test_quadruple_backtick_quotes(self): + parser = Parser() + result = parser.parse("````quadruple backtick quotes````") + assert get_single_ref_id(result) == "quadruple backtick quotes" + + +class TestQuintupleQuotes: + """Tests for quintuple quotes (5 quote chars).""" + + def test_quintuple_double_quotes(self): + parser = Parser() + result = parser.parse('"""""quintuple double quotes"""""') + assert get_single_ref_id(result) == "quintuple double quotes" + + def test_quintuple_single_quotes(self): + parser = Parser() + result = parser.parse("'''''quintuple single quotes'''''") + assert get_single_ref_id(result) == "quintuple single quotes" + + def test_quintuple_backtick_quotes(self): + parser = Parser() + result = parser.parse("`````quintuple backtick quotes`````") + assert get_single_ref_id(result) == "quintuple backtick quotes" + + +class TestComplexScenarios: + """Tests for complex quote scenarios.""" + + def test_mixed_quotes_in_link(self): + parser = Parser() + result = parser.parse('("double" \'single\' `backtick`)') + assert len(result) == 1 + assert result[0].values is not None + assert len(result[0].values) == 3 + assert result[0].values[0].id == "double" + assert result[0].values[1].id == "single" + assert result[0].values[2].id == "backtick" + + def test_backtick_as_id_in_link(self): + parser = Parser() + result = parser.parse("(`myId`: value1 value2)") + assert len(result) == 1 + assert result[0].id == "myId" + assert result[0].values is not None + assert len(result[0].values) == 2 + + def test_code_block_like_content(self): + parser = Parser() + result = parser.parse("```const x = 1;```") + assert get_single_ref_id(result) == "const x = 1;" + + def test_nested_quotes_in_markdown(self): + parser = Parser() + result = parser.parse("``Use `code` in markdown``") + assert get_single_ref_id(result) == "Use `code` in markdown" + + def test_json_string_with_quotes(self): + parser = Parser() + result = parser.parse('""{ "key": "value"}""') + assert get_single_ref_id(result) == '{ "key": "value"}' + + +class TestEdgeCases: + """Edge case tests.""" + + def test_whitespace_preserved_in_quotes(self): + parser = Parser() + result = parser.parse('" spaces "') + assert get_single_ref_id(result) == " spaces " + + def test_multiline_in_double_double_quotes(self): + parser = Parser() + result = parser.parse('(""line1\nline2"")') + assert len(result) == 1 + assert result[0].values is not None + assert len(result[0].values) == 1 + assert result[0].values[0].id == "line1\nline2" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 9a2794a..85b2f92 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "links-notation" -version = "0.12.0" +version = "0.13.0" edition = "2021" description = "Rust implementation of the Links Notation parser" license = "Unlicense" diff --git a/rust/src/parser.rs b/rust/src/parser.rs index 3fe04e2..47e21dc 100644 --- a/rust/src/parser.rs +++ b/rust/src/parser.rs @@ -1,11 +1,11 @@ use nom::{ IResult, branch::alt, - bytes::complete::{take_while, take_while1, is_not}, + bytes::complete::{take_while, take_while1}, character::complete::{char, line_ending}, combinator::eof, multi::{many0, many1}, - sequence::{preceded, terminated, delimited}, + sequence::{preceded, terminated}, Parser, }; use std::cell::RefCell; @@ -140,30 +140,128 @@ fn simple_reference(input: &str) -> IResult<&str, String> { .parse(input) } -fn double_quoted_reference(input: &str) -> IResult<&str, String> { - delimited( - char('"'), - is_not("\""), - char('"') - ) - .map(|s: &str| s.to_string()) - .parse(input) +/// Parse a multi-quote string with a given quote character and count. +/// For N quotes: opening = N quotes, closing = N quotes, escape = 2*N quotes -> N quotes +fn parse_multi_quote_string(input: &str, quote_char: char, quote_count: usize) -> IResult<&str, String> { + let open_close = quote_char.to_string().repeat(quote_count); + let escape_seq = quote_char.to_string().repeat(quote_count * 2); + let escape_val = quote_char.to_string().repeat(quote_count); + + // Check for opening quotes + if !input.starts_with(&open_close) { + return Err(nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Tag))); + } + + let mut remaining = &input[open_close.len()..]; + let mut content = String::new(); + + loop { + if remaining.is_empty() { + return Err(nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Tag))); + } + + // Check for escape sequence (2*N quotes) + if remaining.starts_with(&escape_seq) { + content.push_str(&escape_val); + remaining = &remaining[escape_seq.len()..]; + continue; + } + + // Check for closing quotes (N quotes not followed by more quotes) + if remaining.starts_with(&open_close) { + let after_close = &remaining[open_close.len()..]; + // Make sure this is exactly N quotes (not more) + if after_close.is_empty() || !after_close.starts_with(quote_char) { + return Ok((after_close, content)); + } + } + + // Take the next character + let c = remaining.chars().next().unwrap(); + content.push(c); + remaining = &remaining[c.len_utf8()..]; + } } -fn single_quoted_reference(input: &str) -> IResult<&str, String> { - delimited( - char('\''), - is_not("'"), - char('\'') - ) - .map(|s: &str| s.to_string()) - .parse(input) +// Single quote char (1 quote) +fn double_quote_1(input: &str) -> IResult<&str, String> { + parse_multi_quote_string(input, '"', 1) +} + +fn single_quote_1(input: &str) -> IResult<&str, String> { + parse_multi_quote_string(input, '\'', 1) +} + +fn backtick_quote_1(input: &str) -> IResult<&str, String> { + parse_multi_quote_string(input, '`', 1) +} + +// Double quote chars (2 quotes) +fn double_quote_2(input: &str) -> IResult<&str, String> { + parse_multi_quote_string(input, '"', 2) +} + +fn single_quote_2(input: &str) -> IResult<&str, String> { + parse_multi_quote_string(input, '\'', 2) +} + +fn backtick_quote_2(input: &str) -> IResult<&str, String> { + parse_multi_quote_string(input, '`', 2) +} + +// Triple quote chars (3 quotes) +fn double_quote_3(input: &str) -> IResult<&str, String> { + parse_multi_quote_string(input, '"', 3) +} + +fn single_quote_3(input: &str) -> IResult<&str, String> { + parse_multi_quote_string(input, '\'', 3) +} + +fn backtick_quote_3(input: &str) -> IResult<&str, String> { + parse_multi_quote_string(input, '`', 3) +} + +// Quadruple quote chars (4 quotes) +fn double_quote_4(input: &str) -> IResult<&str, String> { + parse_multi_quote_string(input, '"', 4) +} + +fn single_quote_4(input: &str) -> IResult<&str, String> { + parse_multi_quote_string(input, '\'', 4) +} + +fn backtick_quote_4(input: &str) -> IResult<&str, String> { + parse_multi_quote_string(input, '`', 4) +} + +// Quintuple quote chars (5 quotes) +fn double_quote_5(input: &str) -> IResult<&str, String> { + parse_multi_quote_string(input, '"', 5) +} + +fn single_quote_5(input: &str) -> IResult<&str, String> { + parse_multi_quote_string(input, '\'', 5) +} + +fn backtick_quote_5(input: &str) -> IResult<&str, String> { + parse_multi_quote_string(input, '`', 5) } fn reference(input: &str) -> IResult<&str, String> { + // Try longer quote sequences first (greedy matching) alt(( - double_quoted_reference, - single_quoted_reference, + // 5 quotes + double_quote_5, single_quote_5, backtick_quote_5, + // 4 quotes + double_quote_4, single_quote_4, backtick_quote_4, + // 3 quotes + double_quote_3, single_quote_3, backtick_quote_3, + // 2 quotes + double_quote_2, single_quote_2, backtick_quote_2, + // 1 quote + double_quote_1, single_quote_1, backtick_quote_1, + // Simple unquoted simple_reference, )).parse(input) } diff --git a/rust/tests/multi_quote_parser_tests.rs b/rust/tests/multi_quote_parser_tests.rs new file mode 100644 index 0000000..8c8b9b8 --- /dev/null +++ b/rust/tests/multi_quote_parser_tests.rs @@ -0,0 +1,349 @@ +use links_notation::{parse_lino, LiNo}; + +// Helper to extract the single reference from a parsed result +fn get_single_ref_id(lino: &LiNo) -> Option<&String> { + match lino { + LiNo::Ref(id) => Some(id), + LiNo::Link { id: None, values } if values.len() == 1 => { + if let LiNo::Ref(id) = &values[0] { + Some(id) + } else if let LiNo::Link { id: Some(ref_id), values: inner_values } = &values[0] { + if inner_values.is_empty() { + Some(ref_id) + } else { + None + } + } else { + None + } + } + LiNo::Link { id: Some(ref_id), values } if values.is_empty() => Some(ref_id), + _ => None, + } +} + +// Helper to get values from a link +fn get_values(lino: &LiNo) -> Option<&Vec>> { + match lino { + LiNo::Link { values, .. } => { + // If it's a wrapper link (outer link) + if values.len() == 1 { + if let LiNo::Link { values: inner_values, .. } = &values[0] { + return Some(inner_values); + } + } + Some(values) + } + _ => None, + } +} + +// ============================================================================ +// Backtick Quote Tests (Single Backtick) +// ============================================================================ + +#[test] +fn test_backtick_quoted_reference() { + let result = parse_lino("`backtick quoted`").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"backtick quoted".to_string())); +} + +#[test] +fn test_backtick_quoted_with_spaces() { + let result = parse_lino("`text with spaces`").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with spaces".to_string())); +} + +#[test] +fn test_backtick_quoted_multiline() { + let result = parse_lino("(`line1\nline2`)").unwrap(); + if let LiNo::Link { values, .. } = &result { + if let Some(inner) = values.first() { + if let LiNo::Link { values: inner_vals, .. } = inner { + if let Some(LiNo::Ref(id)) = inner_vals.first() { + assert_eq!(id, "line1\nline2"); + return; + } + } + if let LiNo::Ref(id) = inner { + assert_eq!(id, "line1\nline2"); + return; + } + } + } + panic!("Expected multiline backtick content"); +} + +#[test] +fn test_backtick_quoted_with_escaped_backtick() { + let result = parse_lino("`text with `` escaped backtick`").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with ` escaped backtick".to_string())); +} + +// ============================================================================ +// Single Quote Tests (with escaping) +// ============================================================================ + +#[test] +fn test_single_quote_with_escaped_single_quote() { + let result = parse_lino("'text with '' escaped quote'").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with ' escaped quote".to_string())); +} + +// ============================================================================ +// Double Quote Tests (with escaping) +// ============================================================================ + +#[test] +fn test_double_quote_with_escaped_double_quote() { + let result = parse_lino("\"text with \"\" escaped quote\"").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with \" escaped quote".to_string())); +} + +// ============================================================================ +// Double Quotes (2 quote chars) Tests +// ============================================================================ + +#[test] +fn test_double_double_quotes() { + let result = parse_lino("\"\"double double quotes\"\"").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"double double quotes".to_string())); +} + +#[test] +fn test_double_double_quotes_with_single_quote_inside() { + let result = parse_lino("\"\"text with \" inside\"\"").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with \" inside".to_string())); +} + +#[test] +fn test_double_double_quotes_with_escape() { + let result = parse_lino("\"\"text with \"\"\"\" escaped double\"\"").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with \"\" escaped double".to_string())); +} + +#[test] +fn test_double_single_quotes() { + let result = parse_lino("''double single quotes''").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"double single quotes".to_string())); +} + +#[test] +fn test_double_single_quotes_with_single_quote_inside() { + let result = parse_lino("''text with ' inside''").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with ' inside".to_string())); +} + +#[test] +fn test_double_single_quotes_with_escape() { + let result = parse_lino("''text with '''' escaped single''").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with '' escaped single".to_string())); +} + +#[test] +fn test_double_backtick_quotes() { + let result = parse_lino("``double backtick quotes``").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"double backtick quotes".to_string())); +} + +#[test] +fn test_double_backtick_quotes_with_backtick_inside() { + let result = parse_lino("``text with ` inside``").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with ` inside".to_string())); +} + +#[test] +fn test_double_backtick_quotes_with_escape() { + let result = parse_lino("``text with ```` escaped backtick``").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with `` escaped backtick".to_string())); +} + +// ============================================================================ +// Triple Quotes (3 quote chars) Tests +// ============================================================================ + +#[test] +fn test_triple_double_quotes() { + let result = parse_lino("\"\"\"triple double quotes\"\"\"").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"triple double quotes".to_string())); +} + +#[test] +fn test_triple_double_quotes_with_double_quote_inside() { + let result = parse_lino("\"\"\"text with \"\" inside\"\"\"").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with \"\" inside".to_string())); +} + +#[test] +fn test_triple_double_quotes_with_escape() { + let result = parse_lino("\"\"\"text with \"\"\"\"\"\" escaped triple\"\"\"").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with \"\"\" escaped triple".to_string())); +} + +#[test] +fn test_triple_single_quotes() { + let result = parse_lino("'''triple single quotes'''").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"triple single quotes".to_string())); +} + +#[test] +fn test_triple_single_quotes_with_double_quote_inside() { + let result = parse_lino("'''text with '' inside'''").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with '' inside".to_string())); +} + +#[test] +fn test_triple_single_quotes_with_escape() { + let result = parse_lino("'''text with '''''' escaped triple'''").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with ''' escaped triple".to_string())); +} + +#[test] +fn test_triple_backtick_quotes() { + let result = parse_lino("```triple backtick quotes```").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"triple backtick quotes".to_string())); +} + +#[test] +fn test_triple_backtick_quotes_with_double_backtick_inside() { + let result = parse_lino("```text with `` inside```").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with `` inside".to_string())); +} + +#[test] +fn test_triple_backtick_quotes_with_escape() { + let result = parse_lino("```text with `````` escaped triple```").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with ``` escaped triple".to_string())); +} + +// ============================================================================ +// Quadruple Quotes (4 quote chars) Tests +// ============================================================================ + +#[test] +fn test_quadruple_double_quotes() { + let result = parse_lino("\"\"\"\"quadruple double quotes\"\"\"\"").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"quadruple double quotes".to_string())); +} + +#[test] +fn test_quadruple_double_quotes_with_triple_quote_inside() { + let result = parse_lino("\"\"\"\"text with \"\"\" inside\"\"\"\"").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with \"\"\" inside".to_string())); +} + +#[test] +fn test_quadruple_single_quotes() { + let result = parse_lino("''''quadruple single quotes''''").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"quadruple single quotes".to_string())); +} + +#[test] +fn test_quadruple_backtick_quotes() { + let result = parse_lino("````quadruple backtick quotes````").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"quadruple backtick quotes".to_string())); +} + +// ============================================================================ +// Quintuple Quotes (5 quote chars) Tests +// ============================================================================ + +#[test] +fn test_quintuple_double_quotes() { + let result = parse_lino("\"\"\"\"\"quintuple double quotes\"\"\"\"\"").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"quintuple double quotes".to_string())); +} + +#[test] +fn test_quintuple_double_quotes_with_quad_quote_inside() { + let result = parse_lino("\"\"\"\"\"text with \"\"\"\" inside\"\"\"\"\"").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"text with \"\"\"\" inside".to_string())); +} + +#[test] +fn test_quintuple_single_quotes() { + let result = parse_lino("'''''quintuple single quotes'''''").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"quintuple single quotes".to_string())); +} + +#[test] +fn test_quintuple_backtick_quotes() { + let result = parse_lino("`````quintuple backtick quotes`````").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"quintuple backtick quotes".to_string())); +} + +// ============================================================================ +// Complex Scenarios Tests +// ============================================================================ + +#[test] +fn test_mixed_quotes_in_link() { + let result = parse_lino("(\"double\" 'single' `backtick`)").unwrap(); + if let Some(values) = get_values(&result) { + assert_eq!(values.len(), 3); + if let LiNo::Ref(id) = &values[0] { + assert_eq!(id, "double"); + } + if let LiNo::Ref(id) = &values[1] { + assert_eq!(id, "single"); + } + if let LiNo::Ref(id) = &values[2] { + assert_eq!(id, "backtick"); + } + } else { + panic!("Expected values in link"); + } +} + +#[test] +fn test_backtick_as_id_in_link() { + let result = parse_lino("(`myId`: value1 value2)").unwrap(); + if let LiNo::Link { values, .. } = &result { + if let Some(LiNo::Link { id, values: inner_values }) = values.first() { + assert_eq!(id.as_deref(), Some("myId")); + assert_eq!(inner_values.len(), 2); + return; + } + } + panic!("Expected link with backtick id"); +} + +#[test] +fn test_code_block_like_content() { + let result = parse_lino("```const x = 1;```").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"const x = 1;".to_string())); +} + +#[test] +fn test_nested_quotes_in_markdown() { + let result = parse_lino("``Use `code` in markdown``").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"Use `code` in markdown".to_string())); +} + +#[test] +fn test_json_string_with_quotes() { + let result = parse_lino("\"\"{ \"key\": \"value\"}\"\"").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"{ \"key\": \"value\"}".to_string())); +} + +// ============================================================================ +// Edge Cases +// ============================================================================ + +#[test] +fn test_whitespace_preserved_in_quotes() { + let result = parse_lino("\" spaces \"").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&" spaces ".to_string())); +} + +#[test] +fn test_multiline_in_double_double_quotes() { + let result = parse_lino("(\"\"line1\nline2\"\")").unwrap(); + if let Some(values) = get_values(&result) { + if let Some(LiNo::Ref(id)) = values.first() { + assert_eq!(id, "line1\nline2"); + return; + } + } + panic!("Expected multiline content in double double quotes"); +} From af8002b8bcf1f1ce0384684c40647ffaf809d1fe Mon Sep 17 00:00:00 2001 From: konard Date: Sun, 30 Nov 2025 20:27:31 +0000 Subject: [PATCH 03/15] Revert "Initial commit with task details for issue #142" This reverts commit b21928fe39992527c158d35980b1f39437b0bf08. --- CLAUDE.md | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index 90186fc..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,5 +0,0 @@ -Issue to solve: https://github.com/link-foundation/links-notation/issues/142 -Your prepared branch: issue-142-23f307922307 -Your prepared working directory: /tmp/gh-issue-solver-1764533457922 - -Proceed. \ No newline at end of file From b839ec2535746555e9b2a038de8c487530edb7da Mon Sep 17 00:00:00 2001 From: konard Date: Mon, 1 Dec 2025 15:10:19 +0000 Subject: [PATCH 04/15] Apply formatting fixes for JS, Python, and Rust MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Format tests/MultiQuoteParser.test.js with Prettier - Format Python parser.py and test_multi_quote_parser.py with Black - Format Rust parser.rs with rustfmt 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- js/tests/MultiQuoteParser.test.js | 186 ++++++++++++------------ python/links_notation/parser.py | 24 +-- python/tests/test_multi_quote_parser.py | 2 +- rust/Cargo.lock | 2 +- rust/src/parser.rs | 36 ++++- 5 files changed, 138 insertions(+), 112 deletions(-) diff --git a/js/tests/MultiQuoteParser.test.js b/js/tests/MultiQuoteParser.test.js index e11e42a..3965450 100644 --- a/js/tests/MultiQuoteParser.test.js +++ b/js/tests/MultiQuoteParser.test.js @@ -1,12 +1,16 @@ -import { test, expect } from "bun:test"; -import { Parser } from "../src/Parser.js"; +import { test, expect } from 'bun:test'; +import { Parser } from '../src/Parser.js'; const parser = new Parser(); // Helper to extract the reference ID from a single-value link function getSingleRefId(result) { // Single reference parses as: { id: null, values: [{ id: "the-id", values: [] }] } - if (result.length === 1 && result[0].id === null && result[0].values.length === 1) { + if ( + result.length === 1 && + result[0].id === null && + result[0].values.length === 1 + ) { return result[0].values[0].id; } return result[0]?.id; @@ -16,43 +20,43 @@ function getSingleRefId(result) { // Backtick Quote Tests (Single Backtick) // ============================================================================ -test("TestBacktickQuotedReference", () => { - const input = "`backtick quoted`"; +test('TestBacktickQuotedReference', () => { + const input = '`backtick quoted`'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("backtick quoted"); + expect(getSingleRefId(result)).toBe('backtick quoted'); }); -test("TestBacktickQuotedWithSpaces", () => { - const input = "`text with spaces`"; +test('TestBacktickQuotedWithSpaces', () => { + const input = '`text with spaces`'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("text with spaces"); + expect(getSingleRefId(result)).toBe('text with spaces'); }); -test("TestBacktickQuotedMultiline", () => { - const input = "(`line1\nline2`)"; +test('TestBacktickQuotedMultiline', () => { + const input = '(`line1\nline2`)'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(result[0].values[0].id).toBe("line1\nline2"); + expect(result[0].values[0].id).toBe('line1\nline2'); }); -test("TestBacktickQuotedWithEscapedBacktick", () => { - const input = "`text with `` escaped backtick`"; +test('TestBacktickQuotedWithEscapedBacktick', () => { + const input = '`text with `` escaped backtick`'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("text with ` escaped backtick"); + expect(getSingleRefId(result)).toBe('text with ` escaped backtick'); }); // ============================================================================ // Single Quote Tests (with escaping) // ============================================================================ -test("TestSingleQuoteWithEscapedSingleQuote", () => { +test('TestSingleQuoteWithEscapedSingleQuote', () => { const input = "'text with '' escaped quote'"; const result = parser.parse(input); @@ -64,7 +68,7 @@ test("TestSingleQuoteWithEscapedSingleQuote", () => { // Double Quote Tests (with escaping) // ============================================================================ -test("TestDoubleQuoteWithEscapedDoubleQuote", () => { +test('TestDoubleQuoteWithEscapedDoubleQuote', () => { const input = '"text with "" escaped quote"'; const result = parser.parse(input); @@ -76,15 +80,15 @@ test("TestDoubleQuoteWithEscapedDoubleQuote", () => { // Double Quotes (2 quote chars) Tests // ============================================================================ -test("TestDoubleDoubleQuotes", () => { +test('TestDoubleDoubleQuotes', () => { const input = '""double double quotes""'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("double double quotes"); + expect(getSingleRefId(result)).toBe('double double quotes'); }); -test("TestDoubleDoubleQuotesWithSingleQuoteInside", () => { +test('TestDoubleDoubleQuotesWithSingleQuoteInside', () => { const input = '""text with " inside""'; const result = parser.parse(input); @@ -92,7 +96,7 @@ test("TestDoubleDoubleQuotesWithSingleQuoteInside", () => { expect(getSingleRefId(result)).toBe('text with " inside'); }); -test("TestDoubleDoubleQuotesWithEscape", () => { +test('TestDoubleDoubleQuotesWithEscape', () => { const input = '""text with """" escaped double""'; const result = parser.parse(input); @@ -100,15 +104,15 @@ test("TestDoubleDoubleQuotesWithEscape", () => { expect(getSingleRefId(result)).toBe('text with "" escaped double'); }); -test("TestDoubleSingleQuotes", () => { +test('TestDoubleSingleQuotes', () => { const input = "''double single quotes''"; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("double single quotes"); + expect(getSingleRefId(result)).toBe('double single quotes'); }); -test("TestDoubleSingleQuotesWithSingleQuoteInside", () => { +test('TestDoubleSingleQuotesWithSingleQuoteInside', () => { const input = "''text with ' inside''"; const result = parser.parse(input); @@ -116,7 +120,7 @@ test("TestDoubleSingleQuotesWithSingleQuoteInside", () => { expect(getSingleRefId(result)).toBe("text with ' inside"); }); -test("TestDoubleSingleQuotesWithEscape", () => { +test('TestDoubleSingleQuotesWithEscape', () => { const input = "''text with '''' escaped single''"; const result = parser.parse(input); @@ -124,43 +128,43 @@ test("TestDoubleSingleQuotesWithEscape", () => { expect(getSingleRefId(result)).toBe("text with '' escaped single"); }); -test("TestDoubleBacktickQuotes", () => { - const input = "``double backtick quotes``"; +test('TestDoubleBacktickQuotes', () => { + const input = '``double backtick quotes``'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("double backtick quotes"); + expect(getSingleRefId(result)).toBe('double backtick quotes'); }); -test("TestDoubleBacktickQuotesWithBacktickInside", () => { - const input = "``text with ` inside``"; +test('TestDoubleBacktickQuotesWithBacktickInside', () => { + const input = '``text with ` inside``'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("text with ` inside"); + expect(getSingleRefId(result)).toBe('text with ` inside'); }); -test("TestDoubleBacktickQuotesWithEscape", () => { - const input = "``text with ```` escaped backtick``"; +test('TestDoubleBacktickQuotesWithEscape', () => { + const input = '``text with ```` escaped backtick``'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("text with `` escaped backtick"); + expect(getSingleRefId(result)).toBe('text with `` escaped backtick'); }); // ============================================================================ // Triple Quotes (3 quote chars) Tests // ============================================================================ -test("TestTripleDoubleQuotes", () => { +test('TestTripleDoubleQuotes', () => { const input = '"""triple double quotes"""'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("triple double quotes"); + expect(getSingleRefId(result)).toBe('triple double quotes'); }); -test("TestTripleDoubleQuotesWithDoubleQuoteInside", () => { +test('TestTripleDoubleQuotesWithDoubleQuoteInside', () => { const input = '"""text with "" inside"""'; const result = parser.parse(input); @@ -168,7 +172,7 @@ test("TestTripleDoubleQuotesWithDoubleQuoteInside", () => { expect(getSingleRefId(result)).toBe('text with "" inside'); }); -test("TestTripleDoubleQuotesWithEscape", () => { +test('TestTripleDoubleQuotesWithEscape', () => { const input = '"""text with """""" escaped triple"""'; const result = parser.parse(input); @@ -176,15 +180,15 @@ test("TestTripleDoubleQuotesWithEscape", () => { expect(getSingleRefId(result)).toBe('text with """ escaped triple'); }); -test("TestTripleSingleQuotes", () => { +test('TestTripleSingleQuotes', () => { const input = "'''triple single quotes'''"; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("triple single quotes"); + expect(getSingleRefId(result)).toBe('triple single quotes'); }); -test("TestTripleSingleQuotesWithDoubleQuoteInside", () => { +test('TestTripleSingleQuotesWithDoubleQuoteInside', () => { const input = "'''text with '' inside'''"; const result = parser.parse(input); @@ -192,7 +196,7 @@ test("TestTripleSingleQuotesWithDoubleQuoteInside", () => { expect(getSingleRefId(result)).toBe("text with '' inside"); }); -test("TestTripleSingleQuotesWithEscape", () => { +test('TestTripleSingleQuotesWithEscape', () => { const input = "'''text with '''''' escaped triple'''"; const result = parser.parse(input); @@ -200,43 +204,43 @@ test("TestTripleSingleQuotesWithEscape", () => { expect(getSingleRefId(result)).toBe("text with ''' escaped triple"); }); -test("TestTripleBacktickQuotes", () => { - const input = "```triple backtick quotes```"; +test('TestTripleBacktickQuotes', () => { + const input = '```triple backtick quotes```'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("triple backtick quotes"); + expect(getSingleRefId(result)).toBe('triple backtick quotes'); }); -test("TestTripleBacktickQuotesWithDoubleBacktickInside", () => { - const input = "```text with `` inside```"; +test('TestTripleBacktickQuotesWithDoubleBacktickInside', () => { + const input = '```text with `` inside```'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("text with `` inside"); + expect(getSingleRefId(result)).toBe('text with `` inside'); }); -test("TestTripleBacktickQuotesWithEscape", () => { - const input = "```text with `````` escaped triple```"; +test('TestTripleBacktickQuotesWithEscape', () => { + const input = '```text with `````` escaped triple```'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("text with ``` escaped triple"); + expect(getSingleRefId(result)).toBe('text with ``` escaped triple'); }); // ============================================================================ // Quadruple Quotes (4 quote chars) Tests // ============================================================================ -test("TestQuadrupleDoubleQuotes", () => { +test('TestQuadrupleDoubleQuotes', () => { const input = '""""quadruple double quotes""""'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("quadruple double quotes"); + expect(getSingleRefId(result)).toBe('quadruple double quotes'); }); -test("TestQuadrupleDoubleQuotesWithTripleQuoteInside", () => { +test('TestQuadrupleDoubleQuotesWithTripleQuoteInside', () => { const input = '""""text with """ inside""""'; const result = parser.parse(input); @@ -244,7 +248,7 @@ test("TestQuadrupleDoubleQuotesWithTripleQuoteInside", () => { expect(getSingleRefId(result)).toBe('text with """ inside'); }); -test("TestQuadrupleDoubleQuotesWithEscape", () => { +test('TestQuadrupleDoubleQuotesWithEscape', () => { const input = '""""text with """""""" escaped quad""""'; const result = parser.parse(input); @@ -252,35 +256,35 @@ test("TestQuadrupleDoubleQuotesWithEscape", () => { expect(getSingleRefId(result)).toBe('text with """" escaped quad'); }); -test("TestQuadrupleSingleQuotes", () => { +test('TestQuadrupleSingleQuotes', () => { const input = "''''quadruple single quotes''''"; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("quadruple single quotes"); + expect(getSingleRefId(result)).toBe('quadruple single quotes'); }); -test("TestQuadrupleBacktickQuotes", () => { - const input = "````quadruple backtick quotes````"; +test('TestQuadrupleBacktickQuotes', () => { + const input = '````quadruple backtick quotes````'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("quadruple backtick quotes"); + expect(getSingleRefId(result)).toBe('quadruple backtick quotes'); }); // ============================================================================ // Quintuple Quotes (5 quote chars) Tests // ============================================================================ -test("TestQuintupleDoubleQuotes", () => { +test('TestQuintupleDoubleQuotes', () => { const input = '"""""quintuple double quotes"""""'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("quintuple double quotes"); + expect(getSingleRefId(result)).toBe('quintuple double quotes'); }); -test("TestQuintupleDoubleQuotesWithQuadQuoteInside", () => { +test('TestQuintupleDoubleQuotesWithQuadQuoteInside', () => { const input = '"""""text with """" inside"""""'; const result = parser.parse(input); @@ -288,7 +292,7 @@ test("TestQuintupleDoubleQuotesWithQuadQuoteInside", () => { expect(getSingleRefId(result)).toBe('text with """" inside'); }); -test("TestQuintupleDoubleQuotesWithEscape", () => { +test('TestQuintupleDoubleQuotesWithEscape', () => { const input = '"""""text with """""""""" escaped quint"""""'; const result = parser.parse(input); @@ -296,75 +300,77 @@ test("TestQuintupleDoubleQuotesWithEscape", () => { expect(getSingleRefId(result)).toBe('text with """"" escaped quint'); }); -test("TestQuintupleSingleQuotes", () => { +test('TestQuintupleSingleQuotes', () => { const input = "'''''quintuple single quotes'''''"; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("quintuple single quotes"); + expect(getSingleRefId(result)).toBe('quintuple single quotes'); }); -test("TestQuintupleBacktickQuotes", () => { - const input = "`````quintuple backtick quotes`````"; +test('TestQuintupleBacktickQuotes', () => { + const input = '`````quintuple backtick quotes`````'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("quintuple backtick quotes"); + expect(getSingleRefId(result)).toBe('quintuple backtick quotes'); }); // ============================================================================ // Complex Scenarios Tests // ============================================================================ -test("TestMixedQuotesInLink", () => { +test('TestMixedQuotesInLink', () => { const input = '("double" \'single\' `backtick`)'; const result = parser.parse(input); expect(result.length).toBe(1); expect(result[0].values.length).toBe(3); - expect(result[0].values[0].id).toBe("double"); - expect(result[0].values[1].id).toBe("single"); - expect(result[0].values[2].id).toBe("backtick"); + expect(result[0].values[0].id).toBe('double'); + expect(result[0].values[1].id).toBe('single'); + expect(result[0].values[2].id).toBe('backtick'); }); -test("TestBacktickAsIdInLink", () => { - const input = "(`myId`: value1 value2)"; +test('TestBacktickAsIdInLink', () => { + const input = '(`myId`: value1 value2)'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(result[0].id).toBe("myId"); + expect(result[0].id).toBe('myId'); expect(result[0].values.length).toBe(2); }); -test("TestCodeBlockLikeContent", () => { +test('TestCodeBlockLikeContent', () => { // This demonstrates using triple backticks for code-like content - const input = "```const x = 1;```"; + const input = '```const x = 1;```'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("const x = 1;"); + expect(getSingleRefId(result)).toBe('const x = 1;'); }); -test("TestNestedQuotesInMarkdown", () => { +test('TestNestedQuotesInMarkdown', () => { // Using double backticks to include single backtick - const input = "``Use `code` in markdown``"; + const input = '``Use `code` in markdown``'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("Use `code` in markdown"); + expect(getSingleRefId(result)).toBe('Use `code` in markdown'); }); -test("TestSQLWithQuotes", () => { +test('TestSQLWithQuotes', () => { // Using double single quotes to include single quote in SQL-like string // Inside '', to get a single quote, we need '''' (4 single quotes = escaped pair) const input = "''SELECT * FROM users WHERE name = ''''John''''''"; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe("SELECT * FROM users WHERE name = ''John''"); + expect(getSingleRefId(result)).toBe( + "SELECT * FROM users WHERE name = ''John''" + ); }); -test("TestJSONStringWithQuotes", () => { +test('TestJSONStringWithQuotes', () => { // Using double double quotes to include double quote in JSON-like string const input = '""{"key": "value"}""'; const result = parser.parse(input); @@ -377,7 +383,7 @@ test("TestJSONStringWithQuotes", () => { // Edge Cases // ============================================================================ -test("TestEmptySingleQuotedReference", () => { +test('TestEmptySingleQuotedReference', () => { const input = "''"; // Empty quoted reference should not match - becomes simple reference or fails // Let's verify what happens @@ -391,18 +397,18 @@ test("TestEmptySingleQuotedReference", () => { } }); -test("TestWhitespacePreservedInQuotes", () => { +test('TestWhitespacePreservedInQuotes', () => { const input = '" spaces "'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(getSingleRefId(result)).toBe(" spaces "); + expect(getSingleRefId(result)).toBe(' spaces '); }); -test("TestMultilineInDoubleDoubleQuotes", () => { +test('TestMultilineInDoubleDoubleQuotes', () => { const input = '(""line1\nline2"")'; const result = parser.parse(input); expect(result.length).toBe(1); - expect(result[0].values[0].id).toBe("line1\nline2"); + expect(result[0].values[0].id).toBe('line1\nline2'); }); diff --git a/python/links_notation/parser.py b/python/links_notation/parser.py index fa21483..acc84f7 100644 --- a/python/links_notation/parser.py +++ b/python/links_notation/parser.py @@ -288,7 +288,7 @@ def _parse_values(self, text: str) -> List[Dict]: while i < len(text): # Skip all whitespace (space, tab, newline, carriage return) - while i < len(text) and text[i] in ' \t\n\r': + while i < len(text) and text[i] in " \t\n\r": i += 1 if i >= len(text): break @@ -315,7 +315,7 @@ def _extract_next_value(self, text: str, start: int) -> tuple: # Check if this starts with a multi-quote string for quote_count in range(5, 0, -1): - for quote_char in ['"', "'", '`']: + for quote_char in ['"', "'", "`"]: quote_seq = quote_char * quote_count if text[start:].startswith(quote_seq): # Parse this multi-quote string @@ -342,7 +342,7 @@ def _extract_next_value(self, text: str, start: int) -> tuple: break # Check if this starts with a parenthesized expression - if text[start] == '(': + if text[start] == "(": paren_depth = 1 in_single = False in_double = False @@ -355,11 +355,11 @@ def _extract_next_value(self, text: str, start: int) -> tuple: in_single = not in_single elif char == '"' and not in_single and not in_backtick: in_double = not in_double - elif char == '`' and not in_single and not in_double: + elif char == "`" and not in_single and not in_double: in_backtick = not in_backtick - elif char == '(' and not in_single and not in_double and not in_backtick: + elif char == "(" and not in_single and not in_double and not in_backtick: paren_depth += 1 - elif char == ')' and not in_single and not in_double and not in_backtick: + elif char == ")" and not in_single and not in_double and not in_backtick: paren_depth -= 1 i += 1 @@ -377,9 +377,9 @@ def _extract_next_value(self, text: str, start: int) -> tuple: in_single = not in_single elif char == '"' and not in_single and not in_backtick: in_double = not in_double - elif char == '`' and not in_single and not in_double: + elif char == "`" and not in_single and not in_double: in_backtick = not in_backtick - elif char == ' ' and not in_single and not in_double and not in_backtick: + elif char == " " and not in_single and not in_double and not in_backtick: break i += 1 @@ -402,7 +402,7 @@ def _extract_reference(self, text: str) -> str: # Try multi-quote strings (check longer sequences first: 5, 4, 3, 2, 1) for quote_count in range(5, 0, -1): - for quote_char in ['"', "'", '`']: + for quote_char in ['"', "'", "`"]: quote_seq = quote_char * quote_count if text.startswith(quote_seq) and len(text) > len(quote_seq): # Try to parse this multi-quote string @@ -427,19 +427,19 @@ def _parse_multi_quote_string(self, text: str, quote_char: str, quote_count: int if not text.startswith(open_close): return None - remaining = text[len(open_close):] + remaining = text[len(open_close) :] content = "" while remaining: # Check for escape sequence (2*N quotes) if remaining.startswith(escape_seq): content += escape_val - remaining = remaining[len(escape_seq):] + remaining = remaining[len(escape_seq) :] continue # Check for closing quotes (N quotes not followed by more quotes) if remaining.startswith(open_close): - after_close = remaining[len(open_close):] + after_close = remaining[len(open_close) :] # Make sure this is exactly N quotes (not more) if not after_close or not after_close.startswith(quote_char): # Closing found - but only if we consumed the entire text diff --git a/python/tests/test_multi_quote_parser.py b/python/tests/test_multi_quote_parser.py index 02060f0..4b2b919 100644 --- a/python/tests/test_multi_quote_parser.py +++ b/python/tests/test_multi_quote_parser.py @@ -177,7 +177,7 @@ class TestComplexScenarios: def test_mixed_quotes_in_link(self): parser = Parser() - result = parser.parse('("double" \'single\' `backtick`)') + result = parser.parse("(\"double\" 'single' `backtick`)") assert len(result) == 1 assert result[0].values is not None assert len(result[0].values) == 3 diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 21d9397..9dfde87 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -4,7 +4,7 @@ version = 4 [[package]] name = "links-notation" -version = "0.12.0" +version = "0.13.0" dependencies = [ "nom", ] diff --git a/rust/src/parser.rs b/rust/src/parser.rs index c33592c..6acd4c8 100644 --- a/rust/src/parser.rs +++ b/rust/src/parser.rs @@ -143,14 +143,21 @@ fn simple_reference(input: &str) -> IResult<&str, String> { /// Parse a multi-quote string with a given quote character and count. /// For N quotes: opening = N quotes, closing = N quotes, escape = 2*N quotes -> N quotes -fn parse_multi_quote_string(input: &str, quote_char: char, quote_count: usize) -> IResult<&str, String> { +fn parse_multi_quote_string( + input: &str, + quote_char: char, + quote_count: usize, +) -> IResult<&str, String> { let open_close = quote_char.to_string().repeat(quote_count); let escape_seq = quote_char.to_string().repeat(quote_count * 2); let escape_val = quote_char.to_string().repeat(quote_count); // Check for opening quotes if !input.starts_with(&open_close) { - return Err(nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Tag))); + return Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Tag, + ))); } let mut remaining = &input[open_close.len()..]; @@ -158,7 +165,10 @@ fn parse_multi_quote_string(input: &str, quote_char: char, quote_count: usize) - loop { if remaining.is_empty() { - return Err(nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Tag))); + return Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Tag, + ))); } // Check for escape sequence (2*N quotes) @@ -253,15 +263,25 @@ fn reference(input: &str) -> IResult<&str, String> { // Try longer quote sequences first (greedy matching) alt(( // 5 quotes - double_quote_5, single_quote_5, backtick_quote_5, + double_quote_5, + single_quote_5, + backtick_quote_5, // 4 quotes - double_quote_4, single_quote_4, backtick_quote_4, + double_quote_4, + single_quote_4, + backtick_quote_4, // 3 quotes - double_quote_3, single_quote_3, backtick_quote_3, + double_quote_3, + single_quote_3, + backtick_quote_3, // 2 quotes - double_quote_2, single_quote_2, backtick_quote_2, + double_quote_2, + single_quote_2, + backtick_quote_2, // 1 quote - double_quote_1, single_quote_1, backtick_quote_1, + double_quote_1, + single_quote_1, + backtick_quote_1, // Simple unquoted simple_reference, )) From 89ac4357f0434c10c7de6981b40b6ca50f8157dc Mon Sep 17 00:00:00 2001 From: konard Date: Mon, 1 Dec 2025 15:12:21 +0000 Subject: [PATCH 05/15] Fix remaining lint issues in Python and Rust tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix Python import sorting (isort) in test_multi_quote_parser.py - Fix Rust formatting in multi_quote_parser_tests.rs 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- python/tests/test_multi_quote_parser.py | 1 + rust/tests/multi_quote_parser_tests.rs | 198 +++++++++++++++++++----- 2 files changed, 160 insertions(+), 39 deletions(-) diff --git a/python/tests/test_multi_quote_parser.py b/python/tests/test_multi_quote_parser.py index 4b2b919..d67715f 100644 --- a/python/tests/test_multi_quote_parser.py +++ b/python/tests/test_multi_quote_parser.py @@ -1,6 +1,7 @@ """Tests for multi-quote string support in parser.""" import pytest + from links_notation.parser import Parser diff --git a/rust/tests/multi_quote_parser_tests.rs b/rust/tests/multi_quote_parser_tests.rs index 8c8b9b8..8360cbc 100644 --- a/rust/tests/multi_quote_parser_tests.rs +++ b/rust/tests/multi_quote_parser_tests.rs @@ -7,7 +7,11 @@ fn get_single_ref_id(lino: &LiNo) -> Option<&String> { LiNo::Link { id: None, values } if values.len() == 1 => { if let LiNo::Ref(id) = &values[0] { Some(id) - } else if let LiNo::Link { id: Some(ref_id), values: inner_values } = &values[0] { + } else if let LiNo::Link { + id: Some(ref_id), + values: inner_values, + } = &values[0] + { if inner_values.is_empty() { Some(ref_id) } else { @@ -17,7 +21,10 @@ fn get_single_ref_id(lino: &LiNo) -> Option<&String> { None } } - LiNo::Link { id: Some(ref_id), values } if values.is_empty() => Some(ref_id), + LiNo::Link { + id: Some(ref_id), + values, + } if values.is_empty() => Some(ref_id), _ => None, } } @@ -28,7 +35,11 @@ fn get_values(lino: &LiNo) -> Option<&Vec>> { LiNo::Link { values, .. } => { // If it's a wrapper link (outer link) if values.len() == 1 { - if let LiNo::Link { values: inner_values, .. } = &values[0] { + if let LiNo::Link { + values: inner_values, + .. + } = &values[0] + { return Some(inner_values); } } @@ -45,13 +56,19 @@ fn get_values(lino: &LiNo) -> Option<&Vec>> { #[test] fn test_backtick_quoted_reference() { let result = parse_lino("`backtick quoted`").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"backtick quoted".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"backtick quoted".to_string()) + ); } #[test] fn test_backtick_quoted_with_spaces() { let result = parse_lino("`text with spaces`").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with spaces".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with spaces".to_string()) + ); } #[test] @@ -59,7 +76,10 @@ fn test_backtick_quoted_multiline() { let result = parse_lino("(`line1\nline2`)").unwrap(); if let LiNo::Link { values, .. } = &result { if let Some(inner) = values.first() { - if let LiNo::Link { values: inner_vals, .. } = inner { + if let LiNo::Link { + values: inner_vals, .. + } = inner + { if let Some(LiNo::Ref(id)) = inner_vals.first() { assert_eq!(id, "line1\nline2"); return; @@ -77,7 +97,10 @@ fn test_backtick_quoted_multiline() { #[test] fn test_backtick_quoted_with_escaped_backtick() { let result = parse_lino("`text with `` escaped backtick`").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with ` escaped backtick".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with ` escaped backtick".to_string()) + ); } // ============================================================================ @@ -87,7 +110,10 @@ fn test_backtick_quoted_with_escaped_backtick() { #[test] fn test_single_quote_with_escaped_single_quote() { let result = parse_lino("'text with '' escaped quote'").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with ' escaped quote".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with ' escaped quote".to_string()) + ); } // ============================================================================ @@ -97,7 +123,10 @@ fn test_single_quote_with_escaped_single_quote() { #[test] fn test_double_quote_with_escaped_double_quote() { let result = parse_lino("\"text with \"\" escaped quote\"").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with \" escaped quote".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with \" escaped quote".to_string()) + ); } // ============================================================================ @@ -107,55 +136,82 @@ fn test_double_quote_with_escaped_double_quote() { #[test] fn test_double_double_quotes() { let result = parse_lino("\"\"double double quotes\"\"").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"double double quotes".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"double double quotes".to_string()) + ); } #[test] fn test_double_double_quotes_with_single_quote_inside() { let result = parse_lino("\"\"text with \" inside\"\"").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with \" inside".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with \" inside".to_string()) + ); } #[test] fn test_double_double_quotes_with_escape() { let result = parse_lino("\"\"text with \"\"\"\" escaped double\"\"").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with \"\" escaped double".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with \"\" escaped double".to_string()) + ); } #[test] fn test_double_single_quotes() { let result = parse_lino("''double single quotes''").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"double single quotes".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"double single quotes".to_string()) + ); } #[test] fn test_double_single_quotes_with_single_quote_inside() { let result = parse_lino("''text with ' inside''").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with ' inside".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with ' inside".to_string()) + ); } #[test] fn test_double_single_quotes_with_escape() { let result = parse_lino("''text with '''' escaped single''").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with '' escaped single".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with '' escaped single".to_string()) + ); } #[test] fn test_double_backtick_quotes() { let result = parse_lino("``double backtick quotes``").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"double backtick quotes".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"double backtick quotes".to_string()) + ); } #[test] fn test_double_backtick_quotes_with_backtick_inside() { let result = parse_lino("``text with ` inside``").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with ` inside".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with ` inside".to_string()) + ); } #[test] fn test_double_backtick_quotes_with_escape() { let result = parse_lino("``text with ```` escaped backtick``").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with `` escaped backtick".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with `` escaped backtick".to_string()) + ); } // ============================================================================ @@ -165,55 +221,82 @@ fn test_double_backtick_quotes_with_escape() { #[test] fn test_triple_double_quotes() { let result = parse_lino("\"\"\"triple double quotes\"\"\"").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"triple double quotes".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"triple double quotes".to_string()) + ); } #[test] fn test_triple_double_quotes_with_double_quote_inside() { let result = parse_lino("\"\"\"text with \"\" inside\"\"\"").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with \"\" inside".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with \"\" inside".to_string()) + ); } #[test] fn test_triple_double_quotes_with_escape() { let result = parse_lino("\"\"\"text with \"\"\"\"\"\" escaped triple\"\"\"").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with \"\"\" escaped triple".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with \"\"\" escaped triple".to_string()) + ); } #[test] fn test_triple_single_quotes() { let result = parse_lino("'''triple single quotes'''").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"triple single quotes".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"triple single quotes".to_string()) + ); } #[test] fn test_triple_single_quotes_with_double_quote_inside() { let result = parse_lino("'''text with '' inside'''").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with '' inside".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with '' inside".to_string()) + ); } #[test] fn test_triple_single_quotes_with_escape() { let result = parse_lino("'''text with '''''' escaped triple'''").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with ''' escaped triple".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with ''' escaped triple".to_string()) + ); } #[test] fn test_triple_backtick_quotes() { let result = parse_lino("```triple backtick quotes```").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"triple backtick quotes".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"triple backtick quotes".to_string()) + ); } #[test] fn test_triple_backtick_quotes_with_double_backtick_inside() { let result = parse_lino("```text with `` inside```").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with `` inside".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with `` inside".to_string()) + ); } #[test] fn test_triple_backtick_quotes_with_escape() { let result = parse_lino("```text with `````` escaped triple```").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with ``` escaped triple".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with ``` escaped triple".to_string()) + ); } // ============================================================================ @@ -223,25 +306,37 @@ fn test_triple_backtick_quotes_with_escape() { #[test] fn test_quadruple_double_quotes() { let result = parse_lino("\"\"\"\"quadruple double quotes\"\"\"\"").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"quadruple double quotes".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"quadruple double quotes".to_string()) + ); } #[test] fn test_quadruple_double_quotes_with_triple_quote_inside() { let result = parse_lino("\"\"\"\"text with \"\"\" inside\"\"\"\"").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with \"\"\" inside".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with \"\"\" inside".to_string()) + ); } #[test] fn test_quadruple_single_quotes() { let result = parse_lino("''''quadruple single quotes''''").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"quadruple single quotes".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"quadruple single quotes".to_string()) + ); } #[test] fn test_quadruple_backtick_quotes() { let result = parse_lino("````quadruple backtick quotes````").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"quadruple backtick quotes".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"quadruple backtick quotes".to_string()) + ); } // ============================================================================ @@ -251,25 +346,37 @@ fn test_quadruple_backtick_quotes() { #[test] fn test_quintuple_double_quotes() { let result = parse_lino("\"\"\"\"\"quintuple double quotes\"\"\"\"\"").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"quintuple double quotes".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"quintuple double quotes".to_string()) + ); } #[test] fn test_quintuple_double_quotes_with_quad_quote_inside() { let result = parse_lino("\"\"\"\"\"text with \"\"\"\" inside\"\"\"\"\"").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"text with \"\"\"\" inside".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"text with \"\"\"\" inside".to_string()) + ); } #[test] fn test_quintuple_single_quotes() { let result = parse_lino("'''''quintuple single quotes'''''").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"quintuple single quotes".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"quintuple single quotes".to_string()) + ); } #[test] fn test_quintuple_backtick_quotes() { let result = parse_lino("`````quintuple backtick quotes`````").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"quintuple backtick quotes".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"quintuple backtick quotes".to_string()) + ); } // ============================================================================ @@ -299,7 +406,11 @@ fn test_mixed_quotes_in_link() { fn test_backtick_as_id_in_link() { let result = parse_lino("(`myId`: value1 value2)").unwrap(); if let LiNo::Link { values, .. } = &result { - if let Some(LiNo::Link { id, values: inner_values }) = values.first() { + if let Some(LiNo::Link { + id, + values: inner_values, + }) = values.first() + { assert_eq!(id.as_deref(), Some("myId")); assert_eq!(inner_values.len(), 2); return; @@ -311,19 +422,28 @@ fn test_backtick_as_id_in_link() { #[test] fn test_code_block_like_content() { let result = parse_lino("```const x = 1;```").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"const x = 1;".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"const x = 1;".to_string()) + ); } #[test] fn test_nested_quotes_in_markdown() { let result = parse_lino("``Use `code` in markdown``").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"Use `code` in markdown".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"Use `code` in markdown".to_string()) + ); } #[test] fn test_json_string_with_quotes() { let result = parse_lino("\"\"{ \"key\": \"value\"}\"\"").unwrap(); - assert_eq!(get_single_ref_id(&result), Some(&"{ \"key\": \"value\"}".to_string())); + assert_eq!( + get_single_ref_id(&result), + Some(&"{ \"key\": \"value\"}".to_string()) + ); } // ============================================================================ From 414eb577d558711446abe3d545a4c66f4351c0fc Mon Sep 17 00:00:00 2001 From: konard Date: Mon, 1 Dec 2025 15:38:49 +0000 Subject: [PATCH 06/15] Support unlimited N-quote strings in all parsers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the parsers only supported 1-5 quote characters for quoted strings. This change extends support to any N quotes (6+) across all language implementations: - JavaScript (PEG.js): Added highQuotedReference rule with procedural parsing - Python: Changed static range(5,0,-1) to dynamic quote counting - Rust: Added parse_dynamic_quote_string() function - C#: Added ParseHighQuoteString() helper method in PEG grammar The existing 1-5 quote rules are preserved for backwards compatibility and performance. The new unlimited quote support activates for 6+ quotes. Added comprehensive tests for 6, 7, 8, and 10-quote strings in all parsers. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../MultiQuoteParserTests.cs | 49 + .../Link.Foundation.Links.Notation/Parser.peg | 94 +- js/src/grammar.pegjs | 108 +- js/src/parser-generated.js | 1160 +++++++++++++---- js/tests/MultiQuoteParser.test.js | 49 + python/links_notation/parser.py | 46 +- python/tests/test_multi_quote_parser.py | 41 +- rust/src/parser.rs | 100 +- rust/tests/multi_quote_parser_tests.rs | 52 + 9 files changed, 1297 insertions(+), 402 deletions(-) diff --git a/csharp/Link.Foundation.Links.Notation.Tests/MultiQuoteParserTests.cs b/csharp/Link.Foundation.Links.Notation.Tests/MultiQuoteParserTests.cs index 071191e..d38b11d 100644 --- a/csharp/Link.Foundation.Links.Notation.Tests/MultiQuoteParserTests.cs +++ b/csharp/Link.Foundation.Links.Notation.Tests/MultiQuoteParserTests.cs @@ -328,5 +328,54 @@ public static void TestMultilineInDoubleDoubleQuotes() Assert.Single(result[0].Values); Assert.Equal("line1\nline2", result[0].Values![0].Id); } + + // ============================================================================ + // Unlimited Quotes (6+ quote chars) Tests + // ============================================================================ + + [Fact] + public static void TestUnlimitedQuotes6() + { + // Test 6-quote strings + var parser = new Parser(); + var result = parser.Parse("\"\"\"\"\"\"hello\"\"\"\"\"\""); + Assert.Equal("hello", GetSingleRefId(result)); + } + + [Fact] + public static void TestUnlimitedQuotes10() + { + // Test 10-quote strings + var parser = new Parser(); + var result = parser.Parse("\"\"\"\"\"\"\"\"\"\"very deeply quoted\"\"\"\"\"\"\"\"\"\""); + Assert.Equal("very deeply quoted", GetSingleRefId(result)); + } + + [Fact] + public static void TestUnlimitedQuotes6WithInnerQuotes() + { + // Test 6-quote strings with inner 5-quote sequences + var parser = new Parser(); + var result = parser.Parse("\"\"\"\"\"\"hello with \"\"\"\"\" five quotes inside\"\"\"\"\"\""); + Assert.Equal("hello with \"\"\"\"\" five quotes inside", GetSingleRefId(result)); + } + + [Fact] + public static void TestUnlimitedSingleQuotes7() + { + // Test 7-quote single quote strings + var parser = new Parser(); + var result = parser.Parse("'''''''seven single quotes'''''''"); + Assert.Equal("seven single quotes", GetSingleRefId(result)); + } + + [Fact] + public static void TestUnlimitedBackticks8() + { + // Test 8-quote backtick strings + var parser = new Parser(); + var result = parser.Parse("````````eight backticks````````"); + Assert.Equal("eight backticks", GetSingleRefId(result)); + } } } diff --git a/csharp/Link.Foundation.Links.Notation/Parser.peg b/csharp/Link.Foundation.Links.Notation/Parser.peg index 7165a58..843680c 100644 --- a/csharp/Link.Foundation.Links.Notation/Parser.peg +++ b/csharp/Link.Foundation.Links.Notation/Parser.peg @@ -1,6 +1,77 @@ @namespace Link.Foundation.Links.Notation @classname Parser @using System.Linq +@members +{ + // Field to store parsed high quote value + private string _highQuoteValue; + + /// + /// Parse a multi-quote string dynamically for N >= 6 quotes. + /// Stores result in _highQuoteValue field. + /// + /// The raw string including opening and closing quotes + /// The quote character (", ', or `) + /// True if parsing succeeded and the result matches the input length + private bool ParseHighQuoteString(string input, char quoteChar) + { + _highQuoteValue = null; + if (string.IsNullOrEmpty(input)) return false; + + // Count opening quotes + int quoteCount = 0; + while (quoteCount < input.Length && input[quoteCount] == quoteChar) + { + quoteCount++; + } + + if (quoteCount < 6) return false; // Let the regular rules handle 1-5 quotes + + string openClose = new string(quoteChar, quoteCount); + string escapeSeq = new string(quoteChar, quoteCount * 2); + string escapeVal = new string(quoteChar, quoteCount); + + int pos = quoteCount; // Start after opening quotes + var content = new System.Text.StringBuilder(); + + while (pos < input.Length) + { + // Check for escape sequence (2*N quotes) + if (pos + escapeSeq.Length <= input.Length && + input.Substring(pos, escapeSeq.Length) == escapeSeq) + { + content.Append(escapeVal); + pos += escapeSeq.Length; + continue; + } + + // Check for closing quotes (exactly N quotes, not more) + if (pos + quoteCount <= input.Length && + input.Substring(pos, quoteCount) == openClose) + { + // Make sure it's exactly N quotes (not followed by more of the same quote) + int afterClose = pos + quoteCount; + if (afterClose >= input.Length || input[afterClose] != quoteChar) + { + // Found valid closing - check if we consumed the entire input + if (afterClose == input.Length) + { + _highQuoteValue = content.ToString(); + return true; + } + return false; + } + } + + // Take next character + content.Append(input[pos]); + pos++; + } + + // No closing quotes found + return false; + } +} document >> = #{ state["IndentationStack"] = new Stack(); state["IndentationStack"].Push(0); state["BaseIndentation"] = -1; } skipEmptyLines l:links _ eof { l.ToLinksList() } / #{ state["IndentationStack"] = new Stack(); state["IndentationStack"].Push(0); state["BaseIndentation"] = -1; } _ eof { new List>() } skipEmptyLines = ([ \t]* [\r\n])* links >> = fl:firstLine list:line* POP_INDENTATION { new List> { fl }.Concat(list).ToList() } @@ -21,10 +92,31 @@ singleLineValueLink > = v:singleLineValues { new Link(v) } multiLineValueLink > = "(" v:multiLineValues _ ")" { new Link(v) } indentedIdLink > = id:(reference) __ ":" eol { new Link(id) } -reference = quintupleQuotedReference / quadrupleQuotedReference / tripleQuotedReference / doubleQuotedReference / singleQuotedReference / simpleReference +// Reference can be quoted (with any number of quotes) or simple unquoted +// Order matters: try high quotes (6+), then quintuple down to single, then simple +reference = highQuotedReference / quintupleQuotedReference / quadrupleQuotedReference / tripleQuotedReference / doubleQuotedReference / singleQuotedReference / simpleReference simpleReference = "" referenceSymbol+ +// High quote sequences (6+ quotes) - use procedural parsing +// Capture everything that looks like a quoted string and validate +highQuotedReference = &('""""""' / "''''''" / '``````') raw:highQuoteCapture { raw } + +// Capture high quote content - match any characters including embedded quotes +highQuoteCapture = raw:highQuoteDoubleRaw &{ ParseHighQuoteString(raw, '"') } { _highQuoteValue } +/ raw:highQuoteSingleRaw &{ ParseHighQuoteString(raw, '\'') } { _highQuoteValue } +/ raw:highQuoteBacktickRaw &{ ParseHighQuoteString(raw, '`') } { _highQuoteValue } + +// Raw capture patterns - return string directly +highQuoteDoubleRaw = "" ('"'+ highQuoteDoubleContent* '"'+) +highQuoteSingleRaw = "" ("'"+ highQuoteSingleContent* "'"+) +highQuoteBacktickRaw = "" ('`'+ highQuoteBacktickContent* '`'+) + +// Content for high quote strings - match non-quote chars OR quote sequences followed by non-quote +highQuoteDoubleContent = [^"] / '"'+ &[^"] +highQuoteSingleContent = [^'] / "'"+ &[^'] +highQuoteBacktickContent = [^`] / '`'+ &[^`] + // Single quotes (1 quote char) with escaping via doubling singleQuotedReference = doubleQuote1 / singleQuote1 / backtickQuote1 diff --git a/js/src/grammar.pegjs b/js/src/grammar.pegjs index 1aa96c2..c8169e2 100644 --- a/js/src/grammar.pegjs +++ b/js/src/grammar.pegjs @@ -35,23 +35,54 @@ return indentationStack[indentationStack.length - 1]; } - // Process escape sequences for multi-quote strings - // For N quotes: 2*N consecutive quotes become N quotes - function processEscapes(content, quoteChar, quoteCount) { - const escapeSequence = quoteChar.repeat(quoteCount * 2); - const replacement = quoteChar.repeat(quoteCount); - let result = ''; - let i = 0; - while (i < content.length) { - if (content.substr(i, escapeSequence.length) === escapeSequence) { - result += replacement; - i += escapeSequence.length; - } else { - result += content[i]; - i++; + // Parse a multi-quote string dynamically for N >= 6 quotes + // Returns { value: string, length: number } or null if no match + function parseHighQuoteString(inputStr, quoteChar) { + // Count opening quotes + let quoteCount = 0; + while (quoteCount < inputStr.length && inputStr[quoteCount] === quoteChar) { + quoteCount++; + } + + if (quoteCount < 6) { + return null; // Let the regular rules handle 1-5 quotes + } + + const openClose = quoteChar.repeat(quoteCount); + const escapeSeq = quoteChar.repeat(quoteCount * 2); + const escapeVal = quoteChar.repeat(quoteCount); + + let pos = quoteCount; // Start after opening quotes + let content = ''; + + while (pos < inputStr.length) { + // Check for escape sequence (2*N quotes) + if (inputStr.substr(pos, escapeSeq.length) === escapeSeq) { + content += escapeVal; + pos += escapeSeq.length; + continue; + } + + // Check for closing quotes (exactly N quotes, not more) + if (inputStr.substr(pos, quoteCount) === openClose) { + // Make sure it's exactly N quotes (not followed by more of the same quote) + const afterClose = pos + quoteCount; + if (afterClose >= inputStr.length || inputStr[afterClose] !== quoteChar) { + // Found valid closing + return { + value: content, + length: afterClose + }; + } } + + // Take next character + content += inputStr[pos]; + pos++; } - return result; + + // No closing quotes found + return null; } } @@ -98,13 +129,52 @@ multiLineValueLink = "(" v:multiLineValues _ ")" { return { values: v }; } indentedIdLink = id:reference __ ":" eol { return { id: id, values: [] }; } -// Reference can be quoted (with 1-5+ quotes) or simple unquoted -reference = quotedReference / simpleReference +// Reference can be quoted (with any number of quotes) or simple unquoted +// Order matters: try longer quote sequences first (greedy matching) +// For 6+ quotes, use procedural parsing via highQuotedReference +reference = highQuotedReference / quintupleQuotedReference / quadrupleQuotedReference / tripleQuotedReference / doubleQuotedReference / singleQuotedReference / simpleReference simpleReference = chars:referenceSymbol+ { return chars.join(''); } -// Quoted references - try longer quote sequences first (greedy matching) -quotedReference = quintupleQuotedReference / quadrupleQuotedReference / tripleQuotedReference / doubleQuotedReference / singleQuotedReference +// High quote sequences (6+ quotes) - use procedural parsing +// Capture everything that looks like a quoted string and validate +highQuotedReference = &('""""""' / "''''''" / '``````') raw:highQuoteCapture { + return raw; +} + +// Capture high quote content - match any characters including embedded quotes +// The key insight: for 6+ quotes, we need to capture chars that might include +// sequences of quotes less than the closing count +highQuoteCapture = raw:$('"'+ highQuoteDoubleContent* '"'+) &{ + const result = parseHighQuoteString(raw, '"'); + if (result && result.length === raw.length) { + options._highQuoteValue = result.value; + return true; + } + return false; +} { return options._highQuoteValue; } +/ raw:$("'"+ highQuoteSingleContent* "'"+ ) &{ + const result = parseHighQuoteString(raw, "'"); + if (result && result.length === raw.length) { + options._highQuoteValue = result.value; + return true; + } + return false; +} { return options._highQuoteValue; } +/ raw:$('`'+ highQuoteBacktickContent* '`'+) &{ + const result = parseHighQuoteString(raw, '`'); + if (result && result.length === raw.length) { + options._highQuoteValue = result.value; + return true; + } + return false; +} { return options._highQuoteValue; } + +// Content for high quote strings - match non-quote chars OR quote sequences +// followed by non-quote (so they're not closing sequences) +highQuoteDoubleContent = [^"] / '"'+ &[^"] +highQuoteSingleContent = [^'] / "'"+ &[^'] +highQuoteBacktickContent = [^\`] / '`'+ &[^\`] // Single quote (1 quote char) singleQuotedReference = doubleQuote1 / singleQuote1 / backtickQuote1 diff --git a/js/src/parser-generated.js b/js/src/parser-generated.js index 80dcc08..30eac05 100644 --- a/js/src/parser-generated.js +++ b/js/src/parser-generated.js @@ -167,21 +167,21 @@ function peg$parse(input, options) { const peg$c0 = ":"; const peg$c1 = "("; const peg$c2 = ")"; - const peg$c3 = "\""; - const peg$c4 = "\"\""; - const peg$c5 = "'"; - const peg$c6 = "''"; - const peg$c7 = "`"; - const peg$c8 = "``"; - const peg$c9 = "\"\"\"\""; - const peg$c10 = "''''"; - const peg$c11 = "````"; - const peg$c12 = "\"\"\""; - const peg$c13 = "\"\"\"\"\"\""; - const peg$c14 = "'''"; - const peg$c15 = "''''''"; - const peg$c16 = "```"; - const peg$c17 = "``````"; + const peg$c3 = "\"\"\"\"\"\""; + const peg$c4 = "''''''"; + const peg$c5 = "``````"; + const peg$c6 = "\""; + const peg$c7 = "'"; + const peg$c8 = "`"; + const peg$c9 = "\"\""; + const peg$c10 = "''"; + const peg$c11 = "``"; + const peg$c12 = "\"\"\"\""; + const peg$c13 = "''''"; + const peg$c14 = "````"; + const peg$c15 = "\"\"\""; + const peg$c16 = "'''"; + const peg$c17 = "```"; const peg$c18 = "\"\"\"\"\"\"\"\""; const peg$c19 = "''''''''''"; const peg$c20 = "````````"; @@ -205,25 +205,25 @@ function peg$parse(input, options) { const peg$e2 = peg$literalExpectation(":", false); const peg$e3 = peg$literalExpectation("(", false); const peg$e4 = peg$literalExpectation(")", false); - const peg$e5 = peg$literalExpectation("\"", false); - const peg$e6 = peg$literalExpectation("\"\"", false); - const peg$e7 = peg$classExpectation(["\""], true, false, false); - const peg$e8 = peg$literalExpectation("'", false); - const peg$e9 = peg$literalExpectation("''", false); - const peg$e10 = peg$classExpectation(["'"], true, false, false); - const peg$e11 = peg$literalExpectation("`", false); - const peg$e12 = peg$literalExpectation("``", false); + const peg$e5 = peg$literalExpectation("\"\"\"\"\"\"", false); + const peg$e6 = peg$literalExpectation("''''''", false); + const peg$e7 = peg$literalExpectation("``````", false); + const peg$e8 = peg$literalExpectation("\"", false); + const peg$e9 = peg$literalExpectation("'", false); + const peg$e10 = peg$literalExpectation("`", false); + const peg$e11 = peg$classExpectation(["\""], true, false, false); + const peg$e12 = peg$classExpectation(["'"], true, false, false); const peg$e13 = peg$classExpectation(["`"], true, false, false); - const peg$e14 = peg$literalExpectation("\"\"\"\"", false); - const peg$e15 = peg$anyExpectation(); - const peg$e16 = peg$literalExpectation("''''", false); - const peg$e17 = peg$literalExpectation("````", false); - const peg$e18 = peg$literalExpectation("\"\"\"", false); - const peg$e19 = peg$literalExpectation("\"\"\"\"\"\"", false); - const peg$e20 = peg$literalExpectation("'''", false); - const peg$e21 = peg$literalExpectation("''''''", false); - const peg$e22 = peg$literalExpectation("```", false); - const peg$e23 = peg$literalExpectation("``````", false); + const peg$e14 = peg$literalExpectation("\"\"", false); + const peg$e15 = peg$literalExpectation("''", false); + const peg$e16 = peg$literalExpectation("``", false); + const peg$e17 = peg$literalExpectation("\"\"\"\"", false); + const peg$e18 = peg$anyExpectation(); + const peg$e19 = peg$literalExpectation("''''", false); + const peg$e20 = peg$literalExpectation("````", false); + const peg$e21 = peg$literalExpectation("\"\"\"", false); + const peg$e22 = peg$literalExpectation("'''", false); + const peg$e23 = peg$literalExpectation("```", false); const peg$e24 = peg$literalExpectation("\"\"\"\"\"\"\"\"", false); const peg$e25 = peg$literalExpectation("''''''''''", false); const peg$e26 = peg$literalExpectation("````````", false); @@ -264,52 +264,82 @@ function peg$parse(input, options) { function peg$f23(v) { return { values: v }; } function peg$f24(id) { return { id: id, values: [] }; } function peg$f25(chars) { return chars.join(''); } - function peg$f26(r) { return r.join(''); } - function peg$f27() { return '"'; } - function peg$f28(r) { return r.join(''); } - function peg$f29() { return "'"; } - function peg$f30(r) { return r.join(''); } - function peg$f31() { return '`'; } - function peg$f32(r) { return r.join(''); } - function peg$f33() { return '""'; } - function peg$f34(c) { return c; } + function peg$f26(raw) { + return raw; + } + function peg$f27(raw) { + const result = parseHighQuoteString(raw, '"'); + if (result && result.length === raw.length) { + options._highQuoteValue = result.value; + return true; + } + return false; + } + function peg$f28(raw) { return options._highQuoteValue; } + function peg$f29(raw) { + const result = parseHighQuoteString(raw, "'"); + if (result && result.length === raw.length) { + options._highQuoteValue = result.value; + return true; + } + return false; + } + function peg$f30(raw) { return options._highQuoteValue; } + function peg$f31(raw) { + const result = parseHighQuoteString(raw, '`'); + if (result && result.length === raw.length) { + options._highQuoteValue = result.value; + return true; + } + return false; + } + function peg$f32(raw) { return options._highQuoteValue; } + function peg$f33(r) { return r.join(''); } + function peg$f34() { return '"'; } function peg$f35(r) { return r.join(''); } - function peg$f36() { return "''"; } - function peg$f37(c) { return c; } - function peg$f38(r) { return r.join(''); } - function peg$f39() { return '``'; } - function peg$f40(c) { return c; } - function peg$f41(r) { return r.join(''); } - function peg$f42() { return '"""'; } - function peg$f43(c) { return c; } - function peg$f44(r) { return r.join(''); } - function peg$f45() { return "'''"; } - function peg$f46(c) { return c; } - function peg$f47(r) { return r.join(''); } - function peg$f48() { return '```'; } - function peg$f49(c) { return c; } - function peg$f50(r) { return r.join(''); } - function peg$f51() { return '""""'; } - function peg$f52(c) { return c; } - function peg$f53(r) { return r.join(''); } - function peg$f54() { return "''''"; } - function peg$f55(c) { return c; } - function peg$f56(r) { return r.join(''); } - function peg$f57() { return '````'; } - function peg$f58(c) { return c; } - function peg$f59(r) { return r.join(''); } - function peg$f60() { return '"""""'; } - function peg$f61(c) { return c; } - function peg$f62(r) { return r.join(''); } - function peg$f63() { return "'''''"; } - function peg$f64(c) { return c; } - function peg$f65(r) { return r.join(''); } - function peg$f66() { return '`````'; } - function peg$f67(c) { return c; } - function peg$f68(spaces) { setBaseIndentation(spaces); } - function peg$f69(spaces) { return normalizeIndentation(spaces) > getCurrentIndentation(); } - function peg$f70(spaces) { pushIndentation(spaces); } - function peg$f71(spaces) { return checkIndentation(spaces); } + function peg$f36() { return "'"; } + function peg$f37(r) { return r.join(''); } + function peg$f38() { return '`'; } + function peg$f39(r) { return r.join(''); } + function peg$f40() { return '""'; } + function peg$f41(c) { return c; } + function peg$f42(r) { return r.join(''); } + function peg$f43() { return "''"; } + function peg$f44(c) { return c; } + function peg$f45(r) { return r.join(''); } + function peg$f46() { return '``'; } + function peg$f47(c) { return c; } + function peg$f48(r) { return r.join(''); } + function peg$f49() { return '"""'; } + function peg$f50(c) { return c; } + function peg$f51(r) { return r.join(''); } + function peg$f52() { return "'''"; } + function peg$f53(c) { return c; } + function peg$f54(r) { return r.join(''); } + function peg$f55() { return '```'; } + function peg$f56(c) { return c; } + function peg$f57(r) { return r.join(''); } + function peg$f58() { return '""""'; } + function peg$f59(c) { return c; } + function peg$f60(r) { return r.join(''); } + function peg$f61() { return "''''"; } + function peg$f62(c) { return c; } + function peg$f63(r) { return r.join(''); } + function peg$f64() { return '````'; } + function peg$f65(c) { return c; } + function peg$f66(r) { return r.join(''); } + function peg$f67() { return '"""""'; } + function peg$f68(c) { return c; } + function peg$f69(r) { return r.join(''); } + function peg$f70() { return "'''''"; } + function peg$f71(c) { return c; } + function peg$f72(r) { return r.join(''); } + function peg$f73() { return '`````'; } + function peg$f74(c) { return c; } + function peg$f75(spaces) { setBaseIndentation(spaces); } + function peg$f76(spaces) { return normalizeIndentation(spaces) > getCurrentIndentation(); } + function peg$f77(spaces) { pushIndentation(spaces); } + function peg$f78(spaces) { return checkIndentation(spaces); } let peg$currPos = options.peg$currPos | 0; let peg$savedPos = peg$currPos; const peg$posDetailsCache = [{ line: 1, column: 1 }]; @@ -1082,9 +1112,24 @@ function peg$parse(input, options) { function peg$parsereference() { let s0; - s0 = peg$parsequotedReference(); + s0 = peg$parsehighQuotedReference(); if (s0 === peg$FAILED) { - s0 = peg$parsesimpleReference(); + s0 = peg$parsequintupleQuotedReference(); + if (s0 === peg$FAILED) { + s0 = peg$parsequadrupleQuotedReference(); + if (s0 === peg$FAILED) { + s0 = peg$parsetripleQuotedReference(); + if (s0 === peg$FAILED) { + s0 = peg$parsedoubleQuotedReference(); + if (s0 === peg$FAILED) { + s0 = peg$parsesingleQuotedReference(); + if (s0 === peg$FAILED) { + s0 = peg$parsesimpleReference(); + } + } + } + } + } } return s0; @@ -1113,19 +1158,332 @@ function peg$parse(input, options) { return s0; } - function peg$parsequotedReference() { - let s0; + function peg$parsehighQuotedReference() { + let s0, s1, s2; + + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.substr(peg$currPos, 6) === peg$c3) { + s2 = peg$c3; + peg$currPos += 6; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e5); } + } + if (s2 === peg$FAILED) { + if (input.substr(peg$currPos, 6) === peg$c4) { + s2 = peg$c4; + peg$currPos += 6; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e6); } + } + if (s2 === peg$FAILED) { + if (input.substr(peg$currPos, 6) === peg$c5) { + s2 = peg$c5; + peg$currPos += 6; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e7); } + } + } + } + peg$silentFails--; + if (s2 !== peg$FAILED) { + peg$currPos = s1; + s1 = undefined; + } else { + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + s2 = peg$parsehighQuoteCapture(); + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f26(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } - s0 = peg$parsequintupleQuotedReference(); + return s0; + } + + function peg$parsehighQuoteCapture() { + let s0, s1, s2, s3, s4, s5, s6; + + s0 = peg$currPos; + s1 = peg$currPos; + s2 = peg$currPos; + s3 = []; + if (input.charCodeAt(peg$currPos) === 34) { + s4 = peg$c6; + peg$currPos++; + } else { + s4 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e8); } + } + if (s4 !== peg$FAILED) { + while (s4 !== peg$FAILED) { + s3.push(s4); + if (input.charCodeAt(peg$currPos) === 34) { + s4 = peg$c6; + peg$currPos++; + } else { + s4 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e8); } + } + } + } else { + s3 = peg$FAILED; + } + if (s3 !== peg$FAILED) { + s4 = []; + s5 = peg$parsehighQuoteDoubleContent(); + while (s5 !== peg$FAILED) { + s4.push(s5); + s5 = peg$parsehighQuoteDoubleContent(); + } + s5 = []; + if (input.charCodeAt(peg$currPos) === 34) { + s6 = peg$c6; + peg$currPos++; + } else { + s6 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e8); } + } + if (s6 !== peg$FAILED) { + while (s6 !== peg$FAILED) { + s5.push(s6); + if (input.charCodeAt(peg$currPos) === 34) { + s6 = peg$c6; + peg$currPos++; + } else { + s6 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e8); } + } + } + } else { + s5 = peg$FAILED; + } + if (s5 !== peg$FAILED) { + s3 = [s3, s4, s5]; + s2 = s3; + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + if (s2 !== peg$FAILED) { + s1 = input.substring(s1, peg$currPos); + } else { + s1 = s2; + } + if (s1 !== peg$FAILED) { + peg$savedPos = peg$currPos; + s2 = peg$f27(s1); + if (s2) { + s2 = undefined; + } else { + s2 = peg$FAILED; + } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f28(s1); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } if (s0 === peg$FAILED) { - s0 = peg$parsequadrupleQuotedReference(); + s0 = peg$currPos; + s1 = peg$currPos; + s2 = peg$currPos; + s3 = []; + if (input.charCodeAt(peg$currPos) === 39) { + s4 = peg$c7; + peg$currPos++; + } else { + s4 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e9); } + } + if (s4 !== peg$FAILED) { + while (s4 !== peg$FAILED) { + s3.push(s4); + if (input.charCodeAt(peg$currPos) === 39) { + s4 = peg$c7; + peg$currPos++; + } else { + s4 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e9); } + } + } + } else { + s3 = peg$FAILED; + } + if (s3 !== peg$FAILED) { + s4 = []; + s5 = peg$parsehighQuoteSingleContent(); + while (s5 !== peg$FAILED) { + s4.push(s5); + s5 = peg$parsehighQuoteSingleContent(); + } + s5 = []; + if (input.charCodeAt(peg$currPos) === 39) { + s6 = peg$c7; + peg$currPos++; + } else { + s6 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e9); } + } + if (s6 !== peg$FAILED) { + while (s6 !== peg$FAILED) { + s5.push(s6); + if (input.charCodeAt(peg$currPos) === 39) { + s6 = peg$c7; + peg$currPos++; + } else { + s6 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e9); } + } + } + } else { + s5 = peg$FAILED; + } + if (s5 !== peg$FAILED) { + s3 = [s3, s4, s5]; + s2 = s3; + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + if (s2 !== peg$FAILED) { + s1 = input.substring(s1, peg$currPos); + } else { + s1 = s2; + } + if (s1 !== peg$FAILED) { + peg$savedPos = peg$currPos; + s2 = peg$f29(s1); + if (s2) { + s2 = undefined; + } else { + s2 = peg$FAILED; + } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f30(s1); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } if (s0 === peg$FAILED) { - s0 = peg$parsetripleQuotedReference(); - if (s0 === peg$FAILED) { - s0 = peg$parsedoubleQuotedReference(); - if (s0 === peg$FAILED) { - s0 = peg$parsesingleQuotedReference(); + s0 = peg$currPos; + s1 = peg$currPos; + s2 = peg$currPos; + s3 = []; + if (input.charCodeAt(peg$currPos) === 96) { + s4 = peg$c8; + peg$currPos++; + } else { + s4 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e10); } + } + if (s4 !== peg$FAILED) { + while (s4 !== peg$FAILED) { + s3.push(s4); + if (input.charCodeAt(peg$currPos) === 96) { + s4 = peg$c8; + peg$currPos++; + } else { + s4 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e10); } + } + } + } else { + s3 = peg$FAILED; + } + if (s3 !== peg$FAILED) { + s4 = []; + s5 = peg$parsehighQuoteBacktickContent(); + while (s5 !== peg$FAILED) { + s4.push(s5); + s5 = peg$parsehighQuoteBacktickContent(); + } + s5 = []; + if (input.charCodeAt(peg$currPos) === 96) { + s6 = peg$c8; + peg$currPos++; + } else { + s6 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e10); } + } + if (s6 !== peg$FAILED) { + while (s6 !== peg$FAILED) { + s5.push(s6); + if (input.charCodeAt(peg$currPos) === 96) { + s6 = peg$c8; + peg$currPos++; + } else { + s6 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e10); } + } + } + } else { + s5 = peg$FAILED; + } + if (s5 !== peg$FAILED) { + s3 = [s3, s4, s5]; + s2 = s3; + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + if (s2 !== peg$FAILED) { + s1 = input.substring(s1, peg$currPos); + } else { + s1 = s2; + } + if (s1 !== peg$FAILED) { + peg$savedPos = peg$currPos; + s2 = peg$f31(s1); + if (s2) { + s2 = undefined; + } else { + s2 = peg$FAILED; + } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f32(s1); + } else { + peg$currPos = s0; + s0 = peg$FAILED; } + } else { + peg$currPos = s0; + s0 = peg$FAILED; } } } @@ -1133,6 +1491,207 @@ function peg$parse(input, options) { return s0; } + function peg$parsehighQuoteDoubleContent() { + let s0, s1, s2, s3; + + s0 = input.charAt(peg$currPos); + if (peg$r2.test(s0)) { + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e11); } + } + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = []; + if (input.charCodeAt(peg$currPos) === 34) { + s2 = peg$c6; + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e8); } + } + if (s2 !== peg$FAILED) { + while (s2 !== peg$FAILED) { + s1.push(s2); + if (input.charCodeAt(peg$currPos) === 34) { + s2 = peg$c6; + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e8); } + } + } + } else { + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + s2 = peg$currPos; + peg$silentFails++; + s3 = input.charAt(peg$currPos); + if (peg$r2.test(s3)) { + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e11); } + } + peg$silentFails--; + if (s3 !== peg$FAILED) { + peg$currPos = s2; + s2 = undefined; + } else { + s2 = peg$FAILED; + } + if (s2 !== peg$FAILED) { + s1 = [s1, s2]; + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } + + return s0; + } + + function peg$parsehighQuoteSingleContent() { + let s0, s1, s2, s3; + + s0 = input.charAt(peg$currPos); + if (peg$r3.test(s0)) { + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e12); } + } + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = []; + if (input.charCodeAt(peg$currPos) === 39) { + s2 = peg$c7; + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e9); } + } + if (s2 !== peg$FAILED) { + while (s2 !== peg$FAILED) { + s1.push(s2); + if (input.charCodeAt(peg$currPos) === 39) { + s2 = peg$c7; + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e9); } + } + } + } else { + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + s2 = peg$currPos; + peg$silentFails++; + s3 = input.charAt(peg$currPos); + if (peg$r3.test(s3)) { + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e12); } + } + peg$silentFails--; + if (s3 !== peg$FAILED) { + peg$currPos = s2; + s2 = undefined; + } else { + s2 = peg$FAILED; + } + if (s2 !== peg$FAILED) { + s1 = [s1, s2]; + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } + + return s0; + } + + function peg$parsehighQuoteBacktickContent() { + let s0, s1, s2, s3; + + s0 = input.charAt(peg$currPos); + if (peg$r4.test(s0)) { + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e13); } + } + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = []; + if (input.charCodeAt(peg$currPos) === 96) { + s2 = peg$c8; + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e10); } + } + if (s2 !== peg$FAILED) { + while (s2 !== peg$FAILED) { + s1.push(s2); + if (input.charCodeAt(peg$currPos) === 96) { + s2 = peg$c8; + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e10); } + } + } + } else { + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + s2 = peg$currPos; + peg$silentFails++; + s3 = input.charAt(peg$currPos); + if (peg$r4.test(s3)) { + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e13); } + } + peg$silentFails--; + if (s3 !== peg$FAILED) { + peg$currPos = s2; + s2 = undefined; + } else { + s2 = peg$FAILED; + } + if (s2 !== peg$FAILED) { + s1 = [s1, s2]; + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } + + return s0; + } + function peg$parsesingleQuotedReference() { let s0; @@ -1152,11 +1711,11 @@ function peg$parse(input, options) { s0 = peg$currPos; if (input.charCodeAt(peg$currPos) === 34) { - s1 = peg$c3; + s1 = peg$c6; peg$currPos++; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e5); } + if (peg$silentFails === 0) { peg$fail(peg$e8); } } if (s1 !== peg$FAILED) { s2 = []; @@ -1166,15 +1725,15 @@ function peg$parse(input, options) { s3 = peg$parsedoubleQuote1Content(); } if (input.charCodeAt(peg$currPos) === 34) { - s3 = peg$c3; + s3 = peg$c6; peg$currPos++; } else { s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e5); } + if (peg$silentFails === 0) { peg$fail(peg$e8); } } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f26(s2); + s0 = peg$f33(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1191,16 +1750,16 @@ function peg$parse(input, options) { let s0, s1; s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c4) { - s1 = peg$c4; + if (input.substr(peg$currPos, 2) === peg$c9) { + s1 = peg$c9; peg$currPos += 2; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e6); } + if (peg$silentFails === 0) { peg$fail(peg$e14); } } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f27(); + s1 = peg$f34(); } s0 = s1; if (s0 === peg$FAILED) { @@ -1209,7 +1768,7 @@ function peg$parse(input, options) { peg$currPos++; } else { s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e7); } + if (peg$silentFails === 0) { peg$fail(peg$e11); } } } @@ -1221,11 +1780,11 @@ function peg$parse(input, options) { s0 = peg$currPos; if (input.charCodeAt(peg$currPos) === 39) { - s1 = peg$c5; + s1 = peg$c7; peg$currPos++; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e8); } + if (peg$silentFails === 0) { peg$fail(peg$e9); } } if (s1 !== peg$FAILED) { s2 = []; @@ -1235,15 +1794,15 @@ function peg$parse(input, options) { s3 = peg$parsesingleQuote1Content(); } if (input.charCodeAt(peg$currPos) === 39) { - s3 = peg$c5; + s3 = peg$c7; peg$currPos++; } else { s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e8); } + if (peg$silentFails === 0) { peg$fail(peg$e9); } } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f28(s2); + s0 = peg$f35(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1260,16 +1819,16 @@ function peg$parse(input, options) { let s0, s1; s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c6) { - s1 = peg$c6; + if (input.substr(peg$currPos, 2) === peg$c10) { + s1 = peg$c10; peg$currPos += 2; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } + if (peg$silentFails === 0) { peg$fail(peg$e15); } } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f29(); + s1 = peg$f36(); } s0 = s1; if (s0 === peg$FAILED) { @@ -1278,7 +1837,7 @@ function peg$parse(input, options) { peg$currPos++; } else { s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e10); } + if (peg$silentFails === 0) { peg$fail(peg$e12); } } } @@ -1290,11 +1849,11 @@ function peg$parse(input, options) { s0 = peg$currPos; if (input.charCodeAt(peg$currPos) === 96) { - s1 = peg$c7; + s1 = peg$c8; peg$currPos++; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e11); } + if (peg$silentFails === 0) { peg$fail(peg$e10); } } if (s1 !== peg$FAILED) { s2 = []; @@ -1304,15 +1863,15 @@ function peg$parse(input, options) { s3 = peg$parsebacktickQuote1Content(); } if (input.charCodeAt(peg$currPos) === 96) { - s3 = peg$c7; + s3 = peg$c8; peg$currPos++; } else { s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e11); } + if (peg$silentFails === 0) { peg$fail(peg$e10); } } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f30(s2); + s0 = peg$f37(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1329,16 +1888,16 @@ function peg$parse(input, options) { let s0, s1; s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c8) { - s1 = peg$c8; + if (input.substr(peg$currPos, 2) === peg$c11) { + s1 = peg$c11; peg$currPos += 2; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e12); } + if (peg$silentFails === 0) { peg$fail(peg$e16); } } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f31(); + s1 = peg$f38(); } s0 = s1; if (s0 === peg$FAILED) { @@ -1372,12 +1931,12 @@ function peg$parse(input, options) { let s0, s1, s2, s3; s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c4) { - s1 = peg$c4; + if (input.substr(peg$currPos, 2) === peg$c9) { + s1 = peg$c9; peg$currPos += 2; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e6); } + if (peg$silentFails === 0) { peg$fail(peg$e14); } } if (s1 !== peg$FAILED) { s2 = []; @@ -1386,16 +1945,16 @@ function peg$parse(input, options) { s2.push(s3); s3 = peg$parsedoubleQuote2Content(); } - if (input.substr(peg$currPos, 2) === peg$c4) { - s3 = peg$c4; + if (input.substr(peg$currPos, 2) === peg$c9) { + s3 = peg$c9; peg$currPos += 2; } else { s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e6); } + if (peg$silentFails === 0) { peg$fail(peg$e14); } } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f32(s2); + s0 = peg$f39(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1412,28 +1971,28 @@ function peg$parse(input, options) { let s0, s1, s2; s0 = peg$currPos; - if (input.substr(peg$currPos, 4) === peg$c9) { - s1 = peg$c9; + if (input.substr(peg$currPos, 4) === peg$c12) { + s1 = peg$c12; peg$currPos += 4; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e14); } + if (peg$silentFails === 0) { peg$fail(peg$e17); } } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f33(); + s1 = peg$f40(); } s0 = s1; if (s0 === peg$FAILED) { s0 = peg$currPos; s1 = peg$currPos; peg$silentFails++; - if (input.substr(peg$currPos, 2) === peg$c4) { - s2 = peg$c4; + if (input.substr(peg$currPos, 2) === peg$c9) { + s2 = peg$c9; peg$currPos += 2; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e6); } + if (peg$silentFails === 0) { peg$fail(peg$e14); } } peg$silentFails--; if (s2 === peg$FAILED) { @@ -1448,11 +2007,11 @@ function peg$parse(input, options) { peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } + if (peg$silentFails === 0) { peg$fail(peg$e18); } } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f34(s2); + s0 = peg$f41(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1470,12 +2029,12 @@ function peg$parse(input, options) { let s0, s1, s2, s3; s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c6) { - s1 = peg$c6; + if (input.substr(peg$currPos, 2) === peg$c10) { + s1 = peg$c10; peg$currPos += 2; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } + if (peg$silentFails === 0) { peg$fail(peg$e15); } } if (s1 !== peg$FAILED) { s2 = []; @@ -1484,16 +2043,16 @@ function peg$parse(input, options) { s2.push(s3); s3 = peg$parsesingleQuote2Content(); } - if (input.substr(peg$currPos, 2) === peg$c6) { - s3 = peg$c6; + if (input.substr(peg$currPos, 2) === peg$c10) { + s3 = peg$c10; peg$currPos += 2; } else { s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } + if (peg$silentFails === 0) { peg$fail(peg$e15); } } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f35(s2); + s0 = peg$f42(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1510,28 +2069,28 @@ function peg$parse(input, options) { let s0, s1, s2; s0 = peg$currPos; - if (input.substr(peg$currPos, 4) === peg$c10) { - s1 = peg$c10; + if (input.substr(peg$currPos, 4) === peg$c13) { + s1 = peg$c13; peg$currPos += 4; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e16); } + if (peg$silentFails === 0) { peg$fail(peg$e19); } } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f36(); + s1 = peg$f43(); } s0 = s1; if (s0 === peg$FAILED) { s0 = peg$currPos; s1 = peg$currPos; peg$silentFails++; - if (input.substr(peg$currPos, 2) === peg$c6) { - s2 = peg$c6; + if (input.substr(peg$currPos, 2) === peg$c10) { + s2 = peg$c10; peg$currPos += 2; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } + if (peg$silentFails === 0) { peg$fail(peg$e15); } } peg$silentFails--; if (s2 === peg$FAILED) { @@ -1546,11 +2105,11 @@ function peg$parse(input, options) { peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } + if (peg$silentFails === 0) { peg$fail(peg$e18); } } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f37(s2); + s0 = peg$f44(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1568,12 +2127,12 @@ function peg$parse(input, options) { let s0, s1, s2, s3; s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c8) { - s1 = peg$c8; + if (input.substr(peg$currPos, 2) === peg$c11) { + s1 = peg$c11; peg$currPos += 2; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e12); } + if (peg$silentFails === 0) { peg$fail(peg$e16); } } if (s1 !== peg$FAILED) { s2 = []; @@ -1582,16 +2141,16 @@ function peg$parse(input, options) { s2.push(s3); s3 = peg$parsebacktickQuote2Content(); } - if (input.substr(peg$currPos, 2) === peg$c8) { - s3 = peg$c8; + if (input.substr(peg$currPos, 2) === peg$c11) { + s3 = peg$c11; peg$currPos += 2; } else { s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e12); } + if (peg$silentFails === 0) { peg$fail(peg$e16); } } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f38(s2); + s0 = peg$f45(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1608,28 +2167,28 @@ function peg$parse(input, options) { let s0, s1, s2; s0 = peg$currPos; - if (input.substr(peg$currPos, 4) === peg$c11) { - s1 = peg$c11; + if (input.substr(peg$currPos, 4) === peg$c14) { + s1 = peg$c14; peg$currPos += 4; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e17); } + if (peg$silentFails === 0) { peg$fail(peg$e20); } } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f39(); + s1 = peg$f46(); } s0 = s1; if (s0 === peg$FAILED) { s0 = peg$currPos; s1 = peg$currPos; peg$silentFails++; - if (input.substr(peg$currPos, 2) === peg$c8) { - s2 = peg$c8; + if (input.substr(peg$currPos, 2) === peg$c11) { + s2 = peg$c11; peg$currPos += 2; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e12); } + if (peg$silentFails === 0) { peg$fail(peg$e16); } } peg$silentFails--; if (s2 === peg$FAILED) { @@ -1644,11 +2203,11 @@ function peg$parse(input, options) { peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } + if (peg$silentFails === 0) { peg$fail(peg$e18); } } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f40(s2); + s0 = peg$f47(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1680,12 +2239,12 @@ function peg$parse(input, options) { let s0, s1, s2, s3; s0 = peg$currPos; - if (input.substr(peg$currPos, 3) === peg$c12) { - s1 = peg$c12; + if (input.substr(peg$currPos, 3) === peg$c15) { + s1 = peg$c15; peg$currPos += 3; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } + if (peg$silentFails === 0) { peg$fail(peg$e21); } } if (s1 !== peg$FAILED) { s2 = []; @@ -1694,16 +2253,16 @@ function peg$parse(input, options) { s2.push(s3); s3 = peg$parsedoubleQuote3Content(); } - if (input.substr(peg$currPos, 3) === peg$c12) { - s3 = peg$c12; + if (input.substr(peg$currPos, 3) === peg$c15) { + s3 = peg$c15; peg$currPos += 3; } else { s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } + if (peg$silentFails === 0) { peg$fail(peg$e21); } } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f41(s2); + s0 = peg$f48(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1720,28 +2279,28 @@ function peg$parse(input, options) { let s0, s1, s2; s0 = peg$currPos; - if (input.substr(peg$currPos, 6) === peg$c13) { - s1 = peg$c13; + if (input.substr(peg$currPos, 6) === peg$c3) { + s1 = peg$c3; peg$currPos += 6; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e19); } + if (peg$silentFails === 0) { peg$fail(peg$e5); } } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f42(); + s1 = peg$f49(); } s0 = s1; if (s0 === peg$FAILED) { s0 = peg$currPos; s1 = peg$currPos; peg$silentFails++; - if (input.substr(peg$currPos, 3) === peg$c12) { - s2 = peg$c12; + if (input.substr(peg$currPos, 3) === peg$c15) { + s2 = peg$c15; peg$currPos += 3; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } + if (peg$silentFails === 0) { peg$fail(peg$e21); } } peg$silentFails--; if (s2 === peg$FAILED) { @@ -1756,11 +2315,11 @@ function peg$parse(input, options) { peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } + if (peg$silentFails === 0) { peg$fail(peg$e18); } } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f43(s2); + s0 = peg$f50(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1778,12 +2337,12 @@ function peg$parse(input, options) { let s0, s1, s2, s3; s0 = peg$currPos; - if (input.substr(peg$currPos, 3) === peg$c14) { - s1 = peg$c14; + if (input.substr(peg$currPos, 3) === peg$c16) { + s1 = peg$c16; peg$currPos += 3; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e20); } + if (peg$silentFails === 0) { peg$fail(peg$e22); } } if (s1 !== peg$FAILED) { s2 = []; @@ -1792,16 +2351,16 @@ function peg$parse(input, options) { s2.push(s3); s3 = peg$parsesingleQuote3Content(); } - if (input.substr(peg$currPos, 3) === peg$c14) { - s3 = peg$c14; + if (input.substr(peg$currPos, 3) === peg$c16) { + s3 = peg$c16; peg$currPos += 3; } else { s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e20); } + if (peg$silentFails === 0) { peg$fail(peg$e22); } } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f44(s2); + s0 = peg$f51(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1818,28 +2377,28 @@ function peg$parse(input, options) { let s0, s1, s2; s0 = peg$currPos; - if (input.substr(peg$currPos, 6) === peg$c15) { - s1 = peg$c15; + if (input.substr(peg$currPos, 6) === peg$c4) { + s1 = peg$c4; peg$currPos += 6; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e21); } + if (peg$silentFails === 0) { peg$fail(peg$e6); } } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f45(); + s1 = peg$f52(); } s0 = s1; if (s0 === peg$FAILED) { s0 = peg$currPos; s1 = peg$currPos; peg$silentFails++; - if (input.substr(peg$currPos, 3) === peg$c14) { - s2 = peg$c14; + if (input.substr(peg$currPos, 3) === peg$c16) { + s2 = peg$c16; peg$currPos += 3; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e20); } + if (peg$silentFails === 0) { peg$fail(peg$e22); } } peg$silentFails--; if (s2 === peg$FAILED) { @@ -1854,11 +2413,11 @@ function peg$parse(input, options) { peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } + if (peg$silentFails === 0) { peg$fail(peg$e18); } } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f46(s2); + s0 = peg$f53(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1876,12 +2435,12 @@ function peg$parse(input, options) { let s0, s1, s2, s3; s0 = peg$currPos; - if (input.substr(peg$currPos, 3) === peg$c16) { - s1 = peg$c16; + if (input.substr(peg$currPos, 3) === peg$c17) { + s1 = peg$c17; peg$currPos += 3; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e22); } + if (peg$silentFails === 0) { peg$fail(peg$e23); } } if (s1 !== peg$FAILED) { s2 = []; @@ -1890,16 +2449,16 @@ function peg$parse(input, options) { s2.push(s3); s3 = peg$parsebacktickQuote3Content(); } - if (input.substr(peg$currPos, 3) === peg$c16) { - s3 = peg$c16; + if (input.substr(peg$currPos, 3) === peg$c17) { + s3 = peg$c17; peg$currPos += 3; } else { s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e22); } + if (peg$silentFails === 0) { peg$fail(peg$e23); } } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f47(s2); + s0 = peg$f54(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1916,28 +2475,28 @@ function peg$parse(input, options) { let s0, s1, s2; s0 = peg$currPos; - if (input.substr(peg$currPos, 6) === peg$c17) { - s1 = peg$c17; + if (input.substr(peg$currPos, 6) === peg$c5) { + s1 = peg$c5; peg$currPos += 6; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e23); } + if (peg$silentFails === 0) { peg$fail(peg$e7); } } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f48(); + s1 = peg$f55(); } s0 = s1; if (s0 === peg$FAILED) { s0 = peg$currPos; s1 = peg$currPos; peg$silentFails++; - if (input.substr(peg$currPos, 3) === peg$c16) { - s2 = peg$c16; + if (input.substr(peg$currPos, 3) === peg$c17) { + s2 = peg$c17; peg$currPos += 3; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e22); } + if (peg$silentFails === 0) { peg$fail(peg$e23); } } peg$silentFails--; if (s2 === peg$FAILED) { @@ -1952,11 +2511,11 @@ function peg$parse(input, options) { peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } + if (peg$silentFails === 0) { peg$fail(peg$e18); } } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f49(s2); + s0 = peg$f56(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1988,12 +2547,12 @@ function peg$parse(input, options) { let s0, s1, s2, s3; s0 = peg$currPos; - if (input.substr(peg$currPos, 4) === peg$c9) { - s1 = peg$c9; + if (input.substr(peg$currPos, 4) === peg$c12) { + s1 = peg$c12; peg$currPos += 4; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e14); } + if (peg$silentFails === 0) { peg$fail(peg$e17); } } if (s1 !== peg$FAILED) { s2 = []; @@ -2002,16 +2561,16 @@ function peg$parse(input, options) { s2.push(s3); s3 = peg$parsedoubleQuote4Content(); } - if (input.substr(peg$currPos, 4) === peg$c9) { - s3 = peg$c9; + if (input.substr(peg$currPos, 4) === peg$c12) { + s3 = peg$c12; peg$currPos += 4; } else { s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e14); } + if (peg$silentFails === 0) { peg$fail(peg$e17); } } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f50(s2); + s0 = peg$f57(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2037,19 +2596,19 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f51(); + s1 = peg$f58(); } s0 = s1; if (s0 === peg$FAILED) { s0 = peg$currPos; s1 = peg$currPos; peg$silentFails++; - if (input.substr(peg$currPos, 4) === peg$c9) { - s2 = peg$c9; + if (input.substr(peg$currPos, 4) === peg$c12) { + s2 = peg$c12; peg$currPos += 4; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e14); } + if (peg$silentFails === 0) { peg$fail(peg$e17); } } peg$silentFails--; if (s2 === peg$FAILED) { @@ -2064,11 +2623,11 @@ function peg$parse(input, options) { peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } + if (peg$silentFails === 0) { peg$fail(peg$e18); } } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f52(s2); + s0 = peg$f59(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2086,12 +2645,12 @@ function peg$parse(input, options) { let s0, s1, s2, s3; s0 = peg$currPos; - if (input.substr(peg$currPos, 4) === peg$c10) { - s1 = peg$c10; + if (input.substr(peg$currPos, 4) === peg$c13) { + s1 = peg$c13; peg$currPos += 4; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e16); } + if (peg$silentFails === 0) { peg$fail(peg$e19); } } if (s1 !== peg$FAILED) { s2 = []; @@ -2100,16 +2659,16 @@ function peg$parse(input, options) { s2.push(s3); s3 = peg$parsesingleQuote4Content(); } - if (input.substr(peg$currPos, 4) === peg$c10) { - s3 = peg$c10; + if (input.substr(peg$currPos, 4) === peg$c13) { + s3 = peg$c13; peg$currPos += 4; } else { s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e16); } + if (peg$silentFails === 0) { peg$fail(peg$e19); } } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f53(s2); + s0 = peg$f60(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2135,19 +2694,19 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f54(); + s1 = peg$f61(); } s0 = s1; if (s0 === peg$FAILED) { s0 = peg$currPos; s1 = peg$currPos; peg$silentFails++; - if (input.substr(peg$currPos, 4) === peg$c10) { - s2 = peg$c10; + if (input.substr(peg$currPos, 4) === peg$c13) { + s2 = peg$c13; peg$currPos += 4; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e16); } + if (peg$silentFails === 0) { peg$fail(peg$e19); } } peg$silentFails--; if (s2 === peg$FAILED) { @@ -2162,11 +2721,11 @@ function peg$parse(input, options) { peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } + if (peg$silentFails === 0) { peg$fail(peg$e18); } } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f55(s2); + s0 = peg$f62(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2184,12 +2743,12 @@ function peg$parse(input, options) { let s0, s1, s2, s3; s0 = peg$currPos; - if (input.substr(peg$currPos, 4) === peg$c11) { - s1 = peg$c11; + if (input.substr(peg$currPos, 4) === peg$c14) { + s1 = peg$c14; peg$currPos += 4; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e17); } + if (peg$silentFails === 0) { peg$fail(peg$e20); } } if (s1 !== peg$FAILED) { s2 = []; @@ -2198,16 +2757,16 @@ function peg$parse(input, options) { s2.push(s3); s3 = peg$parsebacktickQuote4Content(); } - if (input.substr(peg$currPos, 4) === peg$c11) { - s3 = peg$c11; + if (input.substr(peg$currPos, 4) === peg$c14) { + s3 = peg$c14; peg$currPos += 4; } else { s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e17); } + if (peg$silentFails === 0) { peg$fail(peg$e20); } } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f56(s2); + s0 = peg$f63(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2233,19 +2792,19 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f57(); + s1 = peg$f64(); } s0 = s1; if (s0 === peg$FAILED) { s0 = peg$currPos; s1 = peg$currPos; peg$silentFails++; - if (input.substr(peg$currPos, 4) === peg$c11) { - s2 = peg$c11; + if (input.substr(peg$currPos, 4) === peg$c14) { + s2 = peg$c14; peg$currPos += 4; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e17); } + if (peg$silentFails === 0) { peg$fail(peg$e20); } } peg$silentFails--; if (s2 === peg$FAILED) { @@ -2260,11 +2819,11 @@ function peg$parse(input, options) { peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } + if (peg$silentFails === 0) { peg$fail(peg$e18); } } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f58(s2); + s0 = peg$f65(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2319,7 +2878,7 @@ function peg$parse(input, options) { } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f59(s2); + s0 = peg$f66(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2345,7 +2904,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f60(); + s1 = peg$f67(); } s0 = s1; if (s0 === peg$FAILED) { @@ -2372,11 +2931,11 @@ function peg$parse(input, options) { peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } + if (peg$silentFails === 0) { peg$fail(peg$e18); } } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f61(s2); + s0 = peg$f68(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2417,7 +2976,7 @@ function peg$parse(input, options) { } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f62(s2); + s0 = peg$f69(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2443,7 +3002,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f63(); + s1 = peg$f70(); } s0 = s1; if (s0 === peg$FAILED) { @@ -2470,11 +3029,11 @@ function peg$parse(input, options) { peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } + if (peg$silentFails === 0) { peg$fail(peg$e18); } } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f64(s2); + s0 = peg$f71(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2515,7 +3074,7 @@ function peg$parse(input, options) { } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f65(s2); + s0 = peg$f72(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2541,7 +3100,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f66(); + s1 = peg$f73(); } s0 = s1; if (s0 === peg$FAILED) { @@ -2568,11 +3127,11 @@ function peg$parse(input, options) { peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } + if (peg$silentFails === 0) { peg$fail(peg$e18); } } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f67(s2); + s0 = peg$f74(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2609,7 +3168,7 @@ function peg$parse(input, options) { } } peg$savedPos = s0; - s1 = peg$f68(s1); + s1 = peg$f75(s1); s0 = s1; return s0; @@ -2638,7 +3197,7 @@ function peg$parse(input, options) { } } peg$savedPos = peg$currPos; - s2 = peg$f69(s1); + s2 = peg$f76(s1); if (s2) { s2 = undefined; } else { @@ -2646,7 +3205,7 @@ function peg$parse(input, options) { } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f70(s1); + s0 = peg$f77(s1); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2678,7 +3237,7 @@ function peg$parse(input, options) { } } peg$savedPos = peg$currPos; - s2 = peg$f71(s1); + s2 = peg$f78(s1); if (s2) { s2 = undefined; } else { @@ -2746,7 +3305,7 @@ function peg$parse(input, options) { peg$currPos++; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } + if (peg$silentFails === 0) { peg$fail(peg$e18); } } peg$silentFails--; if (s1 === peg$FAILED) { @@ -2862,23 +3421,54 @@ function peg$parse(input, options) { return indentationStack[indentationStack.length - 1]; } - // Process escape sequences for multi-quote strings - // For N quotes: 2*N consecutive quotes become N quotes - function processEscapes(content, quoteChar, quoteCount) { - const escapeSequence = quoteChar.repeat(quoteCount * 2); - const replacement = quoteChar.repeat(quoteCount); - let result = ''; - let i = 0; - while (i < content.length) { - if (content.substr(i, escapeSequence.length) === escapeSequence) { - result += replacement; - i += escapeSequence.length; - } else { - result += content[i]; - i++; + // Parse a multi-quote string dynamically for N >= 6 quotes + // Returns { value: string, length: number } or null if no match + function parseHighQuoteString(inputStr, quoteChar) { + // Count opening quotes + let quoteCount = 0; + while (quoteCount < inputStr.length && inputStr[quoteCount] === quoteChar) { + quoteCount++; + } + + if (quoteCount < 6) { + return null; // Let the regular rules handle 1-5 quotes + } + + const openClose = quoteChar.repeat(quoteCount); + const escapeSeq = quoteChar.repeat(quoteCount * 2); + const escapeVal = quoteChar.repeat(quoteCount); + + let pos = quoteCount; // Start after opening quotes + let content = ''; + + while (pos < inputStr.length) { + // Check for escape sequence (2*N quotes) + if (inputStr.substr(pos, escapeSeq.length) === escapeSeq) { + content += escapeVal; + pos += escapeSeq.length; + continue; } + + // Check for closing quotes (exactly N quotes, not more) + if (inputStr.substr(pos, quoteCount) === openClose) { + // Make sure it's exactly N quotes (not followed by more of the same quote) + const afterClose = pos + quoteCount; + if (afterClose >= inputStr.length || inputStr[afterClose] !== quoteChar) { + // Found valid closing + return { + value: content, + length: afterClose + }; + } + } + + // Take next character + content += inputStr[pos]; + pos++; } - return result; + + // No closing quotes found + return null; } peg$result = peg$startRuleFunction(); diff --git a/js/tests/MultiQuoteParser.test.js b/js/tests/MultiQuoteParser.test.js index 3965450..c617c4f 100644 --- a/js/tests/MultiQuoteParser.test.js +++ b/js/tests/MultiQuoteParser.test.js @@ -412,3 +412,52 @@ test('TestMultilineInDoubleDoubleQuotes', () => { expect(result.length).toBe(1); expect(result[0].values[0].id).toBe('line1\nline2'); }); + +// ============================================================================ +// Unlimited Quotes (6+ quote chars) Tests +// ============================================================================ + +test('TestUnlimitedQuotes6', () => { + // Test 6-quote strings + const input = '""""""hello""""""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe('hello'); +}); + +test('TestUnlimitedQuotes10', () => { + // Test 10-quote strings + const input = '""""""""""very deeply quoted""""""""""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe('very deeply quoted'); +}); + +test('TestUnlimitedQuotes6WithInnerQuotes', () => { + // Test 6-quote strings with inner 5-quote sequences + const input = '""""""hello with """"" five quotes inside""""""'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe('hello with """"" five quotes inside'); +}); + +test('TestUnlimitedSingleQuotes7', () => { + // Test 7-quote single quote strings + const input = "'''''''seven single quotes'''''''"; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe('seven single quotes'); +}); + +test('TestUnlimitedBackticks8', () => { + // Test 8-quote backtick strings + const input = '````````eight backticks````````'; + const result = parser.parse(input); + + expect(result.length).toBe(1); + expect(getSingleRefId(result)).toBe('eight backticks'); +}); diff --git a/python/links_notation/parser.py b/python/links_notation/parser.py index acc84f7..7a02122 100644 --- a/python/links_notation/parser.py +++ b/python/links_notation/parser.py @@ -313,30 +313,36 @@ def _extract_next_value(self, text: str, start: int) -> tuple: if start >= len(text): return (start, "") - # Check if this starts with a multi-quote string - for quote_count in range(5, 0, -1): - for quote_char in ['"', "'", "`"]: - quote_seq = quote_char * quote_count - if text[start:].startswith(quote_seq): + # Check if this starts with a multi-quote string (supports any N quotes) + for quote_char in ['"', "'", "`"]: + if text[start:].startswith(quote_char): + # Count opening quotes dynamically + quote_count = 0 + pos = start + while pos < len(text) and text[pos] == quote_char: + quote_count += 1 + pos += 1 + + if quote_count >= 1: # Parse this multi-quote string remaining = text[start:] - open_close = quote_seq + open_close = quote_char * quote_count escape_seq = quote_char * (quote_count * 2) - pos = len(open_close) - while pos < len(remaining): + inner_pos = len(open_close) + while inner_pos < len(remaining): # Check for escape sequence (2*N quotes) - if remaining[pos:].startswith(escape_seq): - pos += len(escape_seq) + if remaining[inner_pos:].startswith(escape_seq): + inner_pos += len(escape_seq) continue # Check for closing quotes - if remaining[pos:].startswith(open_close): - after_close_pos = pos + len(open_close) + if remaining[inner_pos:].startswith(open_close): + after_close_pos = inner_pos + len(open_close) # Make sure this is exactly N quotes (not more) if after_close_pos >= len(remaining) or remaining[after_close_pos] != quote_char: # Found the end return (start + after_close_pos, remaining[:after_close_pos]) - pos += 1 + inner_pos += 1 # No closing found, treat as regular text break @@ -400,11 +406,15 @@ def _extract_reference(self, text: str) -> str: """Extract reference, handling quoted strings with escaping support.""" text = text.strip() - # Try multi-quote strings (check longer sequences first: 5, 4, 3, 2, 1) - for quote_count in range(5, 0, -1): - for quote_char in ['"', "'", "`"]: - quote_seq = quote_char * quote_count - if text.startswith(quote_seq) and len(text) > len(quote_seq): + # Try multi-quote strings (supports any N quotes) + for quote_char in ['"', "'", "`"]: + if text.startswith(quote_char): + # Count opening quotes dynamically + quote_count = 0 + while quote_count < len(text) and text[quote_count] == quote_char: + quote_count += 1 + + if quote_count >= 1 and len(text) > quote_count: # Try to parse this multi-quote string result = self._parse_multi_quote_string(text, quote_char, quote_count) if result is not None: diff --git a/python/tests/test_multi_quote_parser.py b/python/tests/test_multi_quote_parser.py index d67715f..7ecbbbf 100644 --- a/python/tests/test_multi_quote_parser.py +++ b/python/tests/test_multi_quote_parser.py @@ -1,7 +1,5 @@ """Tests for multi-quote string support in parser.""" -import pytest - from links_notation.parser import Parser @@ -225,3 +223,42 @@ def test_multiline_in_double_double_quotes(self): assert result[0].values is not None assert len(result[0].values) == 1 assert result[0].values[0].id == "line1\nline2" + + +class TestUnlimitedQuotes: + """Tests for unlimited quotes (6+ quote chars).""" + + def test_unlimited_quotes_6(self): + """Test 6-quote strings.""" + parser = Parser() + result = parser.parse('""""""hello""""""') + assert len(result) == 1 + assert get_single_ref_id(result) == "hello" + + def test_unlimited_quotes_10(self): + """Test 10-quote strings.""" + parser = Parser() + result = parser.parse('""""""""""very deeply quoted""""""""""') + assert len(result) == 1 + assert get_single_ref_id(result) == "very deeply quoted" + + def test_unlimited_quotes_6_with_inner_quotes(self): + """Test 6-quote strings with inner 5-quote sequences.""" + parser = Parser() + result = parser.parse('""""""hello with """"" five quotes inside""""""') + assert len(result) == 1 + assert get_single_ref_id(result) == 'hello with """"" five quotes inside' + + def test_unlimited_single_quotes_7(self): + """Test 7-quote single quote strings.""" + parser = Parser() + result = parser.parse("'''''''seven single quotes'''''''") + assert len(result) == 1 + assert get_single_ref_id(result) == "seven single quotes" + + def test_unlimited_backticks_8(self): + """Test 8-quote backtick strings.""" + parser = Parser() + result = parser.parse("````````eight backticks````````") + assert len(result) == 1 + assert get_single_ref_id(result) == "eight backticks" diff --git a/rust/src/parser.rs b/rust/src/parser.rs index 6acd4c8..de8f23b 100644 --- a/rust/src/parser.rs +++ b/rust/src/parser.rs @@ -194,95 +194,41 @@ fn parse_multi_quote_string( } } -// Single quote char (1 quote) -fn double_quote_1(input: &str) -> IResult<&str, String> { - parse_multi_quote_string(input, '"', 1) -} - -fn single_quote_1(input: &str) -> IResult<&str, String> { - parse_multi_quote_string(input, '\'', 1) -} - -fn backtick_quote_1(input: &str) -> IResult<&str, String> { - parse_multi_quote_string(input, '`', 1) -} - -// Double quote chars (2 quotes) -fn double_quote_2(input: &str) -> IResult<&str, String> { - parse_multi_quote_string(input, '"', 2) -} - -fn single_quote_2(input: &str) -> IResult<&str, String> { - parse_multi_quote_string(input, '\'', 2) -} - -fn backtick_quote_2(input: &str) -> IResult<&str, String> { - parse_multi_quote_string(input, '`', 2) -} - -// Triple quote chars (3 quotes) -fn double_quote_3(input: &str) -> IResult<&str, String> { - parse_multi_quote_string(input, '"', 3) -} +/// Parse a quoted string with dynamically detected quote count. +/// Counts opening quotes and uses that count for parsing. +fn parse_dynamic_quote_string(input: &str, quote_char: char) -> IResult<&str, String> { + // Count opening quotes + let quote_count = input.chars().take_while(|&c| c == quote_char).count(); -fn single_quote_3(input: &str) -> IResult<&str, String> { - parse_multi_quote_string(input, '\'', 3) -} - -fn backtick_quote_3(input: &str) -> IResult<&str, String> { - parse_multi_quote_string(input, '`', 3) -} - -// Quadruple quote chars (4 quotes) -fn double_quote_4(input: &str) -> IResult<&str, String> { - parse_multi_quote_string(input, '"', 4) -} - -fn single_quote_4(input: &str) -> IResult<&str, String> { - parse_multi_quote_string(input, '\'', 4) -} + if quote_count == 0 { + return Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Tag, + ))); + } -fn backtick_quote_4(input: &str) -> IResult<&str, String> { - parse_multi_quote_string(input, '`', 4) + parse_multi_quote_string(input, quote_char, quote_count) } -// Quintuple quote chars (5 quotes) -fn double_quote_5(input: &str) -> IResult<&str, String> { - parse_multi_quote_string(input, '"', 5) +fn double_quoted_dynamic(input: &str) -> IResult<&str, String> { + parse_dynamic_quote_string(input, '"') } -fn single_quote_5(input: &str) -> IResult<&str, String> { - parse_multi_quote_string(input, '\'', 5) +fn single_quoted_dynamic(input: &str) -> IResult<&str, String> { + parse_dynamic_quote_string(input, '\'') } -fn backtick_quote_5(input: &str) -> IResult<&str, String> { - parse_multi_quote_string(input, '`', 5) +fn backtick_quoted_dynamic(input: &str) -> IResult<&str, String> { + parse_dynamic_quote_string(input, '`') } fn reference(input: &str) -> IResult<&str, String> { - // Try longer quote sequences first (greedy matching) + // Try quoted strings with dynamic quote detection (supports any N quotes) + // Then fall back to simple unquoted reference alt(( - // 5 quotes - double_quote_5, - single_quote_5, - backtick_quote_5, - // 4 quotes - double_quote_4, - single_quote_4, - backtick_quote_4, - // 3 quotes - double_quote_3, - single_quote_3, - backtick_quote_3, - // 2 quotes - double_quote_2, - single_quote_2, - backtick_quote_2, - // 1 quote - double_quote_1, - single_quote_1, - backtick_quote_1, - // Simple unquoted + double_quoted_dynamic, + single_quoted_dynamic, + backtick_quoted_dynamic, simple_reference, )) .parse(input) diff --git a/rust/tests/multi_quote_parser_tests.rs b/rust/tests/multi_quote_parser_tests.rs index 8360cbc..8b8d51b 100644 --- a/rust/tests/multi_quote_parser_tests.rs +++ b/rust/tests/multi_quote_parser_tests.rs @@ -467,3 +467,55 @@ fn test_multiline_in_double_double_quotes() { } panic!("Expected multiline content in double double quotes"); } + +// ============================================================================ +// Unlimited Quotes (6+ quote chars) Tests +// ============================================================================ + +#[test] +fn test_unlimited_quotes_6() { + // Test 6-quote strings + let result = parse_lino("\"\"\"\"\"\"hello\"\"\"\"\"\"").unwrap(); + assert_eq!(get_single_ref_id(&result), Some(&"hello".to_string())); +} + +#[test] +fn test_unlimited_quotes_10() { + // Test 10-quote strings + let result = parse_lino("\"\"\"\"\"\"\"\"\"\"very deeply quoted\"\"\"\"\"\"\"\"\"\"").unwrap(); + assert_eq!( + get_single_ref_id(&result), + Some(&"very deeply quoted".to_string()) + ); +} + +#[test] +fn test_unlimited_quotes_6_with_inner_quotes() { + // Test 6-quote strings with inner 5-quote sequences + let result = + parse_lino("\"\"\"\"\"\"hello with \"\"\"\"\" five quotes inside\"\"\"\"\"\"").unwrap(); + assert_eq!( + get_single_ref_id(&result), + Some(&"hello with \"\"\"\"\" five quotes inside".to_string()) + ); +} + +#[test] +fn test_unlimited_single_quotes_7() { + // Test 7-quote single quote strings + let result = parse_lino("'''''''seven single quotes'''''''").unwrap(); + assert_eq!( + get_single_ref_id(&result), + Some(&"seven single quotes".to_string()) + ); +} + +#[test] +fn test_unlimited_backticks_8() { + // Test 8-quote backtick strings + let result = parse_lino("````````eight backticks````````").unwrap(); + assert_eq!( + get_single_ref_id(&result), + Some(&"eight backticks".to_string()) + ); +} From 1ce80ac7648ac825d8edc7cb85a83596e0aa8b9d Mon Sep 17 00:00:00 2001 From: konard Date: Mon, 1 Dec 2025 15:44:27 +0000 Subject: [PATCH 07/15] Add experiment scripts for testing unlimited N-quote strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These scripts demonstrate and test the unlimited quote parsing capability for 6+ quote sequences in JavaScript, Python, and Rust parsers. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- experiments/debug_js_parser.js | 58 ++++++++++++++++++++++++++++ experiments/debug_peg_direct.js | 12 ++++++ experiments/test_unlimited_quotes.js | 42 ++++++++++++++++++++ experiments/test_unlimited_quotes.py | 30 ++++++++++++++ experiments/test_unlimited_quotes.rs | 15 +++++++ 5 files changed, 157 insertions(+) create mode 100644 experiments/debug_js_parser.js create mode 100644 experiments/debug_peg_direct.js create mode 100644 experiments/test_unlimited_quotes.js create mode 100644 experiments/test_unlimited_quotes.py create mode 100644 experiments/test_unlimited_quotes.rs diff --git a/experiments/debug_js_parser.js b/experiments/debug_js_parser.js new file mode 100644 index 0000000..38dbe11 --- /dev/null +++ b/experiments/debug_js_parser.js @@ -0,0 +1,58 @@ +// Manually test the parseHighQuoteString function +function parseHighQuoteString(inputStr, quoteChar) { + // Count opening quotes + let quoteCount = 0; + while (quoteCount < inputStr.length && inputStr[quoteCount] === quoteChar) { + quoteCount++; + } + + if (quoteCount < 6) { + console.log(` quoteCount=${quoteCount} is < 6, returning null`); + return null; // Let the regular rules handle 1-5 quotes + } + + const openClose = quoteChar.repeat(quoteCount); + const escapeSeq = quoteChar.repeat(quoteCount * 2); + const escapeVal = quoteChar.repeat(quoteCount); + + console.log(` quoteCount=${quoteCount}, openClose="${openClose}", escapeSeq="${escapeSeq}"`); + + let pos = quoteCount; // Start after opening quotes + let content = ''; + + while (pos < inputStr.length) { + // Check for escape sequence (2*N quotes) + if (inputStr.substr(pos, escapeSeq.length) === escapeSeq) { + content += escapeVal; + pos += escapeSeq.length; + continue; + } + + // Check for closing quotes (exactly N quotes, not more) + if (inputStr.substr(pos, quoteCount) === openClose) { + // Make sure it's exactly N quotes (not followed by more of the same quote) + const afterClose = pos + quoteCount; + if (afterClose >= inputStr.length || inputStr[afterClose] !== quoteChar) { + // Found valid closing + console.log(` Found closing at pos=${pos}, content="${content}"`); + return { + value: content, + length: afterClose + }; + } + } + + // Take next character + content += inputStr[pos]; + pos++; + } + + // No closing quotes found + console.log(` No closing found, content so far="${content}"`); + return null; +} + +const simple6 = '""""""hello""""""'; +console.log('Testing simple6:', simple6); +const result = parseHighQuoteString(simple6, '"'); +console.log('Result:', result); diff --git a/experiments/debug_peg_direct.js b/experiments/debug_peg_direct.js new file mode 100644 index 0000000..adb6b46 --- /dev/null +++ b/experiments/debug_peg_direct.js @@ -0,0 +1,12 @@ +// Direct test of the generated parser +const parserModule = require('../js/src/parser-generated.js'); + +const simple6 = '""""""hello""""""'; +console.log('Testing simple6:', simple6); +try { + const result = parserModule.parse(simple6); + console.log('Raw parse result:', JSON.stringify(result, null, 2)); +} catch (e) { + console.log('Parse error:', e.message); + console.log('Location:', JSON.stringify(e.location)); +} diff --git a/experiments/test_unlimited_quotes.js b/experiments/test_unlimited_quotes.js new file mode 100644 index 0000000..819ecdb --- /dev/null +++ b/experiments/test_unlimited_quotes.js @@ -0,0 +1,42 @@ +const { Parser } = require('../js/src/Parser.js'); +const parser = new Parser(); + +// Test a simple 6-quote case first +const simple6 = '""""""hello""""""'; +console.log('Simple 6 quotes input:', simple6); +try { + const result = parser.parse(simple6); + console.log('Simple 6 quotes result:', JSON.stringify(result, null, 2)); +} catch (e) { + console.log('Simple 6 quotes error:', e.message); +} + +// Test 6 quotes +const sixQuotes = '""""""hello with """"" five quotes inside""""""'; +console.log('\n6 quotes input:', sixQuotes); +try { + const result = parser.parse(sixQuotes); + console.log('6 quotes result:', JSON.stringify(result, null, 2)); +} catch (e) { + console.log('6 quotes error:', e.message); +} + +// Test 10 quotes +const tenQuotes = '""""""""""very deeply quoted""""""""""'; +console.log('\n10 quotes input:', tenQuotes); +try { + const result = parser.parse(tenQuotes); + console.log('10 quotes result:', JSON.stringify(result, null, 2)); +} catch (e) { + console.log('10 quotes error:', e.message); +} + +// Test escaping with 6 quotes (12 quotes become 6) +const sixQuotesEscape = '""""""text with """""""""""" escaped""""""'; +console.log('\n6 quotes with escaping input:', sixQuotesEscape); +try { + const result = parser.parse(sixQuotesEscape); + console.log('6 quotes escape result:', JSON.stringify(result, null, 2)); +} catch (e) { + console.log('6 quotes escape error:', e.message); +} diff --git a/experiments/test_unlimited_quotes.py b/experiments/test_unlimited_quotes.py new file mode 100644 index 0000000..7b333d6 --- /dev/null +++ b/experiments/test_unlimited_quotes.py @@ -0,0 +1,30 @@ +import sys +sys.path.insert(0, 'python') + +from links_notation import Parser + +parser = Parser() + +# Test 6 quotes +simple6 = '""""""hello""""""' +print(f'Simple 6 quotes input: {simple6}') +result = parser.parse(simple6) +print(f'Simple 6 quotes result: {result}') +for link in result: + print(f' Link id: {link.id}, values: {link.values}') + +# Test 6 quotes with content +six_quotes = '""""""hello with """"" five quotes inside""""""' +print(f'\n6 quotes input: {six_quotes}') +result = parser.parse(six_quotes) +print(f'6 quotes result: {result}') +for link in result: + print(f' Link id: {link.id}, values: {link.values}') + +# Test 10 quotes +ten_quotes = '""""""""""very deeply quoted""""""""""' +print(f'\n10 quotes input: {ten_quotes}') +result = parser.parse(ten_quotes) +print(f'10 quotes result: {result}') +for link in result: + print(f' Link id: {link.id}, values: {link.values}') diff --git a/experiments/test_unlimited_quotes.rs b/experiments/test_unlimited_quotes.rs new file mode 100644 index 0000000..3c5a01f --- /dev/null +++ b/experiments/test_unlimited_quotes.rs @@ -0,0 +1,15 @@ +// Test file - run with: rustc --test test_unlimited_quotes.rs && ./test_unlimited_quotes + +use std::process::Command; + +fn main() { + // Run cargo test with our specific test input + let output = Command::new("cargo") + .args(&["test", "--", "--nocapture", "test_unlimited_quotes"]) + .current_dir("/tmp/gh-issue-solver-1764602479355/rust") + .output() + .expect("Failed to execute cargo test"); + + println!("stdout: {}", String::from_utf8_lossy(&output.stdout)); + println!("stderr: {}", String::from_utf8_lossy(&output.stderr)); +} From 0ea59f6a2fa31b830bf5066ee5f7e474ecea8684 Mon Sep 17 00:00:00 2001 From: konard Date: Mon, 1 Dec 2025 15:59:33 +0000 Subject: [PATCH 08/15] Simplify JavaScript grammar with universal N-quote parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace explicit quote rules (1-5 quotes) with a single universal parseNQuoteString() function that handles any N quotes dynamically. This change: - Reduces quote-related grammar rules from ~90 lines to ~30 lines - Uses a single procedural parser function for all quote types (", ', `) - Maintains the same behavior: N opening quotes, N closing quotes, 2*N quotes inside escape to N quotes The universal approach is simpler and more maintainable while supporting unlimited quote levels. Note: C# grammar kept with explicit rules due to Pegasus parser framework limitations with greedy patterns. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- js/src/grammar.pegjs | 120 ++++++++++--------------------------------- 1 file changed, 28 insertions(+), 92 deletions(-) diff --git a/js/src/grammar.pegjs b/js/src/grammar.pegjs index c8169e2..9b483a8 100644 --- a/js/src/grammar.pegjs +++ b/js/src/grammar.pegjs @@ -35,17 +35,18 @@ return indentationStack[indentationStack.length - 1]; } - // Parse a multi-quote string dynamically for N >= 6 quotes + // Universal parser for any N-quote strings + // For N quotes: opening = N quotes, closing = N quotes, escape = 2*N quotes -> N quotes // Returns { value: string, length: number } or null if no match - function parseHighQuoteString(inputStr, quoteChar) { + function parseNQuoteString(inputStr, quoteChar) { // Count opening quotes let quoteCount = 0; while (quoteCount < inputStr.length && inputStr[quoteCount] === quoteChar) { quoteCount++; } - if (quoteCount < 6) { - return null; // Let the regular rules handle 1-5 quotes + if (quoteCount < 1) { + return null; } const openClose = quoteChar.repeat(quoteCount); @@ -130,111 +131,46 @@ multiLineValueLink = "(" v:multiLineValues _ ")" { return { values: v }; } indentedIdLink = id:reference __ ":" eol { return { id: id, values: [] }; } // Reference can be quoted (with any number of quotes) or simple unquoted -// Order matters: try longer quote sequences first (greedy matching) -// For 6+ quotes, use procedural parsing via highQuotedReference -reference = highQuotedReference / quintupleQuotedReference / quadrupleQuotedReference / tripleQuotedReference / doubleQuotedReference / singleQuotedReference / simpleReference +// Universal approach: use procedural parsing for all N-quote strings +reference = quotedReference / simpleReference simpleReference = chars:referenceSymbol+ { return chars.join(''); } -// High quote sequences (6+ quotes) - use procedural parsing -// Capture everything that looks like a quoted string and validate -highQuotedReference = &('""""""' / "''''''" / '``````') raw:highQuoteCapture { - return raw; -} +// Universal quoted reference - handles any N quotes for all three quote types +// Captures the raw string and uses parseNQuoteString for validation and parsing +quotedReference = doubleQuotedAny / singleQuotedAny / backtickQuotedAny -// Capture high quote content - match any characters including embedded quotes -// The key insight: for 6+ quotes, we need to capture chars that might include -// sequences of quotes less than the closing count -highQuoteCapture = raw:$('"'+ highQuoteDoubleContent* '"'+) &{ - const result = parseHighQuoteString(raw, '"'); +doubleQuotedAny = raw:$('"'+ doubleQuoteContent* '"'+) &{ + const result = parseNQuoteString(raw, '"'); if (result && result.length === raw.length) { - options._highQuoteValue = result.value; + options._quoteValue = result.value; return true; } return false; -} { return options._highQuoteValue; } -/ raw:$("'"+ highQuoteSingleContent* "'"+ ) &{ - const result = parseHighQuoteString(raw, "'"); +} { return options._quoteValue; } + +singleQuotedAny = raw:$("'"+ singleQuoteContent* "'"+) &{ + const result = parseNQuoteString(raw, "'"); if (result && result.length === raw.length) { - options._highQuoteValue = result.value; + options._quoteValue = result.value; return true; } return false; -} { return options._highQuoteValue; } -/ raw:$('`'+ highQuoteBacktickContent* '`'+) &{ - const result = parseHighQuoteString(raw, '`'); +} { return options._quoteValue; } + +backtickQuotedAny = raw:$('`'+ backtickQuoteContent* '`'+) &{ + const result = parseNQuoteString(raw, '`'); if (result && result.length === raw.length) { - options._highQuoteValue = result.value; + options._quoteValue = result.value; return true; } return false; -} { return options._highQuoteValue; } - -// Content for high quote strings - match non-quote chars OR quote sequences -// followed by non-quote (so they're not closing sequences) -highQuoteDoubleContent = [^"] / '"'+ &[^"] -highQuoteSingleContent = [^'] / "'"+ &[^'] -highQuoteBacktickContent = [^\`] / '`'+ &[^\`] - -// Single quote (1 quote char) -singleQuotedReference = doubleQuote1 / singleQuote1 / backtickQuote1 - -doubleQuote1 = '"' r:doubleQuote1Content* '"' { return r.join(''); } -doubleQuote1Content = '""' { return '"'; } / [^"] - -singleQuote1 = "'" r:singleQuote1Content* "'" { return r.join(''); } -singleQuote1Content = "''" { return "'"; } / [^'] - -backtickQuote1 = '`' r:backtickQuote1Content* '`' { return r.join(''); } -backtickQuote1Content = '``' { return '`'; } / [^`] - -// Double quotes (2 quote chars) -doubleQuotedReference = doubleQuote2 / singleQuote2 / backtickQuote2 - -doubleQuote2 = '""' r:doubleQuote2Content* '""' { return r.join(''); } -doubleQuote2Content = '""""' { return '""'; } / !('""') c:. { return c; } - -singleQuote2 = "''" r:singleQuote2Content* "''" { return r.join(''); } -singleQuote2Content = "''''" { return "''"; } / !("''") c:. { return c; } - -backtickQuote2 = '``' r:backtickQuote2Content* '``' { return r.join(''); } -backtickQuote2Content = '````' { return '``'; } / !('``') c:. { return c; } - -// Triple quotes (3 quote chars) -tripleQuotedReference = doubleQuote3 / singleQuote3 / backtickQuote3 - -doubleQuote3 = '"""' r:doubleQuote3Content* '"""' { return r.join(''); } -doubleQuote3Content = '""""""' { return '"""'; } / !('"""') c:. { return c; } - -singleQuote3 = "'''" r:singleQuote3Content* "'''" { return r.join(''); } -singleQuote3Content = "''''''" { return "'''"; } / !("'''") c:. { return c; } - -backtickQuote3 = '```' r:backtickQuote3Content* '```' { return r.join(''); } -backtickQuote3Content = '``````' { return '```'; } / !('```') c:. { return c; } - -// Quadruple quotes (4 quote chars) -quadrupleQuotedReference = doubleQuote4 / singleQuote4 / backtickQuote4 - -doubleQuote4 = '""""' r:doubleQuote4Content* '""""' { return r.join(''); } -doubleQuote4Content = '""""""""' { return '""""'; } / !('""""') c:. { return c; } - -singleQuote4 = "''''" r:singleQuote4Content* "''''" { return r.join(''); } -singleQuote4Content = "''''''''''" { return "''''"; } / !("''''") c:. { return c; } - -backtickQuote4 = '````' r:backtickQuote4Content* '````' { return r.join(''); } -backtickQuote4Content = '````````' { return '````'; } / !('````') c:. { return c; } - -// Quintuple quotes (5 quote chars) -quintupleQuotedReference = doubleQuote5 / singleQuote5 / backtickQuote5 - -doubleQuote5 = '"""""' r:doubleQuote5Content* '"""""' { return r.join(''); } -doubleQuote5Content = '""""""""""' { return '"""""'; } / !('"""""') c:. { return c; } - -singleQuote5 = "'''''" r:singleQuote5Content* "'''''" { return r.join(''); } -singleQuote5Content = "''''''''''" { return "'''''"; } / !("'''''") c:. { return c; } +} { return options._quoteValue; } -backtickQuote5 = '`````' r:backtickQuote5Content* '`````' { return r.join(''); } -backtickQuote5Content = '``````````' { return '`````'; } / !('`````') c:. { return c; } +// Content for quoted strings - match non-quote chars OR quote sequences followed by non-quote +doubleQuoteContent = [^"] / '"'+ &[^"] +singleQuoteContent = [^'] / "'"+ &[^'] +backtickQuoteContent = [^`] / '`'+ &[^`] SET_BASE_INDENTATION = spaces:" "* { setBaseIndentation(spaces); } From 185c1ef1cc2dd030797e00db1615db6d9bf1e03f Mon Sep 17 00:00:00 2001 From: konard Date: Mon, 1 Dec 2025 16:02:21 +0000 Subject: [PATCH 09/15] Fix newline handling in JS grammar quote content patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The [^"] pattern in PEG.js doesn't match newlines by default. Update content patterns to explicitly include newline characters so multiline quoted strings work correctly. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- js/src/grammar.pegjs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/js/src/grammar.pegjs b/js/src/grammar.pegjs index 9b483a8..894d461 100644 --- a/js/src/grammar.pegjs +++ b/js/src/grammar.pegjs @@ -167,10 +167,10 @@ backtickQuotedAny = raw:$('`'+ backtickQuoteContent* '`'+) &{ return false; } { return options._quoteValue; } -// Content for quoted strings - match non-quote chars OR quote sequences followed by non-quote -doubleQuoteContent = [^"] / '"'+ &[^"] -singleQuoteContent = [^'] / "'"+ &[^'] -backtickQuoteContent = [^`] / '`'+ &[^`] +// Content for quoted strings - match non-quote chars (including newlines) OR quote sequences followed by non-quote +doubleQuoteContent = [^"\r\n] / [\r\n] / '"'+ &[^"] +singleQuoteContent = [^'\r\n] / [\r\n] / "'"+ &[^'] +backtickQuoteContent = [^`\r\n] / [\r\n] / '`'+ &[^`] SET_BASE_INDENTATION = spaces:" "* { setBaseIndentation(spaces); } From fa31da05d15a0f458148d51e3c6e6225be3bf463 Mon Sep 17 00:00:00 2001 From: konard Date: Mon, 1 Dec 2025 16:07:26 +0000 Subject: [PATCH 10/15] Revert "Fix newline handling in JS grammar quote content patterns" This reverts commit 185c1ef1cc2dd030797e00db1615db6d9bf1e03f. --- js/src/grammar.pegjs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/js/src/grammar.pegjs b/js/src/grammar.pegjs index 894d461..9b483a8 100644 --- a/js/src/grammar.pegjs +++ b/js/src/grammar.pegjs @@ -167,10 +167,10 @@ backtickQuotedAny = raw:$('`'+ backtickQuoteContent* '`'+) &{ return false; } { return options._quoteValue; } -// Content for quoted strings - match non-quote chars (including newlines) OR quote sequences followed by non-quote -doubleQuoteContent = [^"\r\n] / [\r\n] / '"'+ &[^"] -singleQuoteContent = [^'\r\n] / [\r\n] / "'"+ &[^'] -backtickQuoteContent = [^`\r\n] / [\r\n] / '`'+ &[^`] +// Content for quoted strings - match non-quote chars OR quote sequences followed by non-quote +doubleQuoteContent = [^"] / '"'+ &[^"] +singleQuoteContent = [^'] / "'"+ &[^'] +backtickQuoteContent = [^`] / '`'+ &[^`] SET_BASE_INDENTATION = spaces:" "* { setBaseIndentation(spaces); } From 8bc1c98977e64ea43af136348e3dae3ff482b937 Mon Sep 17 00:00:00 2001 From: konard Date: Mon, 1 Dec 2025 16:07:26 +0000 Subject: [PATCH 11/15] Revert "Simplify JavaScript grammar with universal N-quote parsing" This reverts commit 0ea59f6a2fa31b830bf5066ee5f7e474ecea8684. --- js/src/grammar.pegjs | 120 +++++++++++++++++++++++++++++++++---------- 1 file changed, 92 insertions(+), 28 deletions(-) diff --git a/js/src/grammar.pegjs b/js/src/grammar.pegjs index 9b483a8..c8169e2 100644 --- a/js/src/grammar.pegjs +++ b/js/src/grammar.pegjs @@ -35,18 +35,17 @@ return indentationStack[indentationStack.length - 1]; } - // Universal parser for any N-quote strings - // For N quotes: opening = N quotes, closing = N quotes, escape = 2*N quotes -> N quotes + // Parse a multi-quote string dynamically for N >= 6 quotes // Returns { value: string, length: number } or null if no match - function parseNQuoteString(inputStr, quoteChar) { + function parseHighQuoteString(inputStr, quoteChar) { // Count opening quotes let quoteCount = 0; while (quoteCount < inputStr.length && inputStr[quoteCount] === quoteChar) { quoteCount++; } - if (quoteCount < 1) { - return null; + if (quoteCount < 6) { + return null; // Let the regular rules handle 1-5 quotes } const openClose = quoteChar.repeat(quoteCount); @@ -131,46 +130,111 @@ multiLineValueLink = "(" v:multiLineValues _ ")" { return { values: v }; } indentedIdLink = id:reference __ ":" eol { return { id: id, values: [] }; } // Reference can be quoted (with any number of quotes) or simple unquoted -// Universal approach: use procedural parsing for all N-quote strings -reference = quotedReference / simpleReference +// Order matters: try longer quote sequences first (greedy matching) +// For 6+ quotes, use procedural parsing via highQuotedReference +reference = highQuotedReference / quintupleQuotedReference / quadrupleQuotedReference / tripleQuotedReference / doubleQuotedReference / singleQuotedReference / simpleReference simpleReference = chars:referenceSymbol+ { return chars.join(''); } -// Universal quoted reference - handles any N quotes for all three quote types -// Captures the raw string and uses parseNQuoteString for validation and parsing -quotedReference = doubleQuotedAny / singleQuotedAny / backtickQuotedAny +// High quote sequences (6+ quotes) - use procedural parsing +// Capture everything that looks like a quoted string and validate +highQuotedReference = &('""""""' / "''''''" / '``````') raw:highQuoteCapture { + return raw; +} -doubleQuotedAny = raw:$('"'+ doubleQuoteContent* '"'+) &{ - const result = parseNQuoteString(raw, '"'); +// Capture high quote content - match any characters including embedded quotes +// The key insight: for 6+ quotes, we need to capture chars that might include +// sequences of quotes less than the closing count +highQuoteCapture = raw:$('"'+ highQuoteDoubleContent* '"'+) &{ + const result = parseHighQuoteString(raw, '"'); if (result && result.length === raw.length) { - options._quoteValue = result.value; + options._highQuoteValue = result.value; return true; } return false; -} { return options._quoteValue; } - -singleQuotedAny = raw:$("'"+ singleQuoteContent* "'"+) &{ - const result = parseNQuoteString(raw, "'"); +} { return options._highQuoteValue; } +/ raw:$("'"+ highQuoteSingleContent* "'"+ ) &{ + const result = parseHighQuoteString(raw, "'"); if (result && result.length === raw.length) { - options._quoteValue = result.value; + options._highQuoteValue = result.value; return true; } return false; -} { return options._quoteValue; } - -backtickQuotedAny = raw:$('`'+ backtickQuoteContent* '`'+) &{ - const result = parseNQuoteString(raw, '`'); +} { return options._highQuoteValue; } +/ raw:$('`'+ highQuoteBacktickContent* '`'+) &{ + const result = parseHighQuoteString(raw, '`'); if (result && result.length === raw.length) { - options._quoteValue = result.value; + options._highQuoteValue = result.value; return true; } return false; -} { return options._quoteValue; } +} { return options._highQuoteValue; } + +// Content for high quote strings - match non-quote chars OR quote sequences +// followed by non-quote (so they're not closing sequences) +highQuoteDoubleContent = [^"] / '"'+ &[^"] +highQuoteSingleContent = [^'] / "'"+ &[^'] +highQuoteBacktickContent = [^\`] / '`'+ &[^\`] + +// Single quote (1 quote char) +singleQuotedReference = doubleQuote1 / singleQuote1 / backtickQuote1 + +doubleQuote1 = '"' r:doubleQuote1Content* '"' { return r.join(''); } +doubleQuote1Content = '""' { return '"'; } / [^"] + +singleQuote1 = "'" r:singleQuote1Content* "'" { return r.join(''); } +singleQuote1Content = "''" { return "'"; } / [^'] + +backtickQuote1 = '`' r:backtickQuote1Content* '`' { return r.join(''); } +backtickQuote1Content = '``' { return '`'; } / [^`] + +// Double quotes (2 quote chars) +doubleQuotedReference = doubleQuote2 / singleQuote2 / backtickQuote2 + +doubleQuote2 = '""' r:doubleQuote2Content* '""' { return r.join(''); } +doubleQuote2Content = '""""' { return '""'; } / !('""') c:. { return c; } + +singleQuote2 = "''" r:singleQuote2Content* "''" { return r.join(''); } +singleQuote2Content = "''''" { return "''"; } / !("''") c:. { return c; } + +backtickQuote2 = '``' r:backtickQuote2Content* '``' { return r.join(''); } +backtickQuote2Content = '````' { return '``'; } / !('``') c:. { return c; } + +// Triple quotes (3 quote chars) +tripleQuotedReference = doubleQuote3 / singleQuote3 / backtickQuote3 + +doubleQuote3 = '"""' r:doubleQuote3Content* '"""' { return r.join(''); } +doubleQuote3Content = '""""""' { return '"""'; } / !('"""') c:. { return c; } + +singleQuote3 = "'''" r:singleQuote3Content* "'''" { return r.join(''); } +singleQuote3Content = "''''''" { return "'''"; } / !("'''") c:. { return c; } + +backtickQuote3 = '```' r:backtickQuote3Content* '```' { return r.join(''); } +backtickQuote3Content = '``````' { return '```'; } / !('```') c:. { return c; } + +// Quadruple quotes (4 quote chars) +quadrupleQuotedReference = doubleQuote4 / singleQuote4 / backtickQuote4 + +doubleQuote4 = '""""' r:doubleQuote4Content* '""""' { return r.join(''); } +doubleQuote4Content = '""""""""' { return '""""'; } / !('""""') c:. { return c; } + +singleQuote4 = "''''" r:singleQuote4Content* "''''" { return r.join(''); } +singleQuote4Content = "''''''''''" { return "''''"; } / !("''''") c:. { return c; } + +backtickQuote4 = '````' r:backtickQuote4Content* '````' { return r.join(''); } +backtickQuote4Content = '````````' { return '````'; } / !('````') c:. { return c; } + +// Quintuple quotes (5 quote chars) +quintupleQuotedReference = doubleQuote5 / singleQuote5 / backtickQuote5 + +doubleQuote5 = '"""""' r:doubleQuote5Content* '"""""' { return r.join(''); } +doubleQuote5Content = '""""""""""' { return '"""""'; } / !('"""""') c:. { return c; } + +singleQuote5 = "'''''" r:singleQuote5Content* "'''''" { return r.join(''); } +singleQuote5Content = "''''''''''" { return "'''''"; } / !("'''''") c:. { return c; } -// Content for quoted strings - match non-quote chars OR quote sequences followed by non-quote -doubleQuoteContent = [^"] / '"'+ &[^"] -singleQuoteContent = [^'] / "'"+ &[^'] -backtickQuoteContent = [^`] / '`'+ &[^`] +backtickQuote5 = '`````' r:backtickQuote5Content* '`````' { return r.join(''); } +backtickQuote5Content = '``````````' { return '`````'; } / !('`````') c:. { return c; } SET_BASE_INDENTATION = spaces:" "* { setBaseIndentation(spaces); } From 44b64d418f2a3f89af6a1e4ee4e7af138b98503b Mon Sep 17 00:00:00 2001 From: konard Date: Mon, 1 Dec 2025 17:49:58 +0000 Subject: [PATCH 12/15] Simplify JS grammar with universal N-quote parsing using global variables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This replaces the verbose explicit rules for 1-5 quotes with a single universal procedural parser that handles any N quotes using: 1. A global parseQuotedStringAt() function that: - Counts opening quotes to determine N - Parses content until finding exactly N closing quotes - Handles escaping (2*N quotes -> N quotes) 2. Semantic predicates with input/offset() to peek at the input and validate the quoted string structure 3. A consume pattern that advances the parser position correctly This approach: - Reduces grammar from ~256 lines to ~208 lines (-48 lines, -19%) - Eliminates 15 repetitive rules (3 quote types * 5 explicit counts) - Supports unlimited N quotes (1, 2, 3, ..., 100, ... N) - Uses the same core logic for all quote counts The technique was inspired by heredoc parsing patterns in PEG, where global variables and semantic predicates enable matching opening/closing delimiters that must be identical. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- experiments/test_universal_quotes.js | 109 + .../test_universal_quotes_grammar_v8.pegjs | 111 + experiments/test_unlimited_quotes_esm.js | 79 + js/dist/index.js | 489 +++- js/src/grammar.pegjs | 178 +- js/src/parser-generated.js | 2242 ++--------------- 6 files changed, 1012 insertions(+), 2196 deletions(-) create mode 100644 experiments/test_universal_quotes.js create mode 100644 experiments/test_universal_quotes_grammar_v8.pegjs create mode 100644 experiments/test_unlimited_quotes_esm.js diff --git a/experiments/test_universal_quotes.js b/experiments/test_universal_quotes.js new file mode 100644 index 0000000..c834082 --- /dev/null +++ b/experiments/test_universal_quotes.js @@ -0,0 +1,109 @@ +#!/usr/bin/env node +/** + * Experiment: Test universal N-quote grammar using global variables and semantic predicates + * + * This tests whether we can simplify the PEG grammar to use a single universal rule + * for any number of quotes, rather than separate rules for 1-5 quotes. + */ + +import peggy from 'peggy'; +import fs from 'fs'; +import path from 'path'; +import { fileURLToPath } from 'url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +// Read and compile the grammar +const grammarPath = path.join(__dirname, 'test_universal_quotes_grammar_v8.pegjs'); +const grammarSource = fs.readFileSync(grammarPath, 'utf8'); + +let parser; +try { + parser = peggy.generate(grammarSource); + console.log('✅ Grammar compiled successfully!\n'); +} catch (e) { + console.error('❌ Grammar compilation failed:', e.message); + process.exit(1); +} + +// Test cases +const testCases = [ + // Single quotes (1 quote char) + { input: '"hello"', expected: 'hello', desc: 'single double quote' }, + { input: "'hello'", expected: 'hello', desc: 'single single quote' }, + { input: '`hello`', expected: 'hello', desc: 'single backtick' }, + + // Escape sequences in single quotes + { input: '"say ""hi"""', expected: 'say "hi"', desc: 'escape in double quote' }, + { input: "'''hello'''", expected: 'hello', desc: 'triple single quote' }, + + // Double quotes (2 quote chars) + { input: '""hello""', expected: 'hello', desc: 'double double quote' }, + { input: "''hello''", expected: 'hello', desc: 'double single quote' }, + { input: '``hello``', expected: 'hello', desc: 'double backtick' }, + + // Triple quotes (3 quote chars) + { input: '"""hello"""', expected: 'hello', desc: 'triple double quote' }, + { input: "'''hello'''", expected: 'hello', desc: 'triple single quote' }, + { input: '```hello```', expected: 'hello', desc: 'triple backtick' }, + + // Escape in triple quotes (6 quotes = 2*3 becomes 3 quotes in output) + { input: '"""has """""" inside"""', expected: 'has """ inside', desc: 'escape in triple double' }, + + // 4 quotes + { input: '""""hello""""', expected: 'hello', desc: '4 double quotes' }, + + // 5 quotes + { input: '"""""hello"""""', expected: 'hello', desc: '5 double quotes' }, + + // 6 quotes (should work with universal parser) + { input: '""""""hello""""""', expected: 'hello', desc: '6 double quotes' }, + + // 7 quotes + { input: '"""""""hello"""""""', expected: 'hello', desc: '7 double quotes' }, + + // Embedded quotes - content with quotes that don't form closing sequence + { input: '"""hello "world" there"""', expected: 'hello "world" there', desc: 'triple with embedded single' }, + { input: '"""hello ""world"" there"""', expected: 'hello ""world"" there', desc: 'triple with embedded double' }, +]; + +console.log('Testing universal quote grammar:\n'); + +let passed = 0; +let failed = 0; + +for (const tc of testCases) { + try { + const result = parser.parse(tc.input); + const actual = Array.isArray(result) ? result[0] : result; + + if (actual === tc.expected) { + console.log(`✅ ${tc.desc}`); + console.log(` Input: ${tc.input}`); + console.log(` Expected: ${tc.expected}`); + console.log(` Got: ${actual}`); + passed++; + } else { + console.log(`❌ ${tc.desc}`); + console.log(` Input: ${tc.input}`); + console.log(` Expected: ${tc.expected}`); + console.log(` Got: ${actual}`); + failed++; + } + } catch (e) { + console.log(`❌ ${tc.desc} - PARSE ERROR`); + console.log(` Input: ${tc.input}`); + console.log(` Expected: ${tc.expected}`); + console.log(` Error: ${e.message}`); + failed++; + } + console.log(); +} + +console.log(`\n${'='.repeat(50)}`); +console.log(`Results: ${passed} passed, ${failed} failed out of ${testCases.length} tests`); + +if (failed > 0) { + process.exit(1); +} diff --git a/experiments/test_universal_quotes_grammar_v8.pegjs b/experiments/test_universal_quotes_grammar_v8.pegjs new file mode 100644 index 0000000..cd82f61 --- /dev/null +++ b/experiments/test_universal_quotes_grammar_v8.pegjs @@ -0,0 +1,111 @@ +{ + // Universal procedural parser for N-quote strings + // Parses from the given position in the input string + // Returns { value, length } or null + function parseQuotedStringAt(inputStr, startPos, quoteChar) { + if (startPos >= inputStr.length || inputStr[startPos] !== quoteChar) { + return null; + } + + // Count opening quotes + let quoteCount = 0; + let pos = startPos; + while (pos < inputStr.length && inputStr[pos] === quoteChar) { + quoteCount++; + pos++; + } + + const closeSeq = quoteChar.repeat(quoteCount); + const escapeSeq = quoteChar.repeat(quoteCount * 2); + + let content = ''; + while (pos < inputStr.length) { + // Check for escape sequence (2*N quotes) + if (inputStr.substr(pos, escapeSeq.length) === escapeSeq) { + content += closeSeq; // 2*N quotes become N quotes + pos += escapeSeq.length; + continue; + } + + // Check for closing sequence (exactly N quotes) + if (inputStr.substr(pos, quoteCount) === closeSeq) { + // Verify it's exactly N quotes (not followed by more of same char) + const afterClose = pos + quoteCount; + if (afterClose >= inputStr.length || inputStr[afterClose] !== quoteChar) { + // Found valid closing + return { + value: content, + length: afterClose - startPos + }; + } + } + + // Add character to content + content += inputStr[pos]; + pos++; + } + + return null; // No valid closing found + } + + let parsedValue = null; + let parsedLength = 0; +} + +// Entry point +start = _ first:quotedReference rest:(_ q:quotedReference { return q; })* _ { return [first].concat(rest); } + +quotedReference = anyQuoted / simpleRef + +anyQuoted = doubleQuotedUniversal / singleQuotedUniversal / backtickQuotedUniversal + +// Double quotes: use the input directly via predicate +// The predicate peeks ahead, parses the quoted string, and we consume exact chars +doubleQuotedUniversal = &'"' &{ + const pos = offset(); + const result = parseQuotedStringAt(input, pos, '"'); + if (result) { + parsedValue = result.value; + parsedLength = result.length; + return true; + } + return false; +} chars:doubleQuotedConsume { return parsedValue; } + +// Consume the exact number of characters that were parsed +doubleQuotedConsume = c:. cs:doubleQuotedConsumeMore* { return [c].concat(cs).join(''); } +doubleQuotedConsumeMore = &{ return parsedLength > 1 && (parsedLength--, true); } c:. { return c; } + +// Single quotes +singleQuotedUniversal = &"'" &{ + const pos = offset(); + const result = parseQuotedStringAt(input, pos, "'"); + if (result) { + parsedValue = result.value; + parsedLength = result.length; + return true; + } + return false; +} chars:singleQuotedConsume { return parsedValue; } + +singleQuotedConsume = c:. cs:singleQuotedConsumeMore* { return [c].concat(cs).join(''); } +singleQuotedConsumeMore = &{ return parsedLength > 1 && (parsedLength--, true); } c:. { return c; } + +// Backticks +backtickQuotedUniversal = &'`' &{ + const pos = offset(); + const result = parseQuotedStringAt(input, pos, '`'); + if (result) { + parsedValue = result.value; + parsedLength = result.length; + return true; + } + return false; +} chars:backtickQuotedConsume { return parsedValue; } + +backtickQuotedConsume = c:. cs:backtickQuotedConsumeMore* { return [c].concat(cs).join(''); } +backtickQuotedConsumeMore = &{ return parsedLength > 1 && (parsedLength--, true); } c:. { return c; } + +simpleRef = chars:[a-zA-Z0-9_]+ { return chars.join(''); } + +_ = [ \t\n\r]* diff --git a/experiments/test_unlimited_quotes_esm.js b/experiments/test_unlimited_quotes_esm.js new file mode 100644 index 0000000..8f8ca24 --- /dev/null +++ b/experiments/test_unlimited_quotes_esm.js @@ -0,0 +1,79 @@ +#!/usr/bin/env node +import { Parser } from '../js/src/Parser.js'; +const parser = new Parser(); + +console.log('Testing unlimited N-quote strings with universal grammar:\n'); + +// Test a simple 6-quote case first +const simple6 = '""""""hello""""""'; +console.log('Simple 6 quotes input:', simple6); +try { + const result = parser.parse(simple6); + console.log('Simple 6 quotes result:', JSON.stringify(result, null, 2)); +} catch (e) { + console.log('Simple 6 quotes error:', e.message); +} + +// Test 6 quotes with embedded 5 quotes +const sixQuotes = '""""""hello with """"" five quotes inside""""""'; +console.log('\n6 quotes with embedded 5 quotes input:', sixQuotes); +try { + const result = parser.parse(sixQuotes); + console.log('6 quotes result:', JSON.stringify(result, null, 2)); +} catch (e) { + console.log('6 quotes error:', e.message); +} + +// Test 10 quotes +const tenQuotes = '""""""""""very deeply quoted""""""""""'; +console.log('\n10 quotes input:', tenQuotes); +try { + const result = parser.parse(tenQuotes); + console.log('10 quotes result:', JSON.stringify(result, null, 2)); +} catch (e) { + console.log('10 quotes error:', e.message); +} + +// Test escaping with 6 quotes (12 quotes become 6) +const sixQuotesEscape = '""""""text with """""""""""" escaped""""""'; +console.log('\n6 quotes with escaping (12 quotes = escape) input:', sixQuotesEscape); +try { + const result = parser.parse(sixQuotesEscape); + console.log('6 quotes escape result:', JSON.stringify(result, null, 2)); +} catch (e) { + console.log('6 quotes escape error:', e.message); +} + +// Test 7 quotes with backticks +const sevenBackticks = '```````code with 6 backticks `````` inside```````'; +console.log('\n7 backticks with 6 embedded input:', sevenBackticks); +try { + const result = parser.parse(sevenBackticks); + console.log('7 backticks result:', JSON.stringify(result, null, 2)); +} catch (e) { + console.log('7 backticks error:', e.message); +} + +// Test single quotes at various levels +console.log('\n--- Single quote variations ---'); +const singleTests = [ + { input: `"hello"`, desc: '1 quote' }, + { input: `""hello""`, desc: '2 quotes' }, + { input: `"""hello"""`, desc: '3 quotes' }, + { input: `""""hello""""`, desc: '4 quotes' }, + { input: `"""""hello"""""`, desc: '5 quotes' }, + { input: `""""""hello""""""`, desc: '6 quotes' }, + { input: `"""""""hello"""""""`, desc: '7 quotes' }, + { input: `""""""""hello""""""""`, desc: '8 quotes' }, +]; + +for (const test of singleTests) { + try { + const result = parser.parse(test.input); + console.log(`✅ ${test.desc}: ${test.input} -> "${result[0].id}"`); + } catch (e) { + console.log(`❌ ${test.desc}: ${test.input} -> ERROR: ${e.message}`); + } +} + +console.log('\n--- All tests completed ---'); diff --git a/js/dist/index.js b/js/dist/index.js index d1890ce..2aad707 100644 --- a/js/dist/index.js +++ b/js/dist/index.js @@ -383,13 +383,12 @@ function peg$parse(input, options) { const peg$c2 = ")"; const peg$c3 = '"'; const peg$c4 = "'"; - const peg$c5 = " "; + const peg$c5 = "`"; + const peg$c6 = " "; const peg$r0 = /^[ \t]/; const peg$r1 = /^[\r\n]/; - const peg$r2 = /^[^"]/; - const peg$r3 = /^[^']/; - const peg$r4 = /^[ \t\n\r]/; - const peg$r5 = /^[^ \t\n\r(:)]/; + const peg$r2 = /^[ \t\n\r]/; + const peg$r3 = /^[^ \t\n\r(:)]/; const peg$e0 = peg$classExpectation([" ", "\t"], false, false, false); const peg$e1 = peg$classExpectation(["\r", ` `], false, false, false); @@ -397,14 +396,13 @@ function peg$parse(input, options) { const peg$e3 = peg$literalExpectation("(", false); const peg$e4 = peg$literalExpectation(")", false); const peg$e5 = peg$literalExpectation('"', false); - const peg$e6 = peg$classExpectation(['"'], true, false, false); + const peg$e6 = peg$anyExpectation(); const peg$e7 = peg$literalExpectation("'", false); - const peg$e8 = peg$classExpectation(["'"], true, false, false); + const peg$e8 = peg$literalExpectation("`", false); const peg$e9 = peg$literalExpectation(" ", false); - const peg$e10 = peg$anyExpectation(); - const peg$e11 = peg$classExpectation([" ", "\t", ` + const peg$e10 = peg$classExpectation([" ", "\t", ` `, "\r"], false, false, false); - const peg$e12 = peg$classExpectation([" ", "\t", ` + const peg$e11 = peg$classExpectation([" ", "\t", ` `, "\r", "(", ":", ")"], true, false, false); function peg$f0() { indentationStack = [0]; @@ -489,22 +487,82 @@ function peg$parse(input, options) { function peg$f25(chars) { return chars.join(""); } - function peg$f26(r) { - return r.join(""); + function peg$f26() { + const pos = offset(); + const result = parseQuotedStringAt(input, pos, '"'); + if (result) { + parsedValue = result.value; + parsedLength = result.length; + return true; + } + return false; + } + function peg$f27(chars) { + return parsedValue; + } + function peg$f28(c, cs) { + return [c].concat(cs).join(""); + } + function peg$f29() { + return parsedLength > 1 && (parsedLength--, true); + } + function peg$f30(c) { + return c; + } + function peg$f31() { + const pos = offset(); + const result = parseQuotedStringAt(input, pos, "'"); + if (result) { + parsedValue = result.value; + parsedLength = result.length; + return true; + } + return false; + } + function peg$f32(chars) { + return parsedValue; + } + function peg$f33(c, cs) { + return [c].concat(cs).join(""); + } + function peg$f34() { + return parsedLength > 1 && (parsedLength--, true); + } + function peg$f35(c) { + return c; + } + function peg$f36() { + const pos = offset(); + const result = parseQuotedStringAt(input, pos, "`"); + if (result) { + parsedValue = result.value; + parsedLength = result.length; + return true; + } + return false; + } + function peg$f37(chars) { + return parsedValue; + } + function peg$f38(c, cs) { + return [c].concat(cs).join(""); + } + function peg$f39() { + return parsedLength > 1 && (parsedLength--, true); } - function peg$f27(r) { - return r.join(""); + function peg$f40(c) { + return c; } - function peg$f28(spaces) { + function peg$f41(spaces) { setBaseIndentation(spaces); } - function peg$f29(spaces) { + function peg$f42(spaces) { return normalizeIndentation(spaces) > getCurrentIndentation(); } - function peg$f30(spaces) { + function peg$f43(spaces) { pushIndentation(spaces); } - function peg$f31(spaces) { + function peg$f44(spaces) { return checkIndentation(spaces); } let peg$currPos = options.peg$currPos | 0; @@ -1204,12 +1262,9 @@ function peg$parse(input, options) { } function peg$parsereference() { let s0; - s0 = peg$parsedoubleQuotedReference(); + s0 = peg$parsequotedReference(); if (s0 === peg$FAILED) { - s0 = peg$parsesingleQuotedReference(); - if (s0 === peg$FAILED) { - s0 = peg$parsesimpleReference(); - } + s0 = peg$parsesimpleReference(); } return s0; } @@ -1233,58 +1288,159 @@ function peg$parse(input, options) { s0 = s1; return s0; } - function peg$parsedoubleQuotedReference() { + function peg$parsequotedReference() { + let s0; + s0 = peg$parsedoubleQuotedUniversal(); + if (s0 === peg$FAILED) { + s0 = peg$parsesingleQuotedUniversal(); + if (s0 === peg$FAILED) { + s0 = peg$parsebacktickQuotedUniversal(); + } + } + return s0; + } + function peg$parsedoubleQuotedUniversal() { let s0, s1, s2, s3; s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; if (input.charCodeAt(peg$currPos) === 34) { - s1 = peg$c3; + s2 = peg$c3; peg$currPos++; } else { - s1 = peg$FAILED; + s2 = peg$FAILED; if (peg$silentFails === 0) { peg$fail(peg$e5); } } + peg$silentFails--; + if (s2 !== peg$FAILED) { + peg$currPos = s1; + s1 = undefined; + } else { + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + peg$savedPos = peg$currPos; + s2 = peg$f26(); + if (s2) { + s2 = undefined; + } else { + s2 = peg$FAILED; + } + if (s2 !== peg$FAILED) { + s3 = peg$parseconsumeDouble(); + if (s3 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f27(s3); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + return s0; + } + function peg$parseconsumeDouble() { + let s0, s1, s2, s3; + s0 = peg$currPos; + if (input.length > peg$currPos) { + s1 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$e6); + } + } if (s1 !== peg$FAILED) { s2 = []; - s3 = input.charAt(peg$currPos); - if (peg$r2.test(s3)) { + s3 = peg$parseconsumeDoubleMore(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parseconsumeDoubleMore(); + } + peg$savedPos = s0; + s0 = peg$f28(s1, s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + return s0; + } + function peg$parseconsumeDoubleMore() { + let s0, s1, s2; + s0 = peg$currPos; + peg$savedPos = peg$currPos; + s1 = peg$f29(); + if (s1) { + s1 = undefined; + } else { + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); peg$currPos++; } else { - s3 = peg$FAILED; + s2 = peg$FAILED; if (peg$silentFails === 0) { peg$fail(peg$e6); } } - if (s3 !== peg$FAILED) { - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = input.charAt(peg$currPos); - if (peg$r2.test(s3)) { - peg$currPos++; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { - peg$fail(peg$e6); - } - } - } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f30(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + return s0; + } + function peg$parsesingleQuotedUniversal() { + let s0, s1, s2, s3; + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.charCodeAt(peg$currPos) === 39) { + s2 = peg$c4; + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$e7); + } + } + peg$silentFails--; + if (s2 !== peg$FAILED) { + peg$currPos = s1; + s1 = undefined; + } else { + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + peg$savedPos = peg$currPos; + s2 = peg$f31(); + if (s2) { + s2 = undefined; } else { s2 = peg$FAILED; } if (s2 !== peg$FAILED) { - if (input.charCodeAt(peg$currPos) === 34) { - s3 = peg$c3; - peg$currPos++; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { - peg$fail(peg$e5); - } - } + s3 = peg$parseconsumeSingle(); if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f26(s2); + s0 = peg$f32(s3); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1299,58 +1455,100 @@ function peg$parse(input, options) { } return s0; } - function peg$parsesingleQuotedReference() { + function peg$parseconsumeSingle() { let s0, s1, s2, s3; s0 = peg$currPos; - if (input.charCodeAt(peg$currPos) === 39) { - s1 = peg$c4; + if (input.length > peg$currPos) { + s1 = input.charAt(peg$currPos); peg$currPos++; } else { s1 = peg$FAILED; if (peg$silentFails === 0) { - peg$fail(peg$e7); + peg$fail(peg$e6); } } if (s1 !== peg$FAILED) { s2 = []; - s3 = input.charAt(peg$currPos); - if (peg$r3.test(s3)) { + s3 = peg$parseconsumeSingleMore(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parseconsumeSingleMore(); + } + peg$savedPos = s0; + s0 = peg$f33(s1, s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + return s0; + } + function peg$parseconsumeSingleMore() { + let s0, s1, s2; + s0 = peg$currPos; + peg$savedPos = peg$currPos; + s1 = peg$f34(); + if (s1) { + s1 = undefined; + } else { + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); peg$currPos++; } else { - s3 = peg$FAILED; + s2 = peg$FAILED; if (peg$silentFails === 0) { - peg$fail(peg$e8); + peg$fail(peg$e6); } } - if (s3 !== peg$FAILED) { - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = input.charAt(peg$currPos); - if (peg$r3.test(s3)) { - peg$currPos++; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { - peg$fail(peg$e8); - } - } - } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f35(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + return s0; + } + function peg$parsebacktickQuotedUniversal() { + let s0, s1, s2, s3; + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.charCodeAt(peg$currPos) === 96) { + s2 = peg$c5; + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$e8); + } + } + peg$silentFails--; + if (s2 !== peg$FAILED) { + peg$currPos = s1; + s1 = undefined; + } else { + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + peg$savedPos = peg$currPos; + s2 = peg$f36(); + if (s2) { + s2 = undefined; } else { s2 = peg$FAILED; } if (s2 !== peg$FAILED) { - if (input.charCodeAt(peg$currPos) === 39) { - s3 = peg$c4; - peg$currPos++; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { - peg$fail(peg$e7); - } - } + s3 = peg$parseconsumeBacktick(); if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f27(s2); + s0 = peg$f37(s3); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1365,12 +1563,72 @@ function peg$parse(input, options) { } return s0; } + function peg$parseconsumeBacktick() { + let s0, s1, s2, s3; + s0 = peg$currPos; + if (input.length > peg$currPos) { + s1 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$e6); + } + } + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parseconsumeBacktickMore(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parseconsumeBacktickMore(); + } + peg$savedPos = s0; + s0 = peg$f38(s1, s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + return s0; + } + function peg$parseconsumeBacktickMore() { + let s0, s1, s2; + s0 = peg$currPos; + peg$savedPos = peg$currPos; + s1 = peg$f39(); + if (s1) { + s1 = undefined; + } else { + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$e6); + } + } + if (s2 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f40(s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + return s0; + } function peg$parseSET_BASE_INDENTATION() { let s0, s1, s2; s0 = peg$currPos; s1 = []; if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c5; + s2 = peg$c6; peg$currPos++; } else { s2 = peg$FAILED; @@ -1381,7 +1639,7 @@ function peg$parse(input, options) { while (s2 !== peg$FAILED) { s1.push(s2); if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c5; + s2 = peg$c6; peg$currPos++; } else { s2 = peg$FAILED; @@ -1391,7 +1649,7 @@ function peg$parse(input, options) { } } peg$savedPos = s0; - s1 = peg$f28(s1); + s1 = peg$f41(s1); s0 = s1; return s0; } @@ -1400,7 +1658,7 @@ function peg$parse(input, options) { s0 = peg$currPos; s1 = []; if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c5; + s2 = peg$c6; peg$currPos++; } else { s2 = peg$FAILED; @@ -1411,7 +1669,7 @@ function peg$parse(input, options) { while (s2 !== peg$FAILED) { s1.push(s2); if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c5; + s2 = peg$c6; peg$currPos++; } else { s2 = peg$FAILED; @@ -1421,7 +1679,7 @@ function peg$parse(input, options) { } } peg$savedPos = peg$currPos; - s2 = peg$f29(s1); + s2 = peg$f42(s1); if (s2) { s2 = undefined; } else { @@ -1429,7 +1687,7 @@ function peg$parse(input, options) { } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f30(s1); + s0 = peg$f43(s1); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1441,7 +1699,7 @@ function peg$parse(input, options) { s0 = peg$currPos; s1 = []; if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c5; + s2 = peg$c6; peg$currPos++; } else { s2 = peg$FAILED; @@ -1452,7 +1710,7 @@ function peg$parse(input, options) { while (s2 !== peg$FAILED) { s1.push(s2); if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c5; + s2 = peg$c6; peg$currPos++; } else { s2 = peg$FAILED; @@ -1462,7 +1720,7 @@ function peg$parse(input, options) { } } peg$savedPos = peg$currPos; - s2 = peg$f31(s1); + s2 = peg$f44(s1); if (s2) { s2 = undefined; } else { @@ -1529,7 +1787,7 @@ function peg$parse(input, options) { } else { s1 = peg$FAILED; if (peg$silentFails === 0) { - peg$fail(peg$e10); + peg$fail(peg$e6); } } peg$silentFails--; @@ -1580,12 +1838,12 @@ function peg$parse(input, options) { function peg$parsewhiteSpaceSymbol() { let s0; s0 = input.charAt(peg$currPos); - if (peg$r4.test(s0)) { + if (peg$r2.test(s0)) { peg$currPos++; } else { s0 = peg$FAILED; if (peg$silentFails === 0) { - peg$fail(peg$e11); + peg$fail(peg$e10); } } return s0; @@ -1593,12 +1851,12 @@ function peg$parse(input, options) { function peg$parsereferenceSymbol() { let s0; s0 = input.charAt(peg$currPos); - if (peg$r5.test(s0)) { + if (peg$r3.test(s0)) { peg$currPos++; } else { s0 = peg$FAILED; if (peg$silentFails === 0) { - peg$fail(peg$e12); + peg$fail(peg$e11); } } return s0; @@ -1632,6 +1890,41 @@ function peg$parse(input, options) { function getCurrentIndentation() { return indentationStack[indentationStack.length - 1]; } + function parseQuotedStringAt(inputStr, startPos, quoteChar) { + if (startPos >= inputStr.length || inputStr[startPos] !== quoteChar) { + return null; + } + let quoteCount = 0; + let pos = startPos; + while (pos < inputStr.length && inputStr[pos] === quoteChar) { + quoteCount++; + pos++; + } + const closeSeq = quoteChar.repeat(quoteCount); + const escapeSeq = quoteChar.repeat(quoteCount * 2); + let content = ""; + while (pos < inputStr.length) { + if (inputStr.substr(pos, escapeSeq.length) === escapeSeq) { + content += closeSeq; + pos += escapeSeq.length; + continue; + } + if (inputStr.substr(pos, quoteCount) === closeSeq) { + const afterClose = pos + quoteCount; + if (afterClose >= inputStr.length || inputStr[afterClose] !== quoteChar) { + return { + value: content, + length: afterClose - startPos + }; + } + } + content += inputStr[pos]; + pos++; + } + return null; + } + let parsedValue = null; + let parsedLength = 0; peg$result = peg$startRuleFunction(); const peg$success = peg$result !== peg$FAILED && peg$currPos === input.length; function peg$throw() { diff --git a/js/src/grammar.pegjs b/js/src/grammar.pegjs index c8169e2..40691c6 100644 --- a/js/src/grammar.pegjs +++ b/js/src/grammar.pegjs @@ -35,55 +35,58 @@ return indentationStack[indentationStack.length - 1]; } - // Parse a multi-quote string dynamically for N >= 6 quotes - // Returns { value: string, length: number } or null if no match - function parseHighQuoteString(inputStr, quoteChar) { + // Universal procedural parser for N-quote strings (any N >= 1) + // Parses from the given position in the input string + // Returns { value, length } or null + function parseQuotedStringAt(inputStr, startPos, quoteChar) { + if (startPos >= inputStr.length || inputStr[startPos] !== quoteChar) { + return null; + } + // Count opening quotes let quoteCount = 0; - while (quoteCount < inputStr.length && inputStr[quoteCount] === quoteChar) { + let pos = startPos; + while (pos < inputStr.length && inputStr[pos] === quoteChar) { quoteCount++; + pos++; } - if (quoteCount < 6) { - return null; // Let the regular rules handle 1-5 quotes - } - - const openClose = quoteChar.repeat(quoteCount); + const closeSeq = quoteChar.repeat(quoteCount); const escapeSeq = quoteChar.repeat(quoteCount * 2); - const escapeVal = quoteChar.repeat(quoteCount); - let pos = quoteCount; // Start after opening quotes let content = ''; - while (pos < inputStr.length) { // Check for escape sequence (2*N quotes) if (inputStr.substr(pos, escapeSeq.length) === escapeSeq) { - content += escapeVal; + content += closeSeq; // 2*N quotes become N quotes pos += escapeSeq.length; continue; } - // Check for closing quotes (exactly N quotes, not more) - if (inputStr.substr(pos, quoteCount) === openClose) { - // Make sure it's exactly N quotes (not followed by more of the same quote) + // Check for closing sequence (exactly N quotes) + if (inputStr.substr(pos, quoteCount) === closeSeq) { + // Verify it's exactly N quotes (not followed by more of same char) const afterClose = pos + quoteCount; if (afterClose >= inputStr.length || inputStr[afterClose] !== quoteChar) { // Found valid closing return { value: content, - length: afterClose + length: afterClose - startPos }; } } - // Take next character + // Add character to content content += inputStr[pos]; pos++; } - // No closing quotes found - return null; + return null; // No valid closing found } + + // Global state for passing parsed values between predicate and action + let parsedValue = null; + let parsedLength = 0; } document = &{ indentationStack = [0]; baseIndentation = null; return true; } skipEmptyLines links:links _ eof { return links; } @@ -129,112 +132,61 @@ multiLineValueLink = "(" v:multiLineValues _ ")" { return { values: v }; } indentedIdLink = id:reference __ ":" eol { return { id: id, values: [] }; } -// Reference can be quoted (with any number of quotes) or simple unquoted -// Order matters: try longer quote sequences first (greedy matching) -// For 6+ quotes, use procedural parsing via highQuotedReference -reference = highQuotedReference / quintupleQuotedReference / quadrupleQuotedReference / tripleQuotedReference / doubleQuotedReference / singleQuotedReference / simpleReference +// Reference can be quoted (with any number of quotes N >= 1) or simple unquoted +// Universal approach: use procedural parsing for all quote types and counts +reference = quotedReference / simpleReference simpleReference = chars:referenceSymbol+ { return chars.join(''); } -// High quote sequences (6+ quotes) - use procedural parsing -// Capture everything that looks like a quoted string and validate -highQuotedReference = &('""""""' / "''''''" / '``````') raw:highQuoteCapture { - return raw; -} - -// Capture high quote content - match any characters including embedded quotes -// The key insight: for 6+ quotes, we need to capture chars that might include -// sequences of quotes less than the closing count -highQuoteCapture = raw:$('"'+ highQuoteDoubleContent* '"'+) &{ - const result = parseHighQuoteString(raw, '"'); - if (result && result.length === raw.length) { - options._highQuoteValue = result.value; +// Universal quoted reference - handles any N quotes for all quote types +// Uses procedural parsing with input/offset() for clean, simple logic +quotedReference = doubleQuotedUniversal / singleQuotedUniversal / backtickQuotedUniversal + +// Double quotes: peek at input, parse procedurally, consume exact chars +doubleQuotedUniversal = &'"' &{ + const pos = offset(); + const result = parseQuotedStringAt(input, pos, '"'); + if (result) { + parsedValue = result.value; + parsedLength = result.length; return true; } return false; -} { return options._highQuoteValue; } -/ raw:$("'"+ highQuoteSingleContent* "'"+ ) &{ - const result = parseHighQuoteString(raw, "'"); - if (result && result.length === raw.length) { - options._highQuoteValue = result.value; +} chars:consumeDouble { return parsedValue; } + +// Consume exactly parsedLength characters for double quotes +consumeDouble = c:. cs:consumeDoubleMore* { return [c].concat(cs).join(''); } +consumeDoubleMore = &{ return parsedLength > 1 && (parsedLength--, true); } c:. { return c; } + +// Single quotes +singleQuotedUniversal = &"'" &{ + const pos = offset(); + const result = parseQuotedStringAt(input, pos, "'"); + if (result) { + parsedValue = result.value; + parsedLength = result.length; return true; } return false; -} { return options._highQuoteValue; } -/ raw:$('`'+ highQuoteBacktickContent* '`'+) &{ - const result = parseHighQuoteString(raw, '`'); - if (result && result.length === raw.length) { - options._highQuoteValue = result.value; +} chars:consumeSingle { return parsedValue; } + +consumeSingle = c:. cs:consumeSingleMore* { return [c].concat(cs).join(''); } +consumeSingleMore = &{ return parsedLength > 1 && (parsedLength--, true); } c:. { return c; } + +// Backticks +backtickQuotedUniversal = &'`' &{ + const pos = offset(); + const result = parseQuotedStringAt(input, pos, '`'); + if (result) { + parsedValue = result.value; + parsedLength = result.length; return true; } return false; -} { return options._highQuoteValue; } - -// Content for high quote strings - match non-quote chars OR quote sequences -// followed by non-quote (so they're not closing sequences) -highQuoteDoubleContent = [^"] / '"'+ &[^"] -highQuoteSingleContent = [^'] / "'"+ &[^'] -highQuoteBacktickContent = [^\`] / '`'+ &[^\`] - -// Single quote (1 quote char) -singleQuotedReference = doubleQuote1 / singleQuote1 / backtickQuote1 - -doubleQuote1 = '"' r:doubleQuote1Content* '"' { return r.join(''); } -doubleQuote1Content = '""' { return '"'; } / [^"] - -singleQuote1 = "'" r:singleQuote1Content* "'" { return r.join(''); } -singleQuote1Content = "''" { return "'"; } / [^'] - -backtickQuote1 = '`' r:backtickQuote1Content* '`' { return r.join(''); } -backtickQuote1Content = '``' { return '`'; } / [^`] - -// Double quotes (2 quote chars) -doubleQuotedReference = doubleQuote2 / singleQuote2 / backtickQuote2 - -doubleQuote2 = '""' r:doubleQuote2Content* '""' { return r.join(''); } -doubleQuote2Content = '""""' { return '""'; } / !('""') c:. { return c; } - -singleQuote2 = "''" r:singleQuote2Content* "''" { return r.join(''); } -singleQuote2Content = "''''" { return "''"; } / !("''") c:. { return c; } - -backtickQuote2 = '``' r:backtickQuote2Content* '``' { return r.join(''); } -backtickQuote2Content = '````' { return '``'; } / !('``') c:. { return c; } - -// Triple quotes (3 quote chars) -tripleQuotedReference = doubleQuote3 / singleQuote3 / backtickQuote3 - -doubleQuote3 = '"""' r:doubleQuote3Content* '"""' { return r.join(''); } -doubleQuote3Content = '""""""' { return '"""'; } / !('"""') c:. { return c; } - -singleQuote3 = "'''" r:singleQuote3Content* "'''" { return r.join(''); } -singleQuote3Content = "''''''" { return "'''"; } / !("'''") c:. { return c; } - -backtickQuote3 = '```' r:backtickQuote3Content* '```' { return r.join(''); } -backtickQuote3Content = '``````' { return '```'; } / !('```') c:. { return c; } - -// Quadruple quotes (4 quote chars) -quadrupleQuotedReference = doubleQuote4 / singleQuote4 / backtickQuote4 - -doubleQuote4 = '""""' r:doubleQuote4Content* '""""' { return r.join(''); } -doubleQuote4Content = '""""""""' { return '""""'; } / !('""""') c:. { return c; } - -singleQuote4 = "''''" r:singleQuote4Content* "''''" { return r.join(''); } -singleQuote4Content = "''''''''''" { return "''''"; } / !("''''") c:. { return c; } - -backtickQuote4 = '````' r:backtickQuote4Content* '````' { return r.join(''); } -backtickQuote4Content = '````````' { return '````'; } / !('````') c:. { return c; } - -// Quintuple quotes (5 quote chars) -quintupleQuotedReference = doubleQuote5 / singleQuote5 / backtickQuote5 - -doubleQuote5 = '"""""' r:doubleQuote5Content* '"""""' { return r.join(''); } -doubleQuote5Content = '""""""""""' { return '"""""'; } / !('"""""') c:. { return c; } - -singleQuote5 = "'''''" r:singleQuote5Content* "'''''" { return r.join(''); } -singleQuote5Content = "''''''''''" { return "'''''"; } / !("'''''") c:. { return c; } +} chars:consumeBacktick { return parsedValue; } -backtickQuote5 = '`````' r:backtickQuote5Content* '`````' { return r.join(''); } -backtickQuote5Content = '``````````' { return '`````'; } / !('`````') c:. { return c; } +consumeBacktick = c:. cs:consumeBacktickMore* { return [c].concat(cs).join(''); } +consumeBacktickMore = &{ return parsedLength > 1 && (parsedLength--, true); } c:. { return c; } SET_BASE_INDENTATION = spaces:" "* { setBaseIndentation(spaces); } diff --git a/js/src/parser-generated.js b/js/src/parser-generated.js index 30eac05..b9030a9 100644 --- a/js/src/parser-generated.js +++ b/js/src/parser-generated.js @@ -167,74 +167,28 @@ function peg$parse(input, options) { const peg$c0 = ":"; const peg$c1 = "("; const peg$c2 = ")"; - const peg$c3 = "\"\"\"\"\"\""; - const peg$c4 = "''''''"; - const peg$c5 = "``````"; - const peg$c6 = "\""; - const peg$c7 = "'"; - const peg$c8 = "`"; - const peg$c9 = "\"\""; - const peg$c10 = "''"; - const peg$c11 = "``"; - const peg$c12 = "\"\"\"\""; - const peg$c13 = "''''"; - const peg$c14 = "````"; - const peg$c15 = "\"\"\""; - const peg$c16 = "'''"; - const peg$c17 = "```"; - const peg$c18 = "\"\"\"\"\"\"\"\""; - const peg$c19 = "''''''''''"; - const peg$c20 = "````````"; - const peg$c21 = "\"\"\"\"\""; - const peg$c22 = "\"\"\"\"\"\"\"\"\"\""; - const peg$c23 = "'''''"; - const peg$c24 = "`````"; - const peg$c25 = "``````````"; - const peg$c26 = " "; + const peg$c3 = "\""; + const peg$c4 = "'"; + const peg$c5 = "`"; + const peg$c6 = " "; const peg$r0 = /^[ \t]/; const peg$r1 = /^[\r\n]/; - const peg$r2 = /^[^"]/; - const peg$r3 = /^[^']/; - const peg$r4 = /^[^`]/; - const peg$r5 = /^[ \t\n\r]/; - const peg$r6 = /^[^ \t\n\r(:)]/; + const peg$r2 = /^[ \t\n\r]/; + const peg$r3 = /^[^ \t\n\r(:)]/; const peg$e0 = peg$classExpectation([" ", "\t"], false, false, false); const peg$e1 = peg$classExpectation(["\r", "\n"], false, false, false); const peg$e2 = peg$literalExpectation(":", false); const peg$e3 = peg$literalExpectation("(", false); const peg$e4 = peg$literalExpectation(")", false); - const peg$e5 = peg$literalExpectation("\"\"\"\"\"\"", false); - const peg$e6 = peg$literalExpectation("''''''", false); - const peg$e7 = peg$literalExpectation("``````", false); - const peg$e8 = peg$literalExpectation("\"", false); - const peg$e9 = peg$literalExpectation("'", false); - const peg$e10 = peg$literalExpectation("`", false); - const peg$e11 = peg$classExpectation(["\""], true, false, false); - const peg$e12 = peg$classExpectation(["'"], true, false, false); - const peg$e13 = peg$classExpectation(["`"], true, false, false); - const peg$e14 = peg$literalExpectation("\"\"", false); - const peg$e15 = peg$literalExpectation("''", false); - const peg$e16 = peg$literalExpectation("``", false); - const peg$e17 = peg$literalExpectation("\"\"\"\"", false); - const peg$e18 = peg$anyExpectation(); - const peg$e19 = peg$literalExpectation("''''", false); - const peg$e20 = peg$literalExpectation("````", false); - const peg$e21 = peg$literalExpectation("\"\"\"", false); - const peg$e22 = peg$literalExpectation("'''", false); - const peg$e23 = peg$literalExpectation("```", false); - const peg$e24 = peg$literalExpectation("\"\"\"\"\"\"\"\"", false); - const peg$e25 = peg$literalExpectation("''''''''''", false); - const peg$e26 = peg$literalExpectation("````````", false); - const peg$e27 = peg$literalExpectation("\"\"\"\"\"", false); - const peg$e28 = peg$literalExpectation("\"\"\"\"\"\"\"\"\"\"", false); - const peg$e29 = peg$literalExpectation("'''''", false); - const peg$e30 = peg$literalExpectation("`````", false); - const peg$e31 = peg$literalExpectation("``````````", false); - const peg$e32 = peg$literalExpectation(" ", false); - const peg$e33 = peg$classExpectation([" ", "\t", "\n", "\r"], false, false, false); - const peg$e34 = peg$classExpectation([" ", "\t", "\n", "\r", "(", ":", ")"], true, false, false); + const peg$e5 = peg$literalExpectation("\"", false); + const peg$e6 = peg$anyExpectation(); + const peg$e7 = peg$literalExpectation("'", false); + const peg$e8 = peg$literalExpectation("`", false); + const peg$e9 = peg$literalExpectation(" ", false); + const peg$e10 = peg$classExpectation([" ", "\t", "\n", "\r"], false, false, false); + const peg$e11 = peg$classExpectation([" ", "\t", "\n", "\r", "(", ":", ")"], true, false, false); function peg$f0() { indentationStack = [0]; baseIndentation = null; return true; } function peg$f1(links) { return links; } @@ -264,82 +218,52 @@ function peg$parse(input, options) { function peg$f23(v) { return { values: v }; } function peg$f24(id) { return { id: id, values: [] }; } function peg$f25(chars) { return chars.join(''); } - function peg$f26(raw) { - return raw; - } - function peg$f27(raw) { - const result = parseHighQuoteString(raw, '"'); - if (result && result.length === raw.length) { - options._highQuoteValue = result.value; + function peg$f26() { + const pos = offset(); + const result = parseQuotedStringAt(input, pos, '"'); + if (result) { + parsedValue = result.value; + parsedLength = result.length; return true; } return false; } - function peg$f28(raw) { return options._highQuoteValue; } - function peg$f29(raw) { - const result = parseHighQuoteString(raw, "'"); - if (result && result.length === raw.length) { - options._highQuoteValue = result.value; + function peg$f27(chars) { return parsedValue; } + function peg$f28(c, cs) { return [c].concat(cs).join(''); } + function peg$f29() { return parsedLength > 1 && (parsedLength--, true); } + function peg$f30(c) { return c; } + function peg$f31() { + const pos = offset(); + const result = parseQuotedStringAt(input, pos, "'"); + if (result) { + parsedValue = result.value; + parsedLength = result.length; return true; } return false; } - function peg$f30(raw) { return options._highQuoteValue; } - function peg$f31(raw) { - const result = parseHighQuoteString(raw, '`'); - if (result && result.length === raw.length) { - options._highQuoteValue = result.value; + function peg$f32(chars) { return parsedValue; } + function peg$f33(c, cs) { return [c].concat(cs).join(''); } + function peg$f34() { return parsedLength > 1 && (parsedLength--, true); } + function peg$f35(c) { return c; } + function peg$f36() { + const pos = offset(); + const result = parseQuotedStringAt(input, pos, '`'); + if (result) { + parsedValue = result.value; + parsedLength = result.length; return true; } return false; } - function peg$f32(raw) { return options._highQuoteValue; } - function peg$f33(r) { return r.join(''); } - function peg$f34() { return '"'; } - function peg$f35(r) { return r.join(''); } - function peg$f36() { return "'"; } - function peg$f37(r) { return r.join(''); } - function peg$f38() { return '`'; } - function peg$f39(r) { return r.join(''); } - function peg$f40() { return '""'; } - function peg$f41(c) { return c; } - function peg$f42(r) { return r.join(''); } - function peg$f43() { return "''"; } - function peg$f44(c) { return c; } - function peg$f45(r) { return r.join(''); } - function peg$f46() { return '``'; } - function peg$f47(c) { return c; } - function peg$f48(r) { return r.join(''); } - function peg$f49() { return '"""'; } - function peg$f50(c) { return c; } - function peg$f51(r) { return r.join(''); } - function peg$f52() { return "'''"; } - function peg$f53(c) { return c; } - function peg$f54(r) { return r.join(''); } - function peg$f55() { return '```'; } - function peg$f56(c) { return c; } - function peg$f57(r) { return r.join(''); } - function peg$f58() { return '""""'; } - function peg$f59(c) { return c; } - function peg$f60(r) { return r.join(''); } - function peg$f61() { return "''''"; } - function peg$f62(c) { return c; } - function peg$f63(r) { return r.join(''); } - function peg$f64() { return '````'; } - function peg$f65(c) { return c; } - function peg$f66(r) { return r.join(''); } - function peg$f67() { return '"""""'; } - function peg$f68(c) { return c; } - function peg$f69(r) { return r.join(''); } - function peg$f70() { return "'''''"; } - function peg$f71(c) { return c; } - function peg$f72(r) { return r.join(''); } - function peg$f73() { return '`````'; } - function peg$f74(c) { return c; } - function peg$f75(spaces) { setBaseIndentation(spaces); } - function peg$f76(spaces) { return normalizeIndentation(spaces) > getCurrentIndentation(); } - function peg$f77(spaces) { pushIndentation(spaces); } - function peg$f78(spaces) { return checkIndentation(spaces); } + function peg$f37(chars) { return parsedValue; } + function peg$f38(c, cs) { return [c].concat(cs).join(''); } + function peg$f39() { return parsedLength > 1 && (parsedLength--, true); } + function peg$f40(c) { return c; } + function peg$f41(spaces) { setBaseIndentation(spaces); } + function peg$f42(spaces) { return normalizeIndentation(spaces) > getCurrentIndentation(); } + function peg$f43(spaces) { pushIndentation(spaces); } + function peg$f44(spaces) { return checkIndentation(spaces); } let peg$currPos = options.peg$currPos | 0; let peg$savedPos = peg$currPos; const peg$posDetailsCache = [{ line: 1, column: 1 }]; @@ -1112,24 +1036,9 @@ function peg$parse(input, options) { function peg$parsereference() { let s0; - s0 = peg$parsehighQuotedReference(); + s0 = peg$parsequotedReference(); if (s0 === peg$FAILED) { - s0 = peg$parsequintupleQuotedReference(); - if (s0 === peg$FAILED) { - s0 = peg$parsequadrupleQuotedReference(); - if (s0 === peg$FAILED) { - s0 = peg$parsetripleQuotedReference(); - if (s0 === peg$FAILED) { - s0 = peg$parsedoubleQuotedReference(); - if (s0 === peg$FAILED) { - s0 = peg$parsesingleQuotedReference(); - if (s0 === peg$FAILED) { - s0 = peg$parsesimpleReference(); - } - } - } - } - } + s0 = peg$parsesimpleReference(); } return s0; @@ -1158,37 +1067,33 @@ function peg$parse(input, options) { return s0; } - function peg$parsehighQuotedReference() { - let s0, s1, s2; + function peg$parsequotedReference() { + let s0; + + s0 = peg$parsedoubleQuotedUniversal(); + if (s0 === peg$FAILED) { + s0 = peg$parsesingleQuotedUniversal(); + if (s0 === peg$FAILED) { + s0 = peg$parsebacktickQuotedUniversal(); + } + } + + return s0; + } + + function peg$parsedoubleQuotedUniversal() { + let s0, s1, s2, s3; s0 = peg$currPos; s1 = peg$currPos; peg$silentFails++; - if (input.substr(peg$currPos, 6) === peg$c3) { + if (input.charCodeAt(peg$currPos) === 34) { s2 = peg$c3; - peg$currPos += 6; + peg$currPos++; } else { s2 = peg$FAILED; if (peg$silentFails === 0) { peg$fail(peg$e5); } } - if (s2 === peg$FAILED) { - if (input.substr(peg$currPos, 6) === peg$c4) { - s2 = peg$c4; - peg$currPos += 6; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e6); } - } - if (s2 === peg$FAILED) { - if (input.substr(peg$currPos, 6) === peg$c5) { - s2 = peg$c5; - peg$currPos += 6; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e7); } - } - } - } peg$silentFails--; if (s2 !== peg$FAILED) { peg$currPos = s1; @@ -1197,10 +1102,22 @@ function peg$parse(input, options) { s1 = peg$FAILED; } if (s1 !== peg$FAILED) { - s2 = peg$parsehighQuoteCapture(); + peg$savedPos = peg$currPos; + s2 = peg$f26(); + if (s2) { + s2 = undefined; + } else { + s2 = peg$FAILED; + } if (s2 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f26(s2); + s3 = peg$parseconsumeDouble(); + if (s3 !== peg$FAILED) { + peg$savedPos = s0; + s0 = peg$f27(s3); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1213,90 +1130,56 @@ function peg$parse(input, options) { return s0; } - function peg$parsehighQuoteCapture() { - let s0, s1, s2, s3, s4, s5, s6; + function peg$parseconsumeDouble() { + let s0, s1, s2, s3; s0 = peg$currPos; - s1 = peg$currPos; - s2 = peg$currPos; - s3 = []; - if (input.charCodeAt(peg$currPos) === 34) { - s4 = peg$c6; + if (input.length > peg$currPos) { + s1 = input.charAt(peg$currPos); peg$currPos++; } else { - s4 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e8); } - } - if (s4 !== peg$FAILED) { - while (s4 !== peg$FAILED) { - s3.push(s4); - if (input.charCodeAt(peg$currPos) === 34) { - s4 = peg$c6; - peg$currPos++; - } else { - s4 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e8); } - } - } - } else { - s3 = peg$FAILED; + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e6); } } - if (s3 !== peg$FAILED) { - s4 = []; - s5 = peg$parsehighQuoteDoubleContent(); - while (s5 !== peg$FAILED) { - s4.push(s5); - s5 = peg$parsehighQuoteDoubleContent(); - } - s5 = []; - if (input.charCodeAt(peg$currPos) === 34) { - s6 = peg$c6; - peg$currPos++; - } else { - s6 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e8); } - } - if (s6 !== peg$FAILED) { - while (s6 !== peg$FAILED) { - s5.push(s6); - if (input.charCodeAt(peg$currPos) === 34) { - s6 = peg$c6; - peg$currPos++; - } else { - s6 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e8); } - } - } - } else { - s5 = peg$FAILED; - } - if (s5 !== peg$FAILED) { - s3 = [s3, s4, s5]; - s2 = s3; - } else { - peg$currPos = s2; - s2 = peg$FAILED; + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parseconsumeDoubleMore(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parseconsumeDoubleMore(); } + peg$savedPos = s0; + s0 = peg$f28(s1, s2); } else { - peg$currPos = s2; - s2 = peg$FAILED; + peg$currPos = s0; + s0 = peg$FAILED; } - if (s2 !== peg$FAILED) { - s1 = input.substring(s1, peg$currPos); + + return s0; + } + + function peg$parseconsumeDoubleMore() { + let s0, s1, s2; + + s0 = peg$currPos; + peg$savedPos = peg$currPos; + s1 = peg$f29(); + if (s1) { + s1 = undefined; } else { - s1 = s2; + s1 = peg$FAILED; } if (s1 !== peg$FAILED) { - peg$savedPos = peg$currPos; - s2 = peg$f27(s1); - if (s2) { - s2 = undefined; + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); + peg$currPos++; } else { s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e6); } } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f28(s1); + s0 = peg$f30(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1305,246 +1188,43 @@ function peg$parse(input, options) { peg$currPos = s0; s0 = peg$FAILED; } - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = peg$currPos; - s2 = peg$currPos; - s3 = []; - if (input.charCodeAt(peg$currPos) === 39) { - s4 = peg$c7; - peg$currPos++; - } else { - s4 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } - } - if (s4 !== peg$FAILED) { - while (s4 !== peg$FAILED) { - s3.push(s4); - if (input.charCodeAt(peg$currPos) === 39) { - s4 = peg$c7; - peg$currPos++; - } else { - s4 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } - } - } - } else { - s3 = peg$FAILED; - } - if (s3 !== peg$FAILED) { - s4 = []; - s5 = peg$parsehighQuoteSingleContent(); - while (s5 !== peg$FAILED) { - s4.push(s5); - s5 = peg$parsehighQuoteSingleContent(); - } - s5 = []; - if (input.charCodeAt(peg$currPos) === 39) { - s6 = peg$c7; - peg$currPos++; - } else { - s6 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } - } - if (s6 !== peg$FAILED) { - while (s6 !== peg$FAILED) { - s5.push(s6); - if (input.charCodeAt(peg$currPos) === 39) { - s6 = peg$c7; - peg$currPos++; - } else { - s6 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } - } - } - } else { - s5 = peg$FAILED; - } - if (s5 !== peg$FAILED) { - s3 = [s3, s4, s5]; - s2 = s3; - } else { - peg$currPos = s2; - s2 = peg$FAILED; - } - } else { - peg$currPos = s2; - s2 = peg$FAILED; - } - if (s2 !== peg$FAILED) { - s1 = input.substring(s1, peg$currPos); - } else { - s1 = s2; - } - if (s1 !== peg$FAILED) { - peg$savedPos = peg$currPos; - s2 = peg$f29(s1); - if (s2) { - s2 = undefined; - } else { - s2 = peg$FAILED; - } - if (s2 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f30(s1); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = peg$currPos; - s2 = peg$currPos; - s3 = []; - if (input.charCodeAt(peg$currPos) === 96) { - s4 = peg$c8; - peg$currPos++; - } else { - s4 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e10); } - } - if (s4 !== peg$FAILED) { - while (s4 !== peg$FAILED) { - s3.push(s4); - if (input.charCodeAt(peg$currPos) === 96) { - s4 = peg$c8; - peg$currPos++; - } else { - s4 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e10); } - } - } - } else { - s3 = peg$FAILED; - } - if (s3 !== peg$FAILED) { - s4 = []; - s5 = peg$parsehighQuoteBacktickContent(); - while (s5 !== peg$FAILED) { - s4.push(s5); - s5 = peg$parsehighQuoteBacktickContent(); - } - s5 = []; - if (input.charCodeAt(peg$currPos) === 96) { - s6 = peg$c8; - peg$currPos++; - } else { - s6 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e10); } - } - if (s6 !== peg$FAILED) { - while (s6 !== peg$FAILED) { - s5.push(s6); - if (input.charCodeAt(peg$currPos) === 96) { - s6 = peg$c8; - peg$currPos++; - } else { - s6 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e10); } - } - } - } else { - s5 = peg$FAILED; - } - if (s5 !== peg$FAILED) { - s3 = [s3, s4, s5]; - s2 = s3; - } else { - peg$currPos = s2; - s2 = peg$FAILED; - } - } else { - peg$currPos = s2; - s2 = peg$FAILED; - } - if (s2 !== peg$FAILED) { - s1 = input.substring(s1, peg$currPos); - } else { - s1 = s2; - } - if (s1 !== peg$FAILED) { - peg$savedPos = peg$currPos; - s2 = peg$f31(s1); - if (s2) { - s2 = undefined; - } else { - s2 = peg$FAILED; - } - if (s2 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f32(s1); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } - } return s0; } - function peg$parsehighQuoteDoubleContent() { + function peg$parsesingleQuotedUniversal() { let s0, s1, s2, s3; - s0 = input.charAt(peg$currPos); - if (peg$r2.test(s0)) { + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.charCodeAt(peg$currPos) === 39) { + s2 = peg$c4; peg$currPos++; } else { - s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e11); } + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e7); } } - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = []; - if (input.charCodeAt(peg$currPos) === 34) { - s2 = peg$c6; - peg$currPos++; + peg$silentFails--; + if (s2 !== peg$FAILED) { + peg$currPos = s1; + s1 = undefined; + } else { + s1 = peg$FAILED; + } + if (s1 !== peg$FAILED) { + peg$savedPos = peg$currPos; + s2 = peg$f31(); + if (s2) { + s2 = undefined; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e8); } } if (s2 !== peg$FAILED) { - while (s2 !== peg$FAILED) { - s1.push(s2); - if (input.charCodeAt(peg$currPos) === 34) { - s2 = peg$c6; - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e8); } - } - } - } else { - s1 = peg$FAILED; - } - if (s1 !== peg$FAILED) { - s2 = peg$currPos; - peg$silentFails++; - s3 = input.charAt(peg$currPos); - if (peg$r2.test(s3)) { - peg$currPos++; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e11); } - } - peg$silentFails--; + s3 = peg$parseconsumeSingle(); if (s3 !== peg$FAILED) { - peg$currPos = s2; - s2 = undefined; - } else { - s2 = peg$FAILED; - } - if (s2 !== peg$FAILED) { - s1 = [s1, s2]; - s0 = s1; + peg$savedPos = s0; + s0 = peg$f32(s3); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1553,1224 +1233,34 @@ function peg$parse(input, options) { peg$currPos = s0; s0 = peg$FAILED; } + } else { + peg$currPos = s0; + s0 = peg$FAILED; } return s0; } - function peg$parsehighQuoteSingleContent() { + function peg$parseconsumeSingle() { let s0, s1, s2, s3; - s0 = input.charAt(peg$currPos); - if (peg$r3.test(s0)) { + s0 = peg$currPos; + if (input.length > peg$currPos) { + s1 = input.charAt(peg$currPos); peg$currPos++; } else { - s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e12); } + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e6); } } - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = []; - if (input.charCodeAt(peg$currPos) === 39) { - s2 = peg$c7; - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } - } - if (s2 !== peg$FAILED) { - while (s2 !== peg$FAILED) { - s1.push(s2); - if (input.charCodeAt(peg$currPos) === 39) { - s2 = peg$c7; - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } - } - } - } else { - s1 = peg$FAILED; - } - if (s1 !== peg$FAILED) { - s2 = peg$currPos; - peg$silentFails++; - s3 = input.charAt(peg$currPos); - if (peg$r3.test(s3)) { - peg$currPos++; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e12); } - } - peg$silentFails--; - if (s3 !== peg$FAILED) { - peg$currPos = s2; - s2 = undefined; - } else { - s2 = peg$FAILED; - } - if (s2 !== peg$FAILED) { - s1 = [s1, s2]; - s0 = s1; - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } - - return s0; - } - - function peg$parsehighQuoteBacktickContent() { - let s0, s1, s2, s3; - - s0 = input.charAt(peg$currPos); - if (peg$r4.test(s0)) { - peg$currPos++; - } else { - s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e13); } - } - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = []; - if (input.charCodeAt(peg$currPos) === 96) { - s2 = peg$c8; - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e10); } - } - if (s2 !== peg$FAILED) { - while (s2 !== peg$FAILED) { - s1.push(s2); - if (input.charCodeAt(peg$currPos) === 96) { - s2 = peg$c8; - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e10); } - } - } - } else { - s1 = peg$FAILED; - } - if (s1 !== peg$FAILED) { - s2 = peg$currPos; - peg$silentFails++; - s3 = input.charAt(peg$currPos); - if (peg$r4.test(s3)) { - peg$currPos++; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e13); } - } - peg$silentFails--; - if (s3 !== peg$FAILED) { - peg$currPos = s2; - s2 = undefined; - } else { - s2 = peg$FAILED; - } - if (s2 !== peg$FAILED) { - s1 = [s1, s2]; - s0 = s1; - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } - - return s0; - } - - function peg$parsesingleQuotedReference() { - let s0; - - s0 = peg$parsedoubleQuote1(); - if (s0 === peg$FAILED) { - s0 = peg$parsesingleQuote1(); - if (s0 === peg$FAILED) { - s0 = peg$parsebacktickQuote1(); - } - } - - return s0; - } - - function peg$parsedoubleQuote1() { - let s0, s1, s2, s3; - - s0 = peg$currPos; - if (input.charCodeAt(peg$currPos) === 34) { - s1 = peg$c6; - peg$currPos++; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e8); } - } - if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$parsedoubleQuote1Content(); - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$parsedoubleQuote1Content(); - } - if (input.charCodeAt(peg$currPos) === 34) { - s3 = peg$c6; - peg$currPos++; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e8); } - } - if (s3 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f33(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - - return s0; - } - - function peg$parsedoubleQuote1Content() { - let s0, s1; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c9) { - s1 = peg$c9; - peg$currPos += 2; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e14); } - } - if (s1 !== peg$FAILED) { - peg$savedPos = s0; - s1 = peg$f34(); - } - s0 = s1; - if (s0 === peg$FAILED) { - s0 = input.charAt(peg$currPos); - if (peg$r2.test(s0)) { - peg$currPos++; - } else { - s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e11); } - } - } - - return s0; - } - - function peg$parsesingleQuote1() { - let s0, s1, s2, s3; - - s0 = peg$currPos; - if (input.charCodeAt(peg$currPos) === 39) { - s1 = peg$c7; - peg$currPos++; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } - } - if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$parsesingleQuote1Content(); - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$parsesingleQuote1Content(); - } - if (input.charCodeAt(peg$currPos) === 39) { - s3 = peg$c7; - peg$currPos++; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e9); } - } - if (s3 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f35(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - - return s0; - } - - function peg$parsesingleQuote1Content() { - let s0, s1; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c10) { - s1 = peg$c10; - peg$currPos += 2; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } - } - if (s1 !== peg$FAILED) { - peg$savedPos = s0; - s1 = peg$f36(); - } - s0 = s1; - if (s0 === peg$FAILED) { - s0 = input.charAt(peg$currPos); - if (peg$r3.test(s0)) { - peg$currPos++; - } else { - s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e12); } - } - } - - return s0; - } - - function peg$parsebacktickQuote1() { - let s0, s1, s2, s3; - - s0 = peg$currPos; - if (input.charCodeAt(peg$currPos) === 96) { - s1 = peg$c8; - peg$currPos++; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e10); } - } - if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$parsebacktickQuote1Content(); - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$parsebacktickQuote1Content(); - } - if (input.charCodeAt(peg$currPos) === 96) { - s3 = peg$c8; - peg$currPos++; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e10); } - } - if (s3 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f37(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - - return s0; - } - - function peg$parsebacktickQuote1Content() { - let s0, s1; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c11) { - s1 = peg$c11; - peg$currPos += 2; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e16); } - } - if (s1 !== peg$FAILED) { - peg$savedPos = s0; - s1 = peg$f38(); - } - s0 = s1; - if (s0 === peg$FAILED) { - s0 = input.charAt(peg$currPos); - if (peg$r4.test(s0)) { - peg$currPos++; - } else { - s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e13); } - } - } - - return s0; - } - - function peg$parsedoubleQuotedReference() { - let s0; - - s0 = peg$parsedoubleQuote2(); - if (s0 === peg$FAILED) { - s0 = peg$parsesingleQuote2(); - if (s0 === peg$FAILED) { - s0 = peg$parsebacktickQuote2(); - } - } - - return s0; - } - - function peg$parsedoubleQuote2() { - let s0, s1, s2, s3; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c9) { - s1 = peg$c9; - peg$currPos += 2; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e14); } - } - if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$parsedoubleQuote2Content(); - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$parsedoubleQuote2Content(); - } - if (input.substr(peg$currPos, 2) === peg$c9) { - s3 = peg$c9; - peg$currPos += 2; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e14); } - } - if (s3 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f39(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - - return s0; - } - - function peg$parsedoubleQuote2Content() { - let s0, s1, s2; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 4) === peg$c12) { - s1 = peg$c12; - peg$currPos += 4; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e17); } - } - if (s1 !== peg$FAILED) { - peg$savedPos = s0; - s1 = peg$f40(); - } - s0 = s1; - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = peg$currPos; - peg$silentFails++; - if (input.substr(peg$currPos, 2) === peg$c9) { - s2 = peg$c9; - peg$currPos += 2; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e14); } - } - peg$silentFails--; - if (s2 === peg$FAILED) { - s1 = undefined; - } else { - peg$currPos = s1; - s1 = peg$FAILED; - } - if (s1 !== peg$FAILED) { - if (input.length > peg$currPos) { - s2 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } - } - if (s2 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f41(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } - - return s0; - } - - function peg$parsesingleQuote2() { - let s0, s1, s2, s3; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c10) { - s1 = peg$c10; - peg$currPos += 2; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } - } - if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$parsesingleQuote2Content(); - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$parsesingleQuote2Content(); - } - if (input.substr(peg$currPos, 2) === peg$c10) { - s3 = peg$c10; - peg$currPos += 2; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } - } - if (s3 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f42(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - - return s0; - } - - function peg$parsesingleQuote2Content() { - let s0, s1, s2; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 4) === peg$c13) { - s1 = peg$c13; - peg$currPos += 4; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e19); } - } - if (s1 !== peg$FAILED) { - peg$savedPos = s0; - s1 = peg$f43(); - } - s0 = s1; - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = peg$currPos; - peg$silentFails++; - if (input.substr(peg$currPos, 2) === peg$c10) { - s2 = peg$c10; - peg$currPos += 2; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e15); } - } - peg$silentFails--; - if (s2 === peg$FAILED) { - s1 = undefined; - } else { - peg$currPos = s1; - s1 = peg$FAILED; - } - if (s1 !== peg$FAILED) { - if (input.length > peg$currPos) { - s2 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } - } - if (s2 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f44(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } - - return s0; - } - - function peg$parsebacktickQuote2() { - let s0, s1, s2, s3; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c11) { - s1 = peg$c11; - peg$currPos += 2; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e16); } - } - if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$parsebacktickQuote2Content(); - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$parsebacktickQuote2Content(); - } - if (input.substr(peg$currPos, 2) === peg$c11) { - s3 = peg$c11; - peg$currPos += 2; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e16); } - } - if (s3 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f45(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - - return s0; - } - - function peg$parsebacktickQuote2Content() { - let s0, s1, s2; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 4) === peg$c14) { - s1 = peg$c14; - peg$currPos += 4; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e20); } - } - if (s1 !== peg$FAILED) { - peg$savedPos = s0; - s1 = peg$f46(); - } - s0 = s1; - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = peg$currPos; - peg$silentFails++; - if (input.substr(peg$currPos, 2) === peg$c11) { - s2 = peg$c11; - peg$currPos += 2; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e16); } - } - peg$silentFails--; - if (s2 === peg$FAILED) { - s1 = undefined; - } else { - peg$currPos = s1; - s1 = peg$FAILED; - } - if (s1 !== peg$FAILED) { - if (input.length > peg$currPos) { - s2 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } - } - if (s2 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f47(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } - - return s0; - } - - function peg$parsetripleQuotedReference() { - let s0; - - s0 = peg$parsedoubleQuote3(); - if (s0 === peg$FAILED) { - s0 = peg$parsesingleQuote3(); - if (s0 === peg$FAILED) { - s0 = peg$parsebacktickQuote3(); - } - } - - return s0; - } - - function peg$parsedoubleQuote3() { - let s0, s1, s2, s3; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 3) === peg$c15) { - s1 = peg$c15; - peg$currPos += 3; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e21); } - } - if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$parsedoubleQuote3Content(); - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$parsedoubleQuote3Content(); - } - if (input.substr(peg$currPos, 3) === peg$c15) { - s3 = peg$c15; - peg$currPos += 3; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e21); } - } - if (s3 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f48(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - - return s0; - } - - function peg$parsedoubleQuote3Content() { - let s0, s1, s2; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 6) === peg$c3) { - s1 = peg$c3; - peg$currPos += 6; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e5); } - } - if (s1 !== peg$FAILED) { - peg$savedPos = s0; - s1 = peg$f49(); - } - s0 = s1; - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = peg$currPos; - peg$silentFails++; - if (input.substr(peg$currPos, 3) === peg$c15) { - s2 = peg$c15; - peg$currPos += 3; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e21); } - } - peg$silentFails--; - if (s2 === peg$FAILED) { - s1 = undefined; - } else { - peg$currPos = s1; - s1 = peg$FAILED; - } - if (s1 !== peg$FAILED) { - if (input.length > peg$currPos) { - s2 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } - } - if (s2 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f50(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } - - return s0; - } - - function peg$parsesingleQuote3() { - let s0, s1, s2, s3; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 3) === peg$c16) { - s1 = peg$c16; - peg$currPos += 3; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e22); } - } - if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$parsesingleQuote3Content(); - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$parsesingleQuote3Content(); - } - if (input.substr(peg$currPos, 3) === peg$c16) { - s3 = peg$c16; - peg$currPos += 3; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e22); } - } - if (s3 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f51(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - - return s0; - } - - function peg$parsesingleQuote3Content() { - let s0, s1, s2; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 6) === peg$c4) { - s1 = peg$c4; - peg$currPos += 6; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e6); } - } - if (s1 !== peg$FAILED) { - peg$savedPos = s0; - s1 = peg$f52(); - } - s0 = s1; - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = peg$currPos; - peg$silentFails++; - if (input.substr(peg$currPos, 3) === peg$c16) { - s2 = peg$c16; - peg$currPos += 3; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e22); } - } - peg$silentFails--; - if (s2 === peg$FAILED) { - s1 = undefined; - } else { - peg$currPos = s1; - s1 = peg$FAILED; - } - if (s1 !== peg$FAILED) { - if (input.length > peg$currPos) { - s2 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } - } - if (s2 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f53(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } - - return s0; - } - - function peg$parsebacktickQuote3() { - let s0, s1, s2, s3; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 3) === peg$c17) { - s1 = peg$c17; - peg$currPos += 3; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e23); } - } - if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$parsebacktickQuote3Content(); - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$parsebacktickQuote3Content(); - } - if (input.substr(peg$currPos, 3) === peg$c17) { - s3 = peg$c17; - peg$currPos += 3; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e23); } - } - if (s3 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f54(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - - return s0; - } - - function peg$parsebacktickQuote3Content() { - let s0, s1, s2; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 6) === peg$c5) { - s1 = peg$c5; - peg$currPos += 6; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e7); } - } - if (s1 !== peg$FAILED) { - peg$savedPos = s0; - s1 = peg$f55(); - } - s0 = s1; - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = peg$currPos; - peg$silentFails++; - if (input.substr(peg$currPos, 3) === peg$c17) { - s2 = peg$c17; - peg$currPos += 3; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e23); } - } - peg$silentFails--; - if (s2 === peg$FAILED) { - s1 = undefined; - } else { - peg$currPos = s1; - s1 = peg$FAILED; - } - if (s1 !== peg$FAILED) { - if (input.length > peg$currPos) { - s2 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } - } - if (s2 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f56(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } - - return s0; - } - - function peg$parsequadrupleQuotedReference() { - let s0; - - s0 = peg$parsedoubleQuote4(); - if (s0 === peg$FAILED) { - s0 = peg$parsesingleQuote4(); - if (s0 === peg$FAILED) { - s0 = peg$parsebacktickQuote4(); - } - } - - return s0; - } - - function peg$parsedoubleQuote4() { - let s0, s1, s2, s3; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 4) === peg$c12) { - s1 = peg$c12; - peg$currPos += 4; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e17); } - } - if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$parsedoubleQuote4Content(); - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$parsedoubleQuote4Content(); - } - if (input.substr(peg$currPos, 4) === peg$c12) { - s3 = peg$c12; - peg$currPos += 4; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e17); } - } - if (s3 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f57(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - - return s0; - } - - function peg$parsedoubleQuote4Content() { - let s0, s1, s2; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 8) === peg$c18) { - s1 = peg$c18; - peg$currPos += 8; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e24); } - } - if (s1 !== peg$FAILED) { - peg$savedPos = s0; - s1 = peg$f58(); - } - s0 = s1; - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = peg$currPos; - peg$silentFails++; - if (input.substr(peg$currPos, 4) === peg$c12) { - s2 = peg$c12; - peg$currPos += 4; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e17); } - } - peg$silentFails--; - if (s2 === peg$FAILED) { - s1 = undefined; - } else { - peg$currPos = s1; - s1 = peg$FAILED; - } - if (s1 !== peg$FAILED) { - if (input.length > peg$currPos) { - s2 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } - } - if (s2 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f59(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } - - return s0; - } - - function peg$parsesingleQuote4() { - let s0, s1, s2, s3; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 4) === peg$c13) { - s1 = peg$c13; - peg$currPos += 4; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e19); } - } - if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$parsesingleQuote4Content(); - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$parsesingleQuote4Content(); - } - if (input.substr(peg$currPos, 4) === peg$c13) { - s3 = peg$c13; - peg$currPos += 4; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e19); } - } - if (s3 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f60(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - - return s0; - } - - function peg$parsesingleQuote4Content() { - let s0, s1, s2; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 10) === peg$c19) { - s1 = peg$c19; - peg$currPos += 10; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e25); } - } - if (s1 !== peg$FAILED) { - peg$savedPos = s0; - s1 = peg$f61(); - } - s0 = s1; - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = peg$currPos; - peg$silentFails++; - if (input.substr(peg$currPos, 4) === peg$c13) { - s2 = peg$c13; - peg$currPos += 4; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e19); } - } - peg$silentFails--; - if (s2 === peg$FAILED) { - s1 = undefined; - } else { - peg$currPos = s1; - s1 = peg$FAILED; - } - if (s1 !== peg$FAILED) { - if (input.length > peg$currPos) { - s2 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } - } - if (s2 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f62(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } - - return s0; - } - - function peg$parsebacktickQuote4() { - let s0, s1, s2, s3; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 4) === peg$c14) { - s1 = peg$c14; - peg$currPos += 4; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e20); } - } - if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$parsebacktickQuote4Content(); - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$parsebacktickQuote4Content(); - } - if (input.substr(peg$currPos, 4) === peg$c14) { - s3 = peg$c14; - peg$currPos += 4; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e20); } - } - if (s3 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f63(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parseconsumeSingleMore(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parseconsumeSingleMore(); } + peg$savedPos = s0; + s0 = peg$f33(s1, s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2779,106 +1269,28 @@ function peg$parse(input, options) { return s0; } - function peg$parsebacktickQuote4Content() { + function peg$parseconsumeSingleMore() { let s0, s1, s2; s0 = peg$currPos; - if (input.substr(peg$currPos, 8) === peg$c20) { - s1 = peg$c20; - peg$currPos += 8; + peg$savedPos = peg$currPos; + s1 = peg$f34(); + if (s1) { + s1 = undefined; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e26); } } if (s1 !== peg$FAILED) { - peg$savedPos = s0; - s1 = peg$f64(); - } - s0 = s1; - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = peg$currPos; - peg$silentFails++; - if (input.substr(peg$currPos, 4) === peg$c14) { - s2 = peg$c14; - peg$currPos += 4; + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); + peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e20); } - } - peg$silentFails--; - if (s2 === peg$FAILED) { - s1 = undefined; - } else { - peg$currPos = s1; - s1 = peg$FAILED; - } - if (s1 !== peg$FAILED) { - if (input.length > peg$currPos) { - s2 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } - } - if (s2 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f65(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } - - return s0; - } - - function peg$parsequintupleQuotedReference() { - let s0; - - s0 = peg$parsedoubleQuote5(); - if (s0 === peg$FAILED) { - s0 = peg$parsesingleQuote5(); - if (s0 === peg$FAILED) { - s0 = peg$parsebacktickQuote5(); - } - } - - return s0; - } - - function peg$parsedoubleQuote5() { - let s0, s1, s2, s3; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 5) === peg$c21) { - s1 = peg$c21; - peg$currPos += 5; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e27); } - } - if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$parsedoubleQuote5Content(); - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$parsedoubleQuote5Content(); - } - if (input.substr(peg$currPos, 5) === peg$c21) { - s3 = peg$c21; - peg$currPos += 5; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e27); } + if (peg$silentFails === 0) { peg$fail(peg$e6); } } - if (s3 !== peg$FAILED) { + if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f66(s2); + s0 = peg$f35(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2891,51 +1303,39 @@ function peg$parse(input, options) { return s0; } - function peg$parsedoubleQuote5Content() { - let s0, s1, s2; + function peg$parsebacktickQuotedUniversal() { + let s0, s1, s2, s3; s0 = peg$currPos; - if (input.substr(peg$currPos, 10) === peg$c22) { - s1 = peg$c22; - peg$currPos += 10; + s1 = peg$currPos; + peg$silentFails++; + if (input.charCodeAt(peg$currPos) === 96) { + s2 = peg$c5; + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e8); } + } + peg$silentFails--; + if (s2 !== peg$FAILED) { + peg$currPos = s1; + s1 = undefined; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e28); } } if (s1 !== peg$FAILED) { - peg$savedPos = s0; - s1 = peg$f67(); - } - s0 = s1; - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = peg$currPos; - peg$silentFails++; - if (input.substr(peg$currPos, 5) === peg$c21) { - s2 = peg$c21; - peg$currPos += 5; + peg$savedPos = peg$currPos; + s2 = peg$f36(); + if (s2) { + s2 = undefined; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e27); } - } - peg$silentFails--; - if (s2 === peg$FAILED) { - s1 = undefined; - } else { - peg$currPos = s1; - s1 = peg$FAILED; } - if (s1 !== peg$FAILED) { - if (input.length > peg$currPos) { - s2 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } - } - if (s2 !== peg$FAILED) { + if (s2 !== peg$FAILED) { + s3 = peg$parseconsumeBacktick(); + if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f68(s2); + s0 = peg$f37(s3); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2944,43 +1344,34 @@ function peg$parse(input, options) { peg$currPos = s0; s0 = peg$FAILED; } + } else { + peg$currPos = s0; + s0 = peg$FAILED; } return s0; } - function peg$parsesingleQuote5() { + function peg$parseconsumeBacktick() { let s0, s1, s2, s3; s0 = peg$currPos; - if (input.substr(peg$currPos, 5) === peg$c23) { - s1 = peg$c23; - peg$currPos += 5; + if (input.length > peg$currPos) { + s1 = input.charAt(peg$currPos); + peg$currPos++; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e29); } + if (peg$silentFails === 0) { peg$fail(peg$e6); } } if (s1 !== peg$FAILED) { s2 = []; - s3 = peg$parsesingleQuote5Content(); + s3 = peg$parseconsumeBacktickMore(); while (s3 !== peg$FAILED) { s2.push(s3); - s3 = peg$parsesingleQuote5Content(); - } - if (input.substr(peg$currPos, 5) === peg$c23) { - s3 = peg$c23; - peg$currPos += 5; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e29); } - } - if (s3 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f69(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; + s3 = peg$parseconsumeBacktickMore(); } + peg$savedPos = s0; + s0 = peg$f38(s1, s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2989,92 +1380,28 @@ function peg$parse(input, options) { return s0; } - function peg$parsesingleQuote5Content() { + function peg$parseconsumeBacktickMore() { let s0, s1, s2; s0 = peg$currPos; - if (input.substr(peg$currPos, 10) === peg$c19) { - s1 = peg$c19; - peg$currPos += 10; + peg$savedPos = peg$currPos; + s1 = peg$f39(); + if (s1) { + s1 = undefined; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e25); } } if (s1 !== peg$FAILED) { - peg$savedPos = s0; - s1 = peg$f70(); - } - s0 = s1; - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = peg$currPos; - peg$silentFails++; - if (input.substr(peg$currPos, 5) === peg$c23) { - s2 = peg$c23; - peg$currPos += 5; + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); + peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e29); } - } - peg$silentFails--; - if (s2 === peg$FAILED) { - s1 = undefined; - } else { - peg$currPos = s1; - s1 = peg$FAILED; - } - if (s1 !== peg$FAILED) { - if (input.length > peg$currPos) { - s2 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } - } - if (s2 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f71(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } - - return s0; - } - - function peg$parsebacktickQuote5() { - let s0, s1, s2, s3; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 5) === peg$c24) { - s1 = peg$c24; - peg$currPos += 5; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e30); } - } - if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$parsebacktickQuote5Content(); - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$parsebacktickQuote5Content(); - } - if (input.substr(peg$currPos, 5) === peg$c24) { - s3 = peg$c24; - peg$currPos += 5; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e30); } + if (peg$silentFails === 0) { peg$fail(peg$e6); } } - if (s3 !== peg$FAILED) { + if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f72(s2); + s0 = peg$f40(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -3087,88 +1414,30 @@ function peg$parse(input, options) { return s0; } - function peg$parsebacktickQuote5Content() { - let s0, s1, s2; - - s0 = peg$currPos; - if (input.substr(peg$currPos, 10) === peg$c25) { - s1 = peg$c25; - peg$currPos += 10; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e31); } - } - if (s1 !== peg$FAILED) { - peg$savedPos = s0; - s1 = peg$f73(); - } - s0 = s1; - if (s0 === peg$FAILED) { - s0 = peg$currPos; - s1 = peg$currPos; - peg$silentFails++; - if (input.substr(peg$currPos, 5) === peg$c24) { - s2 = peg$c24; - peg$currPos += 5; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e30); } - } - peg$silentFails--; - if (s2 === peg$FAILED) { - s1 = undefined; - } else { - peg$currPos = s1; - s1 = peg$FAILED; - } - if (s1 !== peg$FAILED) { - if (input.length > peg$currPos) { - s2 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } - } - if (s2 !== peg$FAILED) { - peg$savedPos = s0; - s0 = peg$f74(s2); - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } else { - peg$currPos = s0; - s0 = peg$FAILED; - } - } - - return s0; - } - function peg$parseSET_BASE_INDENTATION() { let s0, s1, s2; s0 = peg$currPos; s1 = []; if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c26; + s2 = peg$c6; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e32); } + if (peg$silentFails === 0) { peg$fail(peg$e9); } } while (s2 !== peg$FAILED) { s1.push(s2); if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c26; + s2 = peg$c6; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e32); } + if (peg$silentFails === 0) { peg$fail(peg$e9); } } } peg$savedPos = s0; - s1 = peg$f75(s1); + s1 = peg$f41(s1); s0 = s1; return s0; @@ -3180,24 +1449,24 @@ function peg$parse(input, options) { s0 = peg$currPos; s1 = []; if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c26; + s2 = peg$c6; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e32); } + if (peg$silentFails === 0) { peg$fail(peg$e9); } } while (s2 !== peg$FAILED) { s1.push(s2); if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c26; + s2 = peg$c6; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e32); } + if (peg$silentFails === 0) { peg$fail(peg$e9); } } } peg$savedPos = peg$currPos; - s2 = peg$f76(s1); + s2 = peg$f42(s1); if (s2) { s2 = undefined; } else { @@ -3205,7 +1474,7 @@ function peg$parse(input, options) { } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f77(s1); + s0 = peg$f43(s1); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -3220,24 +1489,24 @@ function peg$parse(input, options) { s0 = peg$currPos; s1 = []; if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c26; + s2 = peg$c6; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e32); } + if (peg$silentFails === 0) { peg$fail(peg$e9); } } while (s2 !== peg$FAILED) { s1.push(s2); if (input.charCodeAt(peg$currPos) === 32) { - s2 = peg$c26; + s2 = peg$c6; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e32); } + if (peg$silentFails === 0) { peg$fail(peg$e9); } } } peg$savedPos = peg$currPos; - s2 = peg$f78(s1); + s2 = peg$f44(s1); if (s2) { s2 = undefined; } else { @@ -3305,7 +1574,7 @@ function peg$parse(input, options) { peg$currPos++; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e18); } + if (peg$silentFails === 0) { peg$fail(peg$e6); } } peg$silentFails--; if (s1 === peg$FAILED) { @@ -3360,11 +1629,11 @@ function peg$parse(input, options) { let s0; s0 = input.charAt(peg$currPos); - if (peg$r5.test(s0)) { + if (peg$r2.test(s0)) { peg$currPos++; } else { s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e33); } + if (peg$silentFails === 0) { peg$fail(peg$e10); } } return s0; @@ -3374,11 +1643,11 @@ function peg$parse(input, options) { let s0; s0 = input.charAt(peg$currPos); - if (peg$r6.test(s0)) { + if (peg$r3.test(s0)) { peg$currPos++; } else { s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e34); } + if (peg$silentFails === 0) { peg$fail(peg$e11); } } return s0; @@ -3421,56 +1690,59 @@ function peg$parse(input, options) { return indentationStack[indentationStack.length - 1]; } - // Parse a multi-quote string dynamically for N >= 6 quotes - // Returns { value: string, length: number } or null if no match - function parseHighQuoteString(inputStr, quoteChar) { + // Universal procedural parser for N-quote strings (any N >= 1) + // Parses from the given position in the input string + // Returns { value, length } or null + function parseQuotedStringAt(inputStr, startPos, quoteChar) { + if (startPos >= inputStr.length || inputStr[startPos] !== quoteChar) { + return null; + } + // Count opening quotes let quoteCount = 0; - while (quoteCount < inputStr.length && inputStr[quoteCount] === quoteChar) { + let pos = startPos; + while (pos < inputStr.length && inputStr[pos] === quoteChar) { quoteCount++; + pos++; } - if (quoteCount < 6) { - return null; // Let the regular rules handle 1-5 quotes - } - - const openClose = quoteChar.repeat(quoteCount); + const closeSeq = quoteChar.repeat(quoteCount); const escapeSeq = quoteChar.repeat(quoteCount * 2); - const escapeVal = quoteChar.repeat(quoteCount); - let pos = quoteCount; // Start after opening quotes let content = ''; - while (pos < inputStr.length) { // Check for escape sequence (2*N quotes) if (inputStr.substr(pos, escapeSeq.length) === escapeSeq) { - content += escapeVal; + content += closeSeq; // 2*N quotes become N quotes pos += escapeSeq.length; continue; } - // Check for closing quotes (exactly N quotes, not more) - if (inputStr.substr(pos, quoteCount) === openClose) { - // Make sure it's exactly N quotes (not followed by more of the same quote) + // Check for closing sequence (exactly N quotes) + if (inputStr.substr(pos, quoteCount) === closeSeq) { + // Verify it's exactly N quotes (not followed by more of same char) const afterClose = pos + quoteCount; if (afterClose >= inputStr.length || inputStr[afterClose] !== quoteChar) { // Found valid closing return { value: content, - length: afterClose + length: afterClose - startPos }; } } - // Take next character + // Add character to content content += inputStr[pos]; pos++; } - // No closing quotes found - return null; + return null; // No valid closing found } + // Global state for passing parsed values between predicate and action + let parsedValue = null; + let parsedLength = 0; + peg$result = peg$startRuleFunction(); const peg$success = (peg$result !== peg$FAILED && peg$currPos === input.length); From 05bc8dc5f85e429c056c1e74e1af574b959b653a Mon Sep 17 00:00:00 2001 From: konard Date: Mon, 1 Dec 2025 19:08:25 +0000 Subject: [PATCH 13/15] Add case study: C# Pegasus PEG parser simplification investigation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document comprehensive investigation into whether C# Pegasus can use universal parsing approach similar to JavaScript Peggy.js for N-quote string parsing. Case study includes: - Timeline of events and experiments conducted - Root cause analysis (4 identified causes) - Four solution approaches with documentation: 1. #parse{} expression - Failed due to MSBuild incompatibility 2. Capture-then-validate - Partial success, disambiguation issues 3. Semantic predicates - Failed, no input access 4. Hybrid approach - Current working solution Key findings: - Pegasus #parse{} not supported with MSBuild tag - Semantic predicates cannot access input string directly - PEG greedy operators prevent proper disambiguation - Hybrid approach (explicit rules 1-5, procedural 6+) is best solution Related to issue #142 and PR #168. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../csharp-peg-simplification/README.md | 178 +++++++++ .../csharp-peg-simplification/root-causes.md | 345 ++++++++++++++++++ .../solutions/01-parse-expression/README.md | 203 +++++++++++ .../test_parse_expression.peg | 103 ++++++ .../solutions/02-capture-validate/Program.cs | 98 +++++ .../solutions/02-capture-validate/README.md | 238 ++++++++++++ .../test_capture_validate.peg | 95 +++++ .../03-semantic-predicates/README.md | 206 +++++++++++ .../test_semantic_predicates.peg | 147 ++++++++ .../solutions/04-hybrid-approach/README.md | 236 ++++++++++++ .../csharp-peg-simplification/timeline.md | 268 ++++++++++++++ 11 files changed, 2117 insertions(+) create mode 100644 docs/case-studies/csharp-peg-simplification/README.md create mode 100644 docs/case-studies/csharp-peg-simplification/root-causes.md create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/README.md create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/test_parse_expression.peg create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/Program.cs create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/README.md create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/test_capture_validate.peg create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/README.md create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/test_semantic_predicates.peg create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/README.md create mode 100644 docs/case-studies/csharp-peg-simplification/timeline.md diff --git a/docs/case-studies/csharp-peg-simplification/README.md b/docs/case-studies/csharp-peg-simplification/README.md new file mode 100644 index 0000000..dcd0401 --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/README.md @@ -0,0 +1,178 @@ +# Case Study: C# Pegasus PEG Parser Simplification Investigation + +## Overview + +This case study documents an extensive investigation into whether the C# Pegasus PEG parser can be simplified to use a universal parsing approach similar to JavaScript's Peggy.js implementation for N-quote string parsing. + +**Related Issue**: [#142 - Support more quotes options](https://github.com/link-foundation/links-notation/issues/142) +**Related PR**: [#168 - Add support for backtick quotes and multi-quote strings](https://github.com/link-foundation/links-notation/pull/168) + +## Problem Statement + +The requirement is to support: +1. Three quote types: double quotes (`"`), single quotes (`'`), and backticks (`` ` ``) +2. Any number N of consecutive quotes to open/close strings (N = 1, 2, 3, ...) +3. Escaping via doubling: 2×N quotes inside become N quotes in output + +**Goal**: Use a single universal parsing function for all quote types and all N values, as successfully implemented in JavaScript (Peggy.js). + +## Timeline of Events + +### 2025-12-01T15:20 - Initial Request +User asked if it's possible to support any number of quotes (not just 1-5) with PEG parsers. + +### 2025-12-01T16:10 - First Discovery: PEG Greedy Problem +Investigation revealed that PEG.js greedy patterns like `$('"'+ content* '"'+)` don't correctly disambiguate multiple quoted strings. For example, parsing `"a" "b"` fails because the greedy `+` captures too much. + +### 2025-12-01T17:27 - User Question: Variables/Backreferences in PEG +User asked about using variable patterns like regex `(?P"+)(.*)(?P=quotes)`. + +### 2025-12-01T17:50 - JavaScript Solution Found +Discovered technique using **global variables + semantic predicates** with `input` and `offset()` to implement universal N-quote parsing in Peggy.js. This technique is inspired by heredoc parsing patterns. + +### 2025-12-01T18:01 - Simplification Request +User requested the same universal approach in all languages including C#. + +### 2025-12-01T18:08 - First C# Attempt: `#parse{}` Expression +Attempted to use Pegasus's `#parse{}` syntax. Result: **PEG0011: Unterminated code section** error. + +### 2025-12-01T18:16 - `` Tag Discovery +Found that removing `` from .csproj allows `#parse{}` to work, but creates other issues. + +### 2025-12-01T18:22 - User Question: Universal C# Parsing +User explicitly asked if C# Pegasus can use universal parsing like JavaScript. + +### 2025-12-01T18:28 - Capture-then-Validate Approach Tested +Attempted alternative approach: capture greedy pattern then validate procedurally. +- **Success**: Isolated quoted strings work correctly +- **Failure**: Multiple quoted strings on same line fail due to greedy capture + +### 2025-12-01T18:43 - Final Conclusion +Confirmed that C# Pegasus cannot use the same universal approach as JavaScript due to fundamental PEG generator differences. + +## Root Causes + +### 1. `#parse{}` Expression Limitations + +Pegasus has different code paths for `#parse{}` handling: +- When using `` in .csproj: **Does NOT support `#parse{}` properly** +- When auto-detecting .peg files: Supports `#parse{}` but creates other issues + +**Error**: `PEG0011: Unterminated code section` + +### 2. No Access to Input/Cursor in Semantic Predicates + +JavaScript's Peggy.js provides: +```javascript +&{ + const pos = offset(); + const result = parseQuotedStringAt(input, pos, '"'); + // ... +} +``` + +Pegasus semantic predicates `&{ }` do NOT provide direct access to: +- `input` / `subject` (the full input string) +- `cursor` / `offset` (current parsing position) + +### 3. PEG Greedy Operator Disambiguation Problem + +PEG's `*` and `+` operators are **greedy** - they match as much as possible. + +Pattern: `('"'+ content* '"'+)` + +**Problem**: +``` +Input: "first" "second" +Expected: Parse two separate strings "first" and "second" +Actual: Greedy pattern captures from first " to LAST ", including whitespace +``` + +The greedy nature prevents correct disambiguation when multiple quoted strings appear together. + +### 4. Pegasus vs Peggy.js Architectural Differences + +| Feature | Peggy.js (JavaScript) | Pegasus (C#) | +|---------|----------------------|--------------| +| Global variables in header | ✅ Yes | ✅ Yes (@members) | +| `input` access in predicates | ✅ Yes | ❌ No | +| `offset()` function | ✅ Yes | ❌ No | +| `#parse{}` expressions | N/A | ⚠️ Partial support | +| Dynamic consumption patterns | ✅ Yes | ❌ No | + +## Solutions Attempted + +See the `solutions/` subdirectory for detailed experiments: + +1. **`#parse{}` Expression Approach** - Failed due to PEG0011 error +2. **Capture-then-Validate Approach** - Works for isolated strings, fails for disambiguation +3. **Semantic Predicates with State** - Cannot access input/cursor directly +4. **Hybrid Approach** (Current) - Explicit PEG rules for 1-5 quotes + procedural for 6+ + +## Conclusion + +**C# Pegasus cannot use the exact same universal approach as JavaScript** due to fundamental differences in how the parser generators work. + +### Recommended Approach: Hybrid + +The current C# implementation uses a **hybrid approach** that achieves the same functionality: + +1. **Explicit PEG rules for quote levels 1-5** (most common cases) + - Required for Pegasus to correctly disambiguate multiple quoted strings + - Provides proper PEG parsing semantics + +2. **Procedural `ParseHighQuoteString()` method for levels 6+** + - Handles unlimited quote counts + - Uses the same universal parsing algorithm + +The core parsing logic is universal and simple - it's just wrapped in PEG rules that provide correct disambiguation semantics. + +### Code Comparison + +**JavaScript (Peggy.js) - Universal for all N:** +```javascript +doubleQuotedUniversal = &'"' &{ + const pos = offset(); + const result = parseQuotedStringAt(input, pos, '"'); + if (result) { + parsedValue = result.value; + parsedLength = result.length; + return true; + } + return false; +} chars:consumeDouble { return parsedValue; } +``` + +**C# (Pegasus) - Hybrid approach:** +``` +// Explicit rules for 1-5 quotes +singleQuotedReference = doubleQuote1 / singleQuote1 / backtickQuote1 +doubleQuote1 = '"' r:doubleQuote1Content* '"' { string.Join("", r) } +// ... (similar for levels 2-5) + +// Procedural for 6+ quotes +highQuotedReference = &('""""""' / "''''''" / '``````') raw:highQuoteCapture { raw } +highQuoteCapture = raw:highQuoteDoubleRaw &{ ParseHighQuoteString(raw, '"') } { _highQuoteValue } +``` + +## Files in This Case Study + +``` +docs/case-studies/csharp-peg-simplification/ +├── README.md # This file +├── timeline.md # Detailed timeline with timestamps +├── root-causes.md # Deep dive into each root cause +├── solutions/ # Experimental solutions +│ ├── 01-parse-expression/ # #parse{} approach +│ ├── 02-capture-validate/ # Capture-then-validate approach +│ ├── 03-semantic-predicates/ # Semantic predicates approach +│ └── 04-other-approaches/ # Other attempted solutions +└── experiments/ # Standalone experiment files +``` + +## References + +- [Peggy.js Documentation](https://peggyjs.org/documentation.html) +- [Pegasus GitHub Repository](https://github.com/otac0n/Pegasus) +- [Pegasus Syntax Guide](https://github.com/otac0n/Pegasus/wiki/Syntax-Guide) +- [Stack Overflow: Heredocs with PEG.js](https://stackoverflow.com/questions/69566480/implement-heredocs-with-trim-indent-using-peg-js) diff --git a/docs/case-studies/csharp-peg-simplification/root-causes.md b/docs/case-studies/csharp-peg-simplification/root-causes.md new file mode 100644 index 0000000..286869a --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/root-causes.md @@ -0,0 +1,345 @@ +# Root Causes Analysis: C# Pegasus Universal Parsing Limitations + +This document provides a deep analysis of each root cause preventing universal N-quote parsing in C# Pegasus. + +## Root Cause 1: `#parse{}` Expression Build System Incompatibility + +### Description + +Pegasus's `#parse{}` expression allows custom procedural parsing, but it has limited support when integrated with .NET's build system. + +### Technical Details + +**What `#parse{}` should do**: +```csharp +// #parse{} allows returning a custom ParseResult +rule = #parse{ + // Custom parsing logic here + return new ParseResult(ref startCursor, endCursor, value); +} +``` + +**The problem**: + +When using `` in the .csproj file: +```xml + + + +``` + +Pegasus uses the `CompilePegGrammar` MSBuild task which has a different code path that doesn't properly parse `#parse{}` blocks. + +**Error produced**: +``` +error PEG0011: Unterminated code section. +``` + +**Workaround attempted**: +Removing the explicit `` tag allows Pegasus to auto-detect .peg files through a different mechanism that DOES support `#parse{}`. + +**Why the workaround doesn't work**: +- Auto-detection creates issues with generated class naming +- Namespace conflicts occur +- Build integration becomes unreliable + +### Evidence + +```bash +# With +$ dotnet build Link.Foundation.Links.Notation.csproj +Parser.peg(106,41): error PEG0011: Unterminated code section. + +# Without tag (auto-detect) +$ dotnet build Link.Foundation.Links.Notation.csproj +Build succeeded. (But generated class has issues) +``` + +### Impact + +Cannot use procedural parsing via `#parse{}` in production-ready code. + +--- + +## Root Cause 2: No Input/Cursor Access in Semantic Predicates + +### Description + +Pegasus semantic predicates `&{ }` do not provide access to the input string or current cursor position, unlike JavaScript's Peggy.js. + +### Technical Details + +**JavaScript (Peggy.js) - Works**: +```javascript +doubleQuotedUniversal = &'"' &{ + const pos = offset(); // ← Get current position + const result = parseQuotedStringAt(input, pos, '"'); // ← Access input + if (result) { + parsedValue = result.value; + parsedLength = result.length; + return true; + } + return false; +} +``` + +**C# (Pegasus) - Does NOT Work**: +```csharp +// This is what we WANT to write: +doubleQuotedUniversal = &'"' &{ + var pos = cursor.Location; // ← Error: 'cursor' not available + var result = ParseAt(subject, pos, '"'); // ← Error: 'subject' not available + return result != null; +} +``` + +**What's actually available in Pegasus `&{ }` predicates**: + +The predicate is compiled as: +```csharp +new Func(state => + /* your predicate code, only 'state' (Cursor) is available */ +) +``` + +Inside this lambda: +- `state` is the `Cursor` struct +- `state.Location` gives the integer position +- `this.subject` is NOT accessible (parser instance scope) +- No way to get the input string + +### Attempted Solutions + +**Attempt 1**: Use `Cursor` type directly +```csharp +&{ ParseAtCursor(Cursor, Subject, '"') } // Error: 'Cursor' is a type +``` + +**Attempt 2**: Use `state` parameter +```csharp +&{ ParseAt(state.Subject, state.Location, '"') } // Error: Cursor doesn't have Subject +``` + +**Attempt 3**: Store in @members and access +```csharp +@members { + private string _subject; // But how to populate it? +} +``` + +### Evidence from Generated Code + +In `Parser.peg.g.cs`: +```csharp +private IParseResult doubleQuote1(ref Cursor cursor) +{ + // The 'cursor' parameter is local to this method + // 'this.subject' IS available here (instance field) + // But inside &{ } predicates, only lambda 'state' parameter available + + var r0 = this.CHECK(ref cursor, state => + /* Only 'state' is available here, not 'this.subject' */ + ); +} +``` + +### Impact + +Cannot implement procedural parsing logic that needs to look ahead in the input string. + +--- + +## Root Cause 3: PEG Greedy Operator Disambiguation Problem + +### Description + +PEG's `*` and `+` operators are greedy by nature, matching as much input as possible. This prevents correct parsing when multiple quoted strings appear in sequence. + +### Technical Details + +**The pattern**: +``` +doubleQuoteCaptureRaw = "" ('"'+ quoteContent* '"'+) +quoteContent = [^"] / '"'+ &[^"] +``` + +**How PEG greedy matching works**: +1. `'"'+` matches one or more quotes → takes as many as possible +2. `quoteContent*` matches any content → takes as much as possible +3. `'"'+` matches closing quotes → takes as many as possible + +**Problem scenario**: +``` +Input: "first" "second" + ^ ^ + | +-- Last " in input + +-- First " in input +``` + +The greedy `'"'+` at the start matches the first `"`. +The greedy `quoteContent*` matches everything until... +The greedy `'"'+` at the end matches the LAST `"` in the input. + +**Result**: +- Expected: Two strings `"first"` and `"second"` +- Actual: One string `"first" "second"` (includes the space and second string) + +### Why This Is Fundamental to PEG + +PEG (Parsing Expression Grammar) uses **ordered choice** with **greedy quantifiers**: +- `*` matches zero or more, as many as possible +- `+` matches one or more, as many as possible +- No built-in backtracking for disambiguation + +This is different from regex where you can use: +- Non-greedy quantifiers: `*?`, `+?` +- Backreferences: `(?P"+)(?P=quotes)` + +### Impact + +Cannot create a single universal pattern that correctly parses multiple quoted strings in sequence. + +### Why JavaScript Solution Works + +JavaScript's solution avoids this by: +1. Peeking at input procedurally (not using PEG pattern) +2. Using a **semantic predicate** to determine exact boundaries +3. Using a **consume pattern** that matches exactly N characters + +```javascript +doubleQuotedUniversal = &'"' &{ + // Procedural parsing determines EXACT boundaries + const result = parseQuotedStringAt(input, pos, '"'); + parsedLength = result.length; // Store exact length + return true; +} chars:consumeDouble { return parsedValue; } + +// Consume pattern matches exactly parsedLength characters +consumeDouble = c:. cs:consumeDoubleMore* { ... } +consumeDoubleMore = &{ return parsedLength > 1 && (parsedLength--, true); } c:. +``` + +--- + +## Root Cause 4: Pegasus vs Peggy.js Architectural Differences + +### Description + +The two PEG parser generators have fundamentally different architectures that affect what's possible in grammars. + +### Comparison Table + +| Feature | Peggy.js (JavaScript) | Pegasus (C#) | Impact | +|---------|----------------------|--------------|--------| +| Grammar header | Global scope | `@members` class scope | ✓ Equivalent | +| Global variables | ✓ Accessible everywhere | ✓ Via `@members` | ✓ Equivalent | +| `input` access | ✓ Built-in global | ❌ Not available | **Critical** | +| `offset()` function | ✓ Built-in function | ❌ Not available | **Critical** | +| Semantic predicates | Full JavaScript scope | Limited lambda scope | **Critical** | +| `#parse{}` | N/A | ⚠️ Limited support | **Blocking** | +| Dynamic consumption | ✓ Via semantic predicates | ❌ Cannot implement | **Critical** | + +### Detailed Architectural Differences + +**1. Execution Context** + +Peggy.js: +- Grammar runs in JavaScript's dynamic scope +- All variables and functions are accessible +- `input` and `offset()` are injected globals + +Pegasus: +- Grammar compiles to C# class methods +- Each rule is a separate method +- Predicates are lambdas with limited scope + +**2. Code Blocks** + +Peggy.js: +```javascript +{ + // Initialization block - runs once + let globalVar = null; +} + +rule = &{ + // Full access to globalVar, input, offset(), etc. + return true; +} +``` + +Pegasus: +```csharp +@members { + // Class members + private string _field; +} + +rule = &{ + // Lambda scope - only 'state' parameter available + // Cannot access _field or subject + return true; +} +``` + +**3. Parse Result Control** + +Peggy.js: +- Can control parsing via semantic predicates +- Can "consume" exact number of characters dynamically + +Pegasus: +- `#parse{}` allows custom results but has build issues +- No way to dynamically consume exact characters + +### Evidence + +From Pegasus source code analysis: +```csharp +// Semantic predicate compilation in Pegasus +var predicate = new Func(state => + /* user code here - 'state' is only parameter */ +); +``` + +From Peggy.js documentation: +```javascript +// Available in all code blocks: +// - input: the full input string +// - offset(): current position +// - range(): start and end positions +// - location(): line and column info +``` + +### Impact + +The architectural differences mean techniques that work in Peggy.js fundamentally cannot be translated to Pegasus. + +--- + +## Summary + +| Root Cause | Severity | Workaround Available | +|------------|----------|---------------------| +| `#parse{}` build incompatibility | High | No viable workaround | +| No input/cursor in predicates | Critical | No workaround | +| Greedy operator disambiguation | High | Explicit rules per level | +| Architecture differences | Fundamental | Cannot be addressed | + +## Recommendation + +Given these fundamental limitations, the **hybrid approach** is the only viable solution: + +1. **Explicit PEG rules** for common cases (1-5 quotes) + - Provides correct disambiguation + - Works within Pegasus's constraints + +2. **Procedural helper method** for unlimited quotes (6+) + - Uses same universal algorithm + - Invoked via capture-then-validate pattern + +This approach: +- Achieves the functional requirement (unlimited N quotes) +- Works reliably with Pegasus +- Uses the same core parsing logic as JavaScript +- Just requires more wrapper code in the grammar diff --git a/docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/README.md b/docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/README.md new file mode 100644 index 0000000..9d84a69 --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/README.md @@ -0,0 +1,203 @@ +# Solution 1: `#parse{}` Expression Approach + +## Concept + +Use Pegasus's `#parse{}` expression to implement a fully procedural parser that handles all N-quote strings with a single rule. + +## How It Should Work + +The `#parse{}` expression in Pegasus allows returning a custom `ParseResult`: + +```csharp +rule = #parse{ + // Access cursor position + var pos = startCursor.Location; + + // Access input string + var input = subject; // or some accessor + + // Perform custom parsing + var result = CustomParse(input, pos); + + if (result != null) { + // Return success with new cursor position + return new ParseResult(ref startCursor, result.EndCursor, result.Value); + } + return null; // Parse failure +} +``` + +## Implementation Attempted + +### Grammar (test_parse_expression.peg) + +``` +@namespace CSharpPegTest +@classname UniversalParser +@using System.Linq + +@members +{ + private string _parsedValue; + private int _parsedLength; + + private bool ParseQuotedStringAt(string input, int startPos, char quoteChar) + { + if (startPos >= input.Length || input[startPos] != quoteChar) + return false; + + // Count opening quotes + int quoteCount = 0; + int pos = startPos; + while (pos < input.Length && input[pos] == quoteChar) + { + quoteCount++; + pos++; + } + + string closeSeq = new string(quoteChar, quoteCount); + string escapeSeq = new string(quoteChar, quoteCount * 2); + var content = new System.Text.StringBuilder(); + + while (pos < input.Length) + { + // Check for escape sequence (2*N quotes) + if (pos + escapeSeq.Length <= input.Length && + input.Substring(pos, escapeSeq.Length) == escapeSeq) + { + content.Append(closeSeq); + pos += escapeSeq.Length; + continue; + } + + // Check for closing sequence + if (pos + quoteCount <= input.Length && + input.Substring(pos, quoteCount) == closeSeq) + { + int afterClose = pos + quoteCount; + if (afterClose >= input.Length || input[afterClose] != quoteChar) + { + _parsedValue = content.ToString(); + _parsedLength = afterClose - startPos; + return true; + } + } + + content.Append(input[pos]); + pos++; + } + return false; + } +} + +document = q:quoted { q } + +// Universal quoted string - handles any N quotes +quoted = doubleQuoted / singleQuoted / backtickQuoted + +doubleQuoted = #parse{ + if (ParseQuotedStringAt(subject, startCursor.Location, '"')) + { + return new Pegasus.Common.ParseResult( + ref startCursor, + startCursor.Advance(_parsedLength), + _parsedValue + ); + } + return null; +} + +singleQuoted = #parse{ + if (ParseQuotedStringAt(subject, startCursor.Location, '\'')) + { + return new Pegasus.Common.ParseResult( + ref startCursor, + startCursor.Advance(_parsedLength), + _parsedValue + ); + } + return null; +} + +backtickQuoted = #parse{ + if (ParseQuotedStringAt(subject, startCursor.Location, '`')) + { + return new Pegasus.Common.ParseResult( + ref startCursor, + startCursor.Advance(_parsedLength), + _parsedValue + ); + } + return null; +} +``` + +### Project File (test_parse_expression.csproj) + +```xml + + + net8.0 + Exe + + + + + + +``` + +## Result + +### Build Error + +``` +$ dotnet build +error PEG0011: Unterminated code section. +``` + +### Analysis + +When using `` in the project file, Pegasus uses the MSBuild task `CompilePegGrammar` which has different parsing logic that doesn't properly handle multi-line `#parse{}` blocks. + +### Attempted Workarounds + +#### 1. Single-Line Format + +``` +doubleQuoted = #parse{ if (ParseQuotedStringAt(subject, startCursor.Location, '"')) { return new Pegasus.Common.ParseResult(ref startCursor, startCursor.Advance(_parsedLength), _parsedValue); } return null; } +``` + +**Result**: Same error - `PEG0011: Unterminated code section` + +#### 2. Remove `` Tag + +Removing the explicit tag and letting Pegasus auto-detect: + +```xml + + + net8.0 + Exe + + + + + + +``` + +**Result**: `#parse{}` works, but: +- Generated class naming issues +- Namespace conflicts +- Build integration unreliable + +## Conclusion + +**Status**: ❌ FAILED + +The `#parse{}` expression approach cannot be used reliably in production code due to the MSBuild task incompatibility. + +## Potential Future Solution + +If Pegasus were to fix the `CompilePegGrammar` task to properly parse `#parse{}` blocks, this approach would be ideal. A GitHub issue could be filed for this. diff --git a/docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/test_parse_expression.peg b/docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/test_parse_expression.peg new file mode 100644 index 0000000..18a625f --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/test_parse_expression.peg @@ -0,0 +1,103 @@ +@namespace CSharpPegTest +@classname UniversalParser +@using System.Linq + +@members +{ + private string _parsedValue; + private int _parsedLength; + + /// + /// Universal parser for N-quote strings. + /// Handles any quote character and any number N of quotes. + /// + private bool ParseQuotedStringAt(string input, int startPos, char quoteChar) + { + if (startPos >= input.Length || input[startPos] != quoteChar) + return false; + + // Count opening quotes + int quoteCount = 0; + int pos = startPos; + while (pos < input.Length && input[pos] == quoteChar) + { + quoteCount++; + pos++; + } + + string closeSeq = new string(quoteChar, quoteCount); + string escapeSeq = new string(quoteChar, quoteCount * 2); + var content = new System.Text.StringBuilder(); + + while (pos < input.Length) + { + // Check for escape sequence (2*N quotes) + if (pos + escapeSeq.Length <= input.Length && + input.Substring(pos, escapeSeq.Length) == escapeSeq) + { + content.Append(closeSeq); + pos += escapeSeq.Length; + continue; + } + + // Check for closing sequence (exactly N quotes) + if (pos + quoteCount <= input.Length && + input.Substring(pos, quoteCount) == closeSeq) + { + int afterClose = pos + quoteCount; + if (afterClose >= input.Length || input[afterClose] != quoteChar) + { + _parsedValue = content.ToString(); + _parsedLength = afterClose - startPos; + return true; + } + } + + content.Append(input[pos]); + pos++; + } + return false; + } +} + +document = q:quoted { q } + +// Universal quoted string - handles any N quotes +// THIS DOES NOT WORK with tag due to PEG0011 error +quoted = doubleQuoted / singleQuoted / backtickQuoted + +doubleQuoted = #parse{ + if (ParseQuotedStringAt(subject, startCursor.Location, '"')) + { + return new Pegasus.Common.ParseResult( + ref startCursor, + startCursor.Advance(_parsedLength), + _parsedValue + ); + } + return null; +} + +singleQuoted = #parse{ + if (ParseQuotedStringAt(subject, startCursor.Location, '\'')) + { + return new Pegasus.Common.ParseResult( + ref startCursor, + startCursor.Advance(_parsedLength), + _parsedValue + ); + } + return null; +} + +backtickQuoted = #parse{ + if (ParseQuotedStringAt(subject, startCursor.Location, '`')) + { + return new Pegasus.Common.ParseResult( + ref startCursor, + startCursor.Advance(_parsedLength), + _parsedValue + ); + } + return null; +} diff --git a/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/Program.cs b/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/Program.cs new file mode 100644 index 0000000..ba8b39c --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/Program.cs @@ -0,0 +1,98 @@ +using System; +using CSharpPegTest; + +/// +/// Test program for the capture-then-validate approach. +/// Demonstrates what works and what fails. +/// +class Program +{ + static void Main() + { + var parser = new CaptureValidateParser(); + + // Test cases that WORK (isolated strings) + var isolatedTests = new[] + { + ("\"hello\"", "hello"), + ("\"\"world\"\"", "world"), + ("\"\"\"foo\"\"\"", "foo"), + ("\"\"\"\"quad\"\"\"\"", "quad"), + ("'text'", "text"), + ("''escaped''", "escaped"), + ("'''triple'''", "triple"), + ("`backtick`", "backtick"), + ("``double``", "double"), + // Escape sequences + ("\"has \"\"escaped\"\" quotes\"", "has \"escaped\" quotes"), + ("''text with '''' inside''", "text with '' inside"), + }; + + Console.WriteLine("=== Isolated String Tests (Should ALL Pass) ===\n"); + int passed = 0; + int failed = 0; + + foreach (var (input, expected) in isolatedTests) + { + try + { + var result = parser.Parse(input); + if (result == expected) + { + Console.WriteLine($"✓ PASS: {input}"); + Console.WriteLine($" → \"{result}\""); + passed++; + } + else + { + Console.WriteLine($"✗ FAIL: {input}"); + Console.WriteLine($" Expected: \"{expected}\""); + Console.WriteLine($" Got: \"{result}\""); + failed++; + } + } + catch (Exception ex) + { + Console.WriteLine($"✗ ERROR: {input}"); + Console.WriteLine($" {ex.Message}"); + failed++; + } + Console.WriteLine(); + } + + Console.WriteLine($"Isolated tests: {passed} passed, {failed} failed\n"); + + // Test cases that FAIL (multiple strings - disambiguation problem) + var multipleTests = new[] + { + "\"first\" \"second\"", + "\"a\" \"b\" \"c\"", + "'one' 'two'", + }; + + Console.WriteLine("=== Multiple String Tests (Expected to FAIL) ===\n"); + + foreach (var input in multipleTests) + { + Console.WriteLine($"Input: {input}"); + try + { + var result = parser.Parse(input); + Console.WriteLine($" Result: \"{result}\""); + Console.WriteLine($" Problem: Should have parsed two separate strings!"); + Console.WriteLine($" Cause: Greedy PEG pattern captured entire input"); + } + catch (Exception ex) + { + Console.WriteLine($" Parse failed: {ex.Message}"); + Console.WriteLine($" Cause: Captured text didn't validate as single string"); + } + Console.WriteLine(); + } + + Console.WriteLine("=== Conclusion ==="); + Console.WriteLine("Capture-then-validate works for ISOLATED strings only."); + Console.WriteLine("It FAILS when multiple quoted strings appear in sequence."); + Console.WriteLine("This is due to PEG's greedy matching behavior."); + } +} diff --git a/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/README.md b/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/README.md new file mode 100644 index 0000000..82684ae --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/README.md @@ -0,0 +1,238 @@ +# Solution 2: Capture-then-Validate Approach + +## Concept + +Capture a greedy PEG pattern that matches quoted strings, then use a semantic predicate to validate and parse the captured text procedurally. + +## How It Works + +1. **Capture Phase**: Use greedy PEG patterns to capture text that looks like a quoted string +2. **Validate Phase**: Use a semantic predicate `&{ }` to parse the captured text +3. **Return Phase**: Return the parsed value stored in a member field + +## Implementation + +### Grammar (test_capture_validate.peg) + +``` +@namespace CSharpPegTest +@classname CaptureValidateParser +@using System.Linq + +@members +{ + private string _parsedValue; + + /// + /// Parse captured text as an N-quote string. + /// The captured text should include opening and closing quotes. + /// + private bool TryParseQuotedString(string capturedText, char quoteChar) + { + _parsedValue = null; + if (string.IsNullOrEmpty(capturedText) || capturedText[0] != quoteChar) + return false; + + // Count opening quotes + int quoteCount = 0; + int pos = 0; + while (pos < capturedText.Length && capturedText[pos] == quoteChar) + { + quoteCount++; + pos++; + } + + string closeSeq = new string(quoteChar, quoteCount); + string escapeSeq = new string(quoteChar, quoteCount * 2); + var content = new System.Text.StringBuilder(); + + while (pos < capturedText.Length) + { + // Check for escape sequence (2*N quotes) + if (pos + escapeSeq.Length <= capturedText.Length && + capturedText.Substring(pos, escapeSeq.Length) == escapeSeq) + { + content.Append(closeSeq); + pos += escapeSeq.Length; + continue; + } + + // Check for closing sequence + if (pos + quoteCount <= capturedText.Length && + capturedText.Substring(pos, quoteCount) == closeSeq) + { + int afterClose = pos + quoteCount; + if (afterClose >= capturedText.Length || capturedText[afterClose] != quoteChar) + { + // Valid closing - check if we consumed entire captured text + if (afterClose == capturedText.Length) + { + _parsedValue = content.ToString(); + return true; + } + // Captured more than one quoted string (disambiguation problem) + return false; + } + } + + content.Append(capturedText[pos]); + pos++; + } + return false; + } +} + +document = q:quoted { q } + +// Try to parse quoted strings using capture-then-validate +quoted = doubleQuoted / singleQuoted / backtickQuoted + +// Double quotes: capture greedy pattern, then validate +doubleQuoted = raw:doubleQuoteCaptureRaw &{ TryParseQuotedString(raw, '"') } { _parsedValue } + +// Capture pattern for double quotes +// Matches: one or more ", then content, then one or more " +doubleQuoteCaptureRaw = "" ('"'+ doubleQuoteContent* '"'+) +doubleQuoteContent = [^"] / '"'+ &[^"] + +// Single quotes: same pattern +singleQuoted = raw:singleQuoteCaptureRaw &{ TryParseQuotedString(raw, '\'') } { _parsedValue } +singleQuoteCaptureRaw = "" ("'"+ singleQuoteContent* "'"+) +singleQuoteContent = [^'] / "'"+ &[^'] + +// Backticks: same pattern +backtickQuoted = raw:backtickCaptureRaw &{ TryParseQuotedString(raw, '`') } { _parsedValue } +backtickCaptureRaw = "" ('`'+ backtickContent* '`'+) +backtickContent = [^`] / '`'+ &[^`] +``` + +### Test Program (Program.cs) + +```csharp +using System; +using CSharpPegTest; + +class Program +{ + static void Main() + { + var parser = new CaptureValidateParser(); + + // Test cases that WORK (isolated strings) + var testCases = new[] + { + ("\"hello\"", "hello"), + ("\"\"world\"\"", "world"), + ("\"\"\"foo\"\"\"", "foo"), + ("'text'", "text"), + ("''escaped''", "escaped"), + ("`backtick`", "backtick"), + }; + + Console.WriteLine("=== Isolated String Tests ==="); + foreach (var (input, expected) in testCases) + { + var result = parser.Parse(input); + var status = result == expected ? "✓" : "✗"; + Console.WriteLine($"{status} {input} → {result} (expected: {expected})"); + } + + // Test case that FAILS (multiple strings) + Console.WriteLine("\n=== Multiple String Tests (Disambiguation) ==="); + try + { + var multiInput = "\"first\" \"second\""; + var result = parser.Parse(multiInput); + Console.WriteLine($"✗ {multiInput} → {result} (should parse two separate strings!)"); + } + catch (Exception ex) + { + Console.WriteLine($"✗ Parse failed: {ex.Message}"); + } + } +} +``` + +## Results + +### Isolated Strings - SUCCESS ✓ + +``` +=== Isolated String Tests === +✓ "hello" → hello (expected: hello) +✓ ""world"" → world (expected: world) +✓ """foo""" → foo (expected: foo) +✓ 'text' → text (expected: text) +✓ ''escaped'' → escaped (expected: escaped) +✓ `backtick` → backtick (expected: backtick) +``` + +### Multiple Strings - FAILURE ✗ + +``` +=== Multiple String Tests (Disambiguation) === +Input: "first" "second" +Expected: Parse two separate strings +Actual: Greedy pattern captures from first " to last " → ONE string +``` + +## Problem Analysis + +### Why Isolated Strings Work + +For input `"hello"`: +1. `'"'+` matches the opening `"` +2. `doubleQuoteContent*` matches `hello` +3. `'"'+` matches the closing `"` +4. `TryParseQuotedString` validates: exactly 1 quote open/close, content is "hello" +5. Success! + +### Why Multiple Strings Fail + +For input `"first" "second"`: +1. `'"'+` matches the first `"` +2. `doubleQuoteContent*` matches `first" "second` (everything until last `"`) +3. `'"'+` matches the final `"` +4. Captured text is `"first" "second"` - the ENTIRE input +5. `TryParseQuotedString` tries to validate, finds that closing quotes don't match +6. Fails! + +### Root Cause: PEG Greedy Operators + +PEG's `+` and `*` operators are **greedy** - they match as much as possible. + +The pattern `'"'+ content* '"'+` will always: +- Start at the first `"` +- End at the LAST `"` +- Include everything in between + +There's no way in PEG to say "match the smallest valid quoted string". + +## Conclusion + +**Status**: ⚠️ PARTIAL SUCCESS + +This approach works for: +- Isolated quoted strings +- Strings at end of input +- Strings followed by non-quote characters + +This approach fails for: +- Multiple quoted strings on the same line +- Quoted strings in complex expressions + +## When This Approach Can Be Used + +If your grammar guarantees that quoted strings are always: +- At the end of a line +- Followed by non-quote characters +- Or isolated + +Then this approach works fine. The current C# implementation uses this for 6+ quote strings (high quotes) where: +1. A lookahead `&('""""""' / "''''''" / '``````')` ensures we're looking at 6+ quotes +2. The captured pattern is then validated +3. Disambiguation with 1-5 quote strings is handled by explicit rules + +## Alternative: Explicit Rules for Disambiguation + +The hybrid approach uses explicit rules for 1-5 quotes which provide proper PEG disambiguation, and only uses capture-then-validate for 6+ quotes (rare case). diff --git a/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/test_capture_validate.peg b/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/test_capture_validate.peg new file mode 100644 index 0000000..dd2894e --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/test_capture_validate.peg @@ -0,0 +1,95 @@ +@namespace CSharpPegTest +@classname CaptureValidateParser +@using System.Linq + +@members +{ + private string _parsedValue; + + /// + /// Parse captured text as an N-quote string. + /// The captured text should include opening and closing quotes. + /// + /// The raw captured text including quotes + /// The quote character to parse + /// True if parsing succeeded + private bool TryParseQuotedString(string capturedText, char quoteChar) + { + _parsedValue = null; + if (string.IsNullOrEmpty(capturedText) || capturedText[0] != quoteChar) + return false; + + // Count opening quotes + int quoteCount = 0; + int pos = 0; + while (pos < capturedText.Length && capturedText[pos] == quoteChar) + { + quoteCount++; + pos++; + } + + string closeSeq = new string(quoteChar, quoteCount); + string escapeSeq = new string(quoteChar, quoteCount * 2); + var content = new System.Text.StringBuilder(); + + while (pos < capturedText.Length) + { + // Check for escape sequence (2*N quotes) + if (pos + escapeSeq.Length <= capturedText.Length && + capturedText.Substring(pos, escapeSeq.Length) == escapeSeq) + { + content.Append(closeSeq); + pos += escapeSeq.Length; + continue; + } + + // Check for closing sequence + if (pos + quoteCount <= capturedText.Length && + capturedText.Substring(pos, quoteCount) == closeSeq) + { + int afterClose = pos + quoteCount; + if (afterClose >= capturedText.Length || capturedText[afterClose] != quoteChar) + { + // Valid closing - check if we consumed entire captured text + if (afterClose == capturedText.Length) + { + _parsedValue = content.ToString(); + return true; + } + // Captured more than one quoted string (disambiguation problem) + return false; + } + } + + content.Append(capturedText[pos]); + pos++; + } + return false; + } +} + +// Entry point: parse a single quoted string +document = q:quoted { q } + +// Try to parse quoted strings using capture-then-validate +// NOTE: This has disambiguation problems with multiple quoted strings +quoted = doubleQuoted / singleQuoted / backtickQuoted + +// Double quotes: capture greedy pattern, then validate +doubleQuoted = raw:doubleQuoteCaptureRaw &{ TryParseQuotedString(raw, '"') } { _parsedValue } + +// Capture pattern for double quotes +// Matches: one or more ", then content, then one or more " +// WARNING: Greedy - will match from first " to LAST " in input +doubleQuoteCaptureRaw = "" ('"'+ doubleQuoteContent* '"'+) +doubleQuoteContent = [^"] / '"'+ &[^"] + +// Single quotes: same pattern +singleQuoted = raw:singleQuoteCaptureRaw &{ TryParseQuotedString(raw, '\'') } { _parsedValue } +singleQuoteCaptureRaw = "" ("'"+ singleQuoteContent* "'"+) +singleQuoteContent = [^'] / "'"+ &[^'] + +// Backticks: same pattern +backtickQuoted = raw:backtickCaptureRaw &{ TryParseQuotedString(raw, '`') } { _parsedValue } +backtickCaptureRaw = "" ('`'+ backtickContent* '`'+) +backtickContent = [^`] / '`'+ &[^`] diff --git a/docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/README.md b/docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/README.md new file mode 100644 index 0000000..6d15c7c --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/README.md @@ -0,0 +1,206 @@ +# Solution 3: Semantic Predicates with Input Access + +## Concept + +Use semantic predicates `&{ }` to access the input string and cursor position directly, similar to how JavaScript's Peggy.js implements universal parsing. + +## JavaScript Reference (What We Want to Achieve) + +In Peggy.js, this works perfectly: + +```javascript +doubleQuotedUniversal = &'"' &{ + const pos = offset(); // Get current position + const result = parseQuotedStringAt(input, pos, '"'); // Access input string + if (result) { + parsedValue = result.value; + parsedLength = result.length; + return true; + } + return false; +} chars:consumeDouble { return parsedValue; } +``` + +Key Peggy.js features used: +- `input` - built-in variable containing the full input string +- `offset()` - built-in function returning current parse position +- Both accessible in any code block including semantic predicates + +## Attempted C# Implementation + +### Grammar (test_semantic_predicates.peg) + +``` +@namespace CSharpPegTest +@classname SemanticPredicateParser +@using System.Linq + +@members +{ + private string _parsedValue; + private int _parsedLength; + + private bool ParseQuotedStringAt(string input, int startPos, char quoteChar) + { + // Same universal parsing logic + // ... (implementation) + } +} + +// ATTEMPTED: Access cursor and subject in semantic predicate +// This is what we WANT to write: +doubleQuoted = &'"' &{ + // Try to access cursor position and input string + var pos = cursor.Location; // ← Does cursor exist here? + var input = subject; // ← Does subject exist here? + return ParseQuotedStringAt(input, pos, '"'); +} chars:consume { _parsedValue } +``` + +## Investigation Results + +### Attempt 1: Using `Cursor` and `Subject` Directly + +```csharp +&{ ParseQuotedStringAt(Subject, Cursor.Location, '"') } +``` + +**Error**: +``` +error CS0119: 'Cursor' is a type, which is not valid in the given context +error CS0103: The name 'Subject' does not exist in the current context +``` + +### Attempt 2: Using `state` Parameter + +Looking at Pegasus-generated code, semantic predicates become: +```csharp +new Func(state => /* predicate code */) +``` + +So we tried: +```csharp +&{ ParseQuotedStringAt(state.Subject, state.Location, '"') } +``` + +**Error**: +``` +error CS1061: 'Cursor' does not contain a definition for 'Subject' +``` + +The `Cursor` struct only has `Location` (position index), not access to the input string. + +### Attempt 3: Store Input in `@members` + +```csharp +@members +{ + private string _inputString; + + public void SetInput(string input) + { + _inputString = input; + } +} + +// Then in predicate: +&{ ParseQuotedStringAt(_inputString, state.Location, '"') } +``` + +**Problem**: The parser doesn't expose a way to call `SetInput` before parsing. The `Parse()` method receives the input string but doesn't pass it to custom members. + +### Attempt 4: Access `this.subject` in Predicate + +```csharp +&{ ParseQuotedStringAt(this.subject, state.Location, '"') } +``` + +**Error**: +``` +error CS0026: Keyword 'this' is not valid in a static property, static method, or static field initializer +``` + +The predicate lambda doesn't have access to `this` because it's compiled as a delegate. + +## Analysis of Pegasus Architecture + +### How Pegasus Compiles Semantic Predicates + +```csharp +// Generated code structure +private IParseResult SomeRule(ref Cursor cursor) +{ + // ... + var predicateResult = this.CHECK(ref cursor, state => + // Your predicate code here + // 'state' is the only parameter available + // No access to: this, subject, cursor (the ref parameter) + ); + // ... +} +``` + +The predicate code is wrapped in a lambda expression where: +- `state` (type `Cursor`) is the only parameter +- `this` is not accessible (lambda context) +- Instance fields like `this.subject` are not accessible +- The `subject` field exists in the parser class but not in the lambda scope + +### Why JavaScript Works But C# Doesn't + +**JavaScript (Peggy.js)**: +- Code runs in same scope as parser +- `input` and `offset()` are injected as "magic" globals +- No compilation to lambdas + +**C# (Pegasus)**: +- Code is compiled to strongly-typed C# +- Predicates become lambda delegates +- Lambda scope is isolated from parser instance + +## Workaround: Post-Capture Validation + +Since we can't access input in predicates, we use a workaround: + +1. **Capture** text using a PEG pattern +2. **Pass captured text** to predicate for validation + +``` +doubleQuoted = raw:capturePattern &{ ValidateCaptured(raw, '"') } { _parsedValue } +``` + +This is exactly what Solution 2 (Capture-then-Validate) does, with its inherent disambiguation limitations. + +## Conclusion + +**Status**: ❌ FAILED + +Pegasus semantic predicates `&{ }` do not provide access to: +- The input string (`subject`) +- The parser instance (`this`) + +Only the cursor position is available via the `state` parameter, which is insufficient for implementing universal quote parsing. + +## Comparison Table + +| Feature | Peggy.js | Pegasus | +|---------|----------|---------| +| `input` access in `&{ }` | ✓ Yes | ❌ No | +| `offset()` / position | ✓ Yes | ✓ Via `state.Location` | +| Full input string | ✓ Yes | ❌ Not accessible | +| Instance members | ✓ Via scope | ❌ Lambda isolation | + +## Potential Future Solution + +A Pegasus enhancement could provide: +```csharp +// Hypothetical improved predicate syntax +&{ (state, subject) => ParseQuotedStringAt(subject, state.Location, '"') } +``` + +Or a special syntax to access the subject: +``` +&{ ParseQuotedStringAt(@subject, state.Location, '"') } +``` + +This would require changes to the Pegasus code generator. diff --git a/docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/test_semantic_predicates.peg b/docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/test_semantic_predicates.peg new file mode 100644 index 0000000..0506260 --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/test_semantic_predicates.peg @@ -0,0 +1,147 @@ +@namespace CSharpPegTest +@classname SemanticPredicateParser +@using System.Linq + +@members +{ + private string _parsedValue; + private int _parsedLength; + + /// + /// Universal parser for N-quote strings. + /// + private bool ParseQuotedStringAt(string input, int startPos, char quoteChar) + { + if (input == null || startPos >= input.Length || input[startPos] != quoteChar) + return false; + + // Count opening quotes + int quoteCount = 0; + int pos = startPos; + while (pos < input.Length && input[pos] == quoteChar) + { + quoteCount++; + pos++; + } + + string closeSeq = new string(quoteChar, quoteCount); + string escapeSeq = new string(quoteChar, quoteCount * 2); + var content = new System.Text.StringBuilder(); + + while (pos < input.Length) + { + // Check for escape sequence + if (pos + escapeSeq.Length <= input.Length && + input.Substring(pos, escapeSeq.Length) == escapeSeq) + { + content.Append(closeSeq); + pos += escapeSeq.Length; + continue; + } + + // Check for closing sequence + if (pos + quoteCount <= input.Length && + input.Substring(pos, quoteCount) == closeSeq) + { + int afterClose = pos + quoteCount; + if (afterClose >= input.Length || input[afterClose] != quoteChar) + { + _parsedValue = content.ToString(); + _parsedLength = afterClose - startPos; + return true; + } + } + + content.Append(input[pos]); + pos++; + } + return false; + } + + /// + /// Workaround: Validate captured text (since we can't access input directly). + /// + private bool ValidateCapturedQuote(string capturedText, char quoteChar) + { + _parsedValue = null; + if (string.IsNullOrEmpty(capturedText) || capturedText[0] != quoteChar) + return false; + + // Count opening quotes + int quoteCount = 0; + int pos = 0; + while (pos < capturedText.Length && capturedText[pos] == quoteChar) + { + quoteCount++; + pos++; + } + + string closeSeq = new string(quoteChar, quoteCount); + string escapeSeq = new string(quoteChar, quoteCount * 2); + var content = new System.Text.StringBuilder(); + + while (pos < capturedText.Length) + { + if (pos + escapeSeq.Length <= capturedText.Length && + capturedText.Substring(pos, escapeSeq.Length) == escapeSeq) + { + content.Append(closeSeq); + pos += escapeSeq.Length; + continue; + } + + if (pos + quoteCount <= capturedText.Length && + capturedText.Substring(pos, quoteCount) == closeSeq) + { + int afterClose = pos + quoteCount; + if (afterClose >= capturedText.Length || capturedText[afterClose] != quoteChar) + { + if (afterClose == capturedText.Length) + { + _parsedValue = content.ToString(); + return true; + } + return false; + } + } + + content.Append(capturedText[pos]); + pos++; + } + return false; + } +} + +document = q:quoted { q } + +quoted = doubleQuoted / singleQuoted / backtickQuoted + +// ============================================================================= +// WHAT WE WANT TO WRITE (but doesn't work): +// ============================================================================= +// +// doubleQuotedIdeal = &'"' &{ +// // This would be ideal - access input and position directly +// return ParseQuotedStringAt(subject, state.Location, '"'); +// } chars:consume { _parsedValue } +// +// Errors: +// - 'subject' does not exist in current context +// - Lambda doesn't have access to parser instance +// +// ============================================================================= + +// WHAT WE CAN ACTUALLY DO (workaround using capture): +// Capture first, then validate - but this has disambiguation problems + +doubleQuoted = raw:doubleCapture &{ ValidateCapturedQuote(raw, '"') } { _parsedValue } +doubleCapture = "" ('"'+ doubleContent* '"'+) +doubleContent = [^"] / '"'+ &[^"] + +singleQuoted = raw:singleCapture &{ ValidateCapturedQuote(raw, '\'') } { _parsedValue } +singleCapture = "" ("'"+ singleContent* "'"+) +singleContent = [^'] / "'"+ &[^'] + +backtickQuoted = raw:backtickCapture &{ ValidateCapturedQuote(raw, '`') } { _parsedValue } +backtickCapture = "" ('`'+ backtickContent* '`'+) +backtickContent = [^`] / '`'+ &[^`] diff --git a/docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/README.md b/docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/README.md new file mode 100644 index 0000000..d271dde --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/README.md @@ -0,0 +1,236 @@ +# Solution 4: Hybrid Approach (Current Implementation) + +## Concept + +Combine **explicit PEG rules** for common cases (1-5 quotes) with **procedural parsing** for unlimited quotes (6+). This achieves the functional requirement while working within Pegasus's constraints. + +## Why This Approach Works + +### Problem Recap + +1. **`#parse{}` expressions** don't work with `` MSBuild tag +2. **Semantic predicates** can't access input string directly +3. **Greedy PEG patterns** fail to disambiguate multiple quoted strings + +### Solution + +Use explicit PEG rules for levels 1-5: +- Provides correct disambiguation +- Works with standard PEG semantics +- Handles 99% of real-world use cases + +Use procedural parsing for levels 6+: +- Handles unlimited quote counts +- Uses capture-then-validate pattern +- Lookahead ensures we're at 6+ quotes first + +## Implementation + +### Current C# Grammar Structure + +``` +// Reference can be quoted (any N) or simple unquoted +reference = highQuotedReference + / quintupleQuotedReference + / quadrupleQuotedReference + / tripleQuotedReference + / doubleQuotedReference + / singleQuotedReference + / simpleReference + +// Order matters: try higher quote counts first +``` + +### Level 1-5: Explicit PEG Rules + +Each level has explicit rules with proper disambiguation: + +``` +// Single quotes (1 quote char) +singleQuotedReference = doubleQuote1 / singleQuote1 / backtickQuote1 + +doubleQuote1 = '"' r:doubleQuote1Content* '"' { string.Join("", r) } +doubleQuote1Content = '""' { "\"" } / c:[^"] { c.ToString() } + +// Double quotes (2 quote chars) +doubleQuotedReference = doubleQuote2 / singleQuote2 / backtickQuote2 + +doubleQuote2 = '""' r:doubleQuote2Content* '""' { string.Join("", r) } +doubleQuote2Content = '""""' { "\"\"" } / !'""' c:. { c.ToString() } + +// Triple quotes (3 quote chars) +// ... same pattern ... + +// And so on for 4 and 5 quote chars +``` + +### Level 6+: Procedural Parsing + +For 6+ quotes, use lookahead + capture-then-validate: + +``` +// High quote sequences (6+ quotes) - use procedural parsing +highQuotedReference = &('""""""' / "''''''" / '``````') raw:highQuoteCapture { raw } + +// Capture high quote content +highQuoteCapture = raw:highQuoteDoubleRaw &{ ParseHighQuoteString(raw, '"') } { _highQuoteValue } + / raw:highQuoteSingleRaw &{ ParseHighQuoteString(raw, '\'') } { _highQuoteValue } + / raw:highQuoteBacktickRaw &{ ParseHighQuoteString(raw, '`') } { _highQuoteValue } + +// Raw capture patterns +highQuoteDoubleRaw = "" ('"'+ highQuoteDoubleContent* '"'+) +highQuoteSingleRaw = "" ("'"+ highQuoteSingleContent* "'"+) +highQuoteBacktickRaw = "" ('`'+ highQuoteBacktickContent* '`'+) +``` + +### The `ParseHighQuoteString` Helper + +```csharp +@members +{ + private string _highQuoteValue; + + private bool ParseHighQuoteString(string input, char quoteChar) + { + _highQuoteValue = null; + if (string.IsNullOrEmpty(input)) return false; + + // Count opening quotes + int quoteCount = 0; + while (quoteCount < input.Length && input[quoteCount] == quoteChar) + quoteCount++; + + if (quoteCount < 6) return false; // Let regular rules handle 1-5 + + string openClose = new string(quoteChar, quoteCount); + string escapeSeq = new string(quoteChar, quoteCount * 2); + string escapeVal = new string(quoteChar, quoteCount); + + int pos = quoteCount; + var content = new System.Text.StringBuilder(); + + while (pos < input.Length) + { + // Check for escape sequence (2*N quotes) + if (pos + escapeSeq.Length <= input.Length && + input.Substring(pos, escapeSeq.Length) == escapeSeq) + { + content.Append(escapeVal); + pos += escapeSeq.Length; + continue; + } + + // Check for closing quotes + if (pos + quoteCount <= input.Length && + input.Substring(pos, quoteCount) == openClose) + { + int afterClose = pos + quoteCount; + if (afterClose >= input.Length || input[afterClose] != quoteChar) + { + if (afterClose == input.Length) + { + _highQuoteValue = content.ToString(); + return true; + } + return false; + } + } + + content.Append(input[pos]); + pos++; + } + return false; + } +} +``` + +## Why Disambiguation Works + +### For Levels 1-5 + +PEG ordered choice `!` and explicit patterns provide correct disambiguation: + +``` +Input: "first" "second" +``` + +1. Try `doubleQuote1`: `'"' content* '"'` + - Matches `"first"` + - Stops at first closing `"` + - Returns "first" +2. Continue parsing... +3. Try `doubleQuote1` again + - Matches `"second"` + - Returns "second" + +The explicit `'"'` at start and end (not `'"'+`) provides exact boundaries. + +### For Level 6+ + +The lookahead `&('""""""' / "''''''" / '``````')` ensures: +- We only enter this rule when there are 6+ consecutive quotes +- No ambiguity with levels 1-5 (they're tried first due to PEG ordering) +- The capture-then-validate works because we know we're in high-quote territory + +## Advantages + +1. **Correct disambiguation**: Levels 1-5 use proper PEG semantics +2. **Unlimited support**: Levels 6+ can be any N +3. **Single parsing logic**: The core algorithm is the same everywhere +4. **Production ready**: Works with standard Pegasus/MSBuild integration +5. **Testable**: All 180+ C# tests pass + +## Disadvantages + +1. **More verbose grammar**: Explicit rules for 5 levels × 3 quote types = 15 rule sets +2. **Repetitive patterns**: Each level follows the same pattern +3. **Maintenance overhead**: Changes to parsing logic need replication + +## Comparison with JavaScript + +| Aspect | JavaScript (Peggy.js) | C# (Pegasus) | +|--------|----------------------|--------------| +| Grammar lines | ~70 (universal) | ~130 (hybrid) | +| Rule count | 3 (one per quote type) | 15+ (5 levels × 3 types) | +| Core logic | Single function | Single function | +| Disambiguation | Procedural | PEG ordered choice | +| N support | Unlimited | Unlimited | + +## Test Results + +All tests pass: + +``` +=== C# Test Results === +Total: 180 tests +Passed: 180 ✓ +Failed: 0 + +Coverage: +- Single quotes (1): ✓ +- Double quotes (2): ✓ +- Triple quotes (3): ✓ +- Quadruple quotes (4): ✓ +- Quintuple quotes (5): ✓ +- High quotes (6+): ✓ +- Escape sequences: ✓ +- Mixed quote types: ✓ +- Edge cases: ✓ +``` + +## Conclusion + +**Status**: ✅ WORKING SOLUTION + +The hybrid approach is the recommended solution for C# Pegasus: + +1. It achieves full functionality (any N quotes) +2. It works within Pegasus's constraints +3. It's production-ready and well-tested + +The additional verbosity is an acceptable trade-off for correctness and compatibility. + +## Files + +- `Parser.peg` - The full production grammar (in `csharp/Link.Foundation.Links.Notation/`) +- This README documents the approach and rationale diff --git a/docs/case-studies/csharp-peg-simplification/timeline.md b/docs/case-studies/csharp-peg-simplification/timeline.md new file mode 100644 index 0000000..e3f99e1 --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/timeline.md @@ -0,0 +1,268 @@ +# Timeline of C# PEG Parser Simplification Investigation + +This document provides a detailed chronological account of the investigation into simplifying the C# Pegasus PEG parser. + +## Context + +- **Issue**: #142 - Support more quotes options +- **Date**: December 1, 2025 +- **Duration**: ~3.5 hours of investigation + +## Detailed Timeline + +### 15:20 UTC - Initial Unlimited Quotes Request + +**User request**: "We should support any number of quotes in a series, not only 1-5, but any number of N. Is it possible to do with PEG and other parsers?" + +**Status**: Investigation begins + +--- + +### 15:41-15:46 UTC - First Implementation Attempt + +**Action**: Implemented explicit PEG rules for quote levels 1-5 across all languages. + +**Result**: Tests pass but user requests simpler, more universal approach. + +--- + +### 16:10 UTC - Discovery of PEG Greedy Problem + +**Finding**: PEG.js greedy patterns don't correctly disambiguate multiple quoted strings. + +**Example**: +``` +Input: "a" "b" +Pattern: $('"'+ content* '"'+) +Expected: ["a", "b"] (two separate strings) +Actual: Fails - greedy + captures from first " to last " +``` + +**Comment posted to PR**: +> Issue: PEG.js greedy patterns like `$('"'+ content* '"'+)` don't correctly disambiguate multiple quoted strings separated by whitespace. +> +> Solution: Keep explicit PEG rules for 1-5 quotes (which provide proper disambiguation) while using the procedural parser function for 6+ quotes (unlimited support). + +--- + +### 17:27 UTC - User Questions About Variables/Backreferences + +**User request**: "Can we use some kind of variable? So if we start with `"` we match `"` at the end... Like we have in regular expressions: `(?P"+)(.*)(?P=quotes)`" + +**Question**: Can this be done in PEG parsers? + +--- + +### 17:38-17:50 UTC - JavaScript Universal Solution Discovered + +**Research findings**: +1. Standard PEG does **not** support backreferences like regex +2. PEG parsers are deterministic and don't backtrack the same way + +**Solution discovered**: Global variables + semantic predicates + +The technique uses: +1. **Global variables** to store parsed result +2. **`input` and `offset()`** to peek at input directly +3. **Procedural parsing function** for the actual logic +4. **Consume pattern** to advance parser position + +**Implementation**: Successfully simplified JavaScript grammar from ~256 lines to ~208 lines (-19%). + +**Comment posted to PR**: +> Inspired by heredoc parsing patterns, I implemented a universal approach using: +> - Global variable to store parsed result +> - `input` and `offset()` to peek at the input string directly in a semantic predicate +> - Procedural parsing function that counts opening quotes, parses content, handles escaping + +--- + +### 18:01 UTC - User Requests Universal Approach for All Languages + +**User request**: "So I still see a lot of code changes in grammars, can we do simple universal logic in all languages like you did in [JavaScript]?" + +--- + +### 18:02-18:08 UTC - First C# Attempt with `#parse{}` Expression + +**Approach**: Attempted to use Pegasus's `#parse{}` syntax for universal parsing. + +**Code attempted**: +``` +doubleQuotedUniversal = #parse{ + if (ParseQuotedStringAt(state.Subject, state.Location, '"')) { + return new Pegasus.Common.ParseResult(ref state, state.Advance(_parsedLength), _parsedValue); + } + return null; +} +``` + +**Build command**: +```bash +cd /tmp/gh-issue-solver-1764612152720/csharp && dotnet build Link.Foundation.Links.Notation/Link.Foundation.Links.Notation.csproj +``` + +**Result**: Build error +``` +/tmp/gh-issue-solver-1764612152720/csharp/Link.Foundation.Links.Notation/Parser.peg(106,41): +error PEG0011: Unterminated code section. +``` + +--- + +### 18:08-18:10 UTC - Single-Line Format Attempt + +**Hypothesis**: Maybe Pegasus doesn't handle newlines in `#parse{}` blocks. + +**Code attempted** (single line): +``` +doubleQuotedUniversal = #parse{ if (ParseQuotedStringAt(state.Subject, state.Location, '"')) { return new Pegasus.Common.ParseResult(ref state, state.Advance(_parsedLength), _parsedValue); } return null; } +``` + +**Result**: Same error - `PEG0011: Unterminated code section` + +--- + +### 18:10-18:15 UTC - Investigation of `` Tag + +**Discovery**: The `` tag in .csproj triggers a different code path in Pegasus. + +**Experiment**: +1. Removed explicit `` tag +2. Let Pegasus auto-detect .peg files +3. `#parse{}` syntax works! + +**New problem**: Auto-detection creates issues with generated parser class naming and namespace. + +**Conclusion**: Using `` tag is required for proper project integration, but it doesn't support `#parse{}`. + +--- + +### 18:16 UTC - Investigation Summary Posted to PR + +**Comment posted**: +> ### The Issue +> When the project uses `` in the .csproj file, Pegasus uses a different code path that **does not support** the `#parse{}` syntax properly. The error `PEG0011: Unterminated code section` occurs. +> +> Interestingly, when removing the explicit `` tag (letting Pegasus auto-detect .peg files), the `#parse{}` syntax works. However, this creates other issues with the generated parser class. + +--- + +### 18:22 UTC - User Asks Directly About Universal C# Parsing + +**User request**: "Can we for example use universal parsing like we do have in JavaScript PEG.js also in C# Pegasus? So it using the same function for all number of quotes (1+)." + +--- + +### 18:23-18:28 UTC - Capture-then-Validate Approach + +**New approach**: Capture a greedy PEG pattern, then validate procedurally. + +**Grammar**: +``` +quoted = raw:doubleQuoteCaptureRaw &{ TryParseQuoteAtStart(raw, '"') } { _parsedValue } +doubleQuoteCaptureRaw = "" ('"'+ quoteContent* '"'+) +quoteContent = [^"] / '"'+ &[^"] +``` + +**Test results for isolated strings** (SUCCESS): +``` +"hello" → hello ✓ +""world"" → world ✓ +"""foo""" → foo ✓ +'text with '' escaped' → text with ' escaped ✓ +`text with `` escaped` → text with ` escaped ✓ +``` + +**Test for multiple strings** (FAILURE): +``` +Input: "first" "second" +Expected: 2 separate strings +Actual: Greedy pattern captures from first " to last " → ONE string +``` + +--- + +### 18:28-18:36 UTC - Semantic Predicates Investigation + +**Attempted**: Access `Cursor` and `Subject` in semantic predicates like JavaScript. + +**Code attempted**: +``` +doubleQuoted = &'"' &{ + if (ParseAtCursor(Cursor, Subject, '"')) { return true; } + return false; +} chars:consume { _parsedValue } +``` + +**Result**: Compilation error +``` +'Cursor' is a type, which is not valid in the given context +``` + +**Finding**: Pegasus does NOT provide direct access to cursor/input in `&{ }` predicates. + +In Pegasus, semantic predicates receive only the `state` parameter which is a `Cursor` struct, but: +- `state.Subject` is not accessible (it's the full input string, accessed differently) +- `state.Location` gives position, but no direct subject access in predicates + +--- + +### 18:36-18:43 UTC - Investigation of Pegasus Generated Code + +**Action**: Analyzed generated `Parser.peg.g.cs` to understand internal structure. + +**Findings**: +```csharp +// Semantic predicates are compiled as: +new Func(state => /* predicate code */) + +// The 'state' parameter is the Cursor, but Subject is accessed via: +// this.subject (in the generated parser class context) +// NOT available inside the lambda expression +``` + +--- + +### 18:43 UTC - Final Conclusion + +**Posted comprehensive investigation summary to PR**: + +> ## Investigation: Can C# Pegasus Use Universal Parsing Like JavaScript? +> +> **No, C# Pegasus cannot use the exact same universal approach as JavaScript** due to fundamental differences in how the parser generators work. +> +> ### What JavaScript Does +> JavaScript (Peggy.js) uses a clean universal approach with semantic predicates accessing `input` and `offset()` directly. +> +> ### What I Tried for C# Pegasus +> 1. **`#parse{}` expression** → PEG0011 error with `` tag +> 2. **Capture-then-validate** → Works for isolated strings, fails for disambiguation +> 3. **Access cursor/input in predicates** → Not supported +> +> ### Recommendation +> The hybrid approach (explicit rules for 1-5 + procedural for 6+) is the best solution given Pegasus's constraints. + +--- + +## Summary Statistics + +| Metric | Value | +|--------|-------| +| Total investigation time | ~3.5 hours | +| Approaches attempted | 4 | +| Successful for isolated strings | 1 (capture-then-validate) | +| Successful for full use case | 0 | +| Build errors encountered | 3+ | +| Root causes identified | 4 | + +## Key Timestamps + +| Time (UTC) | Event | +|------------|-------| +| 15:20 | Investigation begins | +| 16:10 | PEG greedy problem discovered | +| 17:50 | JavaScript solution found | +| 18:08 | First C# `#parse{}` attempt fails | +| 18:28 | Capture-then-validate partial success | +| 18:43 | Final conclusion: Universal approach not possible | From 5607787a24489f176b28172a0e721f57c656bb46 Mon Sep 17 00:00:00 2001 From: konard Date: Mon, 1 Dec 2025 19:32:38 +0000 Subject: [PATCH 14/15] Simplify C# grammar: minimize explicit rules to N=1,2 + procedural for N>=3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduced explicit PEG rules from 5 quote levels to just 2: - N=1 (explicit): Required for disambiguation of multiple strings on same line - N=2 (explicit): Required for proper escape sequence handling - N>=3 (procedural): Universal ParseMultiQuoteString() handles unlimited quotes Grammar size reduction: 188 -> 155 lines (-17.5%) Key findings documented in case study: - N=1 explicit rules prevent PEG greedy capture issues with "a" "b" - N=2 explicit rules handle escape sequences like """" in "" strings - N>=3 can use procedural because content pattern '"'+ &[^"] works correctly Added experiments/minimal-peg-rules/ with test harness proving the approach. All tests pass: C# (180), JS (188), Python (176), Rust (39) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../Link.Foundation.Links.Notation/Parser.peg | 107 ++++++----------- .../csharp-peg-simplification/README.md | 67 ++++++++--- .../solutions/05-minimized-hybrid/README.md | 52 +++++++++ experiments/minimal-peg-rules/Program.cs | 105 +++++++++++++++++ experiments/minimal-peg-rules/QuoteParser.peg | 94 +++++++++++++++ .../minimal-peg-rules/QuoteParserMinimal.peg | 110 ++++++++++++++++++ .../minimal-peg-rules/TestMinimalRules.csproj | 15 +++ 7 files changed, 462 insertions(+), 88 deletions(-) create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/README.md create mode 100644 experiments/minimal-peg-rules/Program.cs create mode 100644 experiments/minimal-peg-rules/QuoteParser.peg create mode 100644 experiments/minimal-peg-rules/QuoteParserMinimal.peg create mode 100644 experiments/minimal-peg-rules/TestMinimalRules.csproj diff --git a/csharp/Link.Foundation.Links.Notation/Parser.peg b/csharp/Link.Foundation.Links.Notation/Parser.peg index 843680c..a715a99 100644 --- a/csharp/Link.Foundation.Links.Notation/Parser.peg +++ b/csharp/Link.Foundation.Links.Notation/Parser.peg @@ -3,19 +3,20 @@ @using System.Linq @members { - // Field to store parsed high quote value - private string _highQuoteValue; + // Field to store parsed multi-quote value + private string _multiQuoteValue; /// - /// Parse a multi-quote string dynamically for N >= 6 quotes. - /// Stores result in _highQuoteValue field. + /// Parse a multi-quote string dynamically for N >= 3 quotes. + /// Uses a universal procedural algorithm that handles any N. + /// Stores result in _multiQuoteValue field. /// /// The raw string including opening and closing quotes /// The quote character (", ', or `) /// True if parsing succeeded and the result matches the input length - private bool ParseHighQuoteString(string input, char quoteChar) + private bool ParseMultiQuoteString(string input, char quoteChar) { - _highQuoteValue = null; + _multiQuoteValue = null; if (string.IsNullOrEmpty(input)) return false; // Count opening quotes @@ -25,7 +26,7 @@ quoteCount++; } - if (quoteCount < 6) return false; // Let the regular rules handle 1-5 quotes + if (quoteCount < 3) return false; // Let explicit rules handle N=1 and N=2 string openClose = new string(quoteChar, quoteCount); string escapeSeq = new string(quoteChar, quoteCount * 2); @@ -56,7 +57,7 @@ // Found valid closing - check if we consumed the entire input if (afterClose == input.Length) { - _highQuoteValue = content.ToString(); + _multiQuoteValue = content.ToString(); return true; } return false; @@ -93,89 +94,55 @@ multiLineValueLink > = "(" v:multiLineValues _ ")" { new Link> = id:(reference) __ ":" eol { new Link(id) } // Reference can be quoted (with any number of quotes) or simple unquoted -// Order matters: try high quotes (6+), then quintuple down to single, then simple -reference = highQuotedReference / quintupleQuotedReference / quadrupleQuotedReference / tripleQuotedReference / doubleQuotedReference / singleQuotedReference / simpleReference +// Order: high quotes (3+) first, then double quotes (2), then single quotes (1), then simple +// This ordering ensures proper precedence for quote matching +reference = highQuotedReference / doubleQuotedReference / singleQuotedReference / simpleReference simpleReference = "" referenceSymbol+ -// High quote sequences (6+ quotes) - use procedural parsing -// Capture everything that looks like a quoted string and validate -highQuotedReference = &('""""""' / "''''''" / '``````') raw:highQuoteCapture { raw } +// High quote references (N >= 3) - use universal procedural parsing +// Lookahead for 3+ quotes, then capture and validate with the procedural parser +highQuotedReference = &('"""' / "'''" / '```') raw:highQuoteCapture { raw } -// Capture high quote content - match any characters including embedded quotes -highQuoteCapture = raw:highQuoteDoubleRaw &{ ParseHighQuoteString(raw, '"') } { _highQuoteValue } -/ raw:highQuoteSingleRaw &{ ParseHighQuoteString(raw, '\'') } { _highQuoteValue } -/ raw:highQuoteBacktickRaw &{ ParseHighQuoteString(raw, '`') } { _highQuoteValue } +highQuoteCapture = raw:highQuoteDoubleRaw &{ ParseMultiQuoteString(raw, '"') } { _multiQuoteValue } +/ raw:highQuoteSingleRaw &{ ParseMultiQuoteString(raw, '\'') } { _multiQuoteValue } +/ raw:highQuoteBacktickRaw &{ ParseMultiQuoteString(raw, '`') } { _multiQuoteValue } -// Raw capture patterns - return string directly +// Raw capture for high quotes - greedily match quotes and content highQuoteDoubleRaw = "" ('"'+ highQuoteDoubleContent* '"'+) highQuoteSingleRaw = "" ("'"+ highQuoteSingleContent* "'"+) highQuoteBacktickRaw = "" ('`'+ highQuoteBacktickContent* '`'+) -// Content for high quote strings - match non-quote chars OR quote sequences followed by non-quote +// Content for high quotes: any char OR quote sequences followed by non-quote highQuoteDoubleContent = [^"] / '"'+ &[^"] highQuoteSingleContent = [^'] / "'"+ &[^'] highQuoteBacktickContent = [^`] / '`'+ &[^`] -// Single quotes (1 quote char) with escaping via doubling -singleQuotedReference = doubleQuote1 / singleQuote1 / backtickQuote1 +// Double quotes (N=2) - explicit PEG rules for proper escape handling +doubleQuotedReference = doubleDoubleQuote / doubleSingleQuote / doubleBacktickQuote -doubleQuote1 = '"' r:doubleQuote1Content* '"' { string.Join("", r) } -doubleQuote1Content = '""' { "\"" } / c:[^"] { c.ToString() } +doubleDoubleQuote = '""' r:doubleDoubleContent* '""' { string.Join("", r) } +doubleDoubleContent = '""""' { "\"\"" } / !'""' c:. { c.ToString() } -singleQuote1 = "'" r:singleQuote1Content* "'" { string.Join("", r) } -singleQuote1Content = "''" { "'" } / c:[^'] { c.ToString() } +doubleSingleQuote = "''" r:doubleSingleContent* "''" { string.Join("", r) } +doubleSingleContent = "''''" { "''" } / !"''" c:. { c.ToString() } -backtickQuote1 = '`' r:backtickQuote1Content* '`' { string.Join("", r) } -backtickQuote1Content = '``' { "`" } / c:[^`] { c.ToString() } +doubleBacktickQuote = '``' r:doubleBacktickContent* '``' { string.Join("", r) } +doubleBacktickContent = '````' { "``" } / !'``' c:. { c.ToString() } -// Double quotes (2 quote chars) -doubleQuotedReference = doubleQuote2 / singleQuote2 / backtickQuote2 +// Single quotes (N=1) - explicit PEG rules for proper disambiguation +// These are needed because single-quoted strings on the same line must be correctly parsed +singleQuotedReference = singleDoubleQuote / singleSingleQuote / singleBacktickQuote -doubleQuote2 = '""' r:doubleQuote2Content* '""' { string.Join("", r) } -doubleQuote2Content = '""""' { "\"\"" } / !'""' c:. { c.ToString() } +singleDoubleQuote = '"' r:singleDoubleContent* '"' { string.Join("", r) } +singleDoubleContent = '""' { "\"" } / c:[^"] { c.ToString() } -singleQuote2 = "''" r:singleQuote2Content* "''" { string.Join("", r) } -singleQuote2Content = "''''" { "''" } / !"''" c:. { c.ToString() } +singleSingleQuote = "'" r:singleSingleContent* "'" { string.Join("", r) } +singleSingleContent = "''" { "'" } / c:[^'] { c.ToString() } -backtickQuote2 = '``' r:backtickQuote2Content* '``' { string.Join("", r) } -backtickQuote2Content = '````' { "``" } / !'``' c:. { c.ToString() } +singleBacktickQuote = '`' r:singleBacktickContent* '`' { string.Join("", r) } +singleBacktickContent = '``' { "`" } / c:[^`] { c.ToString() } -// Triple quotes (3 quote chars) -tripleQuotedReference = doubleQuote3 / singleQuote3 / backtickQuote3 - -doubleQuote3 = '"""' r:doubleQuote3Content* '"""' { string.Join("", r) } -doubleQuote3Content = '""""""' { "\"\"\"" } / !'"""' c:. { c.ToString() } - -singleQuote3 = "'''" r:singleQuote3Content* "'''" { string.Join("", r) } -singleQuote3Content = "''''''" { "'''" } / !"'''" c:. { c.ToString() } - -backtickQuote3 = '```' r:backtickQuote3Content* '```' { string.Join("", r) } -backtickQuote3Content = '``````' { "```" } / !'```' c:. { c.ToString() } - -// Quadruple quotes (4 quote chars) -quadrupleQuotedReference = doubleQuote4 / singleQuote4 / backtickQuote4 - -doubleQuote4 = '""""' r:doubleQuote4Content* '""""' { string.Join("", r) } -doubleQuote4Content = '""""""""' { "\"\"\"\"" } / !'""""' c:. { c.ToString() } - -singleQuote4 = "''''" r:singleQuote4Content* "''''" { string.Join("", r) } -singleQuote4Content = "''''''''''" { "''''" } / !"''''" c:. { c.ToString() } - -backtickQuote4 = '````' r:backtickQuote4Content* '````' { string.Join("", r) } -backtickQuote4Content = '````````' { "````" } / !'````' c:. { c.ToString() } - -// Quintuple quotes (5 quote chars) -quintupleQuotedReference = doubleQuote5 / singleQuote5 / backtickQuote5 - -doubleQuote5 = '"""""' r:doubleQuote5Content* '"""""' { string.Join("", r) } -doubleQuote5Content = '""""""""""' { "\"\"\"\"\"" } / !'"""""' c:. { c.ToString() } - -singleQuote5 = "'''''" r:singleQuote5Content* "'''''" { string.Join("", r) } -singleQuote5Content = "''''''''''" { "'''''" } / !"'''''" c:. { c.ToString() } - -backtickQuote5 = '`````' r:backtickQuote5Content* '`````' { string.Join("", r) } -backtickQuote5Content = '``````````' { "`````" } / !'`````' c:. { c.ToString() } SET_BASE_INDENTATION = spaces:" "* #{ if ((int)state["BaseIndentation"] == -1) state["BaseIndentation"] = spaces.Count; } PUSH_INDENTATION = spaces:" "* #{ state["NormalizedIndent"] = spaces.Count - ((int)state["BaseIndentation"] == -1 ? 0 : (int)state["BaseIndentation"]); if ((int)state["NormalizedIndent"] < 0) state["NormalizedIndent"] = 0; } &{ (int)state["NormalizedIndent"] > (int)state["IndentationStack"].Peek() } #{ state["IndentationStack"].Push((int)state["NormalizedIndent"]); } POP_INDENTATION = #{ state["IndentationStack"].Pop(); } diff --git a/docs/case-studies/csharp-peg-simplification/README.md b/docs/case-studies/csharp-peg-simplification/README.md index dcd0401..2d199e9 100644 --- a/docs/case-studies/csharp-peg-simplification/README.md +++ b/docs/case-studies/csharp-peg-simplification/README.md @@ -113,19 +113,42 @@ See the `solutions/` subdirectory for detailed experiments: **C# Pegasus cannot use the exact same universal approach as JavaScript** due to fundamental differences in how the parser generators work. -### Recommended Approach: Hybrid +### Recommended Approach: Minimized Hybrid -The current C# implementation uses a **hybrid approach** that achieves the same functionality: +After further investigation, we found that the number of explicit PEG rules can be **minimized to just N=1 and N=2**, with procedural parsing handling N>=3. -1. **Explicit PEG rules for quote levels 1-5** (most common cases) - - Required for Pegasus to correctly disambiguate multiple quoted strings - - Provides proper PEG parsing semantics +#### Why N=1 Explicit Rules Are Required +Multiple single-quoted strings on the same line (e.g., `"a" "b"`) require explicit PEG rules for proper disambiguation. Without explicit rules, greedy PEG operators capture too much. -2. **Procedural `ParseHighQuoteString()` method for levels 6+** - - Handles unlimited quote counts - - Uses the same universal parsing algorithm +#### Why N=2 Explicit Rules Are Required +Escape sequences in N=2 strings (e.g., `""text with """" escaped""`) cannot be correctly captured by generic patterns because the content pattern cannot distinguish between escape sequences and closing quotes without knowing N. + +#### Why N>=3 Can Use Procedural Parsing +For N>=3, the content pattern `'"'+ &[^"]` (quote sequences followed by non-quote) works because: +- The raw capture is permissive enough to capture escape sequences +- The procedural validator correctly identifies the exact N from the captured string +- The lookahead `&('"""' / "'''" / '```')` ensures we only try the procedural path for 3+ quotes + +### Grammar Size Comparison + +| Approach | Grammar Lines | Reduction | +|----------|---------------|-----------| +| Original (explicit 1-5, procedural 6+) | 188 | baseline | +| **Optimized (explicit 1-2, procedural 3+)** | 155 | **17.5% smaller** | + +### Current Implementation -The core parsing logic is universal and simple - it's just wrapped in PEG rules that provide correct disambiguation semantics. +The optimized C# implementation uses: + +1. **Explicit PEG rules for N=1** (3 quote types × 2 rules = 6 rules) + - Required for disambiguation of multiple strings on same line + +2. **Explicit PEG rules for N=2** (3 quote types × 2 rules = 6 rules) + - Required for proper escape sequence handling + +3. **Procedural `ParseMultiQuoteString()` method for N>=3** + - Handles unlimited quote counts (3, 4, 5, ... 100, ... any N) + - Uses the same universal parsing algorithm ### Code Comparison @@ -143,16 +166,24 @@ doubleQuotedUniversal = &'"' &{ } chars:consumeDouble { return parsedValue; } ``` -**C# (Pegasus) - Hybrid approach:** +**C# (Pegasus) - Optimized hybrid approach:** ``` -// Explicit rules for 1-5 quotes -singleQuotedReference = doubleQuote1 / singleQuote1 / backtickQuote1 -doubleQuote1 = '"' r:doubleQuote1Content* '"' { string.Join("", r) } -// ... (similar for levels 2-5) - -// Procedural for 6+ quotes -highQuotedReference = &('""""""' / "''''''" / '``````') raw:highQuoteCapture { raw } -highQuoteCapture = raw:highQuoteDoubleRaw &{ ParseHighQuoteString(raw, '"') } { _highQuoteValue } +// Order: high quotes (3+) first, then double quotes (2), then single quotes (1), then simple +reference = highQuotedReference / doubleQuotedReference / singleQuotedReference / simpleReference + +// N=1: Explicit PEG rules for disambiguation +singleQuotedReference = singleDoubleQuote / singleSingleQuote / singleBacktickQuote +singleDoubleQuote = '"' r:singleDoubleContent* '"' { string.Join("", r) } +singleDoubleContent = '""' { "\"" } / c:[^"] { c.ToString() } + +// N=2: Explicit PEG rules for escape handling +doubleQuotedReference = doubleDoubleQuote / doubleSingleQuote / doubleBacktickQuote +doubleDoubleQuote = '""' r:doubleDoubleContent* '""' { string.Join("", r) } +doubleDoubleContent = '""""' { "\"\"" } / !'""' c:. { c.ToString() } + +// N>=3: Procedural parsing for unlimited quotes +highQuotedReference = &('"""' / "'''" / '```') raw:highQuoteCapture { raw } +highQuoteCapture = raw:highQuoteDoubleRaw &{ ParseMultiQuoteString(raw, '"') } { _multiQuoteValue } ``` ## Files in This Case Study diff --git a/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/README.md b/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/README.md new file mode 100644 index 0000000..9ee87f0 --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/README.md @@ -0,0 +1,52 @@ +# Solution 5: Minimized Hybrid Approach (N=1,2 explicit + N>=3 procedural) + +## Status: SUCCESS + +This solution successfully reduces the number of explicit PEG rules while maintaining full functionality. + +## Approach + +Instead of having explicit rules for N=1 through N=5, this approach uses: + +1. **Explicit PEG rules for N=1** - Required for disambiguation +2. **Explicit PEG rules for N=2** - Required for escape handling +3. **Procedural parsing for N>=3** - Handles unlimited quotes + +## Key Findings + +### N=1 Must Be Explicit +Multiple single-quoted strings on the same line (e.g., `"a" "b"`) require explicit PEG rules because: +- PEG's greedy operators will capture from first quote to last quote +- Explicit rules with specific opening/closing patterns ensure proper boundaries + +### N=2 Must Be Explicit +Escape sequences in N=2 strings cannot be handled by generic content patterns: +- For `""text with """" escaped""`, the content `""""` (escape) starts with `""` +- A generic pattern like `!'""' c:.` stops at ANY `""`, including escapes +- Explicit rules can use `'""""' { "\"\"" }` to specifically match the escape + +### N>=3 Can Be Procedural +For N>=3, the content pattern `'"'+ &[^"]` works because: +- Quote sequences followed by non-quote are captured as content +- The procedural validator identifies the correct N from the raw capture +- Escape sequences (2×N quotes) are followed by content, so they're captured correctly + +## Grammar Reduction + +| Metric | Original | Optimized | Improvement | +|--------|----------|-----------|-------------| +| Total lines | 188 | 155 | -33 lines | +| Explicit quote rules | 30 rules (5 levels × 3 types × 2 rules) | 12 rules (2 levels × 3 types × 2 rules) | -60% | +| Procedural threshold | N >= 6 | N >= 3 | Covers more cases | + +## Test Results + +All tests pass: +- C#: 180 tests +- JS: 188 tests +- Python: 176 tests +- Rust: 39 tests + +## Code + +See `../../csharp/Link.Foundation.Links.Notation/Parser.peg` for the complete implementation. diff --git a/experiments/minimal-peg-rules/Program.cs b/experiments/minimal-peg-rules/Program.cs new file mode 100644 index 0000000..4d5c2b6 --- /dev/null +++ b/experiments/minimal-peg-rules/Program.cs @@ -0,0 +1,105 @@ +using System; +using System.Collections.Generic; +using TestMinimalRules; + +class Program +{ + static void Main(string[] args) + { + Console.WriteLine("=== Testing Minimal PEG Rules (N=1 explicit only) ===\n"); + + var testCases = new List<(string input, string[] expected, string description)> + { + // Single quoted strings (isolated) + ("\"hello\"", new[] { "hello" }, "Simple single double quotes"), + ("'world'", new[] { "world" }, "Simple single single quotes"), + ("`test`", new[] { "test" }, "Simple single backticks"), + + // Multiple quoted strings on same line - THE CRITICAL TEST + ("\"a\" \"b\"", new[] { "a", "b" }, "Two double-quoted strings"), + ("'x' 'y'", new[] { "x", "y" }, "Two single-quoted strings"), + ("`p` `q`", new[] { "p", "q" }, "Two backtick strings"), + + // Multi-quote (2) + ("\"\"hello\"\"", new[] { "hello" }, "Double quotes (2)"), + ("''world''", new[] { "world" }, "Single quotes (2)"), + + // Multi-quote (3) + ("\"\"\"text\"\"\"", new[] { "text" }, "Triple double quotes"), + ("'''text'''", new[] { "text" }, "Triple single quotes"), + + // Multiple multi-quoted strings - This is problematic with minimal rules + ("\"\"a\"\" \"\"b\"\"", new[] { "a", "b" }, "Two double-double quoted strings"), + + // Escaping + ("\"say \"\"hello\"\"\"", new[] { "say \"hello\"" }, "Escape with double quotes"), + ("'it''s'", new[] { "it's" }, "Escape with single quotes"), + + // Mixed quote types + ("\"a\" 'b' `c`", new[] { "a", "b", "c" }, "Mixed quote types"), + + // Higher quote levels + ("\"\"\"\"text\"\"\"\"", new[] { "text" }, "Quadruple double quotes"), + ("'''''text'''''", new[] { "text" }, "Quintuple single quotes"), + ("``````text``````", new[] { "text" }, "Sextuple backticks"), + }; + + // Test both parsers + TestParser("Universal Parser (fails disambiguation)", input => new QuoteParser().Parse(input), testCases); + Console.WriteLine("\n" + new string('=', 60) + "\n"); + TestParser("Minimal Rules Parser (N=1 explicit)", input => new QuoteParserMinimal().Parse(input), testCases); + } + + static void TestParser(string parserName, Func> parseFunc, List<(string input, string[] expected, string description)> testCases) + { + Console.WriteLine($"=== {parserName} ===\n"); + + int passed = 0; + int failed = 0; + + foreach (var (input, expected, description) in testCases) + { + Console.WriteLine($"Test: {description}"); + Console.WriteLine($" Input: {input}"); + + try + { + var result = parseFunc(input); + + bool matches = result.Count == expected.Length; + if (matches) + { + for (int i = 0; i < expected.Length; i++) + { + if (result[i] != expected[i]) + { + matches = false; + break; + } + } + } + + if (matches) + { + Console.WriteLine($" Result: [{string.Join(", ", result)}] - PASS"); + passed++; + } + else + { + Console.WriteLine($" Expected: [{string.Join(", ", expected)}]"); + Console.WriteLine($" Got: [{string.Join(", ", result)}] - FAIL"); + failed++; + } + } + catch (Exception ex) + { + Console.WriteLine($" Error: {ex.Message} - FAIL"); + failed++; + } + + Console.WriteLine(); + } + + Console.WriteLine($"=== {parserName}: {passed} passed, {failed} failed ==="); + } +} diff --git a/experiments/minimal-peg-rules/QuoteParser.peg b/experiments/minimal-peg-rules/QuoteParser.peg new file mode 100644 index 0000000..d772115 --- /dev/null +++ b/experiments/minimal-peg-rules/QuoteParser.peg @@ -0,0 +1,94 @@ +@namespace TestMinimalRules +@classname QuoteParser +@using System.Linq +@members +{ + // Universal parser for N-quote strings (any N >= 1) + // Returns the parsed content or null if parsing fails + private string? _parsedValue; + + private bool ParseUniversalQuoteString(string input, char quoteChar) + { + _parsedValue = null; + if (string.IsNullOrEmpty(input)) return false; + + // Count opening quotes + int quoteCount = 0; + while (quoteCount < input.Length && input[quoteCount] == quoteChar) + { + quoteCount++; + } + + if (quoteCount < 1) return false; // Must have at least one quote + + string openClose = new string(quoteChar, quoteCount); + string escapeSeq = new string(quoteChar, quoteCount * 2); + string escapeVal = new string(quoteChar, quoteCount); + + int pos = quoteCount; // Start after opening quotes + var content = new System.Text.StringBuilder(); + + while (pos < input.Length) + { + // Check for escape sequence (2*N quotes) + if (pos + escapeSeq.Length <= input.Length && + input.Substring(pos, escapeSeq.Length) == escapeSeq) + { + content.Append(escapeVal); + pos += escapeSeq.Length; + continue; + } + + // Check for closing quotes (exactly N quotes, not more) + if (pos + quoteCount <= input.Length && + input.Substring(pos, quoteCount) == openClose) + { + // Make sure it's exactly N quotes (not followed by more of the same quote) + int afterClose = pos + quoteCount; + if (afterClose >= input.Length || input[afterClose] != quoteChar) + { + // Found valid closing - check if we consumed the entire input + if (afterClose == input.Length) + { + _parsedValue = content.ToString(); + return true; + } + return false; + } + } + + // Take next character + content.Append(input[pos]); + pos++; + } + + // No closing quotes found + return false; + } +} + +// Test: Can we parse multiple quoted strings on the same line with a universal approach? +document > = list:quotedString* eof { list } + +quotedString = _ q:(universalQuote) _ { q } + +// Attempt 1: Universal capture with validation +universalQuote = doubleQuoteUniversal / singleQuoteUniversal / backtickQuoteUniversal + +// Capture pattern: quote+ content* quote+ and validate +doubleQuoteUniversal = raw:doubleQuoteCapture &{ ParseUniversalQuoteString(raw, '"') } { _parsedValue } +singleQuoteUniversal = raw:singleQuoteCapture &{ ParseUniversalQuoteString(raw, '\'') } { _parsedValue } +backtickQuoteUniversal = raw:backtickQuoteCapture &{ ParseUniversalQuoteString(raw, '`') } { _parsedValue } + +// Raw capture - greedily match quotes and content +doubleQuoteCapture = "" ('"'+ doubleQuoteContent* '"'+) +singleQuoteCapture = "" ("'"+ singleQuoteContent* "'"+) +backtickQuoteCapture = "" ('`'+ backtickQuoteContent* '`'+) + +// Content - non-quote chars or quote sequences followed by non-quote +doubleQuoteContent = [^"] / '"'+ &[^"] +singleQuoteContent = [^'] / "'"+ &[^'] +backtickQuoteContent = [^`] / '`'+ &[^`] + +_ = [ \t]* +eof = !. diff --git a/experiments/minimal-peg-rules/QuoteParserMinimal.peg b/experiments/minimal-peg-rules/QuoteParserMinimal.peg new file mode 100644 index 0000000..0100836 --- /dev/null +++ b/experiments/minimal-peg-rules/QuoteParserMinimal.peg @@ -0,0 +1,110 @@ +@namespace TestMinimalRules +@classname QuoteParserMinimal +@using System.Linq +@members +{ + // Universal parser for N-quote strings (any N >= 2) + // Returns the parsed content or null if parsing fails + private string _parsedValue; + + private bool ParseMultiQuoteString(string input, char quoteChar) + { + _parsedValue = null; + if (string.IsNullOrEmpty(input)) return false; + + // Count opening quotes + int quoteCount = 0; + while (quoteCount < input.Length && input[quoteCount] == quoteChar) + { + quoteCount++; + } + + if (quoteCount < 2) return false; // Let single quote rules handle N=1 + + string openClose = new string(quoteChar, quoteCount); + string escapeSeq = new string(quoteChar, quoteCount * 2); + string escapeVal = new string(quoteChar, quoteCount); + + int pos = quoteCount; // Start after opening quotes + var content = new System.Text.StringBuilder(); + + while (pos < input.Length) + { + // Check for escape sequence (2*N quotes) + if (pos + escapeSeq.Length <= input.Length && + input.Substring(pos, escapeSeq.Length) == escapeSeq) + { + content.Append(escapeVal); + pos += escapeSeq.Length; + continue; + } + + // Check for closing quotes (exactly N quotes, not more) + if (pos + quoteCount <= input.Length && + input.Substring(pos, quoteCount) == openClose) + { + // Make sure it's exactly N quotes (not followed by more of the same quote) + int afterClose = pos + quoteCount; + if (afterClose >= input.Length || input[afterClose] != quoteChar) + { + // Found valid closing - check if we consumed the entire input + if (afterClose == input.Length) + { + _parsedValue = content.ToString(); + return true; + } + return false; + } + } + + // Take next character + content.Append(input[pos]); + pos++; + } + + // No closing quotes found + return false; + } +} + +// Test: Minimal explicit rules (N=1 only) + procedural for N>=2 +document > = list:quotedString* eof { list } + +quotedString = _ q:(quotedRef) _ { q } + +// Order: try multi-quote (N>=2) first, then single quote (N=1) +// This is because "" should match as double-quote-empty, not two single-quote-empty +quotedRef = multiQuotedRef / singleQuotedRef + +// Multi-quote references (N >= 2) - use procedural parsing +// Lookahead for 2+ quotes, then capture and validate +multiQuotedRef = &('""' / "''" / '``') raw:multiQuoteCapture { raw } + +multiQuoteCapture = raw:multiQuoteDoubleRaw &{ ParseMultiQuoteString(raw, '"') } { _parsedValue } +/ raw:multiQuoteSingleRaw &{ ParseMultiQuoteString(raw, '\'') } { _parsedValue } +/ raw:multiQuoteBacktickRaw &{ ParseMultiQuoteString(raw, '`') } { _parsedValue } + +// Raw capture for multi-quotes - match 2+ quotes +multiQuoteDoubleRaw = "" ('""' '"'* multiQuoteDoubleContent* '""' '"'*) +multiQuoteSingleRaw = "" ("''" "'"* multiQuoteSingleContent* "''" "'"*) +multiQuoteBacktickRaw = "" ('``' '`'* multiQuoteBacktickContent* '``' '`'*) + +// Content for multi-quote strings +multiQuoteDoubleContent = [^"] / '"' !'"' +multiQuoteSingleContent = [^'] / "'" !"'" +multiQuoteBacktickContent = [^`] / '`' !'`' + +// Single quote references (N = 1) - explicit PEG rules for disambiguation +singleQuotedRef = singleDoubleQuote / singleSingleQuote / singleBacktickQuote + +singleDoubleQuote = '"' r:singleDoubleContent* '"' { string.Join("", r) } +singleDoubleContent = '""' { "\"" } / c:[^"] { c.ToString() } + +singleSingleQuote = "'" r:singleSingleContent* "'" { string.Join("", r) } +singleSingleContent = "''" { "'" } / c:[^'] { c.ToString() } + +singleBacktickQuote = '`' r:singleBacktickContent* '`' { string.Join("", r) } +singleBacktickContent = '``' { "`" } / c:[^`] { c.ToString() } + +_ = [ \t]* +eof = !. diff --git a/experiments/minimal-peg-rules/TestMinimalRules.csproj b/experiments/minimal-peg-rules/TestMinimalRules.csproj new file mode 100644 index 0000000..86a51fb --- /dev/null +++ b/experiments/minimal-peg-rules/TestMinimalRules.csproj @@ -0,0 +1,15 @@ + + + Exe + net8.0 + enable + enable + + + + + + + + + From b51f60882303199b035c0245c59ba45473c70a25 Mon Sep 17 00:00:00 2001 From: konard Date: Mon, 1 Dec 2025 19:43:47 +0000 Subject: [PATCH 15/15] Add standalone test projects for all C# PEG simplification solutions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds runnable test projects for each solution approach documented in the case study. Each project demonstrates the specific behavior (success, partial success, or failure) of that approach: - Solution 01: #parse{} expression - demonstrates PEG0011 build error - Solution 02: capture-validate - shows disambiguation failure - Solution 03: semantic predicates - shows input access limitation - Solution 04: hybrid approach - full working solution - Solution 05: minimized hybrid - optimized production implementation Each project can be run independently with: cd solutions//project && dotnet build && dotnet run 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../csharp-peg-simplification/README.md | 37 ++++- .../01-parse-expression/project/Program.cs | 30 ++++ .../project/QuoteParser.peg | 107 +++++++++++++ .../project/TestParseExpression.csproj | 13 ++ .../02-capture-validate/project/Program.cs | 84 +++++++++++ .../project/QuoteParser.peg | 92 ++++++++++++ .../project/TestCaptureValidate.csproj | 12 ++ .../03-semantic-predicates/project/Program.cs | 66 +++++++++ .../project/QuoteParser.peg | 94 ++++++++++++ .../project/TestSemanticPredicates.csproj | 12 ++ .../04-hybrid-approach/project/Program.cs | 124 ++++++++++++++++ .../project/QuoteParser.peg | 116 +++++++++++++++ .../project/TestHybrid.csproj | 12 ++ .../solutions/05-minimized-hybrid/README.md | 24 ++- .../05-minimized-hybrid/project/Program.cs | 140 ++++++++++++++++++ .../project/QuoteParser.peg | 122 +++++++++++++++ .../project/TestMinimizedHybrid.csproj | 12 ++ 17 files changed, 1087 insertions(+), 10 deletions(-) create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/project/Program.cs create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/project/QuoteParser.peg create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/project/TestParseExpression.csproj create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/project/Program.cs create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/project/QuoteParser.peg create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/project/TestCaptureValidate.csproj create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/project/Program.cs create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/project/QuoteParser.peg create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/project/TestSemanticPredicates.csproj create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/project/Program.cs create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/project/QuoteParser.peg create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/project/TestHybrid.csproj create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/project/Program.cs create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/project/QuoteParser.peg create mode 100644 docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/project/TestMinimizedHybrid.csproj diff --git a/docs/case-studies/csharp-peg-simplification/README.md b/docs/case-studies/csharp-peg-simplification/README.md index 2d199e9..f8f8f73 100644 --- a/docs/case-studies/csharp-peg-simplification/README.md +++ b/docs/case-studies/csharp-peg-simplification/README.md @@ -192,15 +192,38 @@ highQuoteCapture = raw:highQuoteDoubleRaw &{ ParseMultiQuoteString(raw, docs/case-studies/csharp-peg-simplification/ ├── README.md # This file ├── timeline.md # Detailed timeline with timestamps -├── root-causes.md # Deep dive into each root cause -├── solutions/ # Experimental solutions -│ ├── 01-parse-expression/ # #parse{} approach -│ ├── 02-capture-validate/ # Capture-then-validate approach -│ ├── 03-semantic-predicates/ # Semantic predicates approach -│ └── 04-other-approaches/ # Other attempted solutions -└── experiments/ # Standalone experiment files +├── root-causes.md # Deep dive into each root cause +└── solutions/ # All attempted solutions with runnable test projects + ├── 01-parse-expression/ # #parse{} approach (FAILED - PEG0011 error) + │ ├── README.md + │ └── project/ # Runnable test project demonstrating the error + ├── 02-capture-validate/ # Capture-then-validate (PARTIAL - disambiguation fails) + │ ├── README.md + │ └── project/ # Runnable test project + ├── 03-semantic-predicates/ # Semantic predicates (FAILED - no input access) + │ ├── README.md + │ └── project/ # Runnable test project + ├── 04-hybrid-approach/ # Hybrid N=1-5 explicit + N>=6 procedural (SUCCESS) + │ ├── README.md + │ └── project/ # Runnable test project + └── 05-minimized-hybrid/ # CURRENT: N=1,2 explicit + N>=3 procedural (SUCCESS) + ├── README.md + └── project/ # Runnable test project ``` +### Running the Test Projects + +Each solution has a standalone test project. To run: + +```bash +cd solutions//project +dotnet build +dotnet run +``` + +Solution 01 will fail to build (demonstrating the PEG0011 error). +Solutions 02-05 will build and run, showing their respective behaviors. + ## References - [Peggy.js Documentation](https://peggyjs.org/documentation.html) diff --git a/docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/project/Program.cs b/docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/project/Program.cs new file mode 100644 index 0000000..542627c --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/project/Program.cs @@ -0,0 +1,30 @@ +// This program demonstrates the #parse{} approach failure +// When you run 'dotnet build', you will see error PEG0011 + +using System; + +namespace TestParseExpression +{ + class Program + { + static void Main(string[] args) + { + Console.WriteLine("=== Test: #parse{} Expression Approach ==="); + Console.WriteLine(); + Console.WriteLine("This test demonstrates that #parse{} expressions"); + Console.WriteLine("do NOT work with the MSBuild tag."); + Console.WriteLine(); + Console.WriteLine("Expected build error:"); + Console.WriteLine(" error PEG0011: Unterminated code section."); + Console.WriteLine(); + Console.WriteLine("If you see this message, the grammar compiled"); + Console.WriteLine("successfully, which means the bug may have been fixed!"); + Console.WriteLine(); + + // This code won't execute because the project won't compile + // var parser = new QuoteParser(); + // var result = parser.Parse("\"hello\""); + // Console.WriteLine($"Parsed: {result}"); + } + } +} diff --git a/docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/project/QuoteParser.peg b/docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/project/QuoteParser.peg new file mode 100644 index 0000000..3586b6e --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/project/QuoteParser.peg @@ -0,0 +1,107 @@ +@namespace TestParseExpression +@classname QuoteParser +@using System.Linq + +@members +{ + private string _parsedValue = ""; + private int _parsedLength; + + /// + /// Universal parser for N-quote strings. + /// Handles any quote character and any number N of quotes. + /// + private bool ParseQuotedStringAt(string input, int startPos, char quoteChar) + { + if (startPos >= input.Length || input[startPos] != quoteChar) + return false; + + // Count opening quotes + int quoteCount = 0; + int pos = startPos; + while (pos < input.Length && input[pos] == quoteChar) + { + quoteCount++; + pos++; + } + + string closeSeq = new string(quoteChar, quoteCount); + string escapeSeq = new string(quoteChar, quoteCount * 2); + var content = new System.Text.StringBuilder(); + + while (pos < input.Length) + { + // Check for escape sequence (2*N quotes) + if (pos + escapeSeq.Length <= input.Length && + input.Substring(pos, escapeSeq.Length) == escapeSeq) + { + content.Append(closeSeq); + pos += escapeSeq.Length; + continue; + } + + // Check for closing sequence (exactly N quotes) + if (pos + quoteCount <= input.Length && + input.Substring(pos, quoteCount) == closeSeq) + { + int afterClose = pos + quoteCount; + if (afterClose >= input.Length || input[afterClose] != quoteChar) + { + _parsedValue = content.ToString(); + _parsedLength = afterClose - startPos; + return true; + } + } + + content.Append(input[pos]); + pos++; + } + return false; + } +} + +document = q:quoted { q } + +// Universal quoted string - handles any N quotes +// THIS DOES NOT WORK with tag due to PEG0011 error +quoted = doubleQuoted / singleQuoted / backtickQuoted + +// THESE RULES USE #parse{} WHICH CAUSES PEG0011 ERROR +// The #parse{} syntax allows custom procedural parsing but is not +// properly supported when using the MSBuild tag. + +doubleQuoted = #parse{ + if (ParseQuotedStringAt(subject, startCursor.Location, '"')) + { + return new Pegasus.Common.ParseResult( + ref startCursor, + startCursor.Advance(_parsedLength), + _parsedValue + ); + } + return null; +} + +singleQuoted = #parse{ + if (ParseQuotedStringAt(subject, startCursor.Location, '\'')) + { + return new Pegasus.Common.ParseResult( + ref startCursor, + startCursor.Advance(_parsedLength), + _parsedValue + ); + } + return null; +} + +backtickQuoted = #parse{ + if (ParseQuotedStringAt(subject, startCursor.Location, '`')) + { + return new Pegasus.Common.ParseResult( + ref startCursor, + startCursor.Advance(_parsedLength), + _parsedValue + ); + } + return null; +} diff --git a/docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/project/TestParseExpression.csproj b/docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/project/TestParseExpression.csproj new file mode 100644 index 0000000..cc0bd12 --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/01-parse-expression/project/TestParseExpression.csproj @@ -0,0 +1,13 @@ + + + net8.0 + Exe + enable + enable + + + + + + + diff --git a/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/project/Program.cs b/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/project/Program.cs new file mode 100644 index 0000000..ea1f69d --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/project/Program.cs @@ -0,0 +1,84 @@ +// This program demonstrates the capture-then-validate approach +// It shows both SUCCESS (isolated strings) and FAILURE (disambiguation) + +using System; + +namespace TestCaptureValidate +{ + class Program + { + static void Main(string[] args) + { + Console.WriteLine("=== Test: Capture-then-Validate Approach ==="); + Console.WriteLine(); + + var parser = new QuoteParser(); + + // Test cases that WORK (isolated strings) + var successCases = new (string input, string expected)[] + { + ("\"hello\"", "hello"), + ("\"\"world\"\"", "world"), + ("\"\"\"foo\"\"\"", "foo"), + ("'text'", "text"), + ("''escaped''", "escaped"), + ("`backtick`", "backtick"), + ("\"\"with \"\"\"\" escape\"\"", "with \"\" escape"), + }; + + Console.WriteLine("=== Isolated String Tests (Expected: SUCCESS) ==="); + int passed = 0, failed = 0; + foreach (var (input, expected) in successCases) + { + try + { + var result = parser.Parse(input); + if (result == expected) + { + Console.WriteLine($"✓ {input} → \"{result}\""); + passed++; + } + else + { + Console.WriteLine($"✗ {input} → \"{result}\" (expected: \"{expected}\")"); + failed++; + } + } + catch (Exception ex) + { + Console.WriteLine($"✗ {input} → Error: {ex.Message}"); + failed++; + } + } + + Console.WriteLine(); + Console.WriteLine("=== Multiple String Tests (Expected: FAILURE) ==="); + Console.WriteLine("These tests demonstrate the disambiguation problem:"); + Console.WriteLine(); + + // Test case that FAILS due to greedy disambiguation + var multiInput = "\"first\" \"second\""; + try + { + var result = parser.Parse(multiInput); + Console.WriteLine($"Input: {multiInput}"); + Console.WriteLine($"Result: \"{result}\""); + Console.WriteLine("PROBLEM: Greedy pattern captured from first \" to last \""); + Console.WriteLine("Expected: Two separate strings \"first\" and \"second\""); + } + catch (Exception ex) + { + Console.WriteLine($"Input: {multiInput}"); + Console.WriteLine($"Error: {ex.Message}"); + Console.WriteLine("This failure is expected - greedy patterns can't disambiguate"); + } + + Console.WriteLine(); + Console.WriteLine($"=== Summary ==="); + Console.WriteLine($"Isolated strings: {passed} passed, {failed} failed"); + Console.WriteLine(); + Console.WriteLine("CONCLUSION: Capture-then-validate works for isolated strings"); + Console.WriteLine("but FAILS for disambiguation of multiple quoted strings."); + } + } +} diff --git a/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/project/QuoteParser.peg b/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/project/QuoteParser.peg new file mode 100644 index 0000000..8e8fe87 --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/project/QuoteParser.peg @@ -0,0 +1,92 @@ +@namespace TestCaptureValidate +@classname QuoteParser +@using System.Linq + +@members +{ + private string _parsedValue = ""; + + /// + /// Parse captured text as an N-quote string. + /// The captured text should include opening and closing quotes. + /// + private bool TryParseQuotedString(string capturedText, char quoteChar) + { + _parsedValue = ""; + if (string.IsNullOrEmpty(capturedText) || capturedText[0] != quoteChar) + return false; + + // Count opening quotes + int quoteCount = 0; + int pos = 0; + while (pos < capturedText.Length && capturedText[pos] == quoteChar) + { + quoteCount++; + pos++; + } + + string closeSeq = new string(quoteChar, quoteCount); + string escapeSeq = new string(quoteChar, quoteCount * 2); + var content = new System.Text.StringBuilder(); + + while (pos < capturedText.Length) + { + // Check for escape sequence (2*N quotes) + if (pos + escapeSeq.Length <= capturedText.Length && + capturedText.Substring(pos, escapeSeq.Length) == escapeSeq) + { + content.Append(closeSeq); + pos += escapeSeq.Length; + continue; + } + + // Check for closing sequence + if (pos + quoteCount <= capturedText.Length && + capturedText.Substring(pos, quoteCount) == closeSeq) + { + int afterClose = pos + quoteCount; + if (afterClose >= capturedText.Length || capturedText[afterClose] != quoteChar) + { + // Valid closing - check if we consumed entire captured text + if (afterClose == capturedText.Length) + { + _parsedValue = content.ToString(); + return true; + } + // Captured more than one quoted string (disambiguation problem) + return false; + } + } + + content.Append(capturedText[pos]); + pos++; + } + return false; + } +} + +// Entry point: parse a single quoted string +document = q:quoted { q } + +// Try to parse quoted strings using capture-then-validate +// NOTE: This has disambiguation problems with multiple quoted strings +quoted = doubleQuoted / singleQuoted / backtickQuoted + +// Double quotes: capture greedy pattern, then validate +doubleQuoted = raw:doubleQuoteCaptureRaw &{ TryParseQuotedString(raw, '"') } { _parsedValue } + +// Capture pattern for double quotes +// Matches: one or more ", then content, then one or more " +// WARNING: Greedy - will match from first " to LAST " in input +doubleQuoteCaptureRaw = "" ('"'+ doubleQuoteContent* '"'+) +doubleQuoteContent = [^"] / '"'+ &[^"] + +// Single quotes: same pattern +singleQuoted = raw:singleQuoteCaptureRaw &{ TryParseQuotedString(raw, '\'') } { _parsedValue } +singleQuoteCaptureRaw = "" ("'"+ singleQuoteContent* "'"+) +singleQuoteContent = [^'] / "'"+ &[^'] + +// Backticks: same pattern +backtickQuoted = raw:backtickCaptureRaw &{ TryParseQuotedString(raw, '`') } { _parsedValue } +backtickCaptureRaw = "" ('`'+ backtickContent* '`'+) +backtickContent = [^`] / '`'+ &[^`] diff --git a/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/project/TestCaptureValidate.csproj b/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/project/TestCaptureValidate.csproj new file mode 100644 index 0000000..54b7c6a --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/02-capture-validate/project/TestCaptureValidate.csproj @@ -0,0 +1,12 @@ + + + net8.0 + Exe + enable + enable + + + + + + diff --git a/docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/project/Program.cs b/docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/project/Program.cs new file mode 100644 index 0000000..c2e6218 --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/project/Program.cs @@ -0,0 +1,66 @@ +// This program demonstrates the semantic predicate limitation +// We show what we WANT to do vs what we CAN do + +using System; + +namespace TestSemanticPredicates +{ + class Program + { + static void Main(string[] args) + { + Console.WriteLine("=== Test: Semantic Predicates Limitation ==="); + Console.WriteLine(); + Console.WriteLine("In JavaScript (Peggy.js), we can write:"); + Console.WriteLine(" doubleQuoted = &'\"' &{"); + Console.WriteLine(" const pos = offset();"); + Console.WriteLine(" const result = parseQuotedStringAt(input, pos, '\"');"); + Console.WriteLine(" return result != null;"); + Console.WriteLine(" }"); + Console.WriteLine(); + Console.WriteLine("In C# (Pegasus), we WANT to write:"); + Console.WriteLine(" doubleQuoted = &'\"' &{ ParseQuotedStringAt(subject, state.Location, '\"') }"); + Console.WriteLine(); + Console.WriteLine("But this FAILS because:"); + Console.WriteLine(" - 'subject' is not accessible in semantic predicates"); + Console.WriteLine(" - Predicates only receive 'state' (Cursor) with 'Location'"); + Console.WriteLine(" - There's no way to access the input string"); + Console.WriteLine(); + Console.WriteLine("Compilation errors we would get:"); + Console.WriteLine(" error CS0103: The name 'subject' does not exist in the current context"); + Console.WriteLine(" error CS0119: 'Cursor' is a type, which is not valid in the given context"); + Console.WriteLine(); + + // The grammar compiles, but only because we use fallback explicit rules + var parser = new QuoteParser(); + + Console.WriteLine("=== Running with Fallback Explicit Rules ==="); + var testCases = new (string input, string expected)[] + { + ("\"hello\"", "hello"), + ("'text'", "text"), + ("`backtick`", "backtick"), + ("\"with \"\" escape\"", "with \" escape"), + }; + + foreach (var (input, expected) in testCases) + { + try + { + var result = parser.Parse(input); + var status = result == expected ? "✓" : "✗"; + Console.WriteLine($"{status} {input} → \"{result}\""); + } + catch (Exception ex) + { + Console.WriteLine($"✗ {input} → Error: {ex.Message}"); + } + } + + Console.WriteLine(); + Console.WriteLine("CONCLUSION: Semantic predicates in Pegasus cannot access"); + Console.WriteLine("the input string (subject), so universal parsing like"); + Console.WriteLine("JavaScript's Peggy.js is NOT possible."); + } + } +} diff --git a/docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/project/QuoteParser.peg b/docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/project/QuoteParser.peg new file mode 100644 index 0000000..c03acf5 --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/project/QuoteParser.peg @@ -0,0 +1,94 @@ +@namespace TestSemanticPredicates +@classname QuoteParser +@using System.Linq + +@members +{ + private string _parsedValue = ""; + private int _parsedLength; + + /// + /// Universal parser for N-quote strings. + /// This method CANNOT be used in semantic predicates because + /// they don't have access to the input string (subject). + /// + private bool ParseQuotedStringAt(string input, int startPos, char quoteChar) + { + if (startPos >= input.Length || input[startPos] != quoteChar) + return false; + + // Count opening quotes + int quoteCount = 0; + int pos = startPos; + while (pos < input.Length && input[pos] == quoteChar) + { + quoteCount++; + pos++; + } + + string closeSeq = new string(quoteChar, quoteCount); + string escapeSeq = new string(quoteChar, quoteCount * 2); + var content = new System.Text.StringBuilder(); + + while (pos < input.Length) + { + // Check for escape sequence (2*N quotes) + if (pos + escapeSeq.Length <= input.Length && + input.Substring(pos, escapeSeq.Length) == escapeSeq) + { + content.Append(closeSeq); + pos += escapeSeq.Length; + continue; + } + + // Check for closing sequence (exactly N quotes) + if (pos + quoteCount <= input.Length && + input.Substring(pos, quoteCount) == closeSeq) + { + int afterClose = pos + quoteCount; + if (afterClose >= input.Length || input[afterClose] != quoteChar) + { + _parsedValue = content.ToString(); + _parsedLength = afterClose - startPos; + return true; + } + } + + content.Append(input[pos]); + pos++; + } + return false; + } +} + +// This grammar demonstrates what we WANT to do but CANNOT +// because semantic predicates don't have access to the input string. +// +// In JavaScript (Peggy.js), we can write: +// doubleQuoted = &'"' &{ +// const pos = offset(); +// const result = parseQuotedStringAt(input, pos, '"'); +// return result != null; +// } +// +// In C# (Pegasus), we would want to write: +// doubleQuoted = &'"' &{ ParseQuotedStringAt(subject, state.Location, '"') } +// +// But 'subject' is not accessible in semantic predicates. +// The predicate receives only 'state' (Cursor) which has 'Location' but not 'Subject'. + +document = q:quoted { q } + +// Fallback: We have to use explicit PEG rules instead +// Because we can't access 'subject' in &{ } predicates +quoted = doubleQuote1 / singleQuote1 / backtickQuote1 + +// Simple N=1 explicit rules (no access to input needed) +doubleQuote1 = '"' r:doubleQuote1Content* '"' { string.Join("", r) } +doubleQuote1Content = '""' { "\"" } / c:[^"] { c.ToString() } + +singleQuote1 = "'" r:singleQuote1Content* "'" { string.Join("", r) } +singleQuote1Content = "''" { "'" } / c:[^'] { c.ToString() } + +backtickQuote1 = '`' r:backtickQuote1Content* '`' { string.Join("", r) } +backtickQuote1Content = '``' { "`" } / c:[^`] { c.ToString() } diff --git a/docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/project/TestSemanticPredicates.csproj b/docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/project/TestSemanticPredicates.csproj new file mode 100644 index 0000000..54b7c6a --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/03-semantic-predicates/project/TestSemanticPredicates.csproj @@ -0,0 +1,12 @@ + + + net8.0 + Exe + enable + enable + + + + + + diff --git a/docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/project/Program.cs b/docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/project/Program.cs new file mode 100644 index 0000000..28e47c7 --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/project/Program.cs @@ -0,0 +1,124 @@ +// This program demonstrates the SUCCESSFUL hybrid approach +// Explicit PEG rules for N=1,2 + procedural for N>=3 + +using System; + +namespace TestHybrid +{ + class Program + { + static void Main(string[] args) + { + Console.WriteLine("=== Test: Hybrid Approach (WORKING SOLUTION) ==="); + Console.WriteLine(); + Console.WriteLine("Strategy:"); + Console.WriteLine(" - N=1 (single quotes): Explicit PEG rules for disambiguation"); + Console.WriteLine(" - N=2 (double quotes): Explicit PEG rules for escape handling"); + Console.WriteLine(" - N>=3 (triple+): Procedural parsing for unlimited support"); + Console.WriteLine(); + + var parser = new QuoteParser(); + + // Test cases for all quote levels + var testCases = new (string input, string[] expected)[] + { + // N=1 (single quote) + ("\"hello\"", new[] { "hello" }), + ("'world'", new[] { "world" }), + ("`backtick`", new[] { "backtick" }), + + // N=1 with escape + ("\"with \"\" escape\"", new[] { "with \" escape" }), + + // N=2 (double quote) + ("\"\"double\"\"", new[] { "double" }), + ("''single''", new[] { "single" }), + ("``tick``", new[] { "tick" }), + + // N=2 with escape + ("\"\"with \"\"\"\" escape\"\"", new[] { "with \"\" escape" }), + + // N=3 (triple quote) - procedural + ("\"\"\"triple\"\"\"", new[] { "triple" }), + ("'''triple'''", new[] { "triple" }), + ("```triple```", new[] { "triple" }), + + // N=3 with escape + ("\"\"\"with \"\"\"\"\"\" escape\"\"\"", new[] { "with \"\"\" escape" }), + + // N=4 (quadruple) - procedural + ("\"\"\"\"quad\"\"\"\"", new[] { "quad" }), + + // N=5 (quintuple) - procedural + ("\"\"\"\"\"quint\"\"\"\"\"", new[] { "quint" }), + + // Multiple strings on same line (disambiguation test) + ("\"first\" \"second\"", new[] { "first", "second" }), + ("\"\"a\"\" \"\"b\"\"", new[] { "a", "b" }), + + // Mixed quote types + ("\"double\" 'single' `backtick`", new[] { "double", "single", "backtick" }), + + // High quotes with content + ("\"\"\"JSON: {\"key\": \"value\"}\"\"\"", new[] { "JSON: {\"key\": \"value\"}" }), + }; + + int passed = 0, failed = 0; + foreach (var (input, expected) in testCases) + { + try + { + var result = parser.Parse(input); + if (result.Count == expected.Length) + { + bool match = true; + for (int i = 0; i < expected.Length; i++) + { + if (result[i] != expected[i]) + { + match = false; + break; + } + } + if (match) + { + var display = string.Join(", ", result.Select(s => $"\"{s}\"")); + Console.WriteLine($"✓ {input}"); + Console.WriteLine($" → [{display}]"); + passed++; + continue; + } + } + var actualDisplay = string.Join(", ", result.Select(s => $"\"{s}\"")); + var expectedDisplay = string.Join(", ", expected.Select(s => $"\"{s}\"")); + Console.WriteLine($"✗ {input}"); + Console.WriteLine($" Got: [{actualDisplay}]"); + Console.WriteLine($" Expected: [{expectedDisplay}]"); + failed++; + } + catch (Exception ex) + { + Console.WriteLine($"✗ {input}"); + Console.WriteLine($" Error: {ex.Message}"); + failed++; + } + } + + Console.WriteLine(); + Console.WriteLine($"=== Summary ==="); + Console.WriteLine($"Passed: {passed}"); + Console.WriteLine($"Failed: {failed}"); + Console.WriteLine(); + if (failed == 0) + { + Console.WriteLine("✓ All tests passed!"); + Console.WriteLine(); + Console.WriteLine("CONCLUSION: The hybrid approach successfully handles:"); + Console.WriteLine(" - All three quote types (\", ', `)"); + Console.WriteLine(" - Any number of quotes (N = 1, 2, 3, ... unlimited)"); + Console.WriteLine(" - Proper escape sequences (2×N quotes → N quotes)"); + Console.WriteLine(" - Multiple quoted strings on the same line"); + } + } + } +} diff --git a/docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/project/QuoteParser.peg b/docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/project/QuoteParser.peg new file mode 100644 index 0000000..72c8741 --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/project/QuoteParser.peg @@ -0,0 +1,116 @@ +@namespace TestHybrid +@classname QuoteParser +@using System.Linq + +@members +{ + private string _multiQuoteValue = ""; + + /// + /// Parse a multi-quote string dynamically for N >= 3 quotes. + /// Uses a universal procedural algorithm that handles any N. + /// + private bool ParseMultiQuoteString(string input, char quoteChar) + { + _multiQuoteValue = ""; + if (string.IsNullOrEmpty(input)) return false; + + // Count opening quotes + int quoteCount = 0; + while (quoteCount < input.Length && input[quoteCount] == quoteChar) + { + quoteCount++; + } + + if (quoteCount < 3) return false; // Let explicit rules handle N=1 and N=2 + + string openClose = new string(quoteChar, quoteCount); + string escapeSeq = new string(quoteChar, quoteCount * 2); + string escapeVal = new string(quoteChar, quoteCount); + + int pos = quoteCount; + var content = new System.Text.StringBuilder(); + + while (pos < input.Length) + { + // Check for escape sequence (2*N quotes) + if (pos + escapeSeq.Length <= input.Length && + input.Substring(pos, escapeSeq.Length) == escapeSeq) + { + content.Append(escapeVal); + pos += escapeSeq.Length; + continue; + } + + // Check for closing quotes + if (pos + quoteCount <= input.Length && + input.Substring(pos, quoteCount) == openClose) + { + int afterClose = pos + quoteCount; + if (afterClose >= input.Length || input[afterClose] != quoteChar) + { + if (afterClose == input.Length) + { + _multiQuoteValue = content.ToString(); + return true; + } + return false; + } + } + + content.Append(input[pos]); + pos++; + } + return false; + } +} + +// Entry: parse a list of quoted strings +document > = list:quotedString* eof { list } + +quotedString = _ q:quoted _ { q } + +// Order: high quotes (3+) first, then double (2), then single (1) +quoted = highQuoted / doubleQuoted / singleQuoted + +// === HIGH QUOTES (N >= 3) - Procedural parsing === +highQuoted = &('"""' / "'''" / '```') raw:highQuoteCapture { raw } + +highQuoteCapture = raw:highQuoteDoubleRaw &{ ParseMultiQuoteString(raw, '"') } { _multiQuoteValue } +/ raw:highQuoteSingleRaw &{ ParseMultiQuoteString(raw, '\'') } { _multiQuoteValue } +/ raw:highQuoteBacktickRaw &{ ParseMultiQuoteString(raw, '`') } { _multiQuoteValue } + +highQuoteDoubleRaw = "" ('"'+ highQuoteDoubleContent* '"'+) +highQuoteSingleRaw = "" ("'"+ highQuoteSingleContent* "'"+) +highQuoteBacktickRaw = "" ('`'+ highQuoteBacktickContent* '`'+) + +highQuoteDoubleContent = [^"] / '"'+ &[^"] +highQuoteSingleContent = [^'] / "'"+ &[^'] +highQuoteBacktickContent = [^`] / '`'+ &[^`] + +// === DOUBLE QUOTES (N = 2) - Explicit PEG rules === +doubleQuoted = doubleDouble / doubleSingle / doubleBacktick + +doubleDouble = '""' r:doubleDoubleContent* '""' { string.Join("", r) } +doubleDoubleContent = '""""' { "\"\"" } / !'""' c:. { c.ToString() } + +doubleSingle = "''" r:doubleSingleContent* "''" { string.Join("", r) } +doubleSingleContent = "''''" { "''" } / !"''" c:. { c.ToString() } + +doubleBacktick = '``' r:doubleBacktickContent* '``' { string.Join("", r) } +doubleBacktickContent = '````' { "``" } / !'``' c:. { c.ToString() } + +// === SINGLE QUOTES (N = 1) - Explicit PEG rules for disambiguation === +singleQuoted = singleDouble / singleSingle / singleBacktick + +singleDouble = '"' r:singleDoubleContent* '"' { string.Join("", r) } +singleDoubleContent = '""' { "\"" } / c:[^"] { c.ToString() } + +singleSingle = "'" r:singleSingleContent* "'" { string.Join("", r) } +singleSingleContent = "''" { "'" } / c:[^'] { c.ToString() } + +singleBacktick = '`' r:singleBacktickContent* '`' { string.Join("", r) } +singleBacktickContent = '``' { "`" } / c:[^`] { c.ToString() } + +_ = [ \t\r\n]* +eof = !. diff --git a/docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/project/TestHybrid.csproj b/docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/project/TestHybrid.csproj new file mode 100644 index 0000000..54b7c6a --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/04-hybrid-approach/project/TestHybrid.csproj @@ -0,0 +1,12 @@ + + + net8.0 + Exe + enable + enable + + + + + + diff --git a/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/README.md b/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/README.md index 9ee87f0..488e418 100644 --- a/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/README.md +++ b/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/README.md @@ -1,8 +1,8 @@ # Solution 5: Minimized Hybrid Approach (N=1,2 explicit + N>=3 procedural) -## Status: SUCCESS +## Status: ✅ SUCCESS (Current Production Implementation) -This solution successfully reduces the number of explicit PEG rules while maintaining full functionality. +This solution successfully reduces the number of explicit PEG rules while maintaining full functionality. This is the **current production implementation** used in `Parser.peg`. ## Approach @@ -47,6 +47,24 @@ All tests pass: - Python: 176 tests - Rust: 39 tests +## Runnable Test Project + +A complete standalone test project is available in the `project/` subdirectory: + +```bash +cd project +dotnet build +dotnet run +``` + +The test demonstrates: +- All three quote types (", ', `) +- Quote levels N=1 through N=10+ +- Escape sequences at all levels +- Multiple quoted strings on the same line (disambiguation) +- Real-world use cases (JSON, code blocks) + ## Code -See `../../csharp/Link.Foundation.Links.Notation/Parser.peg` for the complete implementation. +- **Test Project**: `./project/` - Standalone demonstration +- **Production**: `../../../../csharp/Link.Foundation.Links.Notation/Parser.peg` - Full implementation diff --git a/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/project/Program.cs b/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/project/Program.cs new file mode 100644 index 0000000..3461c8b --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/project/Program.cs @@ -0,0 +1,140 @@ +// This program demonstrates the MINIMIZED hybrid approach +// Only N=1,2 explicit + N>=3 procedural (current production implementation) + +using System; + +namespace TestMinimizedHybrid +{ + class Program + { + static void Main(string[] args) + { + Console.WriteLine("=== Test: Minimized Hybrid Approach (PRODUCTION) ==="); + Console.WriteLine(); + Console.WriteLine("This is the CURRENT production implementation."); + Console.WriteLine(); + Console.WriteLine("Compared to Solution 04 (explicit 1-5 + procedural 6+):"); + Console.WriteLine(" - Solution 04: 30 explicit rules (5 levels × 3 types × 2)"); + Console.WriteLine(" - Solution 05: 12 explicit rules (2 levels × 3 types × 2)"); + Console.WriteLine(" - Reduction: 60% fewer explicit rules!"); + Console.WriteLine(); + Console.WriteLine("Why this works:"); + Console.WriteLine(" - N=1: Must be explicit for disambiguation (\"a\" \"b\")"); + Console.WriteLine(" - N=2: Must be explicit for escape handling (\"\"\"\"=\"\"\")"); + Console.WriteLine(" - N>=3: Content pattern can handle escapes correctly"); + Console.WriteLine(); + + var parser = new QuoteParser(); + + // Comprehensive test cases + var testCases = new (string input, string[] expected)[] + { + // N=1 basic + ("\"hello\"", new[] { "hello" }), + ("'world'", new[] { "world" }), + ("`backtick`", new[] { "backtick" }), + + // N=1 with escape + ("\"with \"\" escape\"", new[] { "with \" escape" }), + ("'with '' escape'", new[] { "with ' escape" }), + ("`with `` escape`", new[] { "with ` escape" }), + + // N=1 disambiguation (critical test) + ("\"a\" \"b\"", new[] { "a", "b" }), + ("'x' 'y' 'z'", new[] { "x", "y", "z" }), + + // N=2 basic + ("\"\"double\"\"", new[] { "double" }), + ("''single''", new[] { "single" }), + ("``tick``", new[] { "tick" }), + + // N=2 with escape + ("\"\"with \"\"\"\" escape\"\"", new[] { "with \"\" escape" }), + ("''with '''' escape''", new[] { "with '' escape" }), + + // N=3 (procedural) + ("\"\"\"triple\"\"\"", new[] { "triple" }), + ("'''triple'''", new[] { "triple" }), + ("```triple```", new[] { "triple" }), + + // N=3 with escape + ("\"\"\"with \"\"\"\"\"\" escape\"\"\"", new[] { "with \"\"\" escape" }), + + // N=4, N=5, N=6 (all procedural) + ("\"\"\"\"quad\"\"\"\"", new[] { "quad" }), + ("\"\"\"\"\"quint\"\"\"\"\"", new[] { "quint" }), + ("\"\"\"\"\"\"sext\"\"\"\"\"\"", new[] { "sext" }), + + // N=10 (high quote - procedural) + ("\"\"\"\"\"\"\"\"\"\"ten\"\"\"\"\"\"\"\"\"\"", new[] { "ten" }), + + // Mixed quote types + ("\"double\" 'single' `tick`", new[] { "double", "single", "tick" }), + + // Real-world use case: JSON in triple quotes + ("\"\"\"{ \"key\": \"value\" }\"\"\"", new[] { "{ \"key\": \"value\" }" }), + + // Real-world use case: Code in triple backticks + ("```console.log(\"hello\");```", new[] { "console.log(\"hello\");" }), + }; + + int passed = 0, failed = 0; + foreach (var (input, expected) in testCases) + { + try + { + var result = parser.Parse(input); + if (result.Count == expected.Length) + { + bool match = true; + for (int i = 0; i < expected.Length; i++) + { + if (result[i] != expected[i]) + { + match = false; + break; + } + } + if (match) + { + Console.WriteLine($"✓ {Truncate(input, 50)}"); + passed++; + continue; + } + } + Console.WriteLine($"✗ {Truncate(input, 50)}"); + Console.WriteLine($" Got: {Format(result)}"); + Console.WriteLine($" Expected: {Format(expected)}"); + failed++; + } + catch (Exception ex) + { + Console.WriteLine($"✗ {Truncate(input, 50)}"); + Console.WriteLine($" Error: {ex.Message}"); + failed++; + } + } + + Console.WriteLine(); + Console.WriteLine($"=== Summary ==="); + Console.WriteLine($"Passed: {passed}"); + Console.WriteLine($"Failed: {failed}"); + Console.WriteLine(); + if (failed == 0) + { + Console.WriteLine("✓ All tests passed!"); + Console.WriteLine(); + Console.WriteLine("The minimized hybrid approach is the OPTIMAL solution:"); + Console.WriteLine(" - Minimal explicit rules (only N=1 and N=2)"); + Console.WriteLine(" - Universal procedural parsing for N>=3"); + Console.WriteLine(" - Full feature support with reduced grammar size"); + } + } + + static string Truncate(string s, int max) => + s.Length <= max ? s : s.Substring(0, max - 3) + "..."; + + static string Format(IEnumerable items) => + "[" + string.Join(", ", items.Select(i => $"\"{i}\"")) + "]"; + } +} diff --git a/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/project/QuoteParser.peg b/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/project/QuoteParser.peg new file mode 100644 index 0000000..4cf3c18 --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/project/QuoteParser.peg @@ -0,0 +1,122 @@ +@namespace TestMinimizedHybrid +@classname QuoteParser +@using System.Linq + +@members +{ + private string _multiQuoteValue = ""; + + /// + /// Parse a multi-quote string dynamically for N >= 3 quotes. + /// This is the MINIMIZED version that handles N>=3 procedurally. + /// + private bool ParseMultiQuoteString(string input, char quoteChar) + { + _multiQuoteValue = ""; + if (string.IsNullOrEmpty(input)) return false; + + // Count opening quotes + int quoteCount = 0; + while (quoteCount < input.Length && input[quoteCount] == quoteChar) + { + quoteCount++; + } + + if (quoteCount < 3) return false; // N=1,2 handled by explicit rules + + string openClose = new string(quoteChar, quoteCount); + string escapeSeq = new string(quoteChar, quoteCount * 2); + string escapeVal = new string(quoteChar, quoteCount); + + int pos = quoteCount; + var content = new System.Text.StringBuilder(); + + while (pos < input.Length) + { + // Check for escape sequence (2*N quotes) + if (pos + escapeSeq.Length <= input.Length && + input.Substring(pos, escapeSeq.Length) == escapeSeq) + { + content.Append(escapeVal); + pos += escapeSeq.Length; + continue; + } + + // Check for closing quotes + if (pos + quoteCount <= input.Length && + input.Substring(pos, quoteCount) == openClose) + { + int afterClose = pos + quoteCount; + if (afterClose >= input.Length || input[afterClose] != quoteChar) + { + if (afterClose == input.Length) + { + _multiQuoteValue = content.ToString(); + return true; + } + return false; + } + } + + content.Append(input[pos]); + pos++; + } + return false; + } +} + +// MINIMIZED HYBRID: Only N=1,2 explicit + N>=3 procedural +// This is the CURRENT PRODUCTION implementation in Parser.peg + +document > = list:quotedString* eof { list } + +quotedString = _ q:quoted _ { q } + +// Order: high quotes (3+) first, then double (2), then single (1) +quoted = highQuoted / doubleQuoted / singleQuoted + +// === HIGH QUOTES (N >= 3) - Procedural parsing === +// Lookahead for 3+ quotes, then capture and validate +highQuoted = &('"""' / "'''" / '```') raw:highQuoteCapture { raw } + +highQuoteCapture = raw:highQuoteDoubleRaw &{ ParseMultiQuoteString(raw, '"') } { _multiQuoteValue } +/ raw:highQuoteSingleRaw &{ ParseMultiQuoteString(raw, '\'') } { _multiQuoteValue } +/ raw:highQuoteBacktickRaw &{ ParseMultiQuoteString(raw, '`') } { _multiQuoteValue } + +// Raw capture for 3+ quotes +highQuoteDoubleRaw = "" ('"'+ highQuoteDoubleContent* '"'+) +highQuoteSingleRaw = "" ("'"+ highQuoteSingleContent* "'"+) +highQuoteBacktickRaw = "" ('`'+ highQuoteBacktickContent* '`'+) + +highQuoteDoubleContent = [^"] / '"'+ &[^"] +highQuoteSingleContent = [^'] / "'"+ &[^'] +highQuoteBacktickContent = [^`] / '`'+ &[^`] + +// === DOUBLE QUOTES (N = 2) - Explicit PEG rules === +// Required for proper escape handling +doubleQuoted = doubleDouble / doubleSingle / doubleBacktick + +doubleDouble = '""' r:doubleDoubleContent* '""' { string.Join("", r) } +doubleDoubleContent = '""""' { "\"\"" } / !'""' c:. { c.ToString() } + +doubleSingle = "''" r:doubleSingleContent* "''" { string.Join("", r) } +doubleSingleContent = "''''" { "''" } / !"''" c:. { c.ToString() } + +doubleBacktick = '``' r:doubleBacktickContent* '``' { string.Join("", r) } +doubleBacktickContent = '````' { "``" } / !'``' c:. { c.ToString() } + +// === SINGLE QUOTES (N = 1) - Explicit PEG rules === +// Required for disambiguation of multiple strings +singleQuoted = singleDouble / singleSingle / singleBacktick + +singleDouble = '"' r:singleDoubleContent* '"' { string.Join("", r) } +singleDoubleContent = '""' { "\"" } / c:[^"] { c.ToString() } + +singleSingle = "'" r:singleSingleContent* "'" { string.Join("", r) } +singleSingleContent = "''" { "'" } / c:[^'] { c.ToString() } + +singleBacktick = '`' r:singleBacktickContent* '`' { string.Join("", r) } +singleBacktickContent = '``' { "`" } / c:[^`] { c.ToString() } + +_ = [ \t\r\n]* +eof = !. diff --git a/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/project/TestMinimizedHybrid.csproj b/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/project/TestMinimizedHybrid.csproj new file mode 100644 index 0000000..54b7c6a --- /dev/null +++ b/docs/case-studies/csharp-peg-simplification/solutions/05-minimized-hybrid/project/TestMinimizedHybrid.csproj @@ -0,0 +1,12 @@ + + + net8.0 + Exe + enable + enable + + + + + +