Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix table parsing #43

Merged
merged 4 commits into from Jan 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 14 additions & 0 deletions CHANGELOG.md
@@ -1,3 +1,17 @@
## MMark 0.0.5.1

* The parser can now recover from block-level parse errors in tables and
continue parsing.

* Pipes in code spans in table cells are not considered as table cell
delimiters anymore.

* Table sub-parser now faster rejects inputs that do not look like a table,
this improves overall performance.

* Better handling of the cases when a block can be interpreted as a list and
as a table at the same time.

## MMark 0.0.5.0

* Documentation improvements.
Expand Down
44 changes: 38 additions & 6 deletions Text/MMark/Parser.hs
Expand Up @@ -170,11 +170,11 @@ pBlock = do
[ Just <$> pThematicBreak
, Just <$> pAtxHeading
, Just <$> pFencedCodeBlock
, Just <$> pTable
, Just <$> pUnorderedList
, Just <$> pOrderedList
, Just <$> pBlockquote
, pReferenceDef
, Just <$> pTable
, Just <$> pParagraph ]
_ ->
Just <$> pIndentedCodeBlock
Expand Down Expand Up @@ -441,25 +441,31 @@ pReferenceDef = do
pTable :: BParser (Block Isp)
pTable = do
(n, headerRow) <- try $ do
pos <- L.indentLevel
option False (T.any (== '|') <$> lookAhead nonEmptyLine) >>= guard
let pipe' = option False (True <$ pipe)
l <- pipe'
headerRow <- NE.sepBy1 cell (try (pipe <* notFollowedBy eol))
r <- pipe'
let n = NE.length headerRow
guard (n > 1 || l || r)
eol <* sc'
L.indentLevel >>= \i -> guard (i == pos || i == (pos <> pos1))
lookAhead nonEmptyLine >>= guard . isHeaderLike
return (n, headerRow)
caligns <- rowWrapper (NE.fromList <$> sepByCount n calign pipe)
otherRows <- many $ do
lookAhead (option True (isBlank <$> nonEmptyLine)) >>= guard . not
rowWrapper (NE.fromList <$> sepByCount n cell pipe)
Table caligns (headerRow :| otherRows) <$ sc
withRecovery recover $ do
sc'
caligns <- rowWrapper (NE.fromList <$> sepByCount n calign pipe)
otherRows <- many $ do
endOfTable >>= guard . not
rowWrapper (NE.fromList <$> sepByCount n cell pipe)
Table caligns (headerRow :| otherRows) <$ sc
where
cell = do
startPos <- getPosition
txt <- fmap (T.stripEnd . T.pack) . foldMany' . choice $
[ (++) . T.unpack <$> hidden (string "\\|")
, (++) . T.unpack <$> pCodeSpanB
, (:) <$> label "inline content" (satisfy cellChar) ]
return (IspSpan startPos txt)
cellChar x = x /= '|' && notNewline x
Expand Down Expand Up @@ -488,6 +494,13 @@ pTable = do
8 % 10
isHeaderConstituent x =
isSpace x || x == '|' || x == '-' || x == ':'
endOfTable =
lookAhead (option True (isBlank <$> nonEmptyLine))
recover err =
Naked (IspError (replaceEof "end of table block" err)) <$
manyTill
(optional nonEmptyLine)
(endOfTable >>= guard) <* sc

-- | Parse a paragraph or naked text (is some cases).

Expand Down Expand Up @@ -529,6 +542,23 @@ pParagraph = do
(if allowNaked then toBlock else Paragraph)
(IspSpan startPos (assembleParagraph (l:ls []))) <$ sc

----------------------------------------------------------------------------
-- Auxiliary block-level parsers

-- | 'match' a code span, this is a specialised and adjusted version of
-- 'pCodeSpan'.

pCodeSpanB :: BParser Text
pCodeSpanB = fmap fst . match . hidden $ do
n <- try (length <$> some (char '`'))
let finalizer = try $ do
void $ count n (char '`')
notFollowedBy (char '`')
skipManyTill (label "code span content" $
takeWhile1P Nothing (== '`') <|>
takeWhile1P Nothing (\x -> x /= '`' && notNewline x))
finalizer

----------------------------------------------------------------------------
-- Inline parser

Expand Down Expand Up @@ -581,6 +611,8 @@ pInlines = do
else pPlain

-- | Parse a code span.
--
-- See also: 'pCodeSpanB'.

pCodeSpan :: IParser Inline
pCodeSpan = do
Expand Down
35 changes: 33 additions & 2 deletions tests/Text/MMarkSpec.hs
Expand Up @@ -1805,7 +1805,7 @@ spec = parallel $ do
let o = "<table>\n<thead>\n<tr><th>Foo</th></tr>\n</thead>\n<tbody>\n<tr><td>foo</td></tr>\n</tbody>\n</table>\n"
"|Foo\n---\nfoo" ==-> o
"Foo|\n---\nfoo" ==-> o
"| Foo |\n --- \n foo " ==-> o
"| Foo |\n --- \n foo " ==-> o
"| Foo |\n| --- |\n| foo |" ==-> o
it "reports correct parse errors when parsing the header line" $
(let s = "Foo | Bar\na-- | ---"
Expand All @@ -1824,7 +1824,7 @@ spec = parallel $ do
"<p>Foo | Bar\nab- | ---</p>\n"
it "demands that number of columns in rows match number of columns in header" $
(let s = "Foo | Bar | Baz\n--- | --- | ---\nfoo | bar"
in s ~-> err (posN 41 s) (ueof <> etok '|' <> eic))
in s ~-> err (posN 41 s) (ulabel "end of table block" <> etok '|' <> eic))
>>
(let s = "Foo | Bar | Baz\n--- | --- | ---\nfoo | bar\n\nHere it goes."
in s ~-> err (posN 41 s) (utok '\n' <> etok '|' <> eic))
Expand All @@ -1837,6 +1837,21 @@ spec = parallel $ do
it "escaped pipes do not fool position tracking" $
let s = "Foo | Bar\n--- | ---\n\\| *fo | bar"
in s ~-> err (posN 26 s) (ueib <> etok '*' <> elabel "inline content")
it "pipes in code spans in headers do not fool the parser" $
"`|Foo|` | `|Bar|`\n--- | ---\nfoo | bar" ==->
"<table>\n<thead>\n<tr><th><code>|Foo|</code></th><th><code>|Bar|</code></th></tr>\n</thead>\n<tbody>\n<tr><td>foo</td><td>bar</td></tr>\n</tbody>\n</table>\n"
it "pipes in code spans in cells do not fool the parser" $
"Foo | Bar\n--- | ---\n`|foo|` | `|bar|`" ==->
"<table>\n<thead>\n<tr><th>Foo</th><th>Bar</th></tr>\n</thead>\n<tbody>\n<tr><td><code>|foo|</code></td><td><code>|bar|</code></td></tr>\n</tbody>\n</table>\n"
it "multi-line code spans are disallowed in table headers" $
"`Foo\nBar` | Bar\n--- | ---\nfoo | bar" ==->
"<p><code>Foo Bar</code> | Bar\n--- | ---\nfoo | bar</p>\n"
it "multi-line code spans are disallowed in table cells" $
let s = "Foo | Bar\n--- | ---\n`foo\nbar` | bar"
in s ~~->
[ err (posN 24 s) (utok '\n' <> etok '`' <> elabel "code span content")
, err (posN 35 s) (ueib <> etok '`' <> elabel "code span content")
]
it "parses tables with just header row" $
"Foo | Bar\n--- | ---" ==->
"<table>\n<thead>\n<tr><th>Foo</th><th>Bar</th></tr>\n</thead>\n<tbody>\n</tbody>\n</table>\n"
Expand All @@ -1849,6 +1864,22 @@ spec = parallel $ do
[ err (posN 10 s) (ueib <> etok '*' <> eic)
, err (posN 26 s) (ueib <> etok '_' <> eic)
, errFancy (posN 32 s) (nonFlanking "_") ]
it "tables have higher precedence than unordered lists" $ do
"+ foo | bar\n------|----\n" ==->
"<table>\n<thead>\n<tr><th>+ foo</th><th>bar</th></tr>\n</thead>\n<tbody>\n</tbody>\n</table>\n"
"+ foo | bar\n -----|----\n" ==->
"<table>\n<thead>\n<tr><th>+ foo</th><th>bar</th></tr>\n</thead>\n<tbody>\n</tbody>\n</table>\n"
it "tables have higher precedence than ordered lists" $ do
"1. foo | bar\n-------|----\n" ==->
"<table>\n<thead>\n<tr><th>1. foo</th><th>bar</th></tr>\n</thead>\n<tbody>\n</tbody>\n</table>\n"
"1. foo | bar\n ------|----\n" ==->
"<table>\n<thead>\n<tr><th>1. foo</th><th>bar</th></tr>\n</thead>\n<tbody>\n</tbody>\n</table>\n"
it "if table is indented inside unordered list, it's put there" $
"+ foo | bar\n ----|----\n" ==->
"<ul>\n<li>\n<table>\n<thead>\n<tr><th>foo</th><th>bar</th></tr>\n</thead>\n<tbody>\n</tbody>\n</table>\n</li>\n</ul>\n"
it "if table is indented inside ordered list, it's put there" $
"1. foo | bar\n ----|----\n" ==->
"<ol>\n<li>\n<table>\n<thead>\n<tr><th>foo</th><th>bar</th></tr>\n</thead>\n<tbody>\n</tbody>\n</table>\n</li>\n</ol>\n"
it "renders a comprehensive table correctly" $
withFiles "data/table.md" "data/table.html"
context "multiple parse errors" $ do
Expand Down