diff --git a/README.md b/README.md index 36acc93..ab5150b 100644 --- a/README.md +++ b/README.md @@ -166,10 +166,10 @@ Other differences/incompatibilities with Common Mark specification include * Link destination cannot contain unescaped parentheses even if they form a balanced pair. If you want them there, escape them like other punctuation characters. -* Image description is parsed as plain text. Common Mark suggests that we - parse it as inlines and then extract from them plain text content, I think - it's easier to treat image description as plain text from the beginning. -* Separate declaration of image's source and title is not supported. +* Nesting images into description of other images is not allowed (similarly + to the situation with links). +* Empty image descriptions are not allowed. +* Separate declaration of image's source and title is not (yet) supported. * Inline autolinks are not supported yet. * Blockquotes are not supported yet. * Lists (unordered and ordered) are not supported yet. diff --git a/Text/MMark/Extension.hs b/Text/MMark/Extension.hs index 7247ec6..f19d565 100644 --- a/Text/MMark/Extension.hs +++ b/Text/MMark/Extension.hs @@ -29,7 +29,9 @@ module Text.MMark.Extension , inlineRender -- * Scanner construction , Scanner - , scanner ) + , scanner + -- * Utils + , asPlainText ) where import Data.List.NonEmpty (NonEmpty (..)) diff --git a/Text/MMark/Internal.hs b/Text/MMark/Internal.hs index 644d088..85903fc 100644 --- a/Text/MMark/Internal.hs +++ b/Text/MMark/Internal.hs @@ -32,7 +32,8 @@ module Text.MMark.Internal , Inline (..) , Render (..) , defaultBlockRender - , defaultInlineRender ) + , defaultInlineRender + , asPlainText ) where import Control.DeepSeq @@ -237,7 +238,7 @@ data Inline -- ^ Code span | Link (NonEmpty Inline) Text (Maybe Text) -- ^ Link with text, destination, and optionally title - | Image Text Text (Maybe Text) + | Image (NonEmpty Inline) Text (Maybe Text) -- ^ Image with description, URL, and optionally title deriving (Show, Eq, Ord, Data, Typeable, Generic) @@ -328,11 +329,30 @@ defaultInlineRender = \case Link inner dest mtitle -> let title = maybe [] (pure . title_) mtitle in a_ (href_ dest : title) (mapM_ defaultInlineRender inner) - Image alt src mtitle -> + Image desc src mtitle -> let title = maybe [] (pure . title_) mtitle - in img_ (alt_ alt : src_ src : title) + in img_ (alt_ (asPlainText desc) : src_ src : title) -- | HTML containing a newline. newline :: Html () newline = "\n" + +---------------------------------------------------------------------------- +-- Utils + +-- | Convert a non-empty collection of 'Inline's into their plain text +-- representation. This is used e.g. to render image descriptions. + +asPlainText :: NonEmpty Inline -> Text +asPlainText = foldMap $ \case + Plain txt -> txt + LineBreak -> "\n" + Emphasis xs -> asPlainText xs + Strong xs -> asPlainText xs + Strikeout xs -> asPlainText xs + Subscript xs -> asPlainText xs + Superscript xs -> asPlainText xs + CodeSpan txt -> txt + Link xs _ _ -> asPlainText xs + Image xs _ _ -> asPlainText xs diff --git a/Text/MMark/Parser.hs b/Text/MMark/Parser.hs index 4df64d8..e301f97 100644 --- a/Text/MMark/Parser.hs +++ b/Text/MMark/Parser.hs @@ -18,6 +18,7 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TypeFamilies #-} module Text.MMark.Parser @@ -29,6 +30,7 @@ import Control.Applicative import Control.Monad import Control.Monad.State.Strict import Data.Data (Data) +import Data.Default.Class import Data.List.NonEmpty (NonEmpty (..), (<|)) import Data.Maybe (isJust) import Data.Semigroup ((<>)) @@ -101,6 +103,24 @@ data InlineState | DoubleFrame InlineFrame InlineFrame deriving (Eq, Ord, Show) +-- | Configuration in inline parser. + +data InlineConfig = InlineConfig + { iconfigAllowEmpty :: !Bool + -- ^ Whether to accept empty inline blocks + , iconfigAllowLinks :: !Bool + -- ^ Whether to parse links + , iconfigAllowImages :: !Bool + -- ^ Whether to parse images + } + +instance Default InlineConfig where + def = InlineConfig + { iconfigAllowEmpty = True + , iconfigAllowLinks = True + , iconfigAllowImages = True + } + ---------------------------------------------------------------------------- -- Block parser @@ -121,7 +141,7 @@ parse file input = -- level cannot be parsed, which should not normally happen. Left err -> Left (nes err) Right blocks -> - let parsed = fmap (runIsp (pInlines True True <* eof)) <$> blocks + let parsed = fmap (runIsp (pInlines def <* eof)) <$> blocks getErrs (Left e) es = replaceEof e : es getErrs _ es = es fromRight (Right x) = x @@ -276,17 +296,17 @@ runIsp p (Isp startPos input) = -- | Parse a stream of 'Inline's. -pInlines :: Bool -> Bool -> IParser (NonEmpty Inline) -pInlines allowEmpty allowLinks = - if allowEmpty +pInlines :: InlineConfig -> IParser (NonEmpty Inline) +pInlines InlineConfig {..} = + if iconfigAllowEmpty then nes (Plain "") <$ eof <|> stuff else stuff where stuff = NE.some . label "inline content" . choice $ [ pCodeSpan ] <> - [ pInlineLink | allowLinks ] <> - [ pImage - , pEnclosedInline + [ pInlineLink | iconfigAllowLinks ] <> + [ pImage | iconfigAllowImages ] <> + [ pEnclosedInline , try pHardLineBreak , pPlain ] @@ -306,7 +326,8 @@ pCodeSpan = do pInlineLink :: IParser Inline pInlineLink = do - xs <- between (char '[') (char ']') (pInlines True False) + xs <- between (char '[') (char ']') $ + pInlines def { iconfigAllowLinks = False } void (char '(') <* sc dest <- pUrl mtitle <- optional (sc1 *> pTitle) @@ -317,7 +338,7 @@ pImage :: IParser Inline pImage = do void (char '!') alt <- between (char '[') (char ']') $ - manyEscapedWith (\x -> x /= '[' && x /= ']') "unescaped character" + pInlines def { iconfigAllowImages = False } void (char '(') <* sc src <- pUrl mtitle <- optional (sc1 *> pTitle) @@ -344,6 +365,7 @@ pTitle = choice pEnclosedInline :: IParser Inline pEnclosedInline = do + let noEmpty = def { iconfigAllowEmpty = False } st <- choice [ pLfdr (DoubleFrame StrongFrame StrongFrame) , pLfdr (DoubleFrame StrongFrame EmphasisFrame) @@ -360,16 +382,16 @@ pEnclosedInline = do , pLfdr (SingleFrame SuperscriptFrame) ] case st of SingleFrame x -> - liftFrame x <$> pInlines False True <* pRfdr x + liftFrame x <$> pInlines noEmpty <* pRfdr x DoubleFrame x y -> do - inlines0 <- pInlines False True + inlines0 <- pInlines noEmpty thisFrame <- pRfdr x <|> pRfdr y let thatFrame = if x == thisFrame then y else x immediate <- True <$ pRfdr thatFrame <|> pure False if immediate then (return . liftFrame thatFrame . nes . liftFrame thisFrame) inlines0 else do - inlines1 <- pInlines False True + inlines1 <- pInlines noEmpty void (pRfdr thatFrame) return . liftFrame thatFrame $ liftFrame thisFrame inlines0 <| inlines1 @@ -432,12 +454,13 @@ pPlain = Plain . T.pack <$> some pNewline = hidden . try $ '\n' <$ sc' <* eol <* sc' <* put SpaceChar pNonEscapedChar = label "unescaped non-markup character" . choice $ - [ try (char '\\' <* notFollowedBy eol) <* put OtherChar - , spaceChar <* put SpaceChar - , try (char '!' <* notFollowedBy (char '[')) <* put SpaceChar - , satisfy isTransparentPunctuation <* put SpaceChar - , satisfy isOther <* put OtherChar ] - isOther x = not (isMarkupChar x) && x /= '\\' + [ try (char '\\' <* notFollowedBy eol) <* put OtherChar + , try (char '!' <* notFollowedBy (char '[')) <* put SpaceChar + , spaceChar <* put SpaceChar + , satisfy isTrans <* put SpaceChar + , satisfy isOther <* put OtherChar ] + isTrans x = isTransparentPunctuation x && x /= '!' + isOther x = not (isMarkupChar x) && x /= '\\' && x /= '!' ---------------------------------------------------------------------------- -- Parsing helpers diff --git a/mmark.cabal b/mmark.cabal index c1d5030..ed19382 100644 --- a/mmark.cabal +++ b/mmark.cabal @@ -28,6 +28,7 @@ library build-depends: aeson >= 0.11 && < 1.3 , base >= 4.8 && < 5.0 , containers >= 0.5 && < 0.6 + , data-default-class , deepseq >= 1.3 && < 1.5 , lucid >= 2.6 && < 3.0 , megaparsec >= 6.1 && < 7.0 diff --git a/tests/Text/MMarkSpec.hs b/tests/Text/MMarkSpec.hs index f5b5905..54b4a46 100644 --- a/tests/Text/MMarkSpec.hs +++ b/tests/Text/MMarkSpec.hs @@ -856,8 +856,7 @@ spec = parallel $ do in s ~-> err (posN 11 s) (utok '[' <> etok ']' <> eic <> eric) it "CM489" $ let s = "![[[foo](uri1)](uri2)](uri3)" - in s ~-> err (posN 2 s) - (utok '[' <> etok ']' <> elabel "escaped character" <> elabel "unescaped character") + in s ~-> err (posN 3 s) (utoks "[foo" <> eeib <> eic) it "CM490" $ let s = "*[foo*](/uri)\n" in s ~-> err (posN 5 s) (utok '*' <> etok ']' <> eric) @@ -882,15 +881,13 @@ spec = parallel $ do "

\"foo\"

\n" it "CM542" $ "![foo *bar*](train.jpg \"train & tracks\")" ==-> - "

\"foo

\n" + "

\"foo

\n" it "CM543" $ let s = "![foo ![bar](/url)](/url2)\n" - in s ~-> err (posN 7 s) - (utok '[' <> etok ']' <> elabel "escaped character" <> elabel "unescaped character") + in s ~-> err (posN 6 s) (utok '!' <> etok ']') it "CM544" $ - let s = "![foo [bar](/url)](/url2)\n" - in s ~-> err (posN 6 s) - (utok '[' <> etok ']' <> elabel "escaped character" <> elabel "unescaped character") + "![foo [bar](/url)](/url2)" ==-> + "

\"foo

\n" it "CM545" pending it "CM546" pending it "CM547" $ @@ -898,7 +895,13 @@ spec = parallel $ do "

\"foo\"

\n" it "CM548" $ "My ![foo bar](/path/to/train.jpg \"title\" )" ==-> - "

My \"foo

\n" + "

My \"foo

\n" + it "CM549" $ + "![foo]()" ==-> + "

\"foo\"

\n" + it "CM550" $ + let s = "![](/url)" + in s ~-> err (posN 2 s) (utoks "](/u" <> eeib <> eic) context "6.9 Hard line breaks" $ do -- NOTE We currently do not support hard line breaks represented in -- markup as space before newline.