Skip to content

Commit

Permalink
Implement autolinks (#22)
Browse files Browse the repository at this point in the history
  • Loading branch information
mrkkrp committed Nov 9, 2017
1 parent 5e00ab8 commit e64489e
Show file tree
Hide file tree
Showing 6 changed files with 197 additions and 59 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,11 @@ Other differences/incompatibilities with Common Mark specification include
* Nesting images into description of other images is not allowed (similarly
to the situation with links).
* Separate declaration of image's source and title is not (yet) supported.
* Inline autolinks are not supported yet.
* All URI references (in links, images, autolinks, etc.) are parsed as per
RFC 3986, no special escaping is supported. In addition to that, when an
URI reference in not enclosed with `<` and `>`, then closing parenthesis
character `)` is not considered part of URI (use `<uri>` syntax if you
want closing parenthesis as part of a URI).
* Blockquotes are not supported yet.
* Lists (unordered and ordered) are not supported yet.
* Setext headings are not supported yet.
Expand Down
10 changes: 6 additions & 4 deletions Text/MMark/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,10 @@ import Data.Text (Text)
import Data.Typeable (Typeable)
import GHC.Generics
import Lucid
import Text.URI (URI)
import qualified Control.Foldl as L
import qualified Data.Text as T
import qualified Text.URI as URI

-- | Representation of complete markdown document. You can't look inside of
-- 'MMark' on purpose. The only way to influence an 'MMark' document you
Expand Down Expand Up @@ -219,9 +221,9 @@ data Inline
-- ^ Superscript
| CodeSpan Text
-- ^ Code span
| Link (NonEmpty Inline) Text (Maybe Text)
| Link (NonEmpty Inline) URI (Maybe Text)
-- ^ Link with text, destination, and optionally title
| Image (NonEmpty Inline) Text (Maybe Text)
| Image (NonEmpty Inline) URI (Maybe Text)
-- ^ Image with description, URL, and optionally title
deriving (Show, Eq, Ord, Data, Typeable, Generic)

Expand Down Expand Up @@ -311,10 +313,10 @@ defaultInlineRender = \case
code_ (toHtmlRaw txt)
Link inner dest mtitle ->
let title = maybe [] (pure . title_) mtitle
in a_ (href_ dest : title) (mapM_ defaultInlineRender inner)
in a_ (href_ (URI.render dest) : title) (mapM_ defaultInlineRender inner)
Image desc src mtitle ->
let title = maybe [] (pure . title_) mtitle
in img_ (alt_ (asPlainText desc) : src_ src : title)
in img_ (alt_ (asPlainText desc) : src_ (URI.render src) : title)

-- | HTML containing a newline.

Expand Down
87 changes: 71 additions & 16 deletions Text/MMark/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
Expand All @@ -32,20 +33,24 @@ import Control.Monad.State.Strict
import Data.Data (Data)
import Data.Default.Class
import Data.List.NonEmpty (NonEmpty (..), (<|))
import Data.Maybe (isJust)
import Data.Maybe (isNothing, isJust, fromJust)
import Data.Semigroup ((<>))
import Data.Text (Text)
import Data.Typeable (Typeable)
import GHC.Generics
import Text.MMark.Internal
import Text.Megaparsec hiding (parse)
import Text.Megaparsec.Char hiding (eol)
import Text.URI (URI)
import qualified Control.Applicative.Combinators.NonEmpty as NE
import qualified Data.Char as Char
import qualified Data.List.NonEmpty as NE
import qualified Data.Set as E
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Text.Email.Validate as Email
import qualified Text.Megaparsec.Char.Lexer as L
import qualified Text.URI as URI

----------------------------------------------------------------------------
-- Data types
Expand Down Expand Up @@ -142,7 +147,7 @@ parse file input =
Left err -> Left (nes err)
Right blocks ->
let parsed = fmap (runIsp (pInlines def <* eof)) <$> blocks
getErrs (Left e) es = replaceEof e : es
getErrs (Left e) es = replaceEof "end of inline block" e : es
getErrs _ es = es
fromRight (Right x) = x
fromRight _ =
Expand Down Expand Up @@ -306,9 +311,11 @@ pInlines InlineConfig {..} =
[ pCodeSpan ] <>
[ pInlineLink | iconfigAllowLinks ] <>
[ pImage | iconfigAllowImages ] <>
[ pEnclosedInline
[ try (angel pAutolink)
, pEnclosedInline
, try pHardLineBreak
, pPlain ]
angel = between (char '<') (char '>')

pCodeSpan :: IParser Inline
pCodeSpan = do
Expand All @@ -329,9 +336,10 @@ pInlineLink = do
xs <- between (char '[') (char ']') $
pInlines def { iconfigAllowLinks = False }
void (char '(') <* sc
dest <- pUrl
dest <- pUri
mtitle <- optional (sc1 *> pTitle)
sc <* char ')'
put OtherChar
return (Link xs dest mtitle)

pImage :: IParser Inline
Expand All @@ -340,19 +348,37 @@ pImage = do
(pInlines def { iconfigAllowImages = False })
alt <- nes (Plain "") <$ string "![]" <|> nonEmptyDesc
void (char '(') <* sc
src <- pUrl
src <- pUri
mtitle <- optional (sc1 *> pTitle)
sc <* char ')'
put OtherChar
return (Image alt src mtitle)

pUrl :: IParser Text
pUrl = enclosedLink <|> normalLink
pUri :: IParser URI
pUri = do
uri <- between (char '<') (char '>') URI.parser <|> naked
put OtherChar
return uri
where
enclosedLink = between (char '<') (char '>') $
manyEscapedWith (linkChar '<' '>') "unescaped link character"
normalLink =
manyEscapedWith (linkChar '(' ')') "unescaped link character"
linkChar x y ch = not (isSpaceN ch) && ch /= x && ch /= y
naked = do
startPos <- getPosition
input <- takeWhileP Nothing $ \x ->
not (isSpaceN x || x == ')')
let pst = State
{ stateInput = input
, statePos = nes startPos
, stateTokensProcessed = 0
, stateTabWidth = mkPos 4 }
case snd (runParser' (URI.parser <* eof) pst) of
Left err' ->
case replaceEof "end of URI literal" err' of
TrivialError pos us es -> do
setPosition (NE.head pos)
failure us es
FancyError pos xs -> do
setPosition (NE.head pos)
fancyFailure xs
Right x -> return x

pTitle :: IParser Text
pTitle = choice
Expand All @@ -363,6 +389,20 @@ pTitle = choice
p start end = between (char start) (char end) $
manyEscapedWith (/= end) "unescaped character"

pAutolink :: IParser Inline
pAutolink = do
notFollowedBy (char '>') -- empty links don't make sense
uri <- URI.parser
put OtherChar
return $ case isEmailUri uri of
Nothing ->
let txt = (nes . Plain . URI.render) uri
in Link txt uri Nothing
Just email ->
let txt = nes (Plain email)
uri' = URI.makeAbsolute mailtoScheme uri
in Link txt uri' Nothing

pEnclosedInline :: IParser Inline
pEnclosedInline = do
let noEmpty = def { iconfigAllowEmpty = False }
Expand Down Expand Up @@ -456,11 +496,12 @@ pPlain = Plain . T.pack <$> some
pNonEscapedChar = label "unescaped non-markup character" . choice $
[ try (char '\\' <* notFollowedBy eol) <* put OtherChar
, try (char '!' <* notFollowedBy (char '[')) <* put SpaceChar
, try (char '<' <* notFollowedBy (pAutolink <* char '>')) <* put OtherChar
, spaceChar <* put SpaceChar
, satisfy isTrans <* put SpaceChar
, satisfy isOther <* put OtherChar ]
isTrans x = isTransparentPunctuation x && x /= '!'
isOther x = not (isMarkupChar x) && x /= '\\' && x /= '!'
isOther x = not (isMarkupChar x) && x /= '\\' && x /= '!' && x /= '<'

----------------------------------------------------------------------------
-- Parsing helpers
Expand Down Expand Up @@ -662,16 +703,30 @@ liftFrame = \case
SubscriptFrame -> Subscript
SuperscriptFrame -> Superscript

replaceEof :: ParseError Char e -> ParseError Char e
replaceEof = \case
replaceEof :: String -> ParseError Char e -> ParseError Char e
replaceEof altLabel = \case
TrivialError pos us es -> TrivialError pos (f <$> us) (E.map f es)
FancyError pos xs -> FancyError pos xs
where
f EndOfInput = Label (NE.fromList "end of inline block")
f EndOfInput = Label (NE.fromList altLabel)
f x = x

mmarkErr :: MonadParsec MMarkErr s m => MMarkErr -> m a
mmarkErr = fancyFailure . E.singleton . ErrorCustom

toNesTokens :: Text -> NonEmpty Char
toNesTokens = NE.fromList . T.unpack

isEmailUri :: URI -> Maybe Text
isEmailUri uri =
case URI.unRText <$> URI.uriPath uri of
[x] ->
if Email.isValid (TE.encodeUtf8 x) &&
(isNothing (URI.uriScheme uri) ||
URI.uriScheme uri == Just mailtoScheme)
then Just x
else Nothing
_ -> Nothing

mailtoScheme :: URI.RText 'URI.Scheme
mailtoScheme = fromJust (URI.mkScheme "mailto")
2 changes: 2 additions & 0 deletions mmark.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@ library
, containers >= 0.5 && < 0.6
, data-default-class
, deepseq >= 1.3 && < 1.5
, email-validate >= 2.2 && < 2.4
, foldl >= 1.2 && < 1.4
, lucid >= 2.6 && < 3.0
, megaparsec >= 6.1 && < 7.0
, modern-uri >= 0.1.1 && < 0.2
, mtl >= 2.0 && < 3.0
, parser-combinators >= 0.2 && < 1.0
, text >= 0.2 && < 1.3
Expand Down
2 changes: 2 additions & 0 deletions stack.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
resolver: nightly-2017-10-11
packages:
- '.'
extra-deps:
- modern-uri-0.1.1.0

0 comments on commit e64489e

Please sign in to comment.