/
Parser.hs
984 lines (873 loc) · 30 KB
/
Parser.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
-- |
-- Module : Text.MMark.Parser
-- Copyright : © 2017 Mark Karpov
-- License : BSD 3 clause
--
-- Maintainer : Mark Karpov <markkarpov92@gmail.com>
-- Stability : experimental
-- Portability : portable
--
-- MMark markdown parser.
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TypeFamilies #-}
module Text.MMark.Parser
( MMarkErr (..)
, parse )
where
import Control.Applicative
import Control.DeepSeq
import Control.Monad
import Control.Monad.Reader
import Control.Monad.State.Strict
import Data.Bifunctor (Bifunctor (..))
import Data.Data (Data)
import Data.Default.Class
import Data.List.NonEmpty (NonEmpty (..), (<|))
import Data.Maybe (isNothing, isJust, fromJust, fromMaybe)
import Data.Monoid (Any (..))
import Data.Semigroup (Semigroup (..))
import Data.Text (Text)
import Data.Typeable (Typeable)
import Data.Void
import GHC.Generics
import Text.MMark.Internal
import Text.Megaparsec hiding (parse)
import Text.Megaparsec.Char hiding (eol)
import Text.URI (URI)
import qualified Control.Applicative.Combinators.NonEmpty as NE
import qualified Data.Char as Char
import qualified Data.List.NonEmpty as NE
import qualified Data.Set as E
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Yaml as Yaml
import qualified Text.Email.Validate as Email
import qualified Text.Megaparsec.Char.Lexer as L
import qualified Text.URI as URI
----------------------------------------------------------------------------
-- Data types
-- | Block-level parser type. The 'Reader' monad inside allows to access
-- current reference level: 1 column for top-level of document, column where
-- content starts for block quotes and lists.
type BParser = ParsecT MMarkErr Text (Reader BlockEnv)
-- | Block-level parser environment.
data BlockEnv = BlockEnv
{ benvAllowNaked :: !Bool
-- ^ Should we consider a single paragraph naked?
, benvRefLevel :: !Pos
-- ^ Reference level
}
instance Default BlockEnv where
def = BlockEnv
{ benvAllowNaked = False
, benvRefLevel = pos1
}
-- | Block-level parsing mode.
data BlockMode
= UnorderedListMode Char
-- ^ We're currently parsing an unordered list with this bullet
| OrderedListMode Char
-- ^ We're currently parsing an ordered list with this delimiter
| NormalMode
-- ^ We're currently parsing something else
deriving (Eq, Ord, Show)
instance Default BlockMode where
def = NormalMode
-- | MMark custom parse errors.
data MMarkErr
= YamlParseError String
-- ^ YAML error that occurred during parsing of a YAML block
| ListStartIndexTooBig Word
-- ^ Ordered list start numbers must be nine digits or less
| ListIndexOutOfOrder Word Word
-- ^ The index in an ordered list is out of order, first number is the
-- actual index we ran into, the second number is the expected index
| NonFlankingDelimiterRun (NonEmpty Char)
-- ^ This delimiter run should be in left- or right- flanking position
deriving (Eq, Ord, Show, Read, Generic, Typeable, Data)
instance ShowErrorComponent MMarkErr where
showErrorComponent = \case
YamlParseError str ->
"YAML parse error: " ++ str
ListStartIndexTooBig n ->
"Ordered list start numbers must be nine digits or less, " ++ show n
++ " is too big"
ListIndexOutOfOrder actual expected ->
"List index out of order: " ++ show actual ++ ", expected " ++ show expected
NonFlankingDelimiterRun dels ->
showTokens dels ++ " should be in left- or right- flanking position"
instance NFData MMarkErr
-- | Inline-level parser type. We store type of the last consumed character
-- in the state.
type IParser = StateT CharType (Parsec MMarkErr Text)
-- | 'Inline' source pending parsing.
data Isp
= IspSpan SourcePos Text
-- ^ We have an inline source pending parsnig
| IspError (ParseError Char MMarkErr)
-- ^ We should just return this parse error
deriving (Eq, Show)
-- | Type of last seen character.
data CharType
= SpaceChar -- ^ White space or a transparent character
| LeftFlankingDel -- ^ Left flanking delimiter
| RightFlankingDel -- ^ Right flaking delimiter
| OtherChar -- ^ Other character
deriving (Eq, Ord, Show)
-- | Frame that describes where we are in parsing inlines.
data InlineFrame
= EmphasisFrame -- ^ Emphasis with asterisk @*@
| EmphasisFrame_ -- ^ Emphasis with underscore @_@
| StrongFrame -- ^ Strong emphasis with asterisk @**@
| StrongFrame_ -- ^ Strong emphasis with underscore @__@
| StrikeoutFrame -- ^ Strikeout
| SubscriptFrame -- ^ Subscript
| SuperscriptFrame -- ^ Superscript
deriving (Eq, Ord, Show)
-- | State of inline parsing that specifies whether we expect to close one
-- frame or there is a possibility to close one of two alternatives.
data InlineState
= SingleFrame InlineFrame -- ^ One frame to be closed
| DoubleFrame InlineFrame InlineFrame -- ^ Two frames to be closed
deriving (Eq, Ord, Show)
-- | Configuration of inline parser.
data InlineConfig = InlineConfig
{ iconfigAllowEmpty :: !Bool
-- ^ Whether to accept empty inline blocks
, iconfigAllowLinks :: !Bool
-- ^ Whether to parse links
, iconfigAllowImages :: !Bool
-- ^ Whether to parse images
}
instance Default InlineConfig where
def = InlineConfig
{ iconfigAllowEmpty = True
, iconfigAllowLinks = True
, iconfigAllowImages = True
}
-- | A shortcut type synonym for collection of parse errors.
type Errs = NonEmpty (ParseError Char MMarkErr)
-- | An auxiliary type for collapsing levels of 'Either's.
data Pair s a
= PairL s
| PairR ([a] -> [a])
instance Semigroup s => Semigroup (Pair s a) where
(PairL l) <> (PairL r) = PairL (l <> r)
(PairL l) <> (PairR _) = PairL l
(PairR _) <> (PairL r) = PairL r
(PairR l) <> (PairR r) = PairR (l . r)
instance Semigroup s => Monoid (Pair s a) where
mempty = PairR id
mappend = (<>)
-- | Convert @'Either' a b@ to @'Pair' a b@.
e2p :: Either a b -> Pair a b
e2p = \case
Left a -> PairL a
Right b -> PairR (b:)
----------------------------------------------------------------------------
-- Block parser
-- | Parse a markdown document in the form of a strict 'Text' value and
-- either report parse errors or return a 'MMark' document. Note that the
-- parser has the ability to report multiple parse errors at once.
parse
:: String
-- ^ File name (only to be used in error messages), may be empty
-> Text
-- ^ Input to parse
-> Either (NonEmpty (ParseError Char MMarkErr)) MMark
-- ^ Parse errors or parsed document
parse file input =
case runReader (runParserT p file input) def of
-- NOTE This parse error only happens when document structure on block
-- level cannot be parsed even with recovery, which should not normally
-- happen.
Left err -> Left (nes err)
Right (myaml, rawBlocks) ->
let parsed :: [Block (Either Errs (NonEmpty Inline))]
parsed = doInline <$> rawBlocks
doInline :: Block Isp -> Block (Either Errs (NonEmpty Inline))
doInline = fmap
$ first (nes . replaceEof "end of inline block")
. runIsp (pInlines def <* eof)
g block =
case foldMap e2p block of
PairL errs -> PairL errs
PairR _ -> PairR (fmap fromRight block :)
in case foldMap g parsed of
PairL errs -> Left errs
PairR blocks -> Right MMark
{ mmarkYaml = myaml
, mmarkBlocks = blocks []
, mmarkExtension = mempty }
where
p = (,) <$> optional pYamlBlock
<*> between (setTabWidth (mkPos 4)) eof pBlocks
pYamlBlock :: BParser Yaml.Value
pYamlBlock = do
dpos <- getPosition
string "---" *> sc' *> eol
let go = do
l <- takeWhileP Nothing notNewline
void (optional eol)
e <- atEnd
if e || T.stripEnd l == "---"
then return []
else (l :) <$> go
ls <- go
case (Yaml.decodeEither . TE.encodeUtf8 . T.intercalate "\n") ls of
Left err' -> do
let (apos, err) = splitYamlError (sourceName dpos) err'
setPosition (fromMaybe dpos apos)
(fancyFailure . E.singleton . ErrorCustom . YamlParseError) err
Right v ->
return v
pBlocks :: BParser [Block Isp]
pBlocks = many pBlock
pBlock :: BParser (Block Isp)
pBlock = do
sc
rlevel <- asks benvRefLevel
alevel <- L.indentLevel
done <- atEnd
if done || alevel < rlevel then empty else
case compare alevel (ilevel rlevel) of
LT -> choice
[ pThematicBreak
, pAtxHeading
, pFencedCodeBlock
, pUnorderedList
, pOrderedList
, pBlockquote
, pParagraph ]
_ ->
pIndentedCodeBlock
pThematicBreak :: BParser (Block Isp)
pThematicBreak = do
l' <- lookAhead nonEmptyLine
clevel <- ilevel <$> asks benvRefLevel
let l = T.filter (not . isSpace) l'
if T.length l >= 3 &&
indentLevel l' < clevel &&
(T.all (== '*') l ||
T.all (== '-') l ||
T.all (== '_') l)
then ThematicBreak <$ nonEmptyLine <* sc
else empty
pAtxHeading :: BParser (Block Isp)
pAtxHeading = do
(void . lookAhead . try) start
withRecovery recover $ do
hlevel <- length <$> start
sc1'
ispPos <- getPosition
r <- someTill (satisfy notNewline <?> "heading character") . try $
optional (sc1' *> some (char '#') *> sc') *> (eof <|> eol)
let toBlock = case hlevel of
1 -> Heading1
2 -> Heading2
3 -> Heading3
4 -> Heading4
5 -> Heading5
_ -> Heading6
toBlock (IspSpan ispPos (T.strip (T.pack r))) <$ sc
where
start = count' 1 6 (char '#')
recover err =
Heading1 (IspError err) <$ takeWhileP Nothing notNewline <* sc
pFencedCodeBlock :: BParser (Block Isp)
pFencedCodeBlock = do
let p ch = try $ do
void $ count 3 (char ch)
n <- (+ 3) . length <$> many (char ch)
ml <- optional (T.strip <$> someEscapedWith notNewline <?> "info string")
guard (maybe True (not . T.any (== '`')) ml)
return
(ch, n,
case ml of
Nothing -> Nothing
Just l ->
if T.null l
then Nothing
else Just l)
alevel <- L.indentLevel
(ch, n, infoString) <- (p '`' <|> p '~') <* eol
let content = label "code block content" (option "" nonEmptyLine <* eol)
closingFence = try . label "closing code fence" $ do
rlevel <- asks benvRefLevel
void $ L.indentGuard sc' LT (ilevel rlevel)
void $ count n (char ch)
(void . many . char) ch
sc'
eof <|> eol
ls <- manyTill content closingFence
CodeBlock infoString (assembleCodeBlock alevel ls) <$ sc
pIndentedCodeBlock :: BParser (Block Isp)
pIndentedCodeBlock = do
initialIndent <- L.indentLevel
clevel <- ilevel <$> asks benvRefLevel
let go ls = do
immediate <- lookAhead $
(>= clevel) <$> (sc' *> L.indentLevel)
eventual <- lookAhead $
(>= clevel) <$> (sc *> L.indentLevel)
if not immediate && not eventual
then return ls
else do
l <- option "" nonEmptyLine
continue <- eol'
if continue
then go (l:ls)
else return (l:ls)
-- NOTE This is a bit unfortunate, but it's difficult to guarantee
-- that preceding space is not yet consumed when we get to
-- interpreting input as an indented code block, so we need to restore
-- the space this way.
f x = T.replicate (unPos initialIndent - 1) " " <> x
g [] = []
g (x:xs) = f x : xs
ls <- g . reverse . dropWhile isBlank <$> go []
CodeBlock Nothing (assembleCodeBlock clevel ls) <$ sc
pUnorderedList :: BParser (Block Isp)
pUnorderedList = do
alevel <- (<> mkPos 2) <$> L.indentLevel
(bullet, startBulletPos) <- try $ do
p <- getPosition
b <- char '-' <|> char '+' <|> char '*'
eof <|> sc1
return (b, p)
level' <- L.indentLevel
let innerBlocks bulletPos = do
p <- getPosition
let tooFar = sourceLine p > sourceLine bulletPos <> pos1
if tooFar || sourceColumn p < alevel
then return [if tooFar then emptyParagraph else emptyNaked]
else subEnv True (slevel alevel level') pBlocks
x <- innerBlocks startBulletPos
xs <- many $ do
bulletPos <- try $ do
p <- getPosition
guard (sourceColumn p >= sourceColumn startBulletPos)
void (char bullet)
eof <|> sc1
return p
innerBlocks bulletPos
return (UnorderedList (normalizeListItems (x:|xs)))
pOrderedList :: BParser (Block Isp)
pOrderedList = do
pos' <- getPosition
(start, del, alevel, startIndexPos) <- try $ do
p <- getPosition
start <- L.decimal
del <- char '.' <|> char ')'
alevel <- (<> pos1) <$> L.indentLevel
eof <|> sc1
return (start, del, alevel, p)
level' <- L.indentLevel
let innerBlocks l indexPos = do
p <- getPosition
let tooFar = sourceLine p > sourceLine indexPos <> pos1
if tooFar || sourceColumn p < l
then return [if tooFar then emptyParagraph else emptyNaked]
else subEnv True (slevel alevel level') pBlocks
x <- innerBlocks alevel startIndexPos
xs <- manyIndexed (start + 1) $ \expected -> do
pos <- getPosition
(actual, alevel', indexPos) <- try $ do
p <- getPosition
guard (sourceColumn p >= sourceColumn startIndexPos)
i <- L.decimal
void (char del)
alevel' <- (<> pos1) <$> L.indentLevel
eof <|> sc1
return (i, alevel', p)
let sie = Naked . IspError $ FancyError
(nes pos)
(E.singleton . ErrorCustom $ ListIndexOutOfOrder actual expected)
f items =
if actual == expected
then items
else sie:items
f <$> innerBlocks alevel' indexPos
let sie = Naked . IspError $ FancyError
(nes pos')
(E.singleton . ErrorCustom $ ListStartIndexTooBig start)
x' = if start <= 999999999 then x else sie:x
return (OrderedList start (normalizeListItems (x':|xs)))
-- TODO Still not sure about the block quote syntax. Apparently (see e.g.
-- CM197), it makes some cases ambiguous and not user-friendly. We probably
-- should bite the bullet and implement the original markdown block quote
-- syntax. Also restore some tests to their original form, because I have
-- altered some of them already.
pBlockquote :: BParser (Block Isp)
pBlockquote = do
alevel <- (<> mkPos 2) <$> L.indentLevel
try $ do
void (char '>')
sc
l <- L.indentLevel
guard (l >= alevel)
level' <- L.indentLevel
xs <- subEnv False (slevel alevel level') pBlocks
return (Blockquote xs)
pParagraph :: BParser (Block Isp)
pParagraph = do
startPos <- getPosition
allowNaked <- asks benvAllowNaked
rlevel <- asks benvRefLevel
let go ls = do
l <- lookAhead (option "" nonEmptyLine)
case (isBlank l, isParagraphBroken rlevel l) of
(True, _) -> return (reverse ls, Paragraph)
(_, True) -> return (reverse ls, Naked)
(_, False) -> do
void nonEmptyLine
continue <- eol'
if continue
then go (l:ls)
else return (reverse (l:ls), Naked)
l <- nonEmptyLine
continue <- eol'
(ls, toBlock) <-
if continue
then go []
else return ([], Naked)
(if allowNaked then toBlock else Paragraph)
(IspSpan startPos (assembleParagraph (l:ls))) <$ sc
----------------------------------------------------------------------------
-- Inline parser
-- | Run a given parser on 'Isp'.
runIsp
:: IParser a -- ^ The parser to run
-> Isp -- ^ Input for the parser
-> Either (ParseError Char MMarkErr) a -- ^ Result of parsing
runIsp _ (IspError err) = Left err
runIsp p (IspSpan startPos input) =
snd (runParser' (evalStateT p SpaceChar) pst)
where
pst = State
{ stateInput = input
, statePos = nes startPos
, stateTokensProcessed = 0
, stateTabWidth = mkPos 4 }
pInlines :: InlineConfig -> IParser (NonEmpty Inline)
pInlines InlineConfig {..} =
if iconfigAllowEmpty
then nes (Plain "") <$ eof <|> stuff
else stuff
where
stuff = NE.some . label "inline content" . choice $
[ pCodeSpan ] <>
[ pInlineLink | iconfigAllowLinks ] <>
[ pImage | iconfigAllowImages ] <>
[ try (angel pAutolink) | iconfigAllowLinks ] <>
[ pEnclosedInline
, try pHardLineBreak
, pPlain ]
angel = between (char '<') (char '>')
pCodeSpan :: IParser Inline
pCodeSpan = do
n <- try (length <$> some (char '`'))
let finalizer = try $ do
void $ count n (char '`')
notFollowedBy (char '`')
r <- CodeSpan . collapseWhiteSpace . T.concat <$>
manyTill (label "code span content" $
takeWhile1P Nothing (== '`') <|>
takeWhile1P Nothing (/= '`'))
finalizer
put OtherChar
return r
pInlineLink :: IParser Inline
pInlineLink = do
xs <- between (char '[') (char ']') $
pInlines def { iconfigAllowLinks = False }
void (char '(') <* sc
dest <- pUri
mtitle <- optional (sc1 *> pTitle)
sc <* char ')'
put OtherChar
return (Link xs dest mtitle)
pImage :: IParser Inline
pImage = do
let nonEmptyDesc = char '!' *> between (char '[') (char ']')
(pInlines def { iconfigAllowImages = False })
alt <- nes (Plain "") <$ string "![]" <|> nonEmptyDesc
void (char '(') <* sc
src <- pUri
mtitle <- optional (sc1 *> pTitle)
sc <* char ')'
put OtherChar
return (Image alt src mtitle)
pUri :: IParser URI
pUri = do
uri <- between (char '<') (char '>') URI.parser <|> naked
put OtherChar
return uri
where
naked = do
startPos <- getPosition
input <- takeWhileP Nothing $ \x ->
not (isSpaceN x || x == ')')
let pst = State
{ stateInput = input
, statePos = nes startPos
, stateTokensProcessed = 0
, stateTabWidth = mkPos 4 }
case snd (runParser' (URI.parser <* eof) pst) of
Left err' ->
case replaceEof "end of URI literal" err' of
TrivialError pos us es -> do
setPosition (NE.head pos)
failure us es
FancyError pos xs -> do
setPosition (NE.head pos)
fancyFailure xs
Right x -> return x
pTitle :: IParser Text
pTitle = choice
[ p '\"' '\"'
, p '\'' '\''
, p '(' ')' ]
where
p start end = between (char start) (char end) $
manyEscapedWith (/= end) "unescaped character"
pAutolink :: IParser Inline
pAutolink = do
notFollowedBy (char '>') -- empty links don't make sense
uri <- URI.parser
put OtherChar
return $ case isEmailUri uri of
Nothing ->
let txt = (nes . Plain . URI.render) uri
in Link txt uri Nothing
Just email ->
let txt = nes (Plain email)
uri' = URI.makeAbsolute mailtoScheme uri
in Link txt uri' Nothing
pEnclosedInline :: IParser Inline
pEnclosedInline = do
let noEmpty = def { iconfigAllowEmpty = False }
st <- choice
[ pLfdr (DoubleFrame StrongFrame StrongFrame)
, pLfdr (DoubleFrame StrongFrame EmphasisFrame)
, pLfdr (SingleFrame StrongFrame)
, pLfdr (SingleFrame EmphasisFrame)
, pLfdr (DoubleFrame StrongFrame_ StrongFrame_)
, pLfdr (DoubleFrame StrongFrame_ EmphasisFrame_)
, pLfdr (SingleFrame StrongFrame_)
, pLfdr (SingleFrame EmphasisFrame_)
, pLfdr (DoubleFrame StrikeoutFrame StrikeoutFrame)
, pLfdr (DoubleFrame StrikeoutFrame SubscriptFrame)
, pLfdr (SingleFrame StrikeoutFrame)
, pLfdr (SingleFrame SubscriptFrame)
, pLfdr (SingleFrame SuperscriptFrame) ]
case st of
SingleFrame x ->
liftFrame x <$> pInlines noEmpty <* pRfdr x
DoubleFrame x y -> do
inlines0 <- pInlines noEmpty
thisFrame <- pRfdr x <|> pRfdr y
let thatFrame = if x == thisFrame then y else x
immediate <- True <$ pRfdr thatFrame <|> pure False
if immediate
then (return . liftFrame thatFrame . nes . liftFrame thisFrame) inlines0
else do
inlines1 <- pInlines noEmpty
void (pRfdr thatFrame)
return . liftFrame thatFrame $
liftFrame thisFrame inlines0 <| inlines1
pLfdr :: InlineState -> IParser InlineState
pLfdr st = try $ do
let dels = inlineStateDel st
pos <- getPosition
void (string dels)
leftChar <- get
mrightChar <- lookAhead (optional anyChar)
let failNow = do
setPosition pos
(mmarkErr . NonFlankingDelimiterRun . toNesTokens) dels
case (leftChar, isTransparent <$> mrightChar) of
(_, Nothing) -> failNow
(_, Just True) -> failNow
(RightFlankingDel, _) -> failNow
(OtherChar, _) -> failNow
(SpaceChar, _) -> return ()
(LeftFlankingDel, _) -> return ()
put LeftFlankingDel
return st
pRfdr :: InlineFrame -> IParser InlineFrame
pRfdr frame = try $ do
let dels = inlineFrameDel frame
pos <- getPosition
void (string dels)
leftChar <- get
mrightChar <- lookAhead (optional anyChar)
let failNow = do
setPosition pos
(mmarkErr . NonFlankingDelimiterRun . toNesTokens) dels
case (leftChar, mrightChar) of
(SpaceChar, _) -> failNow
(LeftFlankingDel, _) -> failNow
(_, Nothing) -> return ()
(_, Just rightChar) ->
if | isTransparent rightChar -> return ()
| isMarkupChar rightChar -> return ()
| otherwise -> failNow
put RightFlankingDel
return frame
pHardLineBreak :: IParser Inline
pHardLineBreak = do
void (char '\\')
eol
notFollowedBy eof
sc'
put SpaceChar
return LineBreak
pPlain :: IParser Inline
pPlain = Plain . T.pack <$> some
(pEscapedChar <|> pNewline <|> pNonEscapedChar)
where
pEscapedChar = escapedChar <* put OtherChar
pNewline = hidden . try $
'\n' <$ sc' <* eol <* sc' <* put SpaceChar
pNonEscapedChar = label "unescaped non-markup character" . choice $
[ try (char '\\' <* notFollowedBy eol) <* put OtherChar
, try (char '!' <* notFollowedBy (char '[')) <* put SpaceChar
, try (char '<' <* notFollowedBy (pAutolink <* char '>')) <* put OtherChar
, spaceChar <* put SpaceChar
, satisfy isTrans <* put SpaceChar
, satisfy isOther <* put OtherChar ]
isTrans x = isTransparentPunctuation x && x /= '!'
isOther x = not (isMarkupChar x) && x /= '\\' && x /= '!' && x /= '<'
----------------------------------------------------------------------------
-- Parsing helpers
nonEmptyLine :: BParser Text
nonEmptyLine = takeWhile1P Nothing notNewline
manyEscapedWith :: MonadParsec e Text m => (Char -> Bool) -> String -> m Text
manyEscapedWith f l = T.pack <$> many (escapedChar <|> (satisfy f <?> l))
someEscapedWith :: MonadParsec e Text m => (Char -> Bool) -> m Text
someEscapedWith f = T.pack <$> some (escapedChar <|> satisfy f)
escapedChar :: MonadParsec e Text m => m Char
escapedChar = try (char '\\' *> satisfy isAsciiPunctuation)
<?> "escaped character"
sc :: MonadParsec e Text m => m ()
sc = void $ takeWhileP (Just "white space") isSpaceN
sc1 :: MonadParsec e Text m => m ()
sc1 = void $ takeWhile1P (Just "white space") isSpaceN
sc' :: MonadParsec e Text m => m ()
sc' = void $ takeWhileP (Just "white space") isSpace
sc1' :: MonadParsec e Text m => m ()
sc1' = void $ takeWhile1P (Just "white space") isSpace
eol :: MonadParsec e Text m => m ()
eol = void . label "newline" $ choice
[ string "\n"
, string "\r\n"
, string "\r" ]
eol' :: MonadParsec e Text m => m Bool
eol' = option False (True <$ eol)
subEnv :: Bool -> Pos -> BParser a -> BParser a
subEnv benvAllowNaked benvRefLevel = local (const BlockEnv {..})
slevel :: Pos -> Pos -> Pos
slevel a l = if l >= ilevel a then a else l
ilevel :: Pos -> Pos
ilevel = (<> mkPos 4)
----------------------------------------------------------------------------
-- Other helpers
isSpace :: Char -> Bool
isSpace x = x == ' ' || x == '\t'
isSpaceN :: Char -> Bool
isSpaceN x = isSpace x || x == '\n' || x == '\r'
notNewline :: Char -> Bool
notNewline x = x /= '\n' && x /= '\r'
isBlank :: Text -> Bool
isBlank = T.all isSpace
isMarkupChar :: Char -> Bool
isMarkupChar = \case
'*' -> True
'~' -> True
'_' -> True
'`' -> True
'^' -> True
'[' -> True
']' -> True
_ -> False
isAsciiPunctuation :: Char -> Bool
isAsciiPunctuation x =
(x >= '!' && x <= '/') ||
(x >= ':' && x <= '@') ||
(x >= '[' && x <= '`') ||
(x >= '{' && x <= '~')
isTransparentPunctuation :: Char -> Bool
isTransparentPunctuation = \case
'!' -> True
'"' -> True
'(' -> True
')' -> True
',' -> True
'-' -> True
'.' -> True
':' -> True
';' -> True
'?' -> True
'{' -> True
'}' -> True
'–' -> True
'—' -> True
_ -> False
isTransparent :: Char -> Bool
isTransparent x = Char.isSpace x || isTransparentPunctuation x
nes :: a -> NonEmpty a
nes a = a :| []
assembleCodeBlock :: Pos -> [Text] -> Text
assembleCodeBlock indent ls = T.unlines (stripIndent indent <$> ls)
isParagraphBroken :: Pos -> Text -> Bool
isParagraphBroken rlevel = isJust . parseMaybe (p <* takeRest)
where
p :: Parsec Void Text ()
p = do
setTabWidth (mkPos 4)
sc
alevel <- L.indentLevel
guard (alevel < ilevel rlevel)
unless (alevel < rlevel) . choice $
[ void (char '>')
, try $ choice (char <$> ("-+*#" :: String)) *> (eof <|> sc1')
, try $ (L.decimal :: Parsec Void Text Integer) *>
(char '.' <|> char ')') *> (eof <|> sc1')
]
assembleParagraph :: [Text] -> Text
assembleParagraph = go
where
go [] = ""
go [x] = T.dropWhileEnd isSpace x
go (x:xs) = x <> "\n" <> go xs
indentLevel :: Text -> Pos
indentLevel = T.foldl' f pos1 . T.takeWhile isSpace
where
f n ch
| ch == ' ' = n <> pos1
| ch == '\t' = n <> mkPos 4
| otherwise = n
stripIndent :: Pos -> Text -> Text
stripIndent indent txt = T.drop m txt
where
m = snd $ T.foldl' f (0, 0) (T.takeWhile p txt)
p x = isSpace x || x == '>'
f (!j, !n) ch
| j >= i = (j, n)
| ch == ' ' = (j + 1, n + 1)
| ch == '\t' = (j + 4, n + 1)
| otherwise = (j, n)
i = unPos indent - 1
collapseWhiteSpace :: Text -> Text
collapseWhiteSpace =
T.stripEnd . T.filter (/= '\0') . snd . T.mapAccumL f True
where
f seenSpace ch =
case (seenSpace, g ch) of
(False, False) -> (False, ch)
(True, False) -> (False, ch)
(False, True) -> (True, ' ')
(True, True) -> (True, '\0')
g ' ' = True
g '\t' = True
g '\n' = True
g _ = False
inlineFrameDel :: InlineFrame -> Text
inlineFrameDel = \case
EmphasisFrame -> "*"
EmphasisFrame_ -> "_"
StrongFrame -> "**"
StrongFrame_ -> "__"
StrikeoutFrame -> "~~"
SubscriptFrame -> "~"
SuperscriptFrame -> "^"
inlineStateDel :: InlineState -> Text
inlineStateDel = \case
SingleFrame x -> inlineFrameDel x
DoubleFrame x y -> inlineFrameDel x <> inlineFrameDel y
liftFrame :: InlineFrame -> NonEmpty Inline -> Inline
liftFrame = \case
StrongFrame -> Strong
EmphasisFrame -> Emphasis
StrongFrame_ -> Strong
EmphasisFrame_ -> Emphasis
StrikeoutFrame -> Strikeout
SubscriptFrame -> Subscript
SuperscriptFrame -> Superscript
replaceEof :: String -> ParseError Char e -> ParseError Char e
replaceEof altLabel = \case
TrivialError pos us es -> TrivialError pos (f <$> us) (E.map f es)
FancyError pos xs -> FancyError pos xs
where
f EndOfInput = Label (NE.fromList altLabel)
f x = x
mmarkErr :: MonadParsec MMarkErr s m => MMarkErr -> m a
mmarkErr = fancyFailure . E.singleton . ErrorCustom
toNesTokens :: Text -> NonEmpty Char
toNesTokens = NE.fromList . T.unpack
isEmailUri :: URI -> Maybe Text
isEmailUri uri =
case URI.unRText <$> URI.uriPath uri of
[x] ->
if Email.isValid (TE.encodeUtf8 x) &&
(isNothing (URI.uriScheme uri) ||
URI.uriScheme uri == Just mailtoScheme)
then Just x
else Nothing
_ -> Nothing
mailtoScheme :: URI.RText 'URI.Scheme
mailtoScheme = fromJust (URI.mkScheme "mailto")
splitYamlError :: FilePath -> String -> (Maybe SourcePos, String)
splitYamlError file str = maybe (Nothing, str) (first pure) (parseMaybe p str)
where
p :: Parsec Void String (SourcePos, String)
p = do
void (string "YAML parse exception at line ")
l <- mkPos . (+ 2) <$> L.decimal
void (string ", column ")
c <- mkPos . (+ 1) <$> L.decimal
void (string ":\n")
r <- takeRest
return (SourcePos file l c, r)
fromRight :: Either a b -> b
fromRight (Right x) = x
fromRight _ =
error "Text.MMark.Parser.fromRight: the impossible happened"
manyIndexed :: (Alternative m, Num n) => n -> (n -> m a) -> m [a]
manyIndexed n' m = go n'
where
go !n = liftA2 (:) (m n) (go (n + 1)) <|> pure []
emptyParagraph :: Block Isp
emptyParagraph = Paragraph (IspSpan (initialPos "") "")
emptyNaked :: Block Isp
emptyNaked = Naked (IspSpan (initialPos "") "")
normalizeListItems :: NonEmpty [Block Isp] -> NonEmpty [Block Isp]
normalizeListItems xs' =
if getAny $ foldMap (foldMap (Any . isParagraph)) (drop 1 x :| xs)
then fmap toParagraph <$> xs'
else case x of
[] -> xs'
(y:ys) -> r $ (toNaked y : ys) :| xs
where
(x:|xs) = r xs'
r = NE.reverse . fmap reverse
isParagraph = \case
OrderedList _ _ -> False
UnorderedList _ -> False
Naked _ -> False
_ -> True
toParagraph (Naked inner) = Paragraph inner
toParagraph other = other
toNaked (Paragraph inner) = Naked inner
toNaked other = other