Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Adjusted parsing to properly handle unbalanced empahasis operators. I…

…t now requires a closing one, no longer going all the way to the end of the paragraph, and better outputs combined em + strong. There is one other thing still left to deal with, though...
  • Loading branch information...
commit 179aacc0d906c637c17c06b6fd7a90a61f8b473b 1 parent 221c426
David E. Wheeler authored

Showing 3 changed files with 140 additions and 44 deletions. Show diff stats Hide diff stats

  1. +49 21 lib/Text/Markover.pm
  2. +58 19 t/basic.t
  3. +33 4 t/lex.t
70 lib/Text/Markover.pm
@@ -25,14 +25,22 @@ my $stem_re = qr{
25 25 | [*_](?:[*]{2}|[_]{2})
26 26 }x;
27 27
28   -my $stem_split = sub {
  28 +my $lop_split = sub {
29 29 my $l = shift;
30 30 my @c = split //, shift;
31 31 return $c[0] eq $c[1]
32   - ? ( [ $l => "$c[0]$c[1]"], [ $l => $c[2]] )
  32 + ? ( [ $l => "$c[0]$c[1]"], [ $l => $c[2]] )
33 33 : ( [ $l => $c[0] ], [ $l => "$c[1]$c[2]" ] );
34 34 };
35 35
  36 +my $rop_split = sub {
  37 + my $l = shift;
  38 + my @c = split //, shift;
  39 + return $c[1] eq $c[2]
  40 + ? ( [ $l => $c[0] ], [ $l => "$c[1]$c[2]" ] )
  41 + : ( [ $l => "$c[0]$c[1]"], [ $l => $c[2]] );
  42 +};
  43 +
36 44 sub lexer {
37 45 my ($self, $iter) = @_;
38 46 HOP::Lexer::make_lexer(
@@ -64,11 +72,11 @@ sub lexer {
64 72 my $u = eval { URI::URL->new($url) };
65 73 return $@ && !defined $u ? $url : [ $l => $u ];
66 74 } ],
67   -# [ BULLET => qr/^([ ]*)([-*+])[ \t]+/ms, sub { (shift, $2, length $1) } ],
  75 +# [ BULLET => qr/^[ ]*[-*+][ \t]+(?=\S)/ms, sub { (shift, $2, length $1) } ],
68 76
69   - [ EMMOP => qr/(?<=[^\s_*])$stem_re(?=[^\s_*])/, $stem_split ],
70   - [ EMLOP => qr/$stem_re(?=[^\s_*])/, $stem_split ],
71   - [ EMROP => qr/(?<=[^\s_*])$stem_re/, $stem_split ],
  77 + [ EMMOP => qr/(?<=[^\s_*])$stem_re(?=[^\s_*])/, $lop_split ],
  78 + [ EMLOP => qr/$stem_re(?=[^\s_*])/, $lop_split ],
  79 + [ EMROP => qr/(?<=[^\s_*])$stem_re/, $rop_split ],
72 80
73 81 [ EMMOP => qr/(?<=[^\s*_])(?:[*]{1,2})(?=[^\s*_])|(?<=[^\s*_])(?:[_]{1,2})(?=[^\s*_])/ ],
74 82 [ EMLOP => qr/[_]{1,2}(?=[^\s*_])|[*]{1,2}(?=[^\s*_])/ ],
@@ -153,7 +161,6 @@ my $eof = T(
153 161
154 162 # eob ::= BLANK | eof
155 163 my $eob = alternate( $blank, $eof );
156   -my $eob_ahead = error(lookahead($eob));
157 164
158 165 sub entitize($) {
159 166 local $_ = shift;
@@ -212,8 +219,8 @@ my $automail = lookfor( AUTOMAIL => sub {
212 219 return qq{<a href="$scheme:$email">$email</a>}
213 220 } );
214 221
215   -# emphasis ::= (lstar | mstar) not_em (rstar | mstar | lookahead(eob))
216   -# | (lline | mline) not_em (rline | mline | lookahead(eob))
  222 +# emphasis ::= (lstar | mstar) not_em (rstar | mstar)
  223 +# | (lline | mline) not_em (rline | mline)
217 224
218 225 my $lstar = match EMLOP => '*';
219 226 my $rstar = match EMROP => '*';
@@ -229,19 +236,29 @@ my $emphasis = T(
229 236 concatenate(
230 237 alternate($lstar, $mstar),
231 238 $Not_em,
232   - alternate($rstar, $mstar, $eob_ahead)
  239 + alternate($rstar, $mstar)
233 240 ),
234 241 concatenate(
235 242 alternate($lline, $mline),
236 243 $Not_em,
237   - alternate($rline, $mline, $eob_ahead)
  244 + alternate($rline, $mline)
238 245 ),
239 246 ),
240 247 sub { $html_for{em}->( @_[1,0] ) }
241 248 );
242 249
243   -# emphasis ::= (ldstar | mdstar) not_em (rdstar | mdstar | lookahead(eob))
244   -# | (ldline | mdline) not_em (rdline | mdline | lookahead(eob))
  250 +# emor := emphasis | lstar | rstar | mstar | lline | rline | mline | not_em
  251 +my $emor = T(
  252 + alternate(
  253 + $emphasis,
  254 + $lstar, $rstar, $mstar,
  255 + $lline, $rline, $mline,
  256 + ),
  257 + $joiner
  258 +);
  259 +
  260 +# strong ::= (ldstar | mdstar) not_em (rdstar | mdstar)
  261 +# | (ldline | mdline) not_em (rdline | mdline)
245 262 my $ldstar = match EMLOP => '**';
246 263 my $rdstar = match EMROP => '**';
247 264 my $mdstar = match EMMOP => '**';
@@ -256,29 +273,40 @@ my $strong = T(
256 273 concatenate(
257 274 alternate($ldstar, $mdstar),
258 275 $Not_strong,
259   - alternate($rdstar, $mdstar, $eob_ahead)
  276 + alternate($rdstar, $mdstar)
260 277 ),
261 278 concatenate(
262 279 alternate($ldline, $mdline),
263 280 $Not_strong,
264   - alternate($rdline, $mdline, $eob_ahead)
  281 + alternate($rdline, $mdline)
265 282 ),
266 283 ),
267 284 sub { $html_for{strong}->( @_[1,0] ) }
268 285 );
269 286
270   -# spans ::= (text | code | autolink | automail | emphasis | strong)+
271   -my @spans = ($text, $code, $autolink, $automail, $strong, $emphasis);
  287 +# strongor := emphasis | ldstar | rdstar | mdstar | ldline | rdline
  288 +# | mdline | not_strong
  289 +my $strongor = T(
  290 + alternate(
  291 + $strong,
  292 + $ldstar, $rdstar, $mdstar,
  293 + $ldline, $rdline, $mdline,
  294 + ),
  295 + $joiner
  296 +);
  297 +
  298 +# spans ::= (text | code | autolink | automail | emor | strongor)+
  299 +my @spans = ($text, $code, $autolink, $automail, $emor, $strongor);
272 300 $spans = T(plus( T( alternate( @spans ), $joiner, ) ), $joiner);
273 301
274   -# not_em ::= (text | code | autolink | automail | strong)+
  302 +# not_em ::= (text | code | autolink | automail | strongor)+
275 303 $not_em = T(plus( T( alternate( grep {
276   - $_ ne $emphasis
  304 + $_ ne $emor
277 305 } @spans ), $joiner, ) ), $joiner);
278 306
279   -# not_strong ::= (text | code | autolink | automail | strong)+
  307 +# not_strong ::= (text | code | autolink | automail | emor)+
280 308 $not_strong = T(plus( T( alternate( grep {
281   - $_ ne $strong
  309 + $_ ne $strongor
282 310 } @spans ), $joiner, ) ), $joiner);
283 311
284 312 # para ::= spans eob
77 t/basic.t
@@ -2,7 +2,7 @@
2 2
3 3 use strict;
4 4 use warnings;
5   -use Test::More tests => 47;
  5 +use Test::More tests => 55;
6 6 #use Test::More 'no_plan';
7 7
8 8 BEGIN { use_ok 'Text::Markover' or die; }
@@ -12,7 +12,7 @@ ok my $m = Text::Markover->new, 'Contruct Markover object';
12 12 for my $spec (
13 13
14 14 # Paragraphs.
15   - [ "Foo\n\nBar", "<p>Foo</p>\n\n<p>Bar</p>", ' with paras' ],
  15 + [ "Foo\n\nBar", "<p>Foo</p>\n\n<p>Bar</p>", 'with paras' ],
16 16 [ "Foo\n\nBar\n", "<p>Foo</p>\n\n<p>Bar</p>\n", 'with trailing newline' ],
17 17 [ "Foo\nBar\n", "<p>Foo\nBar</p>\n", 'with inline newline' ],
18 18
@@ -45,32 +45,34 @@ for my $spec (
45 45 '_this\_that_', '<p><em>this_that</em></p>',
46 46 'with simple _ emphasis and escape'
47 47 ],
48   - [ "*this", "<p><em>this</em></p>", 'with simple * emphasis and eof' ],
  48 + [ "*this", "<p>*this</p>", 'with lone * and eof' ],
49 49 [
50   - "*this\n\n", "<p><em>this</em></p>\n\n",
51   - 'with simple * emphasis and eob'
  50 + "*this\n\n", "<p>*this</p>\n\n",
  51 + 'with lone * and eob'
52 52 ],
53 53 [
54 54 "*this\n\nfoo",
55   - "<p><em>this</em></p>\n\n<p>foo</p>",
56   - 'with simple * emphasis and eob + para'
  55 + "<p>*this</p>\n\n<p>foo</p>",
  56 + 'with lone * and eob + para'
57 57 ],
58 58 [
59   - 'un*frigging*believable', '<p>un<em>frigging</em>believable</p>',
  59 + 'un*frigging*believable',
  60 + '<p>un<em>frigging</em>believable</p>',
60 61 'with mid-word * emphasis'
61 62 ],
62 63 [
63   - 'un_frigging_believable', '<p>un<em>frigging</em>believable</p>',
  64 + 'un_frigging_believable',
  65 + '<p>un<em>frigging</em>believable</p>',
64 66 'with mid-word _ emphasis'
65 67 ],
66 68 [
67 69 '*this* and *that',
68   - '<p><em>this</em> and <em>that</em></p>',
  70 + '<p><em>this</em> and *that</p>',
69 71 'two *, one hanging'
70 72 ],
71 73 [
72 74 '_this_ and _that',
73   - '<p><em>this</em> and <em>that</em></p>',
  75 + '<p><em>this</em> and _that</p>',
74 76 'two _, one hanging'
75 77 ],
76 78
@@ -88,17 +90,17 @@ for my $spec (
88 90 'with simple __ strong and escape'
89 91 ],
90 92 [
91   - "**this", "<p><strong>this</strong></p>",
92   - 'with simple ** strong and eof'
  93 + "**this", "<p>**this</p>",
  94 + 'with lone ** and eof'
93 95 ],
94 96 [
95   - "**this\n\n", "<p><strong>this</strong></p>\n\n",
96   - 'with simple ** strong and eob'
  97 + "**this\n\n", "<p>**this</p>\n\n",
  98 + 'with lone ** and eob'
97 99 ],
98 100 [
99 101 "**this\n\nfoo",
100   - "<p><strong>this</strong></p>\n\n<p>foo</p>",
101   - 'with simple ** strong and eob + para'
  102 + "<p>**this</p>\n\n<p>foo</p>",
  103 + 'with lone ** and eob + para'
102 104 ],
103 105 [
104 106 'un**frigging**believable',
@@ -112,12 +114,12 @@ for my $spec (
112 114 ],
113 115 [
114 116 '**this** and **that',
115   - '<p><strong>this</strong> and <strong>that</strong></p>',
  117 + '<p><strong>this</strong> and **that</p>',
116 118 'two **, one hanging'
117 119 ],
118 120 [
119 121 '__this__ and __that',
120   - '<p><strong>this</strong> and <strong>that</strong></p>',
  122 + '<p><strong>this</strong> and __that</p>',
121 123 'two __, one hanging'
122 124 ],
123 125
@@ -148,6 +150,37 @@ for my $spec (
148 150 ],
149 151
150 152 [
  153 + 'un*__frigging__*believable',
  154 + '<p>un<em><strong>frigging</strong></em>believable</p>',
  155 + 'with mid-word *__ emphasis'
  156 + ],
  157 + [
  158 + 'un__*frigging*__believable',
  159 + '<p>un<strong><em>frigging</em></strong>believable</p>',
  160 + 'with mid-word __* emphasis'
  161 + ],
  162 + [
  163 + 'un_**frigging**_believable',
  164 + '<p>un<em><strong>frigging</strong></em>believable</p>',
  165 + 'with mid-word _** emphasis'
  166 + ],
  167 + [
  168 + 'un**_frigging_**believable',
  169 + '<p>un<strong><em>frigging</em></strong>believable</p>',
  170 + 'with mid-word **_ emphasis'
  171 + ],
  172 +# [
  173 +# 'un***frigging***believable',
  174 +# '<p>un<strong><em>frigging</em></strong>believable</p>',
  175 +# 'with mid-word *** emphasis'
  176 +# ],
  177 +# [
  178 +# 'un___frigging___believable',
  179 +# '<p>un<strong><em>frigging</em></strong>believable</p>',
  180 +# 'with mid-word ___ emphasis'
  181 +# ],
  182 +
  183 + [
151 184 '*this **and** that*',
152 185 '<p><em>this <strong>and</strong> that</em></p>',
153 186 'mixed em * and srong **'
@@ -158,6 +191,12 @@ for my $spec (
158 191 'mixed em * and srong __'
159 192 ],
160 193
  194 + # Unbalanced emphasis.
  195 + [ '*this *that!', '<p>*this *that!</p>', '2 hangling left *s' ],
  196 + [ '_this _that!', '<p>_this _that!</p>', '2 hangling left _s' ],
  197 + [ '**this **that!', '<p>**this **that!</p>', '2 hangling left **s' ],
  198 + [ '__this __that!', '<p>__this __that!</p>', '2 hangling left __s' ],
  199 +
161 200 # Not Strong or Emphasis.
162 201 [ '* not em *', '<p>* not em *</p>', 'not em *' ],
163 202 [ '** not strong **', '<p>** not strong **</p>', 'not strong **' ],
37 t/lex.t
@@ -2,8 +2,7 @@
2 2
3 3 use strict;
4 4 use warnings;
5   -use Test::More tests => 103;
6   -
  5 +use Test::More tests => 106;
7 6 #use Test::More 'no_plan';
8 7 use Data::Dumper;
9 8 use HOP::Stream;
@@ -320,6 +319,36 @@ for my $spec (
320 319 'a mid and right __'
321 320 ],
322 321
  322 + # Unbalanced empahasis characters.
  323 + [
  324 + '*this and *that!' => [
  325 + [ EMLOP => '*' ],
  326 + [ STRING => 'this and ' ],
  327 + [ EMLOP => '*' ],
  328 + [ STRING => 'that!' ],
  329 + ],
  330 + 'two LOPs'
  331 + ],
  332 + [
  333 + 'this* and that!*' => [
  334 + [ STRING => 'this' ],
  335 + [ EMROP => '*' ],
  336 + [ STRING => ' and that!' ],
  337 + [ EMROP => '*' ],
  338 + ],
  339 + 'two ROPs'
  340 + ],
  341 + [
  342 + 'this* and *that!' => [
  343 + [ STRING => 'this' ],
  344 + [ EMROP => '*' ],
  345 + [ STRING => ' and ' ],
  346 + [ EMLOP => '*' ],
  347 + [ STRING => 'that!' ],
  348 + ],
  349 + 'ROP + LOP'
  350 + ],
  351 +
323 352 # Combining emphasis characters.
324 353 [ '*__', [ [ STRING => '*__' ] ], '*__' ],
325 354 [ '__*', [ [ STRING => '__*' ] ], '__*' ],
@@ -377,8 +406,8 @@ for my $spec (
377 406 [ EMLOP => '__' ],
378 407 [ EMLOP => '_' ],
379 408 [ STRING => 'this' ],
  409 + [ EMROP => '_' ],
380 410 [ EMROP => '__' ],
381   - [ EMROP => '_' ]
382 411 ],
383 412 'a ___word___'
384 413 ],
@@ -388,8 +417,8 @@ for my $spec (
388 417 [ EMLOP => '**' ],
389 418 [ EMLOP => '*' ],
390 419 [ STRING => 'this' ],
  420 + [ EMROP => '*' ],
391 421 [ EMROP => '**' ],
392   - [ EMROP => '*' ]
393 422 ],
394 423 'a ***word***'
395 424 ],

0 comments on commit 179aacc

Please sign in to comment.
Something went wrong with that request. Please try again.