Permalink
Browse files

Rewrote the emphasis and strong emphasis parser.

  • Loading branch information...
1 parent 4ba5b32 commit f6a8c76f17b74541711ef491596d903829b21bfb @michelf committed Jun 7, 2008
Showing with 144 additions and 38 deletions.
  1. +3 −0 PHP Markdown Readme.text
  2. +141 −38 markdown.php
@@ -204,6 +204,9 @@ Current:
* Lists can now have empty items.
+* Rewrote the emphasis and strong emphasis parser to fix some issues
+ with odly placed and overlong markers.
+
1.0.1l:
View
@@ -1101,45 +1101,148 @@ function makeCodeSpan($code) {
}
+ var $em_relist = array(
+ '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S)(?![.,:;]\s)',
+ '*' => '(?<=\S)(?<!\*)\*(?!\*)',
+ '_' => '(?<=\S)(?<!_)_(?!_)',
+ );
+ var $strong_relist = array(
+ '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S)(?![.,:;]\s)',
+ '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
+ '__' => '(?<=\S)(?<!_)__(?!_)',
+ );
+ var $em_strong_relist = array(
+ '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S)(?![.,:;]\s)',
+ '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
+ '___' => '(?<=\S)(?<!_)___(?!_)',
+ );
+
function doItalicsAndBold($text) {
- # <strong> must go first:
- $text = preg_replace_callback('{
- ( # $1: Marker
- (?<!\*\*) \* | # (not preceded by two chars of
- (?<!__) _ # the same marker)
- )
- \1
- (?=\S) # Not followed by whitespace
- (?!\1\1) # or two others marker chars.
- ( # $2: Content
- (?>
- [^*_]+? # Anthing not em markers.
- |
- # Balence any regular emphasis inside.
- \1 (?=\S) .+? (?<=\S) \1
- |
- . # Allow unbalenced * and _.
- )+?
- )
- (?<=\S) \1\1 # End mark not preceded by whitespace.
- }sx',
- array(&$this, '_doItalicAndBold_strong_callback'), $text);
- # Then <em>:
- $text = preg_replace_callback(
- '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s(?=\1).) \1 }sx',
- array(&$this, '_doItalicAndBold_em_callback'), $text);
-
- return $text;
- }
- function _doItalicAndBold_em_callback($matches) {
- $text = $matches[2];
- $text = $this->runSpanGamut($text);
- return $this->hashPart("<em>$text</em>");
- }
- function _doItalicAndBold_strong_callback($matches) {
- $text = $matches[2];
- $text = $this->runSpanGamut($text);
- return $this->hashPart("<strong>$text</strong>");
+ $token_stack = array('');
+ $text_stack = array('');
+ $em = '';
+ $strong = '';
+ $tree_char_em = false;
+
+ while (1) {
+ #
+ # Create regular expression for seraching emphasis tokens
+ # in current context.
+ #
+ $token_relist = array();
+ if (isset($this->em_strong_relist["$em$strong"])) {
+ $token_relist[] = $this->em_strong_relist["$em$strong"];
+ }
+ $token_relist[] = $this->strong_relist[$strong];
+ $token_relist[] = $this->em_relist[$em];
+ $token_re = '{('. implode('|', $token_relist) .')}';
+
+ #
+ # Each loop iteration seach for the next emphasis token.
+ # Each token is then passed to handleSpanToken.
+ #
+ $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
+ $text_stack[0] .= $parts[0];
+ $token =& $parts[1];
+ $text =& $parts[2];
+
+ if (empty($token)) {
+ # Reached end of text span: empty stack without emitting.
+ # any more emphasis.
+ while ($token_stack[0]) {
+ $text_stack[1] .= array_shift($token_stack);
+ $text_stack[0] .= array_shift($text_stack);
+ }
+ break;
+ }
+
+ $token_len = strlen($token);
+ if ($tree_char_em) {
+ # Reached closing marker while inside a three-char emphasis.
+ if ($token_len == 3) {
+ # Three-char closing marker, close em and strong.
+ array_shift($token_stack);
+ $span = array_shift($text_stack);
+ $span = $this->runSpanGamut($span);
+ $span = "<strong><em>$span</em></strong>";
+ $text_stack[0] .= $this->hashPart($span);
+ $em = '';
+ $strong = '';
+ } else {
+ # Other closing marker: close one em or strong and
+ # change current token state to match the other
+ $token_stack[0] = str_repeat($token{0}, 3-$token_len);
+ $tag = $token_len == 2 ? "strong" : "em";
+ $span = $text_stack[0];
+ $span = $this->runSpanGamut($span);
+ $span = "<$tag>$span</$tag>";
+ $text_stack[0] = $this->hashPart($span);
+ $$tag = ''; # $$tag stands for $em or $strong
+ }
+ $tree_char_em = false;
+ } else if ($token_len == 3) {
+ if ($em) {
+ # Reached closing marker for both em and strong.
+ # Closing strong marker:
+ for ($i = 0; $i < 2; ++$i) {
+ $shifted_token = array_shift($token_stack);
+ $tag = strlen($shifted_token) == 2 ? "strong" : "em";
+ $span = array_shift($text_stack);
+ $span = $this->runSpanGamut($span);
+ $span = "<$tag>$span</$tag>";
+ $text_stack[0] .= $this->hashPart($span);
+ $$tag = ''; # $$tag stands for $em or $strong
+ }
+ } else {
+ # Reached opening three-char emphasis marker. Push on token
+ # stack; will be handled by the special condition above.
+ $em = $token{0};
+ $strong = "$em$em";
+ array_unshift($token_stack, $token);
+ array_unshift($text_stack, '');
+ $tree_char_em = true;
+ }
+ } else if ($token_len == 2) {
+ if ($strong) {
+ # Unwind any dangling emphasis marker:
+ if (strlen($token_stack[0]) == 1) {
+ $text_stack[1] .= array_shift($token_stack);
+ $text_stack[0] .= array_shift($text_stack);
+ }
+ # Closing strong marker:
+ array_shift($token_stack);
+ $span = array_shift($text_stack);
+ $span = $this->runSpanGamut($span);
+ $span = "<strong>$span</strong>";
+ $text_stack[0] .= $this->hashPart($span);
+ $strong = '';
+ } else {
+ array_unshift($token_stack, $token);
+ array_unshift($text_stack, '');
+ $strong = $token;
+ }
+ } else {
+ # Here $token_len == 1
+ if ($em) {
+ if (strlen($token_stack[0]) == 1) {
+ # Closing emphasis marker:
+ array_shift($token_stack);
+ $span = array_shift($text_stack);
+ $span = $this->runSpanGamut($span);
+ $span = "<em>$span</em>";
+ $text_stack[0] .= $this->hashPart($span);
+ $strong = '';
+ } else {
+ $text_stack[0] .= $token;
+ }
+ } else {
+ array_unshift($token_stack, $token);
+ array_unshift($text_stack, '');
+ $em = $token;
+ }
+ }
+ }
+ return $text_stack[0];
}

0 comments on commit f6a8c76

Please sign in to comment.