Simplify emphasis pattern.

lepture · Dec 23, 2018 · ca1e7b5 · ca1e7b5
1 parent 8e5b69a
commit ca1e7b5
Show file tree

Hide file tree

Showing 3 changed files with 115 additions and 116 deletions.
diff --git a/mistune/inlines.py b/mistune/inlines.py
@@ -58,26 +58,18 @@ class InlineParser(ScannerParser):
     #:    [an example]: https://example.com "optional title"
     REF_LINK2 = r'!?\[((?:[^\\\[\]]|' + ESCAPE + '){0,1000})\]'
 
-    #: emphasis with * or _::
+    #: emphasis and strong * or _::
     #:
-    #:    *text*
-    #:    _text_
-    EMPHASIS = (
-        r'\b_[^\s_](?:(?<=\\)_)?_|'  # _s_ and _\_-
-        r'\*[^\s*](?:(?<=\\)\*)?\*|'  # *s* and *\**
-        r'\b_[^\s_][\s\S]*?[^\s_]_(?!_|[^\s' + PUNCTUATION + r'])\b|'
-        r'\*[^\s*"<\[][\s\S]*?[^\s*]\*'
+    #:    *emphasis*  **strong**
+    #:    _emphasis_  __strong__
+    ASTERISK_EMPHASIS = (
+        r'(\*{1,2})((?:(?:' + ESCAPE + r'|[^\s*"<\[])[\s\S]*?)?'
+        r'(?:' + ESCAPE + r'|[^\s*]))\1'
     )
-
-    #: strong with ** or __::
-    #:
-    #:    **text**
-    #:    __text__
-    STRONG = (
-        r'\b__[^\s\_]__(?!_)\b|'
-        r'\*\*[^\s\*]\*\*(?!\*)|'
-        r'\b__[^\s][\s\S]*?[^\s]__(?!_)\b|'
-        r'\*\*[^\s][\s\S]*?[^\s]\*\*(?!\*)'
+    UNDERSCORE_EMPHASIS = (
+        r'\b(_{1,2})((?:(?:' + ESCAPE + r'|[^\s_])[\s\S]*?)?'
+        r'(?:' + ESCAPE + r'|[^\s_]))\1'
+        r'(?!_|[^\s' + PUNCTUATION + r'])\b'
     )
 
     #: codespan with `::
@@ -109,7 +101,8 @@ class InlineParser(ScannerParser):
 
     RULE_NAMES = (
         'escape', 'inline_html', 'auto_link', 'footnote',
-        'std_link', 'ref_link', 'ref_link2', 'strong', 'emphasis',
+        'std_link', 'ref_link', 'ref_link2',
+        'asterisk_emphasis', 'underscore_emphasis',
         'codespan', 'strikethrough', 'linebreak',
     )
 
@@ -186,12 +179,17 @@ def parse_footnote(self, m, state):
         state['footnotes'].append(key)
         return 'footnote_ref', key, index
 
-    def parse_emphasis(self, m, state):
-        text = m.group(0)[1:-1]
-        return 'emphasis', self.render(text, state)
+    def parse_asterisk_emphasis(self, m, state):
+        return self.tokenize_emphasis(m, state)
+
+    def parse_underscore_emphasis(self, m, state):
+        return self.tokenize_emphasis(m, state)
 
-    def parse_strong(self, m, state):
-        text = m.group(0)[2:-2]
+    def tokenize_emphasis(self, m, state):
+        marker = m.group(1)
+        text = m.group(2)
+        if len(marker) == 1:
+            return 'emphasis', self.render(text, state)
         return 'strong', self.render(text, state)
 
     def parse_codespan(self, m, state):

diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py
@@ -13,11 +13,12 @@
 
 def load_cases(TestClass, assert_method, filename, ignore=None):
     def attach_case(n, text, html):
-        def test_case(self):
+        def method(self):
             assert_method(self, n, text, html)
 
         name = 'test_{}'.format(n)
-        setattr(TestClass, name, test_case)
+        method.__name__ = name
+        setattr(TestClass, name, method)
 
     for n, text, html in load_examples(filename):
         if ignore and ignore(n):
@@ -45,7 +46,7 @@ def parse_examples(text):
 
         if md and html:
             count += 1
-            n = '%s_%02d' % (section, count)
+            n = '%s_%03d' % (section, count)
             md = md.replace(u'\u2192', '\t')
             html = html.replace(u'\u2192', '\t')
             yield n, md, html
diff --git a/tests/test_commonmark.py b/tests/test_commonmark.py
@@ -5,99 +5,95 @@
 
 
 IGNORE_CASES = {
-    'setext_headings_02',  # we only allow one line title
-    'setext_headings_15',
-
-    'setext_headings_03',  # must start with 2 = or -
-    'setext_headings_07',  # ignore
-    'setext_headings_13',  # ignore
-
-    'html_blocks_39',  # ignore
-    'link_reference_definitions_19',  # weird rule
-
-    'block_quotes_08',  # we treat it different
-
-    'list_items_05',  # I don't agree
-    'list_items_24',
-    'list_items_28',
-    'list_items_39',  # no lazy
-    'list_items_40',
-    'list_items_41',
-
-    'lists_07',  # we use simple way to detect tight list
-    'lists_16',
-    'lists_17',
-    'lists_18',
-    'lists_19',
-
-    'block_quotes_05',  # we don't allow lazy continuation
-    'block_quotes_06',
-    'block_quotes_11',
-    'block_quotes_20',
-    'block_quotes_23',
-    'block_quotes_24',  # this test case shows why lazy is not good
-
-    'code_spans_09',  # code has no priority
-    'code_spans_10',
-
-    'entity_and_numeric_character_references_04',  # &entity is allowed
-    'entity_and_numeric_character_references_05',
-
-    'links_31',  # different behavior
-    'links_37',
-    'links_38',  # code has no priority
-    'links_39',
-    'links_45',  # different behavior
-    'links_46',
-    'links_49',
-    'links_50',  # code has no priority
-    'links_51',  # different behavior
-    'links_64',  # allow empty key
-    'links_65',
-
-    'images_02',  # we just keep everything as raw
-    'images_03',
-    'images_04',
-    'images_05',
-    'images_06',
-    'images_14',
-    'images_18',
-
-    'autolinks_02',  # don't understand
+    'setext_headings_002',  # we only allow one line title
+    'setext_headings_015',
+
+    'setext_headings_003',  # must start with 2 = or -
+    'setext_headings_007',  # ignore
+    'setext_headings_013',  # ignore
+
+    'html_blocks_039',  # ignore
+    'link_reference_definitions_019',  # weird rule
+
+    'block_quotes_008',  # we treat it different
+
+    'list_items_005',  # I don't agree
+    'list_items_024',
+    'list_items_028',
+    'list_items_039',  # no lazy
+    'list_items_040',
+    'list_items_041',
+
+    'lists_007',  # we use simple way to detect tight list
+    'lists_016',
+    'lists_017',
+    'lists_018',
+    'lists_019',
+
+    'block_quotes_005',  # we don't allow lazy continuation
+    'block_quotes_006',
+    'block_quotes_011',
+    'block_quotes_020',
+    'block_quotes_023',
+    'block_quotes_024',  # this test case shows why lazy is not good
+
+    'code_spans_009',  # code has no priority
+    'code_spans_010',
+
+    'entity_and_numeric_character_references_004',  # &entity is allowed
+    'entity_and_numeric_character_references_005',
+
+    'links_031',  # different behavior
+    'links_037',
+    'links_038',  # code has no priority
+    'links_039',
+    'links_045',  # different behavior
+    'links_046',
+    'links_049',
+    'links_050',  # code has no priority
+    'links_051',  # different behavior
+    'links_064',  # allow empty key
+    'links_065',
+
+    'images_002',  # we just keep everything as raw
+    'images_003',
+    'images_004',
+    'images_005',
+    'images_006',
+    'images_014',
+    'images_018',
+
+    'autolinks_002',  # don't understand
 }
 INSANE_CASES = {
-    'fenced_code_blocks_13',
-    'fenced_code_blocks_15',
-    'list_items_33',
-    'list_items_38',
-
-    'link_reference_definitions_02',  # only allow one line definition
-    'link_reference_definitions_03',
-    'link_reference_definitions_04',
-    'link_reference_definitions_05',
-    'link_reference_definitions_07',
-    'link_reference_definitions_21',
-
-    'links_25',
-    'links_32',
-    'links_33',
-    'links_41',
-    'links_60',
-    'links_82',
-    'links_84',
+    'fenced_code_blocks_013',
+    'fenced_code_blocks_015',
+    'list_items_033',
+    'list_items_038',
+
+    'link_reference_definitions_002',  # only allow one line definition
+    'link_reference_definitions_003',
+    'link_reference_definitions_004',
+    'link_reference_definitions_005',
+    'link_reference_definitions_007',
+    'link_reference_definitions_021',
+
+    'links_025',
+    'links_032',
+    'links_033',
+    'links_041',
+    'links_060',
+    'links_082',
+    'links_084',
 }
 
 DIFFERENCES = {
-    'tabs_05': lambda s: s.replace('<code>  ', '<code>'),
-    'tabs_06': lambda s: s.replace('<code>  ', '<code>'),
-    'tabs_07': lambda s: s.replace('<code>  ', '<code>'),
+    'tabs_005': lambda s: s.replace('<code>  ', '<code>'),
+    'tabs_006': lambda s: s.replace('<code>  ', '<code>'),
+    'tabs_007': lambda s: s.replace('<code>  ', '<code>'),
 }
 
 
-class TestCommonMark(TestCase):
-    pass
-
-
 def assert_spec(self, n, text, html):
     print(text)
     result = mistune.html(text)
@@ -120,16 +116,20 @@ def assert_spec(self, n, text, html):
     'paragraphs', 'blank_lines',
     'block_quotes', 'list_items', 'lists',
     'backslash', 'entity', 'code_spans',
-    # emphasis, links
-    'images', 'autolinks', 'raw_html',
+    # emphasis
+    'links', 'images', 'autolinks', 'raw_html',
     'hard_line', 'soft_line', 'textual',
 }
 
 
 def ignore(n):
-    if not n.startswith('links'):
+    if n.startswith('emphasis'):
         return True
     return (n in IGNORE_CASES) or (n in INSANE_CASES)
 
 
-fixtures.load_cases(TestCase, assert_spec, 'commonmark.txt', ignore)
+class TestCommonMark(TestCase):
+    pass
+
+
+fixtures.load_cases(TestCommonMark, assert_spec, 'commonmark.txt', ignore)