Skip to content

Commit

Permalink
Simplify emphasis pattern.
Browse files Browse the repository at this point in the history
  • Loading branch information
lepture committed Dec 23, 2018
1 parent 8e5b69a commit ca1e7b5
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 116 deletions.
46 changes: 22 additions & 24 deletions mistune/inlines.py
Expand Up @@ -58,26 +58,18 @@ class InlineParser(ScannerParser):
#: [an example]: https://example.com "optional title"
REF_LINK2 = r'!?\[((?:[^\\\[\]]|' + ESCAPE + '){0,1000})\]'

#: emphasis with * or _::
#: emphasis and strong * or _::
#:
#: *text*
#: _text_
EMPHASIS = (
r'\b_[^\s_](?:(?<=\\)_)?_|' # _s_ and _\_-
r'\*[^\s*](?:(?<=\\)\*)?\*|' # *s* and *\**
r'\b_[^\s_][\s\S]*?[^\s_]_(?!_|[^\s' + PUNCTUATION + r'])\b|'
r'\*[^\s*"<\[][\s\S]*?[^\s*]\*'
#: *emphasis* **strong**
#: _emphasis_ __strong__
ASTERISK_EMPHASIS = (
r'(\*{1,2})((?:(?:' + ESCAPE + r'|[^\s*"<\[])[\s\S]*?)?'
r'(?:' + ESCAPE + r'|[^\s*]))\1'
)

#: strong with ** or __::
#:
#: **text**
#: __text__
STRONG = (
r'\b__[^\s\_]__(?!_)\b|'
r'\*\*[^\s\*]\*\*(?!\*)|'
r'\b__[^\s][\s\S]*?[^\s]__(?!_)\b|'
r'\*\*[^\s][\s\S]*?[^\s]\*\*(?!\*)'
UNDERSCORE_EMPHASIS = (
r'\b(_{1,2})((?:(?:' + ESCAPE + r'|[^\s_])[\s\S]*?)?'
r'(?:' + ESCAPE + r'|[^\s_]))\1'
r'(?!_|[^\s' + PUNCTUATION + r'])\b'
)

#: codespan with `::
Expand Down Expand Up @@ -109,7 +101,8 @@ class InlineParser(ScannerParser):

RULE_NAMES = (
'escape', 'inline_html', 'auto_link', 'footnote',
'std_link', 'ref_link', 'ref_link2', 'strong', 'emphasis',
'std_link', 'ref_link', 'ref_link2',
'asterisk_emphasis', 'underscore_emphasis',
'codespan', 'strikethrough', 'linebreak',
)

Expand Down Expand Up @@ -186,12 +179,17 @@ def parse_footnote(self, m, state):
state['footnotes'].append(key)
return 'footnote_ref', key, index

def parse_emphasis(self, m, state):
text = m.group(0)[1:-1]
return 'emphasis', self.render(text, state)
def parse_asterisk_emphasis(self, m, state):
return self.tokenize_emphasis(m, state)

def parse_underscore_emphasis(self, m, state):
return self.tokenize_emphasis(m, state)

def parse_strong(self, m, state):
text = m.group(0)[2:-2]
def tokenize_emphasis(self, m, state):
marker = m.group(1)
text = m.group(2)
if len(marker) == 1:
return 'emphasis', self.render(text, state)
return 'strong', self.render(text, state)

def parse_codespan(self, m, state):
Expand Down
7 changes: 4 additions & 3 deletions tests/fixtures/__init__.py
Expand Up @@ -13,11 +13,12 @@

def load_cases(TestClass, assert_method, filename, ignore=None):
def attach_case(n, text, html):
def test_case(self):
def method(self):
assert_method(self, n, text, html)

name = 'test_{}'.format(n)
setattr(TestClass, name, test_case)
method.__name__ = name
setattr(TestClass, name, method)

for n, text, html in load_examples(filename):
if ignore and ignore(n):
Expand Down Expand Up @@ -45,7 +46,7 @@ def parse_examples(text):

if md and html:
count += 1
n = '%s_%02d' % (section, count)
n = '%s_%03d' % (section, count)
md = md.replace(u'\u2192', '\t')
html = html.replace(u'\u2192', '\t')
yield n, md, html
178 changes: 89 additions & 89 deletions tests/test_commonmark.py
Expand Up @@ -5,99 +5,95 @@


IGNORE_CASES = {
'setext_headings_02', # we only allow one line title
'setext_headings_15',

'setext_headings_03', # must start with 2 = or -
'setext_headings_07', # ignore
'setext_headings_13', # ignore

'html_blocks_39', # ignore
'link_reference_definitions_19', # weird rule

'block_quotes_08', # we treat it different

'list_items_05', # I don't agree
'list_items_24',
'list_items_28',
'list_items_39', # no lazy
'list_items_40',
'list_items_41',

'lists_07', # we use simple way to detect tight list
'lists_16',
'lists_17',
'lists_18',
'lists_19',

'block_quotes_05', # we don't allow lazy continuation
'block_quotes_06',
'block_quotes_11',
'block_quotes_20',
'block_quotes_23',
'block_quotes_24', # this test case shows why lazy is not good

'code_spans_09', # code has no priority
'code_spans_10',

'entity_and_numeric_character_references_04', # &entity is allowed
'entity_and_numeric_character_references_05',

'links_31', # different behavior
'links_37',
'links_38', # code has no priority
'links_39',
'links_45', # different behavior
'links_46',
'links_49',
'links_50', # code has no priority
'links_51', # different behavior
'links_64', # allow empty key
'links_65',

'images_02', # we just keep everything as raw
'images_03',
'images_04',
'images_05',
'images_06',
'images_14',
'images_18',

'autolinks_02', # don't understand
'setext_headings_002', # we only allow one line title
'setext_headings_015',

'setext_headings_003', # must start with 2 = or -
'setext_headings_007', # ignore
'setext_headings_013', # ignore

'html_blocks_039', # ignore
'link_reference_definitions_019', # weird rule

'block_quotes_008', # we treat it different

'list_items_005', # I don't agree
'list_items_024',
'list_items_028',
'list_items_039', # no lazy
'list_items_040',
'list_items_041',

'lists_007', # we use simple way to detect tight list
'lists_016',
'lists_017',
'lists_018',
'lists_019',

'block_quotes_005', # we don't allow lazy continuation
'block_quotes_006',
'block_quotes_011',
'block_quotes_020',
'block_quotes_023',
'block_quotes_024', # this test case shows why lazy is not good

'code_spans_009', # code has no priority
'code_spans_010',

'entity_and_numeric_character_references_004', # &entity is allowed
'entity_and_numeric_character_references_005',

'links_031', # different behavior
'links_037',
'links_038', # code has no priority
'links_039',
'links_045', # different behavior
'links_046',
'links_049',
'links_050', # code has no priority
'links_051', # different behavior
'links_064', # allow empty key
'links_065',

'images_002', # we just keep everything as raw
'images_003',
'images_004',
'images_005',
'images_006',
'images_014',
'images_018',

'autolinks_002', # don't understand
}
INSANE_CASES = {
'fenced_code_blocks_13',
'fenced_code_blocks_15',
'list_items_33',
'list_items_38',

'link_reference_definitions_02', # only allow one line definition
'link_reference_definitions_03',
'link_reference_definitions_04',
'link_reference_definitions_05',
'link_reference_definitions_07',
'link_reference_definitions_21',

'links_25',
'links_32',
'links_33',
'links_41',
'links_60',
'links_82',
'links_84',
'fenced_code_blocks_013',
'fenced_code_blocks_015',
'list_items_033',
'list_items_038',

'link_reference_definitions_002', # only allow one line definition
'link_reference_definitions_003',
'link_reference_definitions_004',
'link_reference_definitions_005',
'link_reference_definitions_007',
'link_reference_definitions_021',

'links_025',
'links_032',
'links_033',
'links_041',
'links_060',
'links_082',
'links_084',
}

DIFFERENCES = {
'tabs_05': lambda s: s.replace('<code> ', '<code>'),
'tabs_06': lambda s: s.replace('<code> ', '<code>'),
'tabs_07': lambda s: s.replace('<code> ', '<code>'),
'tabs_005': lambda s: s.replace('<code> ', '<code>'),
'tabs_006': lambda s: s.replace('<code> ', '<code>'),
'tabs_007': lambda s: s.replace('<code> ', '<code>'),
}


class TestCommonMark(TestCase):
pass


def assert_spec(self, n, text, html):
print(text)
result = mistune.html(text)
Expand All @@ -120,16 +116,20 @@ def assert_spec(self, n, text, html):
'paragraphs', 'blank_lines',
'block_quotes', 'list_items', 'lists',
'backslash', 'entity', 'code_spans',
# emphasis, links
'images', 'autolinks', 'raw_html',
# emphasis
'links', 'images', 'autolinks', 'raw_html',
'hard_line', 'soft_line', 'textual',
}


def ignore(n):
if not n.startswith('links'):
if n.startswith('emphasis'):
return True
return (n in IGNORE_CASES) or (n in INSANE_CASES)


fixtures.load_cases(TestCase, assert_spec, 'commonmark.txt', ignore)
class TestCommonMark(TestCase):
pass


fixtures.load_cases(TestCommonMark, assert_spec, 'commonmark.txt', ignore)

0 comments on commit ca1e7b5

Please sign in to comment.