Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transforms all mistune mathjax logic to be inline #611

Merged
merged 8 commits into from Aug 8, 2017
62 changes: 17 additions & 45 deletions nbconvert/filters/markdown_mistune.py
Expand Up @@ -19,72 +19,42 @@
from pygments.util import ClassNotFound

from nbconvert.filters.strings import add_anchor
from nbconvert.utils.exceptions import ConversionException


class MathBlockGrammar(mistune.BlockGrammar):
block_math = re.compile(r"^\$\$(.*?)\$\$", re.DOTALL)
latex_environment = re.compile(r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}",
re.DOTALL)

class MathBlockLexer(mistune.BlockLexer):
default_rules = ['block_math', 'latex_environment'] + mistune.BlockLexer.default_rules

def __init__(self, rules=None, **kwargs):
if rules is None:
rules = MathBlockGrammar()
super(MathBlockLexer, self).__init__(rules, **kwargs)

def parse_block_math(self, m):
"""Parse a $$math$$ block"""
self.tokens.append({
'type': 'block_math',
'text': m.group(1)
})

def parse_latex_environment(self, m):
self.tokens.append({
'type': 'latex_environment',
'name': m.group(1),
'text': m.group(2)
})


class MathInlineGrammar(mistune.InlineGrammar):
math = re.compile(r"^\$(.+?)\$", re.DOTALL)
block_math = re.compile(r"^\$\$(.+?)\$\$", re.DOTALL)
inline_math = re.compile(r"^\$(.+?)\$|^\\\\\((.+?)\\\\\)", re.DOTALL)
block_math = re.compile(r"^\$\$(.*?)\$\$|^\\\\\[(.*?)\\\\\]", re.DOTALL)
latex_environment = re.compile(r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}",
re.DOTALL)
text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~$]|https?://| {2,}\n|$)')


class MathInlineLexer(mistune.InlineLexer):
default_rules = ['block_math', 'math'] + mistune.InlineLexer.default_rules
default_rules = (['block_math', 'inline_math', 'latex_environment']
+ mistune.InlineLexer.default_rules)

def __init__(self, renderer, rules=None, **kwargs):
if rules is None:
rules = MathInlineGrammar()
super(MathInlineLexer, self).__init__(renderer, rules, **kwargs)

def output_math(self, m):
return self.renderer.inline_math(m.group(1))
def output_inline_math(self, m):
return self.renderer.inline_math(m.group(1) or m.group(2))

def output_block_math(self, m):
return self.renderer.block_math(m.group(1))
return self.renderer.block_math(m.group(1) or m.group(2))

def output_latex_environment(self, m):
return self.renderer.latex_environment(m.group(1),
m.group(2))


class MarkdownWithMath(mistune.Markdown):
def __init__(self, renderer, **kwargs):
if 'inline' not in kwargs:
kwargs['inline'] = MathInlineLexer
if 'block' not in kwargs:
kwargs['block'] = MathBlockLexer
super(MarkdownWithMath, self).__init__(renderer, **kwargs)

def output_block_math(self):
return self.renderer.block_math(self.token['text'])

def output_latex_environment(self):
return self.renderer.latex_environment(self.token['name'], self.token['text'])


class IPythonRenderer(mistune.Renderer):
def block_code(self, code, lang):
Expand All @@ -111,7 +81,7 @@ def header(self, text, level, raw=None):
# html.escape() is not availale on python 2.7
# For more details, see:
# https://wiki.python.org/moin/EscapingHtml
def escape_html(self,text):
def escape_html(self, text):
return cgi.escape(text)

def block_math(self, text):
Expand All @@ -125,6 +95,8 @@ def latex_environment(self, name, text):
def inline_math(self, text):
return '$%s$' % self.escape_html(text)


def markdown2html_mistune(source):
"""Convert a markdown string to HTML using mistune"""
return MarkdownWithMath(renderer=IPythonRenderer(escape=False)).render(source)
return MarkdownWithMath(renderer=IPythonRenderer(
escape=False)).render(source)
94 changes: 53 additions & 41 deletions nbconvert/filters/tests/test_markdown.py
Expand Up @@ -52,9 +52,10 @@ class TestMarkdown(TestsBase):
def test_markdown2latex(self):
"""markdown2latex test"""
for index, test in enumerate(self.tests):
self._try_markdown(partial(convert_pandoc, from_format='markdown',
to_format='latex'), test,
self.tokens[index])
self._try_markdown(
partial(
convert_pandoc, from_format='markdown', to_format='latex'),
test, self.tokens[index])

@dec.onlyif_cmds_exist('pandoc')
def test_markdown2latex_markup(self):
Expand Down Expand Up @@ -108,34 +109,35 @@ def test_markdown2html(self):
self._try_markdown(markdown2html, test, self.tokens[index])

def test_markdown2html_heading_anchors(self):
for md, tokens in [
('# test',
('<h1', '>test', 'id="test"', u'&#182;</a>', "anchor-link")
),
('###test head space',
('<h3', '>test head space', 'id="test-head-space"', u'&#182;</a>', "anchor-link")
)
]:
for md, tokens in [('# test', ('<h1', '>test', 'id="test"',
u'&#182;</a>', "anchor-link")),
('###test head space',
('<h3', '>test head space', 'id="test-head-space"',
u'&#182;</a>', "anchor-link"))]:
self._try_markdown(markdown2html, md, tokens)

def test_markdown2html_math(self):
# Mathematical expressions not containing <, >, & should be passed through unaltered
# Mathematical expressions not containing <, >, &
# should be passed through unaltered
# all the "<", ">", "&" must be escaped correctly
cases = [("\\begin{equation*}\n"
"\\left( \\sum_{k=1}^n a_k b_k \\right)^2 \\leq \\left( \\sum_{k=1}^n a_k^2 \\right) \\left( \\sum_{k=1}^n b_k^2 \\right)\n"
"\\end{equation*}"),
("$$\n"
"a = 1 *3* 5\n"
"$$"),
"$ a = 1 *3* 5 $",
"$s_i = s_{i}\n$",
"$a<b&b<lt$",
"$a<b&lt;b>a;a-b<0$",
"$<k'>$",
"$$a<b&b<lt$$",
"$$a<b&lt;b>a;a-b<0$$",
"$$<k'>$$",
"""$
cases = [(
"\\begin{equation*}\n" +
("\\left( \\sum_{k=1}^n a_k b_k \\right)^2 "
"\\leq \\left( \\sum_{k=1}^n a_k^2 \\right) "
"\\left( \\sum_{k=1}^n b_k^2 \\right)\n") +
"\\end{equation*}"),
("$$\n"
"a = 1 *3* 5\n"
"$$"),
"$ a = 1 *3* 5 $",
"$s_i = s_{i}\n$",
"$a<b&b<lt$",
"$a<b&lt;b>a;a-b<0$",
"$<k'>$",
"$$a<b&b<lt$$",
"$$a<b&lt;b>a;a-b<0$$",
"$$<k'>$$",
"""$
\\begin{tabular}{ l c r }
1 & 2 & 3 \\
4 & 5 & 6 \\
Expand All @@ -145,9 +147,11 @@ def test_markdown2html_math(self):
for case in cases:
result = markdown2html(case)
# find the equation in the generated texts
search_result = re.search("\$.*\$",result,re.DOTALL)
search_result = re.search("\$.*\$", result, re.DOTALL)
if search_result is None:
search_result = re.search("\\\\begin\\{equation.*\\}.*\\\\end\\{equation.*\\}",result,re.DOTALL)
search_result = re.search(
"\\\\begin\\{equation.*\\}.*\\\\end\\{equation.*\\}",
result, re.DOTALL)
math = search_result.group(0)
# the resulting math part can not contain "<", ">" or
# "&" not followed by "lt;", "gt;", or "amp;".
Expand All @@ -156,19 +160,25 @@ def test_markdown2html_math(self):
# python 2.7 has assertNotRegexpMatches instead of assertNotRegex
if not hasattr(self, 'assertNotRegex'):
self.assertNotRegex = self.assertNotRegexpMatches
self.assertNotRegex(math,"&(?![gt;|lt;|amp;])")
self.assertNotRegex(math, "&(?![gt;|lt;|amp;])")
# the result should be able to be unescaped correctly
self.assertEquals(case,self._unescape(math))
self.assertEquals(case, self._unescape(math))

def test_markdown2html_math_mixed(self):
"""ensure markdown between inline and inline-block math"""
case = """The entries of $C$ are given by the exact formula:
"""ensure markdown between inline and inline-block math works and
test multiple LaTeX markup syntaxes.
"""
case = """The entries of \\\\(C\\\\) are given by the exact formula:
$$
C_{ik} = \sum_{j=1}^n A_{ij} B_{jk}
C_{ik} = \sum_{j=1}^n A_{ij} B_{jk},
$$
but there are many ways to _implement_ this computation. $\approx 2mnp$ flops"""
self._try_markdown(markdown2html, case,
case.replace("_implement_", "<em>implement</em>"))
but you can _implement_ this computation in many ways.
$\approx 2mnp$ flops are needed for \\\\[ C_{ik} = \sum_{j=1}^n A_{ij} B_{jk} \\\\]."""
output_check = (case.replace("_implement_", "<em>implement</em>")
.replace("\\\\(", "$").replace("\\\\)", "$")
.replace("\\\\[", "$$").replace("\\\\]", "$$"))
# these replacements are needed because we use $ and $$ in our html output
self._try_markdown(markdown2html, case, output_check)

def test_markdown2html_math_paragraph(self):
"""these should all parse without modification"""
Expand Down Expand Up @@ -200,7 +210,7 @@ def test_markdown2html_math_paragraph(self):

for case in cases:
s = markdown2html(case)
self.assertIn(case,self._unescape(s))
self.assertIn(case, self._unescape(s))

@dec.onlyif_cmds_exist('pandoc')
def test_markdown2rst(self):
Expand All @@ -212,8 +222,10 @@ def test_markdown2rst(self):
tokens[1] = r'\*\*test'

for index, test in enumerate(self.tests):
self._try_markdown(partial(convert_pandoc, from_format='markdown',
to_format='rst'), test, tokens[index])
self._try_markdown(
partial(
convert_pandoc, from_format='markdown', to_format='rst'),
test, tokens[index])

def _try_markdown(self, method, test, tokens):
results = method(test)
Expand All @@ -223,7 +235,7 @@ def _try_markdown(self, method, test, tokens):
for token in tokens:
self.assertIn(token, results)

def _unescape(self,s):
def _unescape(self, s):
# undo cgi.escape() manually
# We must be careful here for compatibility
# html.unescape() is not availale on python 2.7
Expand Down