diff --git a/lib/markdown2.py b/lib/markdown2.py index 48d9d4c6..eb388547 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -363,6 +363,9 @@ def convert(self, text): # Turn block-level HTML blocks into hash entries text = self._hash_html_blocks(text, raw=True) + if 'markdown-in-html' in self.extras: + text = self._do_markdown_in_html(text) + if "fenced-code-blocks" in self.extras and self.safe_mode: text = self._do_fenced_code_blocks(text) @@ -878,27 +881,39 @@ def _hash_html_blocks(self, text, raw=False): return text - def _strict_tag_block_sub(self, text, html_tags_re, callback): + def _strict_tag_block_sub(self, text, html_tags_re, callback, allow_indent=False): + ''' + Finds and substitutes HTML blocks within blocks of text + + Args: + text: the text to search + html_tags_re: a regex pattern of HTML block tags to match against. + For example, `Markdown._block_tags_a` + callback: callback function that receives the found HTML text block + allow_indent: allow matching HTML blocks that are not completely outdented + ''' tag_count = 0 current_tag = html_tags_re block = '' result = '' for chunk in text.splitlines(True): - is_markup = re.match(r'^(?:(?=))?(?(%s)\b>?)' % current_tag, chunk) + is_markup = re.match( + r'^(\s{0,%s})(?:(?=))?(?(%s)\b>?)' % ('' if allow_indent else '0', current_tag), chunk + ) block += chunk if is_markup: - if chunk.startswith(''): + if chunk.startswith('%s' % is_markup.group(1)): tag_count -= 1 else: # if close tag is in same line - if self._tag_is_closed(is_markup.group(2), chunk): + if self._tag_is_closed(is_markup.group(3), chunk): # we must ignore these is_markup = None else: tag_count += 1 - current_tag = is_markup.group(2) + current_tag = is_markup.group(3) if tag_count == 0: if is_markup: @@ -915,6 +930,15 @@ def _tag_is_closed(self, tag_name, text): # super basic check if number of open tags == number of closing tags return len(re.findall('<%s(?:.*?)>' % tag_name, text)) == len(re.findall('%s>' % tag_name, text)) + def _do_markdown_in_html(self, text): + def callback(block): + indent, block = self._uniform_outdent(block) + block = self._hash_html_block_sub(block) + block = self._uniform_indent(block, indent, include_empty_lines=True, indent_empty_lines=False) + return block + + return self._strict_tag_block_sub(text, self._block_tags_a, callback, True) + def _strip_link_definitions(self, text): # Strips link definitions from text, stores the URLs and titles in # hash references. @@ -1893,7 +1917,8 @@ def _list_item_sub(self, match): item = match.group(4) leading_line = match.group(1) if leading_line or "\n\n" in item or self._last_li_endswith_two_eols: - item = self._run_block_gamut(self._outdent(item)) + item = self._uniform_outdent(item, min_outdent=' ', max_outdent=self.tab)[1] + item = self._run_block_gamut(item) else: # Recursion for sub-lists: item = self._do_lists(self._uniform_outdent(item, min_outdent=' ')[1]) @@ -2201,7 +2226,7 @@ def _wavedrom_block_sub(self, match): return self._uniform_indent( '\n%s%s%s\n' % (open_tag, self._escape_table[waves], close_tag), - lead_indent, include_empty_lines=True + lead_indent, indent_empty_lines=True ) def _do_wavedrom_blocks(self, text): @@ -2612,13 +2637,16 @@ def _outdent(self, text): # Remove one level of line-leading tabs or spaces return self._outdent_re.sub('', text) - def _uniform_outdent(self, text, min_outdent=None, max_outdent=None): - # Removes the smallest common leading indentation from each (non empty) - # line of `text` and returns said indent along with the outdented text. - # The `min_outdent` kwarg makes sure the smallest common whitespace - # must be at least this size - # The `max_outdent` sets the maximum amount a line can be - # outdented by + @staticmethod + def _uniform_outdent(text, min_outdent=None, max_outdent=None): + ''' + Removes the smallest common leading indentation from each (non empty) + line of `text` and returns said indent along with the outdented text. + + Args: + min_outdent: make sure the smallest common whitespace is at least this size + max_outdent: the maximum amount a line can be outdented by + ''' # find the leading whitespace for every line whitespace = [ @@ -2652,11 +2680,26 @@ def _uniform_outdent(self, text, min_outdent=None, max_outdent=None): return outdent, ''.join(outdented) - def _uniform_indent(self, text, indent, include_empty_lines=False): - return ''.join( - (indent + line if line.strip() or include_empty_lines else '') - for line in text.splitlines(True) - ) + @staticmethod + def _uniform_indent(text, indent, include_empty_lines=False, indent_empty_lines=False): + ''' + Uniformly indent a block of text by a fixed amount + + Args: + text: the text to indent + indent: a string containing the indent to apply + include_empty_lines: don't remove whitespace only lines + indent_empty_lines: indent whitespace only lines with the rest of the text + ''' + blocks = [] + for line in text.splitlines(True): + if line.strip() or indent_empty_lines: + blocks.append(indent + line) + elif include_empty_lines: + blocks.append(line) + else: + blocks.append('') + return ''.join(blocks) @staticmethod def _match_overlaps_substr(text, match, substr): diff --git a/test/tm-cases/markdown_in_html_in_lists.html b/test/tm-cases/markdown_in_html_in_lists.html new file mode 100644 index 00000000..981113f9 --- /dev/null +++ b/test/tm-cases/markdown_in_html_in_lists.html @@ -0,0 +1,37 @@ +
Item 1
+ +Some text
+ +Item 2
+ +Item 3
+ +Item 4
+ +Some text
+ +Item 5
+ +Some text
+ +Other more different nested list:
+ +Item 1 +With some space after
Item 2
+ +Item 1 +ABCDEF
Item 2
+ +