diff --git a/lib/markdown2.py b/lib/markdown2.py index 48d9d4c6..eb388547 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -363,6 +363,9 @@ def convert(self, text): # Turn block-level HTML blocks into hash entries text = self._hash_html_blocks(text, raw=True) + if 'markdown-in-html' in self.extras: + text = self._do_markdown_in_html(text) + if "fenced-code-blocks" in self.extras and self.safe_mode: text = self._do_fenced_code_blocks(text) @@ -878,27 +881,39 @@ def _hash_html_blocks(self, text, raw=False): return text - def _strict_tag_block_sub(self, text, html_tags_re, callback): + def _strict_tag_block_sub(self, text, html_tags_re, callback, allow_indent=False): + ''' + Finds and substitutes HTML blocks within blocks of text + + Args: + text: the text to search + html_tags_re: a regex pattern of HTML block tags to match against. + For example, `Markdown._block_tags_a` + callback: callback function that receives the found HTML text block + allow_indent: allow matching HTML blocks that are not completely outdented + ''' tag_count = 0 current_tag = html_tags_re block = '' result = '' for chunk in text.splitlines(True): - is_markup = re.match(r'^(?:(?=))?(?)' % current_tag, chunk) + is_markup = re.match( + r'^(\s{0,%s})(?:(?=))?(?)' % ('' if allow_indent else '0', current_tag), chunk + ) block += chunk if is_markup: - if chunk.startswith('' % tag_name, text)) == len(re.findall('' % tag_name, text)) + def _do_markdown_in_html(self, text): + def callback(block): + indent, block = self._uniform_outdent(block) + block = self._hash_html_block_sub(block) + block = self._uniform_indent(block, indent, include_empty_lines=True, indent_empty_lines=False) + return block + + return self._strict_tag_block_sub(text, self._block_tags_a, callback, True) + def _strip_link_definitions(self, text): # Strips link definitions from text, stores the URLs and titles in # hash references. @@ -1893,7 +1917,8 @@ def _list_item_sub(self, match): item = match.group(4) leading_line = match.group(1) if leading_line or "\n\n" in item or self._last_li_endswith_two_eols: - item = self._run_block_gamut(self._outdent(item)) + item = self._uniform_outdent(item, min_outdent=' ', max_outdent=self.tab)[1] + item = self._run_block_gamut(item) else: # Recursion for sub-lists: item = self._do_lists(self._uniform_outdent(item, min_outdent=' ')[1]) @@ -2201,7 +2226,7 @@ def _wavedrom_block_sub(self, match): return self._uniform_indent( '\n%s%s%s\n' % (open_tag, self._escape_table[waves], close_tag), - lead_indent, include_empty_lines=True + lead_indent, indent_empty_lines=True ) def _do_wavedrom_blocks(self, text): @@ -2612,13 +2637,16 @@ def _outdent(self, text): # Remove one level of line-leading tabs or spaces return self._outdent_re.sub('', text) - def _uniform_outdent(self, text, min_outdent=None, max_outdent=None): - # Removes the smallest common leading indentation from each (non empty) - # line of `text` and returns said indent along with the outdented text. - # The `min_outdent` kwarg makes sure the smallest common whitespace - # must be at least this size - # The `max_outdent` sets the maximum amount a line can be - # outdented by + @staticmethod + def _uniform_outdent(text, min_outdent=None, max_outdent=None): + ''' + Removes the smallest common leading indentation from each (non empty) + line of `text` and returns said indent along with the outdented text. + + Args: + min_outdent: make sure the smallest common whitespace is at least this size + max_outdent: the maximum amount a line can be outdented by + ''' # find the leading whitespace for every line whitespace = [ @@ -2652,11 +2680,26 @@ def _uniform_outdent(self, text, min_outdent=None, max_outdent=None): return outdent, ''.join(outdented) - def _uniform_indent(self, text, indent, include_empty_lines=False): - return ''.join( - (indent + line if line.strip() or include_empty_lines else '') - for line in text.splitlines(True) - ) + @staticmethod + def _uniform_indent(text, indent, include_empty_lines=False, indent_empty_lines=False): + ''' + Uniformly indent a block of text by a fixed amount + + Args: + text: the text to indent + indent: a string containing the indent to apply + include_empty_lines: don't remove whitespace only lines + indent_empty_lines: indent whitespace only lines with the rest of the text + ''' + blocks = [] + for line in text.splitlines(True): + if line.strip() or indent_empty_lines: + blocks.append(indent + line) + elif include_empty_lines: + blocks.append(line) + else: + blocks.append('') + return ''.join(blocks) @staticmethod def _match_overlaps_substr(text, match, substr): diff --git a/test/tm-cases/markdown_in_html_in_lists.html b/test/tm-cases/markdown_in_html_in_lists.html new file mode 100644 index 00000000..981113f9 --- /dev/null +++ b/test/tm-cases/markdown_in_html_in_lists.html @@ -0,0 +1,37 @@ + diff --git a/test/tm-cases/markdown_in_html_in_lists.opts b/test/tm-cases/markdown_in_html_in_lists.opts new file mode 100644 index 00000000..25fea79f --- /dev/null +++ b/test/tm-cases/markdown_in_html_in_lists.opts @@ -0,0 +1 @@ +{"extras": ["markdown-in-html"]} diff --git a/test/tm-cases/markdown_in_html_in_lists.text b/test/tm-cases/markdown_in_html_in_lists.text new file mode 100644 index 00000000..e629c55d --- /dev/null +++ b/test/tm-cases/markdown_in_html_in_lists.text @@ -0,0 +1,17 @@ +- Item 1 +
+ ###### Block one + Some text +
+- Item 2 + - Item 3 + - Item 4 +
+ ###### Block two + Some text +
+ - Item 5 +
+ ###### Block three + Some text +
diff --git a/test/tm-cases/nested_list.html b/test/tm-cases/nested_list.html index 05851236..49624d24 100644 --- a/test/tm-cases/nested_list.html +++ b/test/tm-cases/nested_list.html @@ -34,3 +34,18 @@
  • Item 3 - yes! just a single item
  • + +

    Other more different nested list:

    + + diff --git a/test/tm-cases/nested_list.text b/test/tm-cases/nested_list.text index 94a2ece1..14fb9291 100644 --- a/test/tm-cases/nested_list.text +++ b/test/tm-cases/nested_list.text @@ -20,4 +20,14 @@ Slightly more nested list: + What + The + Code -* Item 3 - yes! just a single item \ No newline at end of file +* Item 3 - yes! just a single item + + +Other more different nested list: + +- Item 1 + With some space after + +- Item 2 + - Item 3 + - Item 4 diff --git a/test/tm-cases/seperated_list_items.html b/test/tm-cases/seperated_list_items.html new file mode 100644 index 00000000..140ad893 --- /dev/null +++ b/test/tm-cases/seperated_list_items.html @@ -0,0 +1,12 @@ + diff --git a/test/tm-cases/seperated_list_items.text b/test/tm-cases/seperated_list_items.text new file mode 100644 index 00000000..1a5c991a --- /dev/null +++ b/test/tm-cases/seperated_list_items.text @@ -0,0 +1,6 @@ +- Item 1 + ABCDEF + +- Item 2 + - Item 3 + - Item 4