From 700ac816b8d7fca9ae1a67b25ed85450dcaae43c Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Sun, 4 Jun 2023 21:27:08 +0100
Subject: [PATCH] Fix `markdown-in-html` extra not working within lists

---
 lib/markdown2.py                             | 34 +++++++++++++++---
 test/tm-cases/markdown_in_html_in_lists.html | 37 ++++++++++++++++++++
 test/tm-cases/markdown_in_html_in_lists.opts |  1 +
 test/tm-cases/markdown_in_html_in_lists.text | 17 +++++++++
 4 files changed, 84 insertions(+), 5 deletions(-)
 create mode 100644 test/tm-cases/markdown_in_html_in_lists.html
 create mode 100644 test/tm-cases/markdown_in_html_in_lists.opts
 create mode 100644 test/tm-cases/markdown_in_html_in_lists.text
diff --git a/lib/markdown2.py b/lib/markdown2.py
index 79501bae..8d4469e5 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -363,6 +363,9 @@ def convert(self, text):
         # Turn block-level HTML blocks into hash entries
         text = self._hash_html_blocks(text, raw=True)
 
+        if 'markdown-in-html' in self.extras:
+            text = self._do_markdown_in_html(text)
+
         if "fenced-code-blocks" in self.extras and self.safe_mode:
             text = self._do_fenced_code_blocks(text)
 
@@ -878,27 +881,39 @@ def _hash_html_blocks(self, text, raw=False):
 
         return text
 
-    def _strict_tag_block_sub(self, text, html_tags_re, callback):
+    def _strict_tag_block_sub(self, text, html_tags_re, callback, allow_indent=False):
+        '''
+        Finds and substitutes HTML blocks within blocks of text
+
+        Args:
+            text: the text to search
+            html_tags_re: a regex pattern of HTML block tags to match against.
+                For example, `Markdown._block_tags_a`
+            callback: callback function that receives the found HTML text block
+            allow_indent: allow matching HTML blocks that are not completely outdented
+        '''
         tag_count = 0
         current_tag = html_tags_re
         block = ''
         result = ''
 
         for chunk in text.splitlines(True):
-            is_markup = re.match(r'^(?:</code>(?=</pre>))?(</?(%s)\b>?)' % current_tag, chunk)
+            is_markup = re.match(
+                r'^(\s{0,%s})(?:</code>(?=</pre>))?(</?(%s)\b>?)' % ('' if allow_indent else '0', current_tag), chunk
+            )
             block += chunk
 
             if is_markup:
-                if chunk.startswith('</'):
+                if chunk.startswith('%s</' % is_markup.group(1)):
                     tag_count -= 1
                 else:
                     # if close tag is in same line
-                    if self._tag_is_closed(is_markup.group(2), chunk):
+                    if self._tag_is_closed(is_markup.group(3), chunk):
                         # we must ignore these
                         is_markup = None
                     else:
                         tag_count += 1
-                        current_tag = is_markup.group(2)
+                        current_tag = is_markup.group(3)
 
             if tag_count == 0:
                 if is_markup:
@@ -915,6 +930,15 @@ def _tag_is_closed(self, tag_name, text):
         # super basic check if number of open tags == number of closing tags
         return len(re.findall('<%s(?:.*?)>' % tag_name, text)) == len(re.findall('</%s>' % tag_name, text))
 
+    def _do_markdown_in_html(self, text):
+        def callback(block):
+            indent, block = self._uniform_outdent(block)
+            block = self._hash_html_block_sub(block)
+            block = self._uniform_indent(block, indent, include_empty_lines=True, indent_empty_lines=False)
+            return block
+
+        return self._strict_tag_block_sub(text, self._block_tags_a, callback, True)
+
     def _strip_link_definitions(self, text):
         # Strips link definitions from text, stores the URLs and titles in
         # hash references.
diff --git a/test/tm-cases/markdown_in_html_in_lists.html b/test/tm-cases/markdown_in_html_in_lists.html
new file mode 100644
index 00000000..981113f9
--- /dev/null
+++ b/test/tm-cases/markdown_in_html_in_lists.html
@@ -0,0 +1,37 @@
+<ul>
+<li><p>Item 1</p>
+
+<div>
+
+<h6>Block one</h6>
+
+<p>Some text</p>
+
+</div></li>
+<li><p>Item 2</p>
+
+<ul>
+<li><p>Item 3</p>
+
+<ul>
+<li><p>Item 4</p>
+
+<div>
+
+<h6>Block two</h6>
+
+<p>Some text</p>
+
+</div></li>
+</ul></li>
+<li><p>Item 5</p>
+
+<div>
+
+<h6>Block three</h6>
+
+<p>Some text</p>
+
+</div></li>
+</ul></li>
+</ul>
diff --git a/test/tm-cases/markdown_in_html_in_lists.opts b/test/tm-cases/markdown_in_html_in_lists.opts
new file mode 100644
index 00000000..25fea79f
--- /dev/null
+++ b/test/tm-cases/markdown_in_html_in_lists.opts
@@ -0,0 +1 @@
+{"extras": ["markdown-in-html"]}
diff --git a/test/tm-cases/markdown_in_html_in_lists.text b/test/tm-cases/markdown_in_html_in_lists.text
new file mode 100644
index 00000000..e629c55d
--- /dev/null
+++ b/test/tm-cases/markdown_in_html_in_lists.text
@@ -0,0 +1,17 @@
+- Item 1
+  <div markdown="1">
+  ###### Block one
+  Some text
+  </div>
+- Item 2
+  - Item 3
+    - Item 4
+      <div markdown="1">
+      ###### Block two
+      Some text
+      </div>
+  - Item 5
+    <div markdown="1">
+    ###### Block three
+    Some text
+    </div>