backtick formatting in markdown

mortoray · Apr 28, 2019 · bbc2745 · bbc2745
1 parent fb94345
commit bbc2745
Show file tree

Hide file tree

Showing 3 changed files with 108 additions and 64 deletions.
diff --git a/log/5.md b/log/5.md
@@ -1,3 +1,5 @@
+/publish-date: 2019-04-28
+
 It's been a while since my last update as I've not had a lot of time to work on the parser.  So is the life of side-projects. :/
 
 This time around I've made changes to support lists. In the spirit of the feature the changes are:
@@ -18,7 +20,7 @@ Unlike in [markdown](https://en.wikipedia.org/wiki/Markdown), the MDL lists can
 
 - You can't see this.
 
-- But there was an extra space between these points.
+- But there was extra space between these points.
 
 Markdown supports indented lists, which might be easier to read in the source. This isn't a concern for me now, and I'll leave out support for them. I'll also ignore embedded lists until I have a decent use-case for it.
 
@@ -36,18 +38,21 @@ This change, along with the backtick support, allowed me to switch to an unparse
 
 ## Backtick support
 
-Given how much I'm using a backtick `\`` in this document, it's surprising I didn't add support for it earlier.
+Given how much I'm using a [backtick](https://en.wikipedia.org/wiki/Grave_accent#Use_in_programming) `\`` in this document, it's surprising I didn't add support for it earlier.
 
 Unlike markdown, escaping in MDL is done with the backslash `\\` character. It's one of the characters that is recognized even inside escaped sections, like the backticks.
 
+Escaping backticks inside backticks is a bit of an issue. Consider the `\`_NodeIterator\`` bit that needs to look like 
+`\`\` \`_NodeIterator\` \`\`` in GitHub flavoured markdown. I had to count the ticks in the collapsed text, add the count of enclosing ticks, and optional spaces if starts/ends with ticks!
+
 
 ## \_NodeIterator
 
 The doc_tree converter is no longer a recursive tree converter. Instead, each function takes the new `_NodeIterator` and is capable of moving through the tree. This allows features to depend on items around them.
 
-For lists it was essential as list items are independent blocks in the main document. The parser sees no difference between a list item and a paragraph. The doc_tree converter must reassemble the items into a single list block. Druring the block parsing, subsequent list items are appended to the same list.
+For lists, it was essential as list items are independent blocks in the main document. The parser sees no difference between a list item and a paragraph. The doc_tree converter must reassemble the pieces into a single list block. During the block parsing, subsequent list items are appended to the same list.
 
-This also made it easier to do the anchor splitting. The anchor now looks towards the following node, and expecdts it to be a link style node. Then it merges the two together to produce a single output element.
+This also made it easier to do the anchor splitting. The anchor now looks towards the following node and expects it to be a link style node. Then it merges the two together to produce a single output element.
 
 
 @Blurb

diff --git a/mdl/format_html.py b/mdl/format_html.py
@@ -32,14 +32,15 @@ def q( type, func ):
 		def fail():
 			raise Exception( "Unknown node type", node )
 
-		_ = q( doc_tree.Inline, self._write_inline ) or \
-			q( doc_tree.Section, self._write_section ) or \
+		_ = \
 			q( doc_tree.Block, self._write_block ) or \
-			q( doc_tree.Text, self._write_text ) or \
-			q( doc_tree.Link, self._write_link ) or \
-			q( doc_tree.Note, self._write_note ) or \
 			q( doc_tree.Code, self._write_code ) or \
+			q( doc_tree.Inline, self._write_inline ) or \
+			q( doc_tree.Link, self._write_link ) or \
 			q( doc_tree.List, self._write_list ) or \
+			q( doc_tree.Note, self._write_note ) or \
+			q( doc_tree.Section, self._write_section ) or \
+			q( doc_tree.Text, self._write_text ) or \
 			fail()
 
 

diff --git a/mdl/format_markdown.py b/mdl/format_markdown.py
@@ -18,95 +18,104 @@ def __init__(self):
 		self.notes = []
 
 	def render(self, node):
-		self._write_node( node )
+		self.output.write( self._get_node( node ) )
 		self._write_notes()
 		return self.output.getvalue()
 
-	def _write_node( self, node ):
+	def _get_node( self, node ):
+		text = ""
 		def q( type, func ):
+			nonlocal text
 			if isinstance( node, type ):
-				func( node )
+				text = func( node )
 				return True
 			return False
 
 		def fail():
 			raise Exception( "Unknown node type", node )
 
-		_ = q( doc_tree.Inline, self._write_inline ) or \
-			q( doc_tree.Section, self._write_section ) or \
-			q( doc_tree.Block, self._write_block ) or \
-			q( doc_tree.Text, self._write_text ) or \
-			q( doc_tree.Link, self._write_link ) or \
-			q( doc_tree.Note, self._write_note ) or \
-			q( doc_tree.Code, self._write_code ) or \
+		_ = \
+			q( doc_tree.Block, self._get_block ) or \
+			q( doc_tree.Code, self._get_code ) or \
+			q( doc_tree.Inline, self._get_inline ) or \
+			q( doc_tree.Link, self._get_link ) or \
+			q( doc_tree.List, self._get_list ) or \
+			q( doc_tree.Note, self._get_note ) or \
+			q( doc_tree.Section, self._get_section ) or \
+			q( doc_tree.Text, self._get_text ) or \
 			fail()
+
+		return text
 
 
-	def _write_sub( self, node ):
-		self._write_list( node.sub )
+	def _get_sub( self, node ):
+		return self._get_node_list( node.sub )
 
-	def _write_list( self, list_ ):
-		for sub in list_:
-			self._write_node( sub )
+	def _get_node_list( self, list_ ):
+		return "".join( [ self._get_node( sub ) for sub in list_ ] )
 
 
 	inline_map = {
 		"italic": "_",
 		"bold": "*",
+		"code": "`",
 	}
-	def _write_inline( self, node ):
-		fmt = type(self).inline_map[node.feature.name]
-		self.output.write( fmt )
-		self._write_sub( node )
-		self.output.write( fmt )
+	def _get_inline( self, node ):
+		if node.feature == doc_tree.feature_code:
+			# This is GitHub's style of escaping ticks inside ticks
+			text = self._get_sub( node )
+			tick_len = 1 + _count_longest_backtick_chain( text )
+			ticks = '`' * tick_len
+			pre = ticks
+			post = ticks
+			if len(text) > 0 and text[0] == '`':
+				pre += ' '
+			if len(text) > 0 and text[-1] == '`':
+				post = ' ' + post
+			return "{}{}{}".format( pre, text, post )
+		else:
+			fmt = type(self).inline_map[node.feature.name]
+			return "{}{}{}".format( fmt, self._get_sub( node ), fmt )
 
-	def _write_paragraph( self, node ):
-		self.output.write( "\n" )
-		self._write_sub( node )
-		self.output.write( "\n" )
+	def _get_paragraph( self, node ):
+		return "\n{}\n".format( self._get_sub( node ) )
 
-	def _write_quote( self, node ):
-		self.output.write( "\n>" )
-		self._write_sub( node )
-		self.output.write( "\n" )
+	def _get_quote( self, node ):
+		return "\n>{}\n".format( self._get_sub( node ) )
 
-	def _write_blurb( self, node ):
-		self.output.write( "\n----\n\n_" )
-		self._write_sub( node )
-		self.output.write( "_\n" )
+	def _get_blurb( self, node ):
+		return "\n----\n\n_{}_\n".format( self._get_sub( node ) )
 
-	def _write_block( self, node ):
+	def _get_block( self, node ):
 		if node.class_ == doc_tree.block_quote:
-			self._write_quote( node )
+			return self._get_quote( node )
 		elif node.class_ == doc_tree.block_blurb:
-			self._write_blurb( node )
+			return self._get_blurb( node )
 		else:
-			self._write_paragraph( node )
+			return self._get_paragraph( node )
 
 
-	def _write_section( self, node ):
-		self.output.write( "\n" )
+	def _get_section( self, node ):
+		text = "\n"
 		if node.title != None:
-			self.output.write( "#" * node.level )
-			self._write_list( node.title )
-			self.output.write( "\n" )
+			text += "#" * node.level
+			text += self._get_node_list( node.title )
+			text += "\n"
 
-		self._write_sub(  node )
+		return text + self._get_sub(  node )
 
-	def _write_text( self, node ):
+	def _get_text( self, node ):
 		#TODO: Escaping of course
-		self.output.write( node.text )
+		return node.text
 
-	def _write_link( self, node ):
+	def _get_link( self, node ):
 		# TODO: more escaping
-		self.output.write( "[" )
-		self._write_sub(node)
-		self.output.write( "]({})".format( node.url ) )
+		return "[{}]({})".format( self._get_sub(node), node.url )
 
-	def _write_note( self, node ):
+	def _get_note( self, node ):
 		self.notes.append( node )
 		number = len(self.notes)
-		self.output.write( "<sup>[{}](#note-{})</sup>".format( number, number ) )
+		return "<sup>[{}](#note-{})</sup>".format( number, number )
 
 	def _write_notes( self ):
 		if len(self.notes) == 0:
@@ -115,11 +124,40 @@ def _write_notes( self ):
 		self.output.write( '\n----\n\n' )
 		for index, note in enumerate(self.notes):
 			self.output.write( '{}. <a id="note-{}"></a>'.format(index+1, index+1) )
-			self._write_node( note.node )
+			self.output.write( self._get_node( note.node ) )
 			self.output.write("\n")
 
 
-	def _write_code( self, node ):
-		self.output.write( "\n```{}\n".format( node.class_ ) )
-		self.output.write( node.text ) # TODO: escape
-		self.output.write( "\n```\n" )
+	def _get_code( self, node ):
+		# TODO: escape
+		self.output.write( "\n```{}\n{}\n```\n".format( node.class_, node.text ) )
+
+	def _get_list( self, node ):
+		text = ""
+		for sub in node.sub:
+			assert isinstance( sub, doc_tree.Block ) # The only supported type
+			assert sub.class_ == doc_tree.block_paragraph
+
+			text += "\n- " 
+			text += self._get_sub( sub )
+		text += "\n"
+		return text
+
+
+def _count_longest_backtick_chain( text ):
+	count = 0
+	max_count = 0
+	def update_max():
+		nonlocal count, max_count
+		max_count = max( count, max_count )
+		count = 0
+
+	for c in text:
+		if c == '`':
+			count += 1
+		else:
+			update_max()
+	update_max()
+
+	return max_count
+