Skip to content

Commit

Permalink
backtick formatting in markdown
Browse files Browse the repository at this point in the history
  • Loading branch information
edA-qa mort-ora-y committed Apr 28, 2019
1 parent fb94345 commit bbc2745
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 64 deletions.
13 changes: 9 additions & 4 deletions log/5.md
@@ -1,3 +1,5 @@
/publish-date: 2019-04-28

It's been a while since my last update as I've not had a lot of time to work on the parser. So is the life of side-projects. :/

This time around I've made changes to support lists. In the spirit of the feature the changes are:
Expand All @@ -18,7 +20,7 @@ Unlike in [markdown](https://en.wikipedia.org/wiki/Markdown), the MDL lists can

- You can't see this.

- But there was an extra space between these points.
- But there was extra space between these points.

Markdown supports indented lists, which might be easier to read in the source. This isn't a concern for me now, and I'll leave out support for them. I'll also ignore embedded lists until I have a decent use-case for it.

Expand All @@ -36,18 +38,21 @@ This change, along with the backtick support, allowed me to switch to an unparse

## Backtick support

Given how much I'm using a backtick `\`` in this document, it's surprising I didn't add support for it earlier.
Given how much I'm using a [backtick](https://en.wikipedia.org/wiki/Grave_accent#Use_in_programming) `\`` in this document, it's surprising I didn't add support for it earlier.

Unlike markdown, escaping in MDL is done with the backslash `\\` character. It's one of the characters that is recognized even inside escaped sections, like the backticks.

Escaping backticks inside backticks is a bit of an issue. Consider the `\`_NodeIterator\`` bit that needs to look like
`\`\` \`_NodeIterator\` \`\`` in GitHub flavoured markdown. I had to count the ticks in the collapsed text, add the count of enclosing ticks, and optional spaces if starts/ends with ticks!


## \_NodeIterator

The doc_tree converter is no longer a recursive tree converter. Instead, each function takes the new `_NodeIterator` and is capable of moving through the tree. This allows features to depend on items around them.

For lists it was essential as list items are independent blocks in the main document. The parser sees no difference between a list item and a paragraph. The doc_tree converter must reassemble the items into a single list block. Druring the block parsing, subsequent list items are appended to the same list.
For lists, it was essential as list items are independent blocks in the main document. The parser sees no difference between a list item and a paragraph. The doc_tree converter must reassemble the pieces into a single list block. During the block parsing, subsequent list items are appended to the same list.

This also made it easier to do the anchor splitting. The anchor now looks towards the following node, and expecdts it to be a link style node. Then it merges the two together to produce a single output element.
This also made it easier to do the anchor splitting. The anchor now looks towards the following node and expects it to be a link style node. Then it merges the two together to produce a single output element.


@Blurb
Expand Down
11 changes: 6 additions & 5 deletions mdl/format_html.py
Expand Up @@ -32,14 +32,15 @@ def q( type, func ):
def fail():
raise Exception( "Unknown node type", node )

_ = q( doc_tree.Inline, self._write_inline ) or \
q( doc_tree.Section, self._write_section ) or \
_ = \
q( doc_tree.Block, self._write_block ) or \
q( doc_tree.Text, self._write_text ) or \
q( doc_tree.Link, self._write_link ) or \
q( doc_tree.Note, self._write_note ) or \
q( doc_tree.Code, self._write_code ) or \
q( doc_tree.Inline, self._write_inline ) or \
q( doc_tree.Link, self._write_link ) or \
q( doc_tree.List, self._write_list ) or \
q( doc_tree.Note, self._write_note ) or \
q( doc_tree.Section, self._write_section ) or \
q( doc_tree.Text, self._write_text ) or \
fail()


Expand Down
148 changes: 93 additions & 55 deletions mdl/format_markdown.py
Expand Up @@ -18,95 +18,104 @@ def __init__(self):
self.notes = []

def render(self, node):
self._write_node( node )
self.output.write( self._get_node( node ) )
self._write_notes()
return self.output.getvalue()

def _write_node( self, node ):
def _get_node( self, node ):
text = ""
def q( type, func ):
nonlocal text
if isinstance( node, type ):
func( node )
text = func( node )
return True
return False

def fail():
raise Exception( "Unknown node type", node )

_ = q( doc_tree.Inline, self._write_inline ) or \
q( doc_tree.Section, self._write_section ) or \
q( doc_tree.Block, self._write_block ) or \
q( doc_tree.Text, self._write_text ) or \
q( doc_tree.Link, self._write_link ) or \
q( doc_tree.Note, self._write_note ) or \
q( doc_tree.Code, self._write_code ) or \
_ = \
q( doc_tree.Block, self._get_block ) or \
q( doc_tree.Code, self._get_code ) or \
q( doc_tree.Inline, self._get_inline ) or \
q( doc_tree.Link, self._get_link ) or \
q( doc_tree.List, self._get_list ) or \
q( doc_tree.Note, self._get_note ) or \
q( doc_tree.Section, self._get_section ) or \
q( doc_tree.Text, self._get_text ) or \
fail()

return text


def _write_sub( self, node ):
self._write_list( node.sub )
def _get_sub( self, node ):
return self._get_node_list( node.sub )

def _write_list( self, list_ ):
for sub in list_:
self._write_node( sub )
def _get_node_list( self, list_ ):
return "".join( [ self._get_node( sub ) for sub in list_ ] )


inline_map = {
"italic": "_",
"bold": "*",
"code": "`",
}
def _write_inline( self, node ):
fmt = type(self).inline_map[node.feature.name]
self.output.write( fmt )
self._write_sub( node )
self.output.write( fmt )
def _get_inline( self, node ):
if node.feature == doc_tree.feature_code:
# This is GitHub's style of escaping ticks inside ticks
text = self._get_sub( node )
tick_len = 1 + _count_longest_backtick_chain( text )
ticks = '`' * tick_len
pre = ticks
post = ticks
if len(text) > 0 and text[0] == '`':
pre += ' '
if len(text) > 0 and text[-1] == '`':
post = ' ' + post
return "{}{}{}".format( pre, text, post )
else:
fmt = type(self).inline_map[node.feature.name]
return "{}{}{}".format( fmt, self._get_sub( node ), fmt )

def _write_paragraph( self, node ):
self.output.write( "\n" )
self._write_sub( node )
self.output.write( "\n" )
def _get_paragraph( self, node ):
return "\n{}\n".format( self._get_sub( node ) )

def _write_quote( self, node ):
self.output.write( "\n>" )
self._write_sub( node )
self.output.write( "\n" )
def _get_quote( self, node ):
return "\n>{}\n".format( self._get_sub( node ) )

def _write_blurb( self, node ):
self.output.write( "\n----\n\n_" )
self._write_sub( node )
self.output.write( "_\n" )
def _get_blurb( self, node ):
return "\n----\n\n_{}_\n".format( self._get_sub( node ) )

def _write_block( self, node ):
def _get_block( self, node ):
if node.class_ == doc_tree.block_quote:
self._write_quote( node )
return self._get_quote( node )
elif node.class_ == doc_tree.block_blurb:
self._write_blurb( node )
return self._get_blurb( node )
else:
self._write_paragraph( node )
return self._get_paragraph( node )


def _write_section( self, node ):
self.output.write( "\n" )
def _get_section( self, node ):
text = "\n"
if node.title != None:
self.output.write( "#" * node.level )
self._write_list( node.title )
self.output.write( "\n" )
text += "#" * node.level
text += self._get_node_list( node.title )
text += "\n"

self._write_sub( node )
return text + self._get_sub( node )

def _write_text( self, node ):
def _get_text( self, node ):
#TODO: Escaping of course
self.output.write( node.text )
return node.text

def _write_link( self, node ):
def _get_link( self, node ):
# TODO: more escaping
self.output.write( "[" )
self._write_sub(node)
self.output.write( "]({})".format( node.url ) )
return "[{}]({})".format( self._get_sub(node), node.url )

def _write_note( self, node ):
def _get_note( self, node ):
self.notes.append( node )
number = len(self.notes)
self.output.write( "<sup>[{}](#note-{})</sup>".format( number, number ) )
return "<sup>[{}](#note-{})</sup>".format( number, number )

def _write_notes( self ):
if len(self.notes) == 0:
Expand All @@ -115,11 +124,40 @@ def _write_notes( self ):
self.output.write( '\n----\n\n' )
for index, note in enumerate(self.notes):
self.output.write( '{}. <a id="note-{}"></a>'.format(index+1, index+1) )
self._write_node( note.node )
self.output.write( self._get_node( note.node ) )
self.output.write("\n")


def _write_code( self, node ):
self.output.write( "\n```{}\n".format( node.class_ ) )
self.output.write( node.text ) # TODO: escape
self.output.write( "\n```\n" )
def _get_code( self, node ):
# TODO: escape
self.output.write( "\n```{}\n{}\n```\n".format( node.class_, node.text ) )

def _get_list( self, node ):
text = ""
for sub in node.sub:
assert isinstance( sub, doc_tree.Block ) # The only supported type
assert sub.class_ == doc_tree.block_paragraph

text += "\n- "
text += self._get_sub( sub )
text += "\n"
return text


def _count_longest_backtick_chain( text ):
count = 0
max_count = 0
def update_max():
nonlocal count, max_count
max_count = max( count, max_count )
count = 0

for c in text:
if c == '`':
count += 1
else:
update_max()
update_max()

return max_count

0 comments on commit bbc2745

Please sign in to comment.