Skip to content

Commit

Permalink
[doctools] Extract code blocks from every doc into _tmp/code-blocks
Browse files Browse the repository at this point in the history
For validating the documentation.
  • Loading branch information
Andy C committed May 18, 2021
1 parent fe9390e commit 0e51118
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 9 deletions.
4 changes: 2 additions & 2 deletions build/doc.sh
Expand Up @@ -149,8 +149,8 @@ split-and-render() {
#head _tmp/doc/*
#return

local code_output=_tmp/code-blocks/$name.txt
cmark --code-output $code_output ${prefix}_meta.json ${prefix}_content.md > $out
local code_out=_tmp/code-blocks/$name.txt
cmark --code-block-output $code_out ${prefix}_meta.json ${prefix}_content.md > $out
log "$prefix -> (doctools/cmark) -> $out"
}

Expand Down
12 changes: 6 additions & 6 deletions doctools/cmark.py
Expand Up @@ -262,6 +262,11 @@ def Render(opts, meta, in_file, out_file, use_fastlex=True):
html = md2html(in_file.read())

if use_fastlex:
if opts.code_block_output:
with open(opts.code_block_output, 'w') as f:
f.write('# %s: code blocks extracted from Markdown/HTML\n\n' % opts.code_block_output)
text = oil_doc.ExtractCode(html, f)

html = oil_doc.RemoveComments(html)

# Hack for allowing tables without <p> in cells, which CommonMark seems to require?
Expand All @@ -273,11 +278,6 @@ def Render(opts, meta, in_file, out_file, use_fastlex=True):

html = oil_doc.HighlightCode(html, meta.get('default_highlighter'))

if opts.code_output:
log('TODO: output to %s', opts.code_output)
with open(opts.code_output, 'w') as f:
f.write('TODO')

# h2 is the title. h1 is unused.
if opts.toc_tags:
toc_tags = opts.toc_tags
Expand Down Expand Up @@ -326,7 +326,7 @@ def Options():
help='Hack for old blog posts')

p.add_option(
'--code-output', dest='code_output',
'--code-block-output', dest='code_block_output',
default=False,
help='Extract and print code blocks to this file')

Expand Down
62 changes: 61 additions & 1 deletion doctools/oil_doc.py
Expand Up @@ -406,7 +406,7 @@ def HighlightCode(s, default_highlighter):

else: # language-*: Use Pygments

# We REMOVIE the original <pre><code> because Pygments gives you a <pre> already
# We REMOVE the original <pre><code> because Pygments gives you a <pre> already

# We just read closing </code>, and the next one should be </pre>.
try:
Expand Down Expand Up @@ -434,6 +434,66 @@ def HighlightCode(s, default_highlighter):
return f.getvalue()


def ExtractCode(s, f):
"""Print code blocks to a plain text file.
So we can at least validate the syntax.
Similar to the algorithm code above:
1. Collect what's inside <pre><code> ...
2. Decode &amp; -> &,e tc. and return it
"""
out = html.Output(s, f)
tag_lexer = html.TagLexer(s)

block_num = 0
pos = 0
it = html.ValidTokens(s)

while True:
try:
tok_id, end_pos = next(it)
except StopIteration:
break

if tok_id == html.StartTag:
tag_lexer.Reset(pos, end_pos)
if tag_lexer.TagName() == 'pre':
pre_start_pos = pos
pos = end_pos

try:
tok_id, end_pos = next(it)
except StopIteration:
break

tag_lexer.Reset(pos, end_pos)
if tok_id == html.StartTag and tag_lexer.TagName() == 'code':

css_class = tag_lexer.GetAttr('class')
code_start_pos = end_pos

out.SkipTo(code_start_pos)
out.Print('# block %d' % block_num)
out.Print('\n')

slash_code_left, slash_code_right = \
html.ReadUntilEndTag(it, tag_lexer, 'code')

text = html.ToText(s, code_start_pos, slash_code_left)
out.SkipTo(slash_code_left)

out.Print(text)
out.Print('\n')

block_num += 1

pos = end_pos

#out.PrintTheRest()


class ShellSession(object):
"""
TODO: Pass this to HighlightCode as a plugin
Expand Down

0 comments on commit 0e51118

Please sign in to comment.