Skip to content

Commit

Permalink
Use jupyter notebook html rendering for more faithful appearance
Browse files Browse the repository at this point in the history
  • Loading branch information
pv committed Mar 12, 2017
1 parent 0c94d61 commit 525a1ee
Show file tree
Hide file tree
Showing 4 changed files with 11,789 additions and 46 deletions.
105 changes: 59 additions & 46 deletions build.py
Expand Up @@ -11,6 +11,7 @@
import argparse
import subprocess
import shutil
import lxml.html


def main():
Expand Down Expand Up @@ -210,13 +211,14 @@ def convert_file(dst_path, fn, editors):
"""
print(fn)
subprocess.check_call(['jupyter', 'nbconvert', '--to', 'rst',
subprocess.check_call(['jupyter', 'nbconvert', '--to', 'html',
'--output-dir', os.path.abspath(dst_path),
os.path.abspath(fn)],
cwd=dst_path, stderr=subprocess.STDOUT)

basename = os.path.splitext(os.path.basename(fn))[0]
rst_fn = os.path.join(dst_path, basename + '.rst')
html_fn = os.path.join(dst_path, basename + '.html')

title = None
tags = set()
Expand All @@ -225,59 +227,70 @@ def convert_file(dst_path, fn, editors):

lines = []

with open(rst_fn, 'r') as f:
prev_line = ''
for orig_line in f:
line = orig_line.strip()
m = re.match('^===+\s*$', line)
m2 = re.match('^---+\s*$', line)
if m or m2:
if prev_line and len(line) >= 1+len(prev_line)//2 and not title:
title = prev_line.strip()
lines = lines[:-1]
continue

m = re.match('^TAGS:\s*(.*)\s*$', line)
if m:
tag_line = m.group(1).strip().replace(';', ',')
tags.update([x.strip() for x in tag_line.split(",")])
continue

m = re.match('^AUTHORS:\s*(.*)\s*$', line)
if m:
# Author lines override editors
if legacy_editors:
editors = []
legacy_editors = False
author_line = m.group(1).strip().replace(';', ',')
for author in author_line.split(","):
author = author.strip()
if author and author not in editors:
editors.append(author)
continue
# Parse and munge HTML
tree = lxml.html.parse(html_fn)
os.unlink(html_fn)

root = tree.getroot()
head = root.find('head')
container, = root.xpath("//div[@id='notebook-container']")

headers = container.xpath('//h1')
if headers:
title = headers[0].text
if isinstance(title, unicode):
title = title.encode('utf-8')
h1_parent = headers[0].getparent()
h1_parent.remove(headers[0])

lines.extend([u".. raw:: html", u""])

for element in head.getchildren():
if element.tag in ('script',):
text = lxml.html.tostring(element)
lines.extend(" " + x for x in text.splitlines())

text = lxml.html.tostring(container)

m = re.search(ur'<p>TAGS:\s*(.*)\s*</p>', text)
if m:
tag_line = m.group(1).strip().replace(';', ',')
if isinstance(tag_line, unicode):
tag_line = tag_line.encode('utf-8')
tags.update([x.strip() for x in tag_line.split(",")])
text = text[:m.start()] + text[m.end():]

m = re.search(ur'<p>AUTHORS:\s*(.*)\s*</p>', text)
if m:
# Author lines override editors
if legacy_editors:
editors = []
legacy_editors = False
author_line = m.group(1).strip().replace(';', ',')
if isinstance(author_line, unicode):
author_line = author_line.encode('utf-8')
for author in author_line.split(","):
author = author.strip()
if author and author not in editors:
editors.append(author)

text = text[:m.start()] + text[m.end():]

text = text.replace(u'attachments/{0}/'.format(basename),
u'../_downloads/')

lines.extend(u" " + x for x in text.splitlines())
lines.append(u"")

prev_line = line
lines.append(orig_line)

text = "".join(lines)
# Produce output
text = u"\n".join(lines).encode('utf-8')

if not title:
title = basename

authors = ", ".join(editors)
text = "{0}\n{1}\n\n{2}".format(title, "="*len(title), text)

text = re.sub(r'`(.*?) <files/(attachments/.*?)>`__',
r':download:`\1 <\2>`',
text,
flags=re.M)
text = re.sub(r'^TAGS:.*$', '', text, flags=re.M)
text = re.sub(r'(figure|image):: files/attachments/', r'\1:: attachments/', text, flags=re.M)
text = re.sub(r' <files/attachments/', r' <attachments/', text, flags=re.M)
text = re.sub(r'.. parsed-literal::', r'.. parsed-literal::\n :class: ipy-out', text, flags=re.M)
text = re.sub(r'`([^`<]*)\s+<(?!attachments/)([^:.>]*?)(?:.html)?>`__', r':doc:`\1 <\2>`', text, flags=re.M)
text = re.sub(r'^(\s*)\.\.\s*raw:: latex', '\\1.. math::\\1 :nowrap:', text, flags=re.M)
text = re.sub(r'^(\s*)\.\. code::\s*(ipython3|ipython2|python3|python2|python)?\s*$', r'\1.. code-block:: python\n', text, flags=re.M)
with open(rst_fn, 'w') as f:
f.write(text)
if authors:
Expand Down

0 comments on commit 525a1ee

Please sign in to comment.