from docstructure import SITE_STRUCTURE, HREF_MAP, BASENAME_MAP
from lxml.etree import (parse, fromstring, ElementTree,
Element, SubElement, XPath, XML)
import os
import re
import sys
import copy
import shutil
import subprocess
RST2HTML_OPTIONS = " ".join([
'--language en',
htmlnsmap = {"h" : XHTML_NS}
find_title = XPath("/h:html/h:head/h:title/text()", namespaces=htmlnsmap)
find_title_tag = XPath("/h:html/h:head/h:title", namespaces=htmlnsmap)
find_headings = XPath("//h:h1[not(@class)]//text()", namespaces=htmlnsmap)
find_heading_tag = XPath("//h:h1[@class = 'title'][1]", namespaces=htmlnsmap)
find_menu = XPath("//h:ul[@id=$name]", namespaces=htmlnsmap)
find_page_end = XPath("/h:html/h:body/h:div[last()]", namespaces=htmlnsmap)
find_words = re.compile('(\w+)').findall
replace_invalid = re.compile(r'[-_/.\s\\]').sub
def make_menu_section_head(section, menuroot):
section_id = section + '-section'
section_head = menuroot.xpath("//ul[@id=$section]/li", section=section_id)
if not section_head:
ul = SubElement(menuroot, "ul", id=section_id)
section_head = SubElement(ul, "li")
title = SubElement(section_head, "span", {"class":"section title"})
title.text = section
section_head = section_head[0]
return section_head
def build_menu(tree, basename, section_head):
page_title = find_title(tree)
if page_title:
page_title = page_title[0]
page_title = replace_invalid('', basename.capitalize())
build_menu_entry(page_title, basename+".html", section_head,
def build_menu_entry(page_title, url, section_head, headings=None):
page_id = replace_invalid(' ', os.path.splitext(url)[0]) + '-menu'
ul = SubElement(section_head, "ul", {"class":"menu foreign", "id":page_id})
title = SubElement(ul, "li", {"class":"menu title"})
a = SubElement(title, "a", href=url)
a.text = page_title
if headings:
subul = SubElement(title, "ul", {"class":"submenu"})
for heading in headings:
li = SubElement(subul, "li", {"class":"menu item"})
ref = heading.getparent().getparent().get('id')
except AttributeError:
ref = None
if ref is None:
ref = '-'.join(find_words(replace_invalid(' ', heading.lower())))
a = SubElement(li, "a", href=url+'#'+ref)
a.text = heading
def merge_menu(tree, menu, name):
menu_root = copy.deepcopy(menu)
tree.getroot()[1][0].insert(0, menu_root) # html->body->div[class=document]
for el in menu_root.iter():
tag = el.tag
if tag[0] != '{':
el.tag = "{}" + tag
current_menu = find_menu(
menu_root, name=replace_invalid(' ', name) + '-menu')
if not current_menu:
current_menu = find_menu(
menu_root, name=replace_invalid('-', name) + '-menu')
if current_menu:
for submenu in current_menu:
submenu.set("class", submenu.get("class", "").
replace("foreign", "current"))
return tree
def inject_flatter_button(tree):
head = tree.xpath('h:head[1]', namespaces=htmlnsmap)[0]
script = SubElement(head, '{%s}script' % XHTML_NS, type='text/javascript')
script.text = """
(function() {
var s = document.createElement('script');
var t = document.getElementsByTagName('script')[0];
s.type = 'text/javascript';
s.async = true;
s.src = '';
t.parentNode.insertBefore(s, t);
script.tail = '\n'
intro_div = tree.xpath('h:body//h:div[@id = "introduction"][1]', namespaces=htmlnsmap)[0]
intro_div.insert(-1, XML(
'<p style="text-align: center;">Like working with lxml? '
'Happy about the time that it just saved you? <br />'
'Show your appreciation with <a href="">Flattr</a>.<br />'
'<a class="FlattrButton" style="display:none;" rev="flattr;button:compact;" href=""></a>'
def inject_donate_buttons(lxml_path, rst2html_script, tree):
command = ([sys.executable, rst2html_script]
+ RST2HTML_OPTIONS.split() + [os.path.join(lxml_path, 'README.rst')])
rst2html = subprocess.Popen(command, stdout=subprocess.PIPE)
stdout, _ = rst2html.communicate()
readme = fromstring(stdout)
intro_div = tree.xpath('h:body//h:div[@id = "introduction"][1]',
support_div = readme.xpath('h:body//h:div[@id = "support-the-project"][1]',
legal = readme.xpath('h:body//h:div[@id = "legal-notice-for-donations"][1]',
last_div = tree.xpath('h:body//h:div//h:div', namespaces=htmlnsmap)[-1]
def rest2html(script, source_path, dest_path, stylesheet_url):
command = ('%s %s %s --stylesheet=%s --link-stylesheet %s > %s' %
(sys.executable, script, RST2HTML_OPTIONS,
stylesheet_url, source_path, dest_path)), shell=True)
def publish(dirname, lxml_path, release):
if not os.path.exists(dirname):
doc_dir = os.path.join(lxml_path, 'doc')
script = os.path.join(doc_dir, '')
pubkey = os.path.join(doc_dir, 'pubkey.asc')
stylesheet_url = 'style.css'
shutil.copy(pubkey, dirname)
href_map = HREF_MAP.copy()
changelog_basename = 'changes-%s' % release
href_map['Release Changelog'] = changelog_basename + '.html'
trees = {}
menu = Element("div", {"class":"sidemenu"})
# build HTML pages and parse them back
for section, text_files in SITE_STRUCTURE:
section_head = make_menu_section_head(section, menu)
for filename in text_files:
if filename.startswith('@'):
# special menu entry
page_title = filename[1:]
url = href_map[page_title]
build_menu_entry(page_title, url, section_head)
path = os.path.join(doc_dir, filename)
basename = os.path.splitext(os.path.basename(filename))[0]
basename = BASENAME_MAP.get(basename, basename)
outname = basename + '.html'
outpath = os.path.join(dirname, outname)
rest2html(script, path, outpath, stylesheet_url)
tree = parse(outpath)
if filename == 'main.txt':
# inject donation buttons
inject_donate_buttons(lxml_path, script, tree)
trees[filename] = (tree, basename, outpath)
build_menu(tree, basename, section_head)
# also convert CHANGES.txt
os.path.join(lxml_path, 'CHANGES.txt'),
os.path.join(dirname, 'changes-%s.html' % release),
# generate sitemap from menu
sitemap = XML('''\
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "">
<html xmlns="" xml:lang="en" lang="en">
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Sitemap of - Processing XML and HTML with Python</title>
<meta content="lxml - the most feature-rich and easy-to-use library for processing XML and HTML in the Python language"
name="description" />
<meta content="Python XML, XML, XML processing, HTML, lxml, simple XML, ElementTree, etree, lxml.etree, objectify, XML parsing, XML validation, XPath, XSLT"
name="keywords" />
<h1>Sitemap of - Processing XML and HTML with Python</h1>
'''.replace(' ', ' '))
sitemap_menu = copy.deepcopy(menu)
SubElement(SubElement(sitemap_menu[-1], 'li'), 'a', href='').text = 'Download files'
sitemap[-1].append(sitemap_menu) # append to body
ElementTree(sitemap).write(os.path.join(dirname, 'sitemap.html'))
# integrate sitemap into the menu
SubElement(SubElement(menu[-1], 'li'), 'a', href='').text = 'Sitemap'
# integrate menu into web pages
for tree, basename, outpath in trees.itervalues():
new_tree = merge_menu(tree, menu, basename)
title = find_title_tag(new_tree)
if title and title[0].text == 'lxml':
title[0].text = "lxml - Processing XML and HTML with Python"
heading = find_heading_tag(new_tree)
if heading:
heading[0].text = "lxml - XML and HTML with Python"
if __name__ == '__main__':
publish(sys.argv[1], sys.argv[2], sys.argv[3])
