docx updates

jpn-- · Mar 3, 2017 · 17c08a3 · 17c08a3
1 parent fc4f6ff
commit 17c08a3
Show file tree

Hide file tree

Showing 2 changed files with 240 additions and 1 deletion.
diff --git a/py/model_reporter/docx.py b/py/model_reporter/docx.py
@@ -2,6 +2,7 @@
 try:
 	import docx
 	from docx.enum.style import WD_STYLE_TYPE
+	from docx.enum.text import WD_ALIGN_PARAGRAPH
 except ImportError:
 
 	class DocxModelReporter():
@@ -28,6 +29,15 @@ def _append_to_document(self, other_doc):
 	def document_larchstyle():
 		document = docx.Document()
 
+#		normal = document.styles['Normal']
+#		normal.font.name = 'Arial'
+#		normal.font.size = docx.shared.Pt(11)
+#		normal.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
+#		normal.paragraph_format.line_spacing = 1.0
+#		normal.paragraph_format.widow_control = True
+#
+		body_text = document.styles['Body Text']
+
 		monospaced_small = document.styles.add_style('Monospaced Small',WD_STYLE_TYPE.TABLE)
 		monospaced_small.base_style = document.styles['Normal']
 		monospaced_small.font.name = 'Courier New'
@@ -36,6 +46,15 @@ def document_larchstyle():
 		monospaced_small.paragraph_format.space_after  = docx.shared.Pt(0)
 		monospaced_small.paragraph_format.line_spacing = 1.0
 
+		table_body_text = document.styles.add_style('Table Body Text',WD_STYLE_TYPE.TABLE)
+		table_body_text.base_style = document.styles['Body Text']
+		table_body_text.font.name = 'Arial Narrow'
+		table_body_text.font.size = docx.shared.Pt(9)
+		table_body_text.paragraph_format.space_before = docx.shared.Pt(1)
+		table_body_text.paragraph_format.space_after  = docx.shared.Pt(1)
+		table_body_text.paragraph_format.line_spacing = 1.0
+
+
 		return document
 
 
@@ -69,7 +88,7 @@ def docx_params(self, groups=None, display_inital=False, **format):
 			if groups is None and hasattr(self, 'parameter_groups'):
 				groups = self.parameter_groups
 
-			table = docx_table(rows=1, cols=number_of_columns, style='Monospaced Small',
+			table = docx_table(rows=1, cols=number_of_columns, style='Table Body Text',
 							   header_text="Model Parameter Estimates", header_level=2)
 
 			def append_simple_row(name, initial_value, value, std_err, tstat, nullvalue, holdfast):

diff --git a/py/util/rst_to_docx.py b/py/util/rst_to_docx.py
@@ -0,0 +1,220 @@
+# encoding: utf-8
+
+"""
+Helper objects for rendering to .docx format.
+
+Warning: This module is entirely experimental.
+"""
+
+
+from xml.etree import ElementTree
+import textwrap
+from docutils.core import publish_parts
+
+
+
+def xhtml_blurb(blurb_rst, h_stepdown=2, **format):
+	if isinstance(blurb_rst, bytes):
+		blurb_rst = blurb_rst.decode()
+	if not isinstance(blurb_rst, str):
+		raise TypeError('blurb must be reStructuredText as str ot bytes')
+	blurb_rst = textwrap.dedent(blurb_rst).strip()
+	blurb_div = ElementTree.fromstring(publish_parts(blurb_rst, writer_name='html')['html_body'])
+	blurb_div.attrib['class'] = 'blurb'
+	for hlevel in (6,5,4,3,2,1):
+		for bh1 in blurb_div.iter('h{}'.format(hlevel)):
+			bh1.tag = 'h{}'.format(hlevel+h_stepdown)
+	return blurb_div
+
+
+def cut_extra_whitespace(x):
+	x = x.replace('\t',' ').replace('\n',' ')
+	x1 = x.replace('  ',' ')
+	while x1!=x:
+		x = x1
+		x1 = x.replace('  ',' ')
+	return x
+
+
+class RstRenderer(object):
+	"""
+	Service class that knows how to render a RestructuredText string to
+	a python-docx Document object.
+	"""
+	def __init__(self, blkcntnr, rst, style_overrides={}):
+		self._blkcntnr = blkcntnr
+		self._rst = rst
+		self._style_overrides = style_overrides
+		self._depth = 1
+
+
+	def render(self, h_stepdown=2):
+		"""
+		Parse the RestructuredText in *rst* and render it into *blkcntnr* as
+		paragraphs, bullets, etc., including recognizing and rendering bold
+		and italic runs within block elements.
+		"""
+		t = xhtml_blurb(self._rst, h_stepdown=h_stepdown)
+		#print(ElementTree.tostring(t).decode())
+		self._render_container(t)
+
+	@property
+	def _styles(self):
+		"""
+		The dict providing lookup for style names for this RST document.
+		"""
+		if not hasattr(self, '_styles_'):
+			self._styles_ = {
+				'h1': 'Heading 1',
+				'h2': 'Heading 2',
+				'h3': 'Heading 3',
+				'h4': 'Heading 4',
+				'h5': 'Heading 5',
+				'h6': 'Heading 6',
+				'h7': 'Heading 7',
+				'h8': 'Heading 8',
+				'p':  'Body Text',
+				'li': 'List Bullet',
+				'b':  'Strong',
+				'i':  'Emphasis',
+			}
+			self._styles_.update(self._style_overrides)
+		return self._styles_
+
+	def _render_container(self, container):
+		"""
+		Render each element in *container* in turn.
+		"""
+		for element in container:
+			tag = element.tag
+#			print("_ "*self._depth,'tag:',tag)
+#			print("_ "*self._depth,'  text:',((element.text.replace('\n',' ')) if element.text is not None else ""))
+#			print("_ "*self._depth,'  tail:',((element.tail.replace('\n',' ')) if element.tail is not None else ""))
+			if tag == 'section':
+				self._render_container(element)
+			elif tag == 'div':
+				self._render_container(element)
+			elif tag == 'blockquote':
+				self._render_blockquote(element)
+			elif tag == 'ul':
+				self._render_container(element)
+			elif tag == 'title':
+				self._render_paragraph(element, self._styles['h1'])
+			elif tag == 'h1':
+				self._render_paragraph(element, self._styles['h1'])
+			elif tag == 'h2':
+				self._render_paragraph(element, self._styles['h2'])
+			elif tag == 'h3':
+				self._render_paragraph(element, self._styles['h3'])
+			elif tag == 'h4':
+				self._render_paragraph(element, self._styles['h4'])
+			elif tag == 'h5':
+				self._render_paragraph(element, self._styles['h5'])
+			elif tag == 'h6':
+				self._render_paragraph(element, self._styles['h6'])
+			elif tag == 'h7':
+				self._render_paragraph(element, self._styles['h7'])
+			elif tag == 'h8':
+				self._render_paragraph(element, self._styles['h8'])
+			elif tag == 'p':
+				self._render_paragraph(element, self._styles['p'])
+			elif tag == 'paragraph':
+				self._render_paragraph(element, self._styles['p'])
+			elif tag == 'transition':
+				self._render_transition(self._styles['p'])
+			elif tag == 'hr':
+				self._render_transition(self._styles['p'])
+			elif tag == 'li':
+				#self._render_bullet_list(element)
+				self._render_paragraph(element, self._styles['li'], depth=self._depth)
+			else:
+				raise NotImplementedError('unrecognized tag %s' % tag)
+
+	@property
+	def _rst_etree(self):
+		"""
+		Return the root element of a RestructuredText XML document produced by
+		converting *rst* to XML and then parsing that XML using lxml.
+		"""
+		return xhtml_blurb(self._rst)
+
+
+	def _render_blockquote(self, bq):
+		"""
+		Add one level of depth to the contents.
+		"""
+		self._depth += 1
+		try:
+			for sub_item in bq:
+				self._render_container(sub_item)
+		finally:
+			self._depth -= 1
+
+
+
+
+	def _render_bullet_list(self, bullet_list):
+		"""
+		Add a bullet to *blkcntnr* for each list item in *bullet_list*.
+		"""
+		def render_list_item(list_item):
+			for idx, para in enumerate(list_item):
+				style_key = 'li' if idx == 0 else 'lc'
+				self._render_paragraph(para, self._styles[style_key])
+
+		for list_item in bullet_list:
+			render_list_item(list_item)
+
+	def _render_paragraph(self, para, style, depth=None):
+		"""
+		Add a new paragraph to *blkcntnr* containing the content in the
+		`paragraph` element *para*. Create appropriate runs for text having
+		strong and emphasis inline formatting.
+		"""
+		if depth and depth>1:
+			paragraph = self._blkcntnr.add_paragraph(style=style+" {}".format(depth))
+		else:
+			paragraph = self._blkcntnr.add_paragraph(style=style)
+		if para.text is not None:
+			paragraph.add_run(cut_extra_whitespace(para.text))
+		for child in para:
+#			print("child.tag",child.tag,child.text,child.tail)
+			if child.tag=='p':
+				paragraph.add_run(cut_extra_whitespace(child.text)+" ")
+				paragraph.add_run(cut_extra_whitespace(child.tail)+ " ")
+			elif child.tag in ('strong','b','em','i'):
+				style_key = {'strong': 'b', 'em':'i','b':'b', 'i':'i'}.get(child.tag)
+				if child.text is not None:
+					paragraph.add_run(cut_extra_whitespace(child.text), self._styles[style_key])
+				if child.tail is not None:
+					paragraph.add_run(cut_extra_whitespace(child.tail))
+			else:
+				self._depth += 1
+				try:
+					for sub_item in para:
+						self._render_container(sub_item)
+				finally:
+					self._depth -= 1
+
+
+
+
+	def _render_transition(self, style='p'):
+		"""
+		Add a new paragraph to *blkcntnr* containing the content in the
+		`paragraph` element *para*. Create appropriate runs for text having
+		strong and emphasis inline formatting.
+		"""
+		paragraph = self._blkcntnr.add_paragraph(style=style)
+		paragraph.add_run('---------------------------------')
+
+
+
+
+
+from ..model_reporter.docx import document_larchstyle
+
+def render_docx(rst, h_stepdown=2):
+	ad = document_larchstyle()
+	RstRenderer(ad, rst).render(h_stepdown=h_stepdown)
+	return ad