Skip to content

Commit

Permalink
docx updates
Browse files Browse the repository at this point in the history
  • Loading branch information
jpn-- committed Mar 3, 2017
1 parent fc4f6ff commit 17c08a3
Show file tree
Hide file tree
Showing 2 changed files with 240 additions and 1 deletion.
21 changes: 20 additions & 1 deletion py/model_reporter/docx.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
try:
import docx
from docx.enum.style import WD_STYLE_TYPE
from docx.enum.text import WD_ALIGN_PARAGRAPH
except ImportError:

class DocxModelReporter():
Expand All @@ -28,6 +29,15 @@ def _append_to_document(self, other_doc):
def document_larchstyle():
document = docx.Document()

# normal = document.styles['Normal']
# normal.font.name = 'Arial'
# normal.font.size = docx.shared.Pt(11)
# normal.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
# normal.paragraph_format.line_spacing = 1.0
# normal.paragraph_format.widow_control = True
#
body_text = document.styles['Body Text']

monospaced_small = document.styles.add_style('Monospaced Small',WD_STYLE_TYPE.TABLE)
monospaced_small.base_style = document.styles['Normal']
monospaced_small.font.name = 'Courier New'
Expand All @@ -36,6 +46,15 @@ def document_larchstyle():
monospaced_small.paragraph_format.space_after = docx.shared.Pt(0)
monospaced_small.paragraph_format.line_spacing = 1.0

table_body_text = document.styles.add_style('Table Body Text',WD_STYLE_TYPE.TABLE)
table_body_text.base_style = document.styles['Body Text']
table_body_text.font.name = 'Arial Narrow'
table_body_text.font.size = docx.shared.Pt(9)
table_body_text.paragraph_format.space_before = docx.shared.Pt(1)
table_body_text.paragraph_format.space_after = docx.shared.Pt(1)
table_body_text.paragraph_format.line_spacing = 1.0


return document


Expand Down Expand Up @@ -69,7 +88,7 @@ def docx_params(self, groups=None, display_inital=False, **format):
if groups is None and hasattr(self, 'parameter_groups'):
groups = self.parameter_groups

table = docx_table(rows=1, cols=number_of_columns, style='Monospaced Small',
table = docx_table(rows=1, cols=number_of_columns, style='Table Body Text',
header_text="Model Parameter Estimates", header_level=2)

def append_simple_row(name, initial_value, value, std_err, tstat, nullvalue, holdfast):
Expand Down
220 changes: 220 additions & 0 deletions py/util/rst_to_docx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
# encoding: utf-8

"""
Helper objects for rendering to .docx format.
Warning: This module is entirely experimental.
"""


from xml.etree import ElementTree
import textwrap
from docutils.core import publish_parts



def xhtml_blurb(blurb_rst, h_stepdown=2, **format):
if isinstance(blurb_rst, bytes):
blurb_rst = blurb_rst.decode()
if not isinstance(blurb_rst, str):
raise TypeError('blurb must be reStructuredText as str ot bytes')
blurb_rst = textwrap.dedent(blurb_rst).strip()
blurb_div = ElementTree.fromstring(publish_parts(blurb_rst, writer_name='html')['html_body'])
blurb_div.attrib['class'] = 'blurb'
for hlevel in (6,5,4,3,2,1):
for bh1 in blurb_div.iter('h{}'.format(hlevel)):
bh1.tag = 'h{}'.format(hlevel+h_stepdown)
return blurb_div


def cut_extra_whitespace(x):
x = x.replace('\t',' ').replace('\n',' ')
x1 = x.replace(' ',' ')
while x1!=x:
x = x1
x1 = x.replace(' ',' ')
return x


class RstRenderer(object):
"""
Service class that knows how to render a RestructuredText string to
a python-docx Document object.
"""
def __init__(self, blkcntnr, rst, style_overrides={}):
self._blkcntnr = blkcntnr
self._rst = rst
self._style_overrides = style_overrides
self._depth = 1


def render(self, h_stepdown=2):
"""
Parse the RestructuredText in *rst* and render it into *blkcntnr* as
paragraphs, bullets, etc., including recognizing and rendering bold
and italic runs within block elements.
"""
t = xhtml_blurb(self._rst, h_stepdown=h_stepdown)
#print(ElementTree.tostring(t).decode())
self._render_container(t)

@property
def _styles(self):
"""
The dict providing lookup for style names for this RST document.
"""
if not hasattr(self, '_styles_'):
self._styles_ = {
'h1': 'Heading 1',
'h2': 'Heading 2',
'h3': 'Heading 3',
'h4': 'Heading 4',
'h5': 'Heading 5',
'h6': 'Heading 6',
'h7': 'Heading 7',
'h8': 'Heading 8',
'p': 'Body Text',
'li': 'List Bullet',
'b': 'Strong',
'i': 'Emphasis',
}
self._styles_.update(self._style_overrides)
return self._styles_

def _render_container(self, container):
"""
Render each element in *container* in turn.
"""
for element in container:
tag = element.tag
# print("_ "*self._depth,'tag:',tag)
# print("_ "*self._depth,' text:',((element.text.replace('\n',' ')) if element.text is not None else ""))
# print("_ "*self._depth,' tail:',((element.tail.replace('\n',' ')) if element.tail is not None else ""))
if tag == 'section':
self._render_container(element)
elif tag == 'div':
self._render_container(element)
elif tag == 'blockquote':
self._render_blockquote(element)
elif tag == 'ul':
self._render_container(element)
elif tag == 'title':
self._render_paragraph(element, self._styles['h1'])
elif tag == 'h1':
self._render_paragraph(element, self._styles['h1'])
elif tag == 'h2':
self._render_paragraph(element, self._styles['h2'])
elif tag == 'h3':
self._render_paragraph(element, self._styles['h3'])
elif tag == 'h4':
self._render_paragraph(element, self._styles['h4'])
elif tag == 'h5':
self._render_paragraph(element, self._styles['h5'])
elif tag == 'h6':
self._render_paragraph(element, self._styles['h6'])
elif tag == 'h7':
self._render_paragraph(element, self._styles['h7'])
elif tag == 'h8':
self._render_paragraph(element, self._styles['h8'])
elif tag == 'p':
self._render_paragraph(element, self._styles['p'])
elif tag == 'paragraph':
self._render_paragraph(element, self._styles['p'])
elif tag == 'transition':
self._render_transition(self._styles['p'])
elif tag == 'hr':
self._render_transition(self._styles['p'])
elif tag == 'li':
#self._render_bullet_list(element)
self._render_paragraph(element, self._styles['li'], depth=self._depth)
else:
raise NotImplementedError('unrecognized tag %s' % tag)

@property
def _rst_etree(self):
"""
Return the root element of a RestructuredText XML document produced by
converting *rst* to XML and then parsing that XML using lxml.
"""
return xhtml_blurb(self._rst)


def _render_blockquote(self, bq):
"""
Add one level of depth to the contents.
"""
self._depth += 1
try:
for sub_item in bq:
self._render_container(sub_item)
finally:
self._depth -= 1




def _render_bullet_list(self, bullet_list):
"""
Add a bullet to *blkcntnr* for each list item in *bullet_list*.
"""
def render_list_item(list_item):
for idx, para in enumerate(list_item):
style_key = 'li' if idx == 0 else 'lc'
self._render_paragraph(para, self._styles[style_key])

for list_item in bullet_list:
render_list_item(list_item)

def _render_paragraph(self, para, style, depth=None):
"""
Add a new paragraph to *blkcntnr* containing the content in the
`paragraph` element *para*. Create appropriate runs for text having
strong and emphasis inline formatting.
"""
if depth and depth>1:
paragraph = self._blkcntnr.add_paragraph(style=style+" {}".format(depth))
else:
paragraph = self._blkcntnr.add_paragraph(style=style)
if para.text is not None:
paragraph.add_run(cut_extra_whitespace(para.text))
for child in para:
# print("child.tag",child.tag,child.text,child.tail)
if child.tag=='p':
paragraph.add_run(cut_extra_whitespace(child.text)+" ")
paragraph.add_run(cut_extra_whitespace(child.tail)+ " ")
elif child.tag in ('strong','b','em','i'):
style_key = {'strong': 'b', 'em':'i','b':'b', 'i':'i'}.get(child.tag)
if child.text is not None:
paragraph.add_run(cut_extra_whitespace(child.text), self._styles[style_key])
if child.tail is not None:
paragraph.add_run(cut_extra_whitespace(child.tail))
else:
self._depth += 1
try:
for sub_item in para:
self._render_container(sub_item)
finally:
self._depth -= 1




def _render_transition(self, style='p'):
"""
Add a new paragraph to *blkcntnr* containing the content in the
`paragraph` element *para*. Create appropriate runs for text having
strong and emphasis inline formatting.
"""
paragraph = self._blkcntnr.add_paragraph(style=style)
paragraph.add_run('---------------------------------')





from ..model_reporter.docx import document_larchstyle

def render_docx(rst, h_stepdown=2):
ad = document_larchstyle()
RstRenderer(ad, rst).render(h_stepdown=h_stepdown)
return ad

0 comments on commit 17c08a3

Please sign in to comment.