In [1]:
import win32com.client as w32_client
import win32com.client.gencache as w32c_gen
from pywintypes import com_error  # pylint: disable=I0011,E0611
from win32com.client import constants as wincom_const
import sys
import json
import contextlib
import os
import time
import re

class WdC():
    def __getattr__(self, a):
        v = getattr(wincom_const, a, None)
        if v is None:
            raise AttributeError("%s not found"%a)
        object.__setattr__(self, a, v)
        return v

wdc = WdC()

_unspecified = object()

@contextlib.contextmanager
def screen_lock(word, visible=_unspecified):
    word.ScreenUpdating = False
    if visible is not _unspecified:
        oldvis = word.Visible
        word.Visible = visible
    try:
        yield word
    finally:
        word.ScreenUpdating = True
        if visible is not _unspecified:
            word.Visible = oldvis

@contextlib.contextmanager
def hidden(doc):
    win = doc.ActiveWindow
    vis = win.Visible
    win.Visible = False
    try:
        yield doc
    finally:
        try:
            win.Visible = vis
        except com_error:
            pass

def Word(visible=True):
    try:
        w = w32_client.GetObject("Word.Application")
    except com_error:
        w = w32c_gen.EnsureDispatch("Word.Application")
    w.Visible = visible
    return w     

def fp_for_section(s, r):
    rv = r.get(s,s)
    if rv != s:
        print("    replacing: %r --> %r"%(s, rv))
    return fp_for_file("Sections\\"+rv)


def fp_for_file(file):
    if not file.endswith(".docx"):
        file += ".docx"
    return os.path.abspath(file)


class Spec():
    def __init__(self, obj):
        self._obj = obj
        self._parents = []
        
    def __getitem__(self, key):
        return self.lookup(key)
    
    def get(self, key, default=None):
        try:
            return self.lookup(key)
        except KeyError:
            return default
    
    def lookup(self, key):
        path = key.split(".")
        try:
            return self._lookup(path)
        except KeyError:
            raise KeyError(key) from None

    def _lookup(self, path):
        return self._internal_lookup(path)
    
    def _internal_lookup(self, path):
        v = self._obj
        for p in path:
            v = v[p]
        return v
    
    def __repr__(self):
        return "<Spec: %s>"%self['Name']
    
    def keys(self):
        return self._obj.keys()
    
    def values(self):
        return self._obj.values()
    
    def items(self):
        return self._obj.items()
    
    def __iter__(self):
        return iter(self._obj)


class SanityError(Exception):
    pass
    
def merge(src, dst):
    for k,v in src.items():
        if isinstance(v, dict):
            if k not in dst:
                dst[k] = {}
            merge(v, dst[k])
        else:
            dst[k] = v

def get_merged_props(target, specs):
    ret = {}
    for p in reversed(target.get("ParentSpec",[])):
        np = specs[p]
        props = get_merged_props(np, specs)
        merge(props, ret)


# 9/5/2019 
# I'm pretty sure this function doesn't actually
# work the way it is supposed to, but it works on 
# simple linear chains
def merge_parents(props, parent, seen, specs):
    for p in reversed(parent.get("ParentSpec", [])):
        if p in seen: continue
        np = specs[p]
        merge_parents(props, np, seen, specs)
    merge(parent, props)
    print("  merged", parent['Name'])
    seen.add(parent['Name'])

def loadspec_fromfile(fp):
    j = get_spec(fp)
    return loadspec(j['Specs'], j['Targets'])

def loadspec(rawspecs, rawtargets):
    allspecs = {}
    targets = []
    
    for spec in rawspecs:
        s = Spec(spec)
        allspecs[s['Name']] = s
    
    for target in rawtargets:
        t = Spec(target)
        allspecs[t['Name']] = t
        targets.append(t)
    
    for target in targets:
        new = {}
        seen = set()
        print("prop merge for %r -> {}"%target['Name'])
        merge_parents(new, target._obj, seen, allspecs)
        target._obj = new
        print()
            
    # sanity checks
    for t in targets:
        sections = set(t['Properties.Sections'])
        for tr in t.get('Properties.TextReplace', {}).keys():
            if tr not in sections and not t.get('IsMixin', False) and tr != '_Main':
                raise SanityError("TextReplace for %r contains unknown section: %r"%(t['Name'], tr))
        
    return targets


def get_spec(fp):
    with open(fp, 'r') as f:
        return json.load(f)       

def open_doc(w, fp):
    try:
        doc = w.Documents.Open(fp)
    except com_error as e:
        if e.hresult == -2147352567:  # file not found
            raise FileNotFoundError(fp)
        raise
    else:
        return doc


def exec_text_replace(rng, replace):
    find = rng.Find
    for key, value in replace.items():
        find.Text = key
        find.Replacement.Text = value
        find.Wrap = wdc.wdFindContinue
        if find.Execute(Replace=wdc.wdReplaceAll):
            print("    text_replace: %r -> %r"%(key, value))
        else:
            print("    warning: failed to replace %r"%key)


def import_section(w, m, doc, text_replace):
    """
    Copies the table from the section document 
    into the master document, using the clipboard
    (via Word's built in Copy/Paste methods). 

    Does some nonsense to move the selection range 
    around to insert all sections in the correct location.
    """

    doc.Select()
    w.Selection.Copy()

    m.Select()
    s = w.Selection
    s.MoveDown(wdc.wdLine, 1, wdc.wdMove)
    s.MoveEnd(wdc.wdCharacter, -1)
    r = s.Range
    s.Paste()
    last_table = m.Tables(m.Tables.Count)

    # checking this instead of letting the iterator
    # no-op saves two calls to the COM server
    if text_replace:
        exec_text_replace(last_table.Range, text_replace)

    # center alignment before stitching
    last_table.Rows.Alignment = wdc.wdAlignRowCenter

    # stitch tables together
    r.MoveEnd(wdc.wdCharacter, -1)
    r.Delete()

def update_all_fields(m):
    # adapted from https://stackoverflow.com/questions/33733113/macro-to-update-all-fields-in-a-word-document
    print("  updating document fields")
    for story in m.StoryRanges:
        while story:
            story.Fields.Update()
            if 6 <= story.StoryType <= 11:
                for shape in story.ShapeRange:
                    if shape.TextFrame.HasText:
                        shape.TextFrame.TextRange.Fields.Update()
            story = story.NextStoryRange


def create_file(w, template, filepath, sections, replace, target, docs):
    
    template = fp_for_file(template)
    filepath = fp_for_file(filepath)

    m = open_doc(w, template)
    text_replacements = target.get("Properties.TextReplace", {})
    skips = set(target.get("Properties.SkipSections", []))

    m.Select()
    w.Selection.MoveDown(wdc.wdLine, 1, wdc.wdMove)
    w.Selection.MoveEnd()

    text_replace = text_replacements.get("_Main",{})
    if text_replace:
        exec_text_replace(m.Range(), text_replace)

    for section in sections:

        if section in skips:
            print("  skipping %r"%section)
            continue

        print("  importing %r"%section)
        fp = fp_for_section(section, replace)

        doc = docs[fp]

        text_replace = text_replacements.get(section, {})
        try:
            import_section(w, m, doc, text_replace)
        except:
            m.Close(False)
            raise

    # trim whitespace at end of doc
    m.Tables(m.Tables.Count).Select()
    w.Selection.MoveDown(wdc.wdLine, 1, wdc.wdMove)
    w.Selection.MoveDown(wdc.wdLine, 999, wdc.wdExtend)
    w.Selection.Delete()

    # check for missed replacement brackets
    ptrn = r"(\{.*?\})"
    bracket_find = re.compile(ptrn).findall
    for var in bracket_find(m.Range().Text):
        print("  variable not replaced: %r"%var)

    # file is saved twice so that
    # the filename fields update properly
    m.SaveAs(filepath)
    update_all_fields(m)
    m.Save()

    
def _addmap(w, docs, section, raise_if_not_found=False):
    file = fp_for_file("Sections\\" + section)
    if file in docs:
        return
    if not os.path.exists(file):
        if raise_if_not_found:
            raise FileNotFoundError(file)
        return
    
    docs[file] = open_doc(w, file)


def open_section_docs(w, docs, targets):
    # turns out a huge amount of the time being
    # spent is just opening and closing the subsections.
    # rather than open and close each time, build
    # a mapping of name -> active word document
    print("Pre-loading section documents...")
    docnames = set()
    for target in targets:
        for section in target['Properties.Sections']:
            docnames.add(section)
    
    nd = len(docnames)
    for i, name in enumerate(docnames, 1):
        print("\rOpening document %d/%d '%s'                   "%(i, nd, name), end="")
        _addmap(w, docs, name, True)
    
    print('\n') # two lines


def create_files(w, targets):

    print()
    if len(targets) == 0:
        print("No targets found")
        return

    # docs is created & passed in to ensure the finally
    # clause closes everything if an error occurs while 
    # preloading 
    docs = {}
    n = 0
    try:
        open_section_docs(w, docs, targets)

        for target in targets:
            name = target["Name"]
            dn = target["DocumentNumber"]
            filename = "%s %s.docx" % (dn, name)

            if target.get("Exclude") is True:
                print("Skipping file %r" % filename)
                continue

            filepath = "out\\%s\\%s" % (dn, filename)
            os.makedirs("out\\%s"%dn, exist_ok=True)
            template = "Templates\\" + target["Template"]
            
            replace = target.get("Replace", {})
            sections = target['Properties.Sections']

            print("Creating file: %r"%filename)
            create_file(w, template, filepath, sections, replace, target, docs)
            n += 1
            print()

    finally:
        print("Successfully created %d file(s)"%(n))
        ndocs = len(docs)
        for i, d in enumerate(docs.values(),1):
            print("\rClosing document %d/%d        "%(i,ndocs), end="")
            if d is None: continue
            d.Close(False)
        print()


def main(fspec):
    targets = loadspec_fromfile(fspec)
    w = Word()
    try:
        with screen_lock(w, False):
            create_files(w, targets)
    except:
        w.Quit()
        raise

In [2]:
fp = "C:\\Users\\Nathan\\Documents\\Dropbox\\PM\\CUT MY DOC INTO PIECES2\\PMForms.json"

In [31]:
def merge(src, dst):
    for k,v in src.items():
        if isinstance(v, dict):
            if k not in dst:
                dst[k] = {}
            merge(v, dst[k])
        else:
            dst[k] = v

def build_merged_props(target, specs):
    target._obj = get_merged_props(target, specs)
    
def get_merged_props(target, specs):
    new_obj = {}
    for p in reversed(target.get("ParentSpec",[])):
        np = specs[p]
        props = get_merged_props(np, specs)
        merge(props, new_obj)
        print("  merged: ", p, "->", target['Name'])
    merge(target._obj, new_obj)
    return new_obj

def loadspec(rawspecs, rawtargets):
    allspecs = {}
    targets = []
    
    for spec in rawspecs:
        s = Spec(spec)
        allspecs[s['Name']] = s
    
    for target in rawtargets:
        t = Spec(target)
        allspecs[t['Name']] = t
        targets.append(t)
    
    for target in targets:
        print("prop merge for %r -> {}"%target['Name'])
        try:
            build_merged_props(target, allspecs)
        except RecursionError:
            raise ValueError("Recursion limit exceeded: cycles found in parent graph") from None
        print()
            
    # sanity checks
    for t in targets:
        sections = set(t['Properties.Sections'])
        for tr in t.get('Properties.TextReplace', {}).keys():
            if tr not in sections and not t.get('IsMixin', False) and tr != '_Main':
                raise SanityError("TextReplace for %r contains unknown section: %r"%(t['Name'], tr))
        
    return targets

def loadspec_fromfile(fp):
    j = get_spec(fp)
    return loadspec(j['Specs'], j['Targets'])

In [33]:
def printdict(d,l=0):
    return
    space = "  "*l
    for k,v in d.items():
        if isinstance(v, dict):
            print(space, "%r:"%k)
            printdict(v, l+1)
        else:
            print(space, "%r: %r"%(k,v))

for s in loadspec_fromfile(fp):
    for k,v in s._obj.items():
        if isinstance(v, dict):
            printdict(v)
        else:
            print(" %r: %r"%(k,v))
    print()

prop merge for 'IA-3-B-5xx' -> {}
  merged:  PBS 3 MAG -> IA-3-B-5xx

prop merge for 'IA-3-B-7xx' -> {}
  merged:  PBS 3 MAG -> IA-3-B-7xx
  merged:  Hamilton SU -> IA-3-B-7xx

prop merge for 'IA-15-B-5xx' -> {}
  merged:  PBS 15 MAG -> IA-15-B-5xx

prop merge for 'IA-15-B-7xx' -> {}
  merged:  PBS 15 MAG -> IA-15-B-7xx
  merged:  Hamilton SU -> IA-15-B-7xx

prop merge for 'IA-80-B-5xx' -> {}
  merged:  PBS 15 MAG -> PBS 80 MAG
  merged:  PBS 80 MAG -> IA-80-B-5xx

prop merge for 'IA-80-B-7xx' -> {}
  merged:  PBS 15 MAG -> PBS 80 MAG
  merged:  PBS 80 MAG -> IA-80-B-7xx
  merged:  Hamilton SU -> IA-80-B-7xx

 'Template': 'THIS IS MY LAST REPORT3'
 'Exclude': False
 'ParentSpec': ['PBS 3 MAG']
 'Name': 'IA-3-B-5xx'
 'DocumentNumber': 'IF00118'

 'Template': 'THIS IS MY LAST REPORT3'
 'Exclude': False
 'Name': 'IA-3-B-7xx'
 'ParentSpec': ['Hamilton SU', 'PBS 3 MAG']
 'IsMixin': True
 'DocumentNumber': 'IF00118'

 'Template': 'THIS IS MY LAST REPORT3'
 'Exclude': False
 'ParentSpec': ['P