In [1]:
import os
from lxml import etree as lxml
import zipfile, itertools
import io

In [2]:
os.chdir("C:\\Users\\Nathan\\Downloads")

In [3]:
def load_xml(z):
    with z.open("archive_manifest.xml", 'r') as f:
        tree = lxml.parse(f)
    return tree

def load_zip(file):
    return zipfile.ZipFile(file)

When executed, the below cells gather artifacts and artifact documents exported via XML export to "ArtifactDocumentsExport.zip" into data structures using lxml. 

They then build a fresh zip file that includes only the indicated documents as well as artifacts contained 
in those specific documents, cutting out custom field values as well to reduce file size and minimize errors 
during this simple import process. 

In [4]:
# documents to scan for artifacts
documents = [
    'PBS Bioreactor Market Requirements Document',
    '3.0.9 Test Plan',
    '3.0.9 Requirements'
]

In [5]:
# load the data
azip = load_zip("ArtifactsExport.zip")
dzip = load_zip("ArtifactDocumentsExport.zip")

atree = load_xml(azip)
dtree = load_xml(dzip)

aroot = atree.getroot()
droot = dtree.getroot()

# map record id <-> artifact
amap = {}
for req in droot.xpath("requirement"):
    amap[req.find("record-id").text] = req
    
# loop over documents and flag artifacts for import
# this is done to only import artifacts that were part of the document
art_seen = set()
to_import = []
docs = []
for d in documents:
    doc = droot.xpath("requirement-document[./summary/text()=%r]"%d)[0]
    docs.append(doc)
    for id in doc.xpath("document-tree-node/node-requirement-id/text()"):
        if id not in art_seen:
            art_seen.add(id)
            to_import.append(id)
            
def file_namer():
    _attach_fileno = itertools.count(4)
    while True:
        yield "%08d.dat"%next(_attach_fileno)
new_filename = file_namer().__next__

class ZipQueue():
    def __init__(self):
        self.q = []
    def add(self, name, dst):
        self.q.append((name, dst))
    def docopy(self, z1, z2):
        for name, dst in self.q:
            with z1.open(name, 'r') as src:
                z2.writestr(dst, src.read())
            
zipq = ZipQueue()

newroot = lxml.Element(droot.tag, droot.attrib)
newroot.text = droot.text
    
for rid in to_import:
    req = amap[rid]
    elem = lxml.SubElement(newroot, req.tag, req.attrib)
    elem.text = req.text
    elem.tail = req.tail
    to_replace = []
    for e in req:
        if e.tag == 'custom-field-value' and e.attrib['field-name'] != 'Release Notes':
            continue
        elif e.tag == 'record-id':
            continue
        elif e.tag == 'requirement-number':
            continue
        elif e.tag == 'attachment':
            a = lxml.SubElement(elem, e.tag, e.attrib)
            nfn = new_filename()
            ofn = a.attrib['filespec']
            zipq.add(ofn, nfn)
            to_replace.append((ofn, nfn))
            a.attrib['filespec'] = nfn
        else:
            elem.append(e)
            
    # enqueued text replace
    if to_replace:
        d = elem.find("description")
        t = d.text
        for a,b in to_replace:
            t = t.replace(a,b)
        d.text = t

for doc in docs:
    newroot.append(doc)

nzip = zipfile.ZipFile("test\\test.zip", mode='w')

print("creating zip file")
newtree = lxml.ElementTree(newroot)
with io.BytesIO() as f:
    newtree.write(f, pretty_print=True, xml_declaration=True, standalone=False, encoding='UTF-8', doctype=b'<!DOCTYPE TestTrackData SYSTEM "TestTrackData.dtd">')
    data = f.getvalue()
    
nzip.writestr("archive_manifest.xml", data)
    
print("copying attachments")
zipq.docopy(dzip, nzip)
print("done")
nzip.close()

creating zip file
copying attachments
done


In [67]:
tzip = load_zip("TestCases.zip")
ttree = load_xml(tzip)
troot = ttree.getroot()

newroot = lxml.Element(troot.tag, troot.attrib)
newroot.text = troot.text

for t in troot.xpath("test-case[./folder-path[contains(text(), '3.0.9')]]"):
    newroot.append(t)
    for e in t.xpath("workflow-event/custom-field-value[@field-value='']"):
        e.getparent().remove(e)
    for e in t.xpath("custom-field-value[@field-value='']"):
        e.getparent().remove(e)
    
newtree = lxml.ElementTree(newroot)
with open("test\\out.xml", 'wb') as f:
    newtree.write(f, pretty_print=True, xml_declaration=True, standalone=False, encoding='UTF-8', 
                  doctype=b'<!DOCTYPE TestTrackData SYSTEM "TestTrackData.dtd">')