In [1]:
import os
from lxml import etree as lxml
import zipfile, itertools
import io

In [2]:
def load_xml(z):
    with z.open("archive_manifest.xml", 'r') as f:
        tree = lxml.parse(f)
    return tree

def load_zip(file):
    return zipfile.ZipFile(file)

In [3]:
# These items are used to determine what items to import
# include all artifacts from these documents,
# and all items from these folders

documents = [
    'PBS Bioreactor Market Requirements Document',
    '3.0.9 Test Plan',
    '3.0.9 Requirements',
    'ID00013A2 Master Project Document',
]

folders = [
    ("/Public/Public/Bioreactor Development/PBS Software/v3 Release/3.0.9", "/Public/Software Development/Bioreactor Control Software/v3.0+/3.0.9"),
    ("/Public/Public/Bioreactor Development/PBS Software/v3 Release/3.0.9/Tests", "/Public/Software Development/Bioreactor Control Software/v3.0+/3.0.9/Tests"),
    ("/Public/Public/Bioreactor Development/PBS Software/v3 Release/3.0.9/Results", "/Public/Software Development/Bioreactor Control Software/v3.0+/3.0.9/Results"),
    ("/Public/PBS-1/Design and Development Documents", "/Public/Bioreactor Development/PBS 1L Bioreactor/Initial Release/Concept"),
    ("/Public/PBS-1/DIDO Requirements", "/Public/Bioreactor Development/PBS 1L Bioreactor/Initial Release/Concept"),
    ("/Public/PBS 3 MAG/Single Use Sensors", "/Public/Bioreactor Development/PBS 3L Bioreactor/Single Use Sensors"),
    ("/Public/PBS-MINI/AT MINI Vessel", "/Public/Bioreactor Development/PBS MINI Bioreactor/AT MINI Vessel"),
    ("/Public/PBS-15 MAG", "/Public/Bioreactor Development/PBS 15L Bioreactor/Initial Development"),
    ("/Public/Public/Bioreactor Development", "/Public/Bioreactor Development")
]

In [4]:
# create folders manually:
# PBS Mini bioreactor / AT Mini Vessel
# PBS 15L Bioreactor / Initial development
# create PBS 3 bioreactor /  initial development
# delete unused PBS 3 folders
# manually correct "report type" field code in the one test case

In [5]:
# load the data
thezip = load_zip("Helix Migration Test\ExportData.zip")
thetree = load_xml(thezip)
theroot = thetree.getroot()

In [6]:
import copy

req_type_map = {
    'Functional': 'Functional Requirement',
    'Software': 'Software Requirement',
    'Manufacturing': 'Manufacturing Requirement',
    'Interface': 'Design Specification',
    'System': 'System Requirement',
    'Electrical': 'System Requirement',
    'Localization': "System Requirement",
    'Physical': 'Hardware Requirement',
    'Design': 'Design Specification',
    'Mechanical': 'Design Specification',
    'Hardware': 'Hardware Requirement',
    'Compliance': 'Compliance Requirement',
    'Performance': 'Design Metric',
    'Risk Evaluation': 'Risk Analysis',
    'Environmental': 'Compliance Requirement',
    'Issues & Complaints': 'General Info',
    'Product': 'Product Requirement',
    'Configuration': 'Configuration Specification'
}

bad_folders = {
    '/Public/WysiPump 2000/Product Backlog',
    '/Public/Public/Bioreactor Development/PBS Software/Test Repository',
    '/Public/PBS 3 Releases',
    '/Public/Public/Bioreactor Development/PBS 15L Bioreactor/Phase 2'
}

bad_variants = {
    'Browser',
    'Firmware Chip',
    'Operating System',
    'Peanut Butter Brand'
}

class UniqueElementContainer():
    def __init__(self, tag, attrib, text):
        self.root = lxml.Element(tag, attrib)
        self.root.text = text
        self.seen = set()
        self.zipq = []
        
    def add(self, elem):
        rid = elem.find("record-id").text
        if rid not in self.seen:
            self.seen.add(rid)
            self.root.append(elem)
            
    def copy_element(self, elem):
        rid = elem.find("record-id").text
        if rid in self.seen:
            return
        self.seen.add(rid)
        
        new = lxml.SubElement(self.root, elem.tag, elem.attrib)
        new.text = elem.text
        new.tail = elem.tail
        for e in elem:
            if e.tag == 'custom-field-value' and e.attrib['field-name'] != 'Release Notes':
                continue
#             elif e.tag == 'record-id': # or \
            elif  e.tag == 'requirement-number' or \
                      e.tag == 'defect-number' or \
                      e.tag == 'test-case-number' or \
                      e.tag == 'requirement-document-number':
                continue
            elif e.tag == 'attachment' or e.tag == 'tcm-attachment':
                attachment = lxml.SubElement(new, e.tag, e.attrib)
                self.zipq.append(attachment.attrib['filespec'])
            else:
                new.append(copy.deepcopy(e))
        return new
                
    def copy_link(self, link):
        newlink = _deep_copy(self.root, link)
        removed = 0
        links = newlink.xpath("linked-item-list/linked-item/entity-id")
        nlinks = len(links)
        for e in links:
            if e.text not in self.seen:
                print("removing link '%s'"%e.getparent().getparent().getparent().find("link-definition").text)
                e.getparent().remove(e)
                removed += 1
        if removed >= nlinks - 1:
            self.root.remove(newlink)
            
        
    @classmethod
    def fromroot(cls, root):
        return cls(root.tag, root.attrib, root.text)
    
def _simple_copy(parent, elem=None):
    if elem is not None:
        new = lxml.SubElement(parent, elem.tag, elem.attrib)
    else:
        new = lxml.Element(parent.tag, parent.attrib)
        elem = parent
    new.text = elem.text
    new.tail = elem.tail
    return new

def _deep_copy(parent, elem=None):
    new = _simple_copy(parent, elem)
    if elem is None:
        elem = parent
    for e in elem:
        _deep_copy(new, e)
    return new

def copy_element(elem):
    new = lxml.Element(elem.tag, elem.attrib)
    new.text = elem.text
    new.tail = elem.tail
    for e in elem:
        if e.tag == 'custom-field-value' and e.attrib['field-name'] != 'Release Notes':
            continue
#             elif e.tag == 'record-id': # or \
        elif  e.tag == 'requirement-number' or \
                  e.tag == 'defect-number' or \
                  e.tag == 'test-case-number' or \
                  e.tag == 'requirement-document-number':
            continue
        else:
            new.append(copy.deepcopy(e))
    return new

In [7]:
# load the data
thezip = load_zip("Helix Migration Test\ExportData.zip")
thetree = load_xml(thezip)
theroot = thetree.getroot()

# translate folder paths
for fldr in theroot.xpath("*/folder-path"):
    if fldr.text in bad_folders:
        fldr.getparent().remove(fldr)
    else:
        for old, new in folders:
            if old == fldr.text:
                fldr.text = fldr.text.replace(old, new)
    
# translate requirement types
for typ in theroot.xpath("requirement/requirement-type"):
    typ.text = req_type_map.get(typ.text, typ.text)
    
# remove bad test case variants
for v in theroot.xpath("test-case/test-case-coverage-variable"):
    if v.attrib['variable-name'] in bad_variants:
        v.getparent().remove(v)
        
# clear empty workflow event fields
for cv in theroot.xpath("*/workflow-event/custom-field-value"):
    if 'field-value' not in cv.attrib or cv.attrib['field-value'] == "":
        cv.getparent().remove(cv)

In [8]:
uc = UniqueElementContainer.fromroot(theroot)

xp = "*[./folder-path[%s]]" % " or ".join("text()='%s'"%f for _, f in folders)
for item in theroot.xpath(xp):
    if item.tag == 'requirement-document':
        continue  # will be added later
    if item.tag == 'folder':
        continue
    uc.copy_element(item)

In [9]:

# map record id <-> artifact
themap = {}
for req in theroot.xpath("requirement"):
    themap[req.find("record-id").text] = req
    
# loop over documents and flag artifacts for import
# this is done to only import artifacts that were part of the document
xp = "requirement-document[./summary[%s]]"% " or ".join('text()="%s"' % d for d in documents)

docs = []
newmap = {}
for doc in theroot.xpath(xp):
    for id in doc.xpath("document-tree-node/node-requirement-id/text()"):
        #print(id)
        new = uc.copy_element(themap[id])
    docs.append(doc)
    
for d in docs:
    uc.copy_element(d)
            
# for link in theroot.xpath("link"):
#     uc.copy_link(link)
            
            
def docopy(q, z1, z2):
    for file in q:
        docopy_inner(file, z1, z2)

def docopy_inner(file, z1, z2):
    print("Copying attachment '%s'..."%file, end="")
    with z1.open(file, 'r') as src:
        z2.writestr(file, src.read())
        print("Done")
    


# for item in uc.root:
#     if item.tag != 'requirement':
#         uc.root.remove(item)


In [10]:
# nzip = zipfile.ZipFile("Helix Migration Test\\test.zip", mode='w')
# print("creating zip file")
# newtree = lxml.ElementTree(uc.root)
# with io.BytesIO() as f:
#     newtree.write(f, pretty_print=True, xml_declaration=True, standalone=False, encoding='UTF-8', doctype=b'<!DOCTYPE TestTrackData SYSTEM "TestTrackData.dtd">')
#     data = f.getvalue()
    
# nzip.writestr("archive_manifest.xml", data)
    
# print("copying attachments")
# #docopy(uc.zipq, thezip, nzip)
# print("done")
# nzip.close()

# with zipfile.ZipFile("Helix Migration Test\\test.zip") as z:
#     with z.open("archive_manifest.xml", 'r') as f:
#         with open("Helix Migration Test\\archive_manifest.xml", 'wb') as f2:
#             f2.write(f.read())

In [33]:
def write_root(newroot, attachments, name):
    newtree = lxml.ElementTree(newroot)
    
    with io.BytesIO() as f:
        newtree.write(f, pretty_print=True, xml_declaration=True, standalone=False, encoding='UTF-8', doctype=b'<!DOCTYPE TestTrackData SYSTEM "TestTrackData.dtd">')
        data = f.getvalue()
    
    with zipfile.ZipFile("Helix Migration Test\\%s.zip"%name, 'w', zipfile.ZIP_DEFLATED) as newzip:
        for fp in attachments:
            docopy_inner(fp, thezip, newzip)
        
        newzip.writestr("archive_manifest.xml", data)

def create_zip_file_for(root, type, name):
    newroot = _simple_copy(root)
    for e in root.xpath("%s"%type):
        _deep_copy(newroot, e)

    attachments = []
    for attachment in newroot.xpath("%s/*[local-name()='attachment' or local-name()='tcm-attachment']"%type):
        fp = attachment.attrib['filespec']
        attachments.append(fp)
        
    write_root(newroot, attachments, name)
        
#     with open("Helix Migration Test\\archive_manifest_%s.xml"%type, 'wb') as f:
#         f.write(data)
    

In [34]:
#create_zip_file_for(uc.root, 'requirement', 'RequirementImport')
create_zip_file_for(uc.root, 'test-case', 'TestCaseImport')
#create_zip_file_for(uc.root, 'requirement-document', 'RequirementDocumentImport')
create_zip_file_for(uc.root, 'defect', 'DefectImport')

Copying attachment '00066982.dat'...Done
Copying attachment '00067000.dat'...Done
Copying attachment '00066999.dat'...Done
Copying attachment '00066984.dat'...Done


In [35]:
def create_zip_root_docs(root):
    newroot = _simple_copy(root)
    seen = set()
    for rid in root.xpath("requirement-document/document-tree-node/node-requirement-id/text()"):
        if rid not in seen:
            seen.add(rid)
            e = themap[rid]
            e = copy_element(e)
            newroot.append(e)
        
    for req in root.xpath("requirement"):
        rid = req.find("record-id").text
        if rid not in seen:
            seen.add(rid)
            _deep_copy(newroot, req)
    
    for doc in root.xpath("requirement-document"):
        _deep_copy(newroot, doc)
    
    attachments = []
    for attachment in newroot.xpath("requirement/*[local-name()='attachment' or local-name()='tcm-attachment']"):
        fp = attachment.attrib['filespec']
        attachments.append(fp)
        
    write_root(newroot, attachments, "RequirementDocumentImport")
        
#     with open("Helix Migration Test\\archive_manifest_requirement-document.xml", 'wb') as f:
#         f.write(data)

In [36]:
create_zip_root_docs(uc.root)

Copying attachment '00066972.dat'...Done
Copying attachment '00066971.dat'...Done
Copying attachment '00066973.dat'...Done
Copying attachment '00066974.dat'...Done
Copying attachment '00066975.dat'...Done
Copying attachment '00066978.dat'...Done
Copying attachment '00066977.dat'...Done
Copying attachment '00066976.dat'...Done
Copying attachment '00066990.dat'...Done
Copying attachment '00066980.dat'...Done
Copying attachment '00066994.dat'...Done
Copying attachment '00066993.dat'...Done
Copying attachment '00066997.dat'...Done
Copying attachment '00066996.dat'...Done
Copying attachment '00066995.dat'...Done
