In [1]:
import os
import re
import sys
import requests
import pickle
import getopt
import subprocess as sp
import numpy as np
from pprint import pprint
from lxml import etree
from tqdm.notebook import tqdm

In [2]:
# arguments
input_path = os.path.abspath("/mnt/data1/jiawei/acs-data/article-files/")
output_path = os.path.abspath("/mnt/data1/jiawei/acs-data/processed-files/")
suppl_path = os.path.abspath("/mnt/data1/jiawei/acs-data/suppl-files")

In [3]:
# function to collect matching files and dirs
def collect_files(root, res, pattern="", collect_dirs=True, min_depth=None, max_depth=None):
    
    # check max depth
    if not max_depth is None and max_depth == 0:
        return
    
    # go through all item in the dir
    for item in os.listdir(root):
        
        # process item
        item_path = os.path.join(root, item)
        item_is_dir = os.path.isdir(item_path)
        
        # pull valid file in res if min depth has reached
        if min_depth is None or min_depth - 1 <= 0:
            if re.match(pattern, item_path):
                if not item_is_dir or collect_dirs:
                    res.append(item_path)
        
        # recursively collect all files
        if item_is_dir:
            next_min_depth = None if min_depth is None else min_depth - 1
            next_max_depth = None if max_depth is None else max_depth - 1
            collect_files(item_path, res, pattern, collect_dirs, next_min_depth, next_max_depth)

In [4]:
# helps to extract text from paragraph
def p_helper(node):
    
    # <p/> does not have text
    if node.text is None:
        return ""
    
    # each paragarph is put into a line
    line_list = [node.text]
    for child in node:

        # get the text inside the child if the tag isn't 
        # named-content and inline-formula
        # and the text following the child
        if not child.tag in ("named-content", "inline-formula"):
            line_list.append(" ".join(child.xpath(".//text()")))
        line_list.append(child.tail)

    # there might be none in line_list
        
    # re dark magic
    # remove new line and spaces
    line = " ".join(line_list)
    line = line.strip()
    line = line.replace("\n", " ")

    # clean up consecutive spaces
    line = re.sub("\s+", " ", line)

    # fix the space around punctuation
    line = re.sub("\s([.,\):;])", r"\1", line)
    line = re.sub("\(\s", r"(", line)
    line = re.sub("\s*([-/])\s*", r"\1", line)
    return line

In [5]:
def kwd_helper(node):
    
    # return a keyword string
    kwd_tokens = node.xpath(".//text()")
    kwd = " ".join(kwd_tokens).replace("\n", " ").strip()
    kwd = re.sub("\s+", " ", kwd)
    return kwd

In [6]:
# this returns interesting titles
# for example: intro, method, and results
# return None for non interesting titles
def title_helper(node):
    
    # extract text from title node
    title = " ".join(node.xpath(".//text()"))
    title = title.replace("\n", " ")
    title = re.sub("\s+", " ", title)
    title = title.strip()
    title = title.lower()
    
    # categorize title
    res = []
    if "intro" in title:
        res.append("introduction")
    if "result" in title:
        res.append("result")
    if "discuss" in title:
        res.append("discussion")
    if "material" in title:
        res.append("materials")
    if "method" in title or "procedure" in title:
        res.append("method")
    if "summary" in title:
        res.append("summary")
    return res

In [7]:
def extract_body(root):
    
    # we are interested in the text in the body section
    curr_title = []
    text = []
    text_nodes = root.xpath("/article/body//*[self::p or (self::title and not(ancestor::caption))]")
    for text_node in text_nodes:
        
        # handle title
        if text_node.tag == "title":
            tmp_title = title_helper(text_node)
            if len(tmp_title) > 0:
                curr_title = tmp_title
        
        # handle paragraph
        elif text_node.tag == "p":
            text.append({
                "text": p_helper(text_node),
                "section": curr_title
            })
    return text

In [8]:
def extract_abstract(root):
    
    # get the abstract paragraph
    abstract = []
    abstract_nodes = root.xpath("//abstract/p")
    if abstract_nodes:
        abstract.append(p_helper(abstract_nodes[0]))
    return abstract

In [9]:
def extract_keywords(root):
    
    # get the keywords
    keywords = []
    kwd_nodes = root.xpath("//kwd-group/kwd")
    for kwd_node in kwd_nodes:
        keywords.append(kwd_helper(kwd_node))
    return keywords

In [10]:
def extract_date(root):
    
    issue_pub_date = None
    electron_pub_date = None
    
    # traverse to the date note
    date_nodes = root.xpath("/article/front/article-meta/pub-date")
    
    # get the time
    for node in date_nodes:
        year = node.xpath("./year")[0].text.strip()
        month = node.xpath("./month")[0].text.strip()
        day = node.xpath("./day")[0].text.strip()

        if "date-type" in node.attrib and node.attrib["date-type"] == "issue-pub":
            issue_pub_date = "%s/%s/%s" % (month, day, year)
        else:
            electron_pub_date = "%s/%s/%s" % (month, day, year)
    
    return issue_pub_date, electron_pub_date

In [11]:
# collect all xml files
xml_paths = []
collect_files(input_path, xml_paths, pattern=".*\.xml$", collect_dirs=False)
print(f"total xml files: %d" % len(xml_paths))

total xml files: 1545


In [12]:
# parse the files
processed_files = {}
for xml_path in tqdm(xml_paths):

    # print("\nparsing %s" % xml_path)

    # get the root of the xml
    root = etree.parse(xml_path).getroot()
    
    # get the pub date
    issue_pub_date, electron_pub_date = extract_date(root)

    # create a dictionary holding the xml data
    xml_data = {
        "keywords": extract_keywords(root),
        "abstract": extract_abstract(root),
        "body": extract_body(root),
        "issue_pub_date": issue_pub_date,
        "electron_pub_date": electron_pub_date,
        "suppl_files": []
    }

    # save the data
    pub_num = xml_path.split("/")[-1].split(".")[0]
    processed_files[pub_num] = xml_data

HBox(children=(FloatProgress(value=0.0, max=1545.0), HTML(value='')))




In [13]:
# find out all the zip files
# suppl_files_zip = []
# collect_files(suppl_path, suppl_files_zip, pattern=".*\.zip$", collect_dirs=False, min_depth=3)
# print("files: %d" % len(suppl_files_zip))

In [14]:
# extract all the zip files in place
# for zip_file in suppl_files_zip:
#     zip_file_dir = re.sub("/[^/]*$", "", zip_file)
#     res = sp.run(["unzip", "-n", zip_file, "-d", zip_file_dir])
#     if res.returncode != 0:
#         print(res)

In [15]:
# collect all suppl files
suppl_files_all = []
collect_files(suppl_path, suppl_files_all, pattern="", collect_dirs=False, min_depth=3)
suppl_files_all = [x for x in suppl_files_all if not re.match(".*__MACOSX.*", x)]
print("files: %d" % len(suppl_files_all))

files: 10070


In [16]:
# attach all the suppl path to processed files
suppl_ext = set()
for path in suppl_files_all:
    path = path.split("/")
    
    # get basic attrib
    suppl_filename = path[-1]
    suppl_dir = suppl_path.split("/")[-1]
    suppl_dir_idx = 0
    for i, item in enumerate(path):
        if item == suppl_dir:
            suppl_dir_idx = i
            break
    pub_num = path[suppl_dir_idx + 1]
    rpath = os.path.join(*path[suppl_dir_idx + 1:]) # relative path
    
    # create info dict
    suppl_info = {
        "suppl_filename": suppl_filename,
        "rpath": rpath,
        "is_sequence": False
    }
    
    # push it into the processed files dict
    if pub_num in processed_files:
        processed_files[pub_num]["suppl_files"].append(suppl_info)
    
    # collect file extension
    ext = suppl_filename.split(".")[-1]
    suppl_ext.add(ext)

In [17]:
suppl_ext

{'001',
 '002',
 '003',
 '004',
 '005',
 '006',
 '024',
 '323',
 'AD_ASM',
 'AD_PRT',
 'AUTHORS',
 'AVI',
 'About',
 'About?action=diff',
 'About?action=edit',
 'About?action=print',
 'Abstracted_model',
 'Acknowledgements',
 'Acknowledgements?action=diff',
 'Acknowledgements?action=edit',
 'Acknowledgements?action=print',
 'Awmilleruwedu?action=edit',
 'BasicEditing',
 'C',
 'CAD',
 'COPYING',
 'Chamber',
 'Chamber?action=diff',
 'Chamber?action=edit',
 'Chamber?action=print',
 'ChangeLog',
 'Chris?action=edit',
 'Cnt?action=edit',
 'ConstructionManual',
 'ConstructionManual?action=diff',
 'ConstructionManual?action=edit',
 'ConstructionManual?action=print',
 'DESCRIPTION',
 'DS_Store',
 'DWG',
 'Derivatives?action=edit',
 'Detailed_model',
 'DocumentIdentifier',
 'DocumentationIndex',
 'Downloads',
 'Downloads?action=diff',
 'Downloads?action=edit',
 'Downloads?action=print',
 'ENZYME',
 'Electronics',
 'Electronics?action=diff',
 'Electronics?action=edit',
 'Electronics?action=print

In [18]:
# request params
sbol_validator_url = "https://validator.sbolstandard.org/validate/"
allowed_file_type = set([
    "gb", "fasta", "sbol", "txt", "xml"
])
validator_param = {
    'options': {
        'language' : "SBOL2",
        'test_equality': False,
        'check_uri_compliance': False,
        'check_completeness': False,
        'check_best_practices': False,
        'fail_on_first_error': True,
        'provide_detailed_stack_trace': False,
        'subset_uri': '',
        'uri_prefix': 'dummy',
        'version': '',
        'insert_type': False
    },
    "main_file": None,
    "return_file": True
}

In [19]:
# use the api to check if the file is a sequence file
for pub_num, item in tqdm(processed_files.items()):
    for suppl_file in item["suppl_files"]:

        name = suppl_file["suppl_filename"]

        # only allow the following extension
        ext = name.split(".")[-1]
        if not ext in allowed_file_type:
            continue
        
        # try read the content
        try:
            path = os.path.join(suppl_path, suppl_file["rpath"])
            with open(path, "r", encoding="utf-8") as f:
                content = f.read()
        except UnicodeDecodeError:
            continue
        
        # restrict file size to be less than 64mb
        file_size = os.path.getsize(path)
        if file_size >= 64 * 2 ** 20:
            continue
    
        # create parameter
        validator_param["main_file"] = content

        # check file
        print(f"{path}")
        res = requests.post(sbol_validator_url, json=validator_param)
        res = res.json()
        suppl_file["is_sequence"] = res["valid"]
        print(f"{name}, {res['valid']}")

HBox(children=(FloatProgress(value=0.0, max=1545.0), HTML(value='')))

/mnt/data1/jiawei/acs-data/suppl-files/sb5003407/suppl/Code for publication/parfor_progress.txt
parfor_progress.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5003407/suppl/Code for publication/README.txt
README.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5003407/suppl/Code for publication/.ignore/ghostscript.txt
ghostscript.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5003407/suppl/Code for publication/.ignore/gs_font_path.txt
gs_font_path.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400066m/suppl/suppl/Toggle_Switch.xml
Toggle_Switch.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400066m/suppl/suppl/edited_biobricks.xml
edited_biobricks.xml, True
/mnt/data1/jiawei/acs-data/suppl-files/sb400066m/suppl/suppl/LacI_Inverter.xml
LacI_Inverter.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400066m/suppl/suppl/TetR_Inverter.xml
TetR_Inverter.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb9b00021/suppl/Supporting File S3 - pQE-60_T5-sfGFP(UAG) report

statistics.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x4D/Simulation_Run/sim-rep.txt
sim-rep.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x4D/Simulation_Run/Simulation_Environment.xml
Simulation_Environment.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x4D/Simulation_Run/term-time.txt
term-time.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x4D/Simulation_Environment.xml
Simulation_Environment.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x4D/Model_0x4D.sbol
Model_0x4D.sbol, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x4D/circuit_0x4D_0_YFP_module.xml
circuit_0x4D_0_YFP_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xC9/Model_0xC9.sbol
Model_0xC9.sbol, True
/mnt/data1/jiawei/acs

circuit_0xF7_3_P2_PhlF_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xF7/circuit_0xF7_0_YFP_module.xml
circuit_0xF7_0_YFP_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xF7/Model_0xF7.sbol
Model_0xF7.sbol, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xF7/Simulation_Run/statistics.txt
statistics.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xF7/Simulation_Run/sim-rep.txt
sim-rep.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xF7/Simulation_Run/Simulation_Environment.xml
Simulation_Environment.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xF7/Simulation_Run/term-time.txt
term-time.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xF7/Simulation_Environment.xml
Simulation_Environment

circuit_0x9F_6_pTac_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x6F/circuit_0x6F_1_A1_AmtR_module.xml
circuit_0x6F_1_A1_AmtR_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x6F/circuit_0x6F_6_pBAD_module.xml
circuit_0x6F_6_pBAD_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x6F/circuit_0x6F_5_P2_PhlF_module.xml
circuit_0x6F_5_P2_PhlF_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x6F/circuit_0x6F_8_pTet_module.xml
circuit_0x6F_8_pTet_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x6F/circuit_0x6F_5_P2_PhlF_circuit_0x6F_6_pBAD_circuit_0x6F_3_S3_SrpR_circuit_0x6F_2_H1_HlyIIR_circuit_0x6F_0_YFP_circuit_0x6F_7_pTac_circuit_0x6F_8_pTet_circuit_0x6F_1_A1_AmtR_circuit_0x6F_4_E1_BetI_module.xml
circuit_0x6F_5_P2_PhlF_circuit_0x6F_6_pBAD_circuit_0

circuit_0x07_3_H1_HlyIIR_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x07/circuit_0x07_6_pBAD_module.xml
circuit_0x07_6_pBAD_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x07/circuit_0x07_1_P3_PhlF_module.xml
circuit_0x07_1_P3_PhlF_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x07/circuit_0x07_2_S2_SrpR_module.xml
circuit_0x07_2_S2_SrpR_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x07/Model_0x07.sbol
Model_0x07.sbol, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x07/circuit_0x07_5_pTet_module.xml
circuit_0x07_5_pTet_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x07/circuit_0x07_4_pTac_module.xml
circuit_0x07_4_pTac_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_p

circuit_0xC8_5_pBAD_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xC8/Simulation_Run/statistics.txt
statistics.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xC8/Simulation_Run/sim-rep.txt
sim-rep.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xC8/Simulation_Run/Simulation_Environment.xml
Simulation_Environment.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xC8/Simulation_Run/term-time.txt
term-time.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xC8/Simulation_Environment.xml
Simulation_Environment.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xC8/circuit_0xC8_1_H1_HlyIIR_module.xml
circuit_0xC8_1_H1_HlyIIR_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x06/circuit_0x06_2_A1

circuit_0x1C_9_pBAD_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x1C/Simulation_Run/statistics.txt
statistics.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x1C/Simulation_Run/sim-rep.txt
sim-rep.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x1C/Simulation_Run/Simulation_Environment.xml
Simulation_Environment.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x1C/Simulation_Run/term-time.txt
term-time.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x1C/circuit_0x1C_4_E1_BetI_module.xml
circuit_0x1C_4_E1_BetI_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x1C/Simulation_Environment.xml
Simulation_Environment.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x1C/circuit_0x1C_5_A1_Amt

circuit_0x8E_2_A1_AmtR_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x8E/Simulation_Run/statistics.txt
statistics.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x8E/Simulation_Run/sim-rep.txt
sim-rep.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x8E/Simulation_Run/Simulation_Environment.xml
Simulation_Environment.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x8E/Simulation_Run/term-time.txt
term-time.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x8E/circuit_0x8E_3_E1_BetI_module.xml
circuit_0x8E_3_E1_BetI_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x8E/Simulation_Environment.xml
Simulation_Environment.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x8E/circuit_0x8E_5_pTa

circuit_0x19_0_YFP_circuit_0x19_6_pTet_circuit_0x19_8_pBAD_circuit_0x19_2_H1_HlyIIR_circuit_0x19_1_S3_SrpR_circuit_0x19_4_E1_BetI_circuit_0x19_7_pTac_circuit_0x19_5_F1_AmeR_circuit_0x19_3_P3_PhlF_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x19/Simulation_Environment.xml
Simulation_Environment.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x19/Model_0x19.sbol
Model_0x19.sbol, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x19/circuit_0x19_4_E1_BetI_module.xml
circuit_0x19_4_E1_BetI_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x19/circuit_0x19_5_F1_AmeR_module.xml
circuit_0x19_5_F1_AmeR_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x78/circuit_0x78_4_H1_HlyIIR_module.xml
circuit_0x78_4_H1_HlyIIR_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b

circuit_0xAE_6_pTet_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xAE/Simulation_Environment.xml
Simulation_Environment.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xAE/Model_0xAE.sbol
Model_0xAE.sbol, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x36/circuit_0x36_3_E1_BetI_module.xml
circuit_0x36_3_E1_BetI_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x36/circuit_0x36_4_P3_PhlF_module.xml
circuit_0x36_4_P3_PhlF_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x36/circuit_0x36_6_A1_AmtR_module.xml
circuit_0x36_6_A1_AmtR_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x36/Model_0x36.sbol
Model_0x36.sbol, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x36/circuit_0x36_5_S2_S

Simulation_Environment.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x6E/Simulation_Run/term-time.txt
term-time.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x6E/Simulation_Environment.xml
Simulation_Environment.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x6E/circuit_0x6E_5_pTac_module.xml
circuit_0x6E_5_pTac_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xB9/circuit_0xB9_5_E1_BetI_module.xml
circuit_0xB9_5_E1_BetI_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xB9/circuit_0xB9_4_H1_HlyIIR_module.xml
circuit_0xB9_4_H1_HlyIIR_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xB9/circuit_0xB9_6_pTet_module.xml
circuit_0xB9_6_pTet_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_pr

circuit_0x01_8_pTac_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x01/circuit_0x01_7_pBAD_module.xml
circuit_0x01_7_pBAD_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x01/Model_0x01.sbol
Model_0x01.sbol, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x01/circuit_0x01_0_YFP_circuit_0x01_1_B2_BM3R1_circuit_0x01_4_S1_SrpR_circuit_0x01_9_pTet_circuit_0x01_8_pTac_circuit_0x01_7_pBAD_circuit_0x01_3_E1_BetI_circuit_0x01_5_A1_AmtR_circuit_0x01_6_P2_PhlF_circuit_0x01_2_H1_HlyIIR_module.xml
circuit_0x01_0_YFP_circuit_0x01_1_B2_BM3R1_circuit_0x01_4_S1_SrpR_circuit_0x01_9_pTet_circuit_0x01_8_pTac_circuit_0x01_7_pBAD_circuit_0x01_3_E1_BetI_circuit_0x01_5_A1_AmtR_circuit_0x01_6_P2_PhlF_circuit_0x01_2_H1_HlyIIR_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x01/circuit_0x01_5_A1_AmtR_module.xml
circuit_0x01_5_A1_Am

circuit_0x70_4_pTet_circuit_0x70_5_pBAD_circuit_0x70_2_H1_HlyIIR_circuit_0x70_3_pTac_circuit_0x70_1_B2_BM3R1_circuit_0x70_0_YFP_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x70/circuit_0x70_1_B2_BM3R1_module.xml
circuit_0x70_1_B2_BM3R1_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x70/circuit_0x70_5_pBAD_module.xml
circuit_0x70_5_pBAD_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x70/circuit_0x70_3_pTac_module.xml
circuit_0x70_3_pTac_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x70/circuit_0x70_4_pTet_module.xml
circuit_0x70_4_pTet_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x70/circuit_0x70_2_H1_HlyIIR_module.xml
circuit_0x70_2_H1_HlyIIR_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Mode

circuit_0x7F_3_pTet_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x7F/Simulation_Environment.xml
Simulation_Environment.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x7F/circuit_0x7F_1_A1_AmtR_circuit_0x7F_3_pTet_circuit_0x7F_0_YFP_circuit_0x7F_5_pTac_circuit_0x7F_2_B3_BM3R1_circuit_0x7F_4_pBAD_module.xml
circuit_0x7F_1_A1_AmtR_circuit_0x7F_3_pTet_circuit_0x7F_0_YFP_circuit_0x7F_5_pTac_circuit_0x7F_2_B3_BM3R1_circuit_0x7F_4_pBAD_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x82/circuit_0x82_7_pTac_module.xml
circuit_0x82_7_pTac_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x82/circuit_0x82_5_A1_AmtR_module.xml
circuit_0x82_5_A1_AmtR_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0x82/circuit_0x82_1_P3_PhlF_module.xml
circuit_0x82_1_P3_PhlF_modu

circuit_0x08_7_pTet_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xEA/circuit_0xEA_3_pTet_module.xml
circuit_0xEA_3_pTet_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xEA/circuit_0xEA_1_A1_AmtR_module.xml
circuit_0xEA_1_A1_AmtR_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xEA/circuit_0xEA_5_pTac_module.xml
circuit_0xEA_5_pTac_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xEA/circuit_0xEA_2_S3_SrpR_module.xml
circuit_0xEA_2_S3_SrpR_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xEA/Simulation_Run/statistics.txt
statistics.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_0xEA/Simulation_Run/sim-rep.txt
sim-rep.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/iBioSim_projects/Model_

TetR_Sensor_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/acs_2017_vpr/AmtR_Production2_AmtR_Production1_LacI_Sensor_GFP_Reporter_TetR_Sensor_RFP_Reporter_BetI_Production2_BetI_Production1_module.xml
AmtR_Production2_AmtR_Production1_LacI_Sensor_GFP_Reporter_TetR_Sensor_RFP_Reporter_BetI_Production2_BetI_Production1_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/acs_2017_vpr/Testing_Environment.xml
Testing_Environment.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/acs_2017_vpr/AmtR_Production2_module.xml
AmtR_Production2_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/acs_2017_vpr/BetI_Production1_module.xml
BetI_Production1_module.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/acs_2017_vpr/Simulation_Environment/statistics.txt
statistics.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00459/suppl/acs_2017_vpr/Simulation_Environment/Testing_Environment.xml
Testing_E

pRC1101.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00060/suppl/GenbankFiles/Venus (YFP).gb
Venus (YFP).gb, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00060/suppl/GenbankFiles/pPAB1.gb
pPAB1.gb, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00060/suppl/GenbankFiles/tSSA1.gb
tSSA1.gb, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00060/suppl/GenbankFiles/109B.gb
109B.gb, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00060/suppl/GenbankFiles/ABP1_alt (Actin).gb
ABP1_alt (Actin).gb, False
/mnt/data1/jiawei/acs-data/suppl-files/sb4001728/suppl/Supporting_Files/Protocol_S4.txt
Protocol_S4.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb4001728/suppl/Supporting_Files/Protocol_S1.txt
Protocol_S1.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb4001728/suppl/Supporting_Files/Protocol_S2.txt
Protocol_S2.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb4001728/suppl/Supporting_Files/Protocol_S3.txt
Protocol_S3.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb6

aTc_AND_Three_OC_Six_AND_IPTG_AND_Ara_Sensor.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00215/suppl/FourInputSensor/sim_and_sensor/s1011/log.txt
log.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00215/suppl/FourInputSensor/sim_and_sensor/s0101/aTc_AND_Three_OC_Six_AND_IPTG_AND_Ara_Sensor.xml
aTc_AND_Three_OC_Six_AND_IPTG_AND_Ara_Sensor.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00215/suppl/FourInputSensor/sim_and_sensor/s0101/log.txt
log.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00215/suppl/FourInputSensor/sim_and_sensor/sim-rep.txt
sim-rep.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00215/suppl/FourInputSensor/sim_and_sensor/s0001/aTc_AND_Three_OC_Six_AND_IPTG_AND_Ara_Sensor.xml
aTc_AND_Three_OC_Six_AND_IPTG_AND_Ara_Sensor.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00215/suppl/FourInputSensor/sim_and_sensor/s0001/log.txt
log.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00215/suppl/FourInputSensor/sim_and_sensor/s0

pYTK009.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500366v/suppl/plasmids/pYTK040.gb
pYTK040.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500366v/suppl/plasmids/pYTK072.gb
pYTK072.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500366v/suppl/plasmids/pYTK065.gb
pYTK065.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500366v/suppl/plasmids/pYTK024.gb
pYTK024.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500366v/suppl/plasmids/pYTK029.gb
pYTK029.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500366v/suppl/plasmids/pYTK030.gb
pYTK030.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500366v/suppl/plasmids/pYTK047.gb
pYTK047.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500366v/suppl/plasmids/pYTK028.gb
pYTK028.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500366v/suppl/plasmids/pYTK038.gb
pYTK038.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500366v/suppl/plasmids/pYTK011.gb
pYTK011.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500366v/suppl/plasmids

3-input-not-and.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400139h/suppl/sbrome-0.9.1/Benchmarks/2-cascade.xml
2-cascade.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400139h/suppl/sbrome-0.9.1/Benchmarks/4-cascade.xml
4-cascade.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400139h/suppl/sbrome-0.9.1/Benchmarks/D1_Nat_Biotech.xml
D1_Nat_Biotech.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400139h/suppl/sbrome-0.9.1/Benchmarks/2-to-1-multiplexer.xml
2-to-1-multiplexer.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400139h/suppl/sbrome-0.9.1/src/Database/ML_June_2013.txt
ML_June_2013.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400139h/suppl/sbrome-0.9.1/src/Benchmarks/3-cascade.xml
3-cascade.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400139h/suppl/sbrome-0.9.1/src/Benchmarks/D2_Nat_Biotech.xml
D2_Nat_Biotech.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400139h/suppl/sbrome-0.9.1/src/Benchmarks/not-and.xml
not-and.xml, False
/mnt/da

pBTK200.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00399/suppl/pBTK503.gb
pBTK503.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00399/suppl/pBTK563.gb
pBTK563.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00399/suppl/pBTK602.gb
pBTK602.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00399/suppl/pBTK119.gb
pBTK119.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00399/suppl/pBTK550d.gb
pBTK550d.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00399/suppl/pBTK509.gb
pBTK509.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00399/suppl/pBTK520.gb
pBTK520.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00399/suppl/tn7-PA1-gfp-kan.gb
tn7-PA1-gfp-kan.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00399/suppl/pBTK112.gb
pBTK112.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00399/suppl/pBTK564.gb
pBTK564.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00399/suppl/pBTK401.gb
pBTK401.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00399/sup

pICH53388.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICH49244.gb
pICH49244.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICSL80007.gb
pICSL80007.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICH79289.gb
pICH79289.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICH41233.gb
pICH41233.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICH41276.gb
pICH41276.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICH41432.gb
pICH41432.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICSL13002.gb
pICSL13002.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICH44222.gb
pICH44222.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICH75322.gb
pICH75322.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICH45234.gb
pICH45234.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICH47761.gb
pICH47761.gb, True
/mnt/data1/jiawei/ac

pICH53399.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICH51277.gb
pICH51277.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICSL30009.gb
pICSL30009.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICH75355.gb
pICH75355.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICH50866.gb
pICH50866.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICH54022.gb
pICH54022.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICSL70005.gb
pICSL70005.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICSL50013.gb
pICSL50013.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICH44188.gb
pICH44188.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICH45089.gb
pICH45089.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb4001504/suppl/pICSL50008 corr.gb
pICSL50008 corr.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500033d/suppl/pVKL.002.gb
pVKL.002.gb, True
/mnt/dat

pGL363.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb8b00398/suppl/plasmids/pGL532.gb
pGL532.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb8b00398/suppl/plasmids/pGL494.gb
pGL494.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb8b00398/suppl/plasmids/pGL686.gb
pGL686.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb8b00398/suppl/plasmids/pMVA2RBS038.gb
pMVA2RBS038.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb8b00398/suppl/plasmids/pGL516.gb
pGL516.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb8b00398/suppl/plasmids/pGL676.gb
pGL676.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb8b00398/suppl/plasmids/pGL663.gb
pGL663.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb8b00398/suppl/plasmids/pGL338.gb
pGL338.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb8b00398/suppl/plasmids/pMVA2RBS041.gb
pMVA2RBS041.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb8b00398/suppl/plasmids/pGL477.gb
pGL477.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb8b00398/suppl/plasmids/

pAY441 mC-SP.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb6b00178/suppl/Sc_eGFP_RFP_ARS.gb
Sc_eGFP_RFP_ARS.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb6b00178/suppl/pPpT4_SB-truncatedAOX1-eGFP.gb
pPpT4_SB-truncatedAOX1-eGFP.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb6b00178/suppl/pPpT4-bidi-sTomato-eGFP.gb
pPpT4-bidi-sTomato-eGFP.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb9b00142/suppl/sb-2019-001429_si_002.txt
sb-2019-001429_si_002.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00410/suppl/Supp_File_3_mKate_integration.gb
Supp_File_3_mKate_integration.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00410/suppl/Supp_File_4_pJB036.gb
Supp_File_4_pJB036.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00147/suppl/plasmid maps/hybrid promoters/pADCR5lux-noSsrA-BS12.gb
pADCR5lux-noSsrA-BS12.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00147/suppl/plasmid maps/hybrid promoters/pADCR5BAD-noSsrA-BS12.gb
pADCR5BAD-noSsrA-BS12.gb, True
/mnt/data1/ji

rxnCountThresholdTable.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb500234s/suppl/ApE files/Auxotroph analysis/pSEVA177RbT.gb
pSEVA177RbT.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500234s/suppl/ApE files/Auxotroph analysis/pSEVA117RbD.gb
pSEVA117RbD.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500234s/suppl/ApE files/Auxotroph analysis/pSEVA117RbT.gb
pSEVA117RbT.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500234s/suppl/ApE files/Auxotroph analysis/pSEVA177RbD.gb
pSEVA177RbD.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500234s/suppl/ApE files/GeneGuard vector cassettes/pSEVAD1Z1A.gb
pSEVAD1Z1A.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500234s/suppl/ApE files/GeneGuard vector cassettes/pSEVAD7K1A.gb
pSEVAD7K1A.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500234s/suppl/ApE files/GeneGuard vector cassettes/pSEVAD7Z1A.gb
pSEVAD7Z1A.gb, True
/mnt/data1/jiawei/acs-data/suppl-files/sb500234s/suppl/ApE files/GeneGuard vector cassettes/pSEVAD1K1A.gb
pS

aTc_AND_Three_OC_Six_AND_IPTG_AND_Ara_Sensor.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5003289/suppl/model_generation_supplementary/modelGen/sim_and_sensor/s0011/log.txt
log.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5003289/suppl/model_generation_supplementary/modelGen/sim_and_sensor/aTc_AND_Three_OC_Six_AND_IPTG_AND_Ara_Sensor.xml
aTc_AND_Three_OC_Six_AND_IPTG_AND_Ara_Sensor.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5003289/suppl/model_generation_supplementary/modelGen/sim_and_sensor/s1000/aTc_AND_Three_OC_Six_AND_IPTG_AND_Ara_Sensor.xml
aTc_AND_Three_OC_Six_AND_IPTG_AND_Ara_Sensor.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5003289/suppl/model_generation_supplementary/modelGen/sim_and_sensor/s1000/log.txt
log.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5003289/suppl/model_generation_supplementary/modelGen/sim_and_sensor/log.txt
log.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5003289/suppl/model_generation_supplementary/modelGen/si

Ckt9_Speed_Independent_Muller_C_element.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb6b00296/suppl/Supplementary Information/Ckt 1 - Not-gate/Ckt1_not_gate.xml
Ckt1_not_gate.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb6b00296/suppl/Supplementary Information/Ckt 8 - Majority Muller C element/Ckt8_Majority_Muller_C_element.xml
Ckt8_Majority_Muller_C_element.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb6b00296/suppl/Supplementary Information/Ckt 4 - NOR/Ckt4_nor.xml
Ckt4_nor.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb6b00296/suppl/Supplementary Information/Ckt 2 - Nand/Ckt2_nand_gate.xml
Ckt2_nand_gate.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb6b00296/suppl/Supplementary Information/Ckt 7 - Toggle Switch/Ckt7_Toggle_Switch.xml
Ckt7_Toggle_Switch.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb6b00296/suppl/Supplementary Information/Ckt 5 - OR/Ckt5_or.xml
Ckt5_or.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb6b00296/suppl/Supplementary Inf

AND22.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/OR17.xml
OR17.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/YES28.xml
YES28.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/AND26.xml
AND26.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/INV6.xml
INV6.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/NAND31.xml
NAND31.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/INV15.xml
INV15.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/NAND21.xml
NAND21.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/NOR16.xml
NOR16.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/YES14.xml
YES14.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/NOR20.xml
NOR20.xml, False
/mnt/data1/jiawei/acs-data/suppl-

YES23.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/INV0.xml
INV0.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/OR4.xml
OR4.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/YES21.xml
YES21.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/INV31.xml
INV31.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/NOR8.xml
NOR8.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/INV17.xml
INV17.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/OR5.xml
OR5.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/INV39.xml
INV39.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/AND11.xml
AND11.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_200/NAND0.xml
NAND0.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb4001

INV14.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_100/INV6.xml
INV6.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_100/INV15.xml
INV15.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_100/YES14.xml
YES14.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_100/NOR1.xml
NOR1.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_100/YES9.xml
YES9.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_100/NOR10.xml
NOR10.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_100/OR10.xml
OR10.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_100/NOR4.xml
NOR4.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_100/OR15.xml
OR15.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_100/INV7.xml
INV7.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135

NAND0.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_25/YES3.xml
YES3.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_25/NAND2.xml
NAND2.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_25/NOR3.xml
NOR3.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_25/AND0.xml
AND0.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_50/OR3.xml
OR3.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_50/INV4.xml
INV4.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_50/OR6.xml
OR6.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_50/AND5.xml
AND5.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_50/synTest.sbol
synTest.sbol, True
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib_size_50/NOR6.xml
NOR6.xml, False
/mnt/data1/jiawei/acs-data/suppl-files/sb400135t/suppl/lib

pth1.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00250/suppl/pth7.txt
pth7.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00250/suppl/pth3.txt
pth3.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00250/suppl/pth2.txt
pth2.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb5b00250/suppl/pth4.txt
pth4.txt, False
/mnt/data1/jiawei/acs-data/suppl-files/sb7b00042/suppl/sb-2017-00042e_si_001.txt
sb-2017-00042e_si_001.txt, False



In [20]:
processed_files["sb500234s"]["suppl_files"]

[{'suppl_filename': 'sb-2014-00234s_si_001.pdf',
  'rpath': 'sb500234s/suppl/sb-2014-00234s_si_001.pdf',
  'is_sequence': False},
 {'suppl_filename': 'sb-2014-00234s_si_002.xls',
  'rpath': 'sb500234s/suppl/sb-2014-00234s_si_002.xls',
  'is_sequence': False},
 {'suppl_filename': 'sb-2014-00234s_si_003.zip',
  'rpath': 'sb500234s/suppl/sb-2014-00234s_si_003.zip',
  'is_sequence': False},
 {'suppl_filename': 'pSEVA177RbT.gb',
  'rpath': 'sb500234s/suppl/ApE files/Auxotroph analysis/pSEVA177RbT.gb',
  'is_sequence': True},
 {'suppl_filename': 'pSEVA117RbD.gb',
  'rpath': 'sb500234s/suppl/ApE files/Auxotroph analysis/pSEVA117RbD.gb',
  'is_sequence': True},
 {'suppl_filename': 'pSEVA117RbT.gb',
  'rpath': 'sb500234s/suppl/ApE files/Auxotroph analysis/pSEVA117RbT.gb',
  'is_sequence': True},
 {'suppl_filename': 'pSEVA177RbD.gb',
  'rpath': 'sb500234s/suppl/ApE files/Auxotroph analysis/pSEVA177RbD.gb',
  'is_sequence': True},
 {'suppl_filename': 'pSEVAD1Z1A.gb',
  'rpath': 'sb500234s/suppl/A

In [21]:
# pickle the files
for pub_num, data in tqdm(processed_files.items()):
    with open(os.path.join(output_path, pub_num + ".pkl"), "wb") as out:
        pickle.dump(data, out)

HBox(children=(FloatProgress(value=0.0, max=1545.0), HTML(value='')))




In [22]:
pprint([(n, len(d["suppl_files"])) for n, d in processed_files.items()])

[('sb7b00050', 1),
 ('sb6b00301', 1),
 ('sb300068g', 0),
 ('sb6b00050', 2),
 ('sb6b00034', 1),
 ('sb5b00187', 1),
 ('sb7b00088', 1),
 ('sb8b00471', 1),
 ('sb400072v', 0),
 ('sb300069k', 1),
 ('sb8b00194', 1),
 ('sb500331x', 1),
 ('sb7b00016', 2),
 ('sb8b00059', 0),
 ('sb8b00040', 1),
 ('sb9b00020', 5),
 ('sb5003295', 0),
 ('sb5b00202', 6),
 ('sb3001003', 1),
 ('sb500241e', 1),
 ('sb4000367', 0),
 ('sb9b00208', 1),
 ('sb6b00057', 47),
 ('sb500368w', 1),
 ('sb8b00493', 1),
 ('sb8b00218', 1),
 ('sb5001565', 2),
 ('sb8b00213', 1),
 ('sb5b00077', 1),
 ('sb5b00009', 0),
 ('sb300025d', 0),
 ('sb9b00195', 1),
 ('sb7b00407', 1),
 ('sb400058n', 1),
 ('sb3000194', 1),
 ('sb7b00154', 1),
 ('sb9b00291', 1),
 ('sb8b00479', 1),
 ('sb5b00104', 0),
 ('sb6b00199', 1),
 ('sb5b00046', 0),
 ('sb500053j', 2),
 ('sb5003136', 1),
 ('sb7b00087', 1),
 ('sb8b00016', 1),
 ('sb7b00077', 1),
 ('sb6b00219', 1),
 ('sb8b00429', 0),
 ('sb300044r', 0),
 ('sb8b00488', 1),
 ('sb400126a', 1),
 ('sb6b00359', 1),
 ('sb9b0001

In [23]:
# check one pickle
with open(os.path.join(output_path, "sb500234s.pkl"), "rb") as ifile:
    pprint(pickle.load(ifile))

{'abstract': ['Synthetic biology applications in biosensing, bioremediation, '
              'and biomining envision the use of engineered microbes beyond a '
              'contained laboratory. Deployment of such microbes in the '
              'environment raises concerns of unchecked cellular proliferation '
              'or unwanted spread of synthetic genes. While '
              'antibiotic-resistant plasmids are the most utilized vectors for '
              'introducing synthetic genes into bacteria, they are also '
              'inherently insecure, acting naturally to propagate DNA from one '
              'cell to another. To introduce security into bacterial synthetic '
              'biology, we here took on the task of completely reformatting '
              'plasmids to be dependent on their intended host strain and '
              'inherently disadvantageous for others. Using conditional '
              'origins of replication, rich-media compatible auxotrophies, and 