# Construction of draft GEM

# Preparation of input files (blastp_homol.xlsx, edgar_homol.xlsx)

**1. Download RefSeq genome files.**
- Target strain: Pseudomonas putida S12 (NZ_CP009974.1)
- Reference strain 1: Pseudomonas putida KT2440 (NC_002947.4)
- Reference strain 2: Pseudomonas aeruginosa PAO1 (NC_002516.2)

**2. Download sbml files of GEMs of reference strains from BiGG database (http://bigg.ucsd.edu/).**
- P. putida KT2440: iJN1463.xml
- P. aeruginosa PAO1: iPAE1146.xml

**3. Homolog searches of S12 genes in the reference GEMs**
- BLASTP search using 'BLAST+' (standalone BLAST) 
- Database search for the EDGAR server with default settings (https://edgar3.computational.bio.uni-giessen.de/) 


In [None]:
# import packages

import openpyxl
import cobra
from Bio import SeqIO
import os
import copy
from string import ascii_uppercase

# Prepare to create up to 27 dynamic variables
apb = ''.join(list(ascii_uppercase))

for i in apb: globals()[i] = {} # Generate as many dictionary variables as the number of alphabets.

# set PATH
path = os.path.abspath(os.getcwd())

In [None]:
# Function definition

# Function that extracts the locus tag of CDSs from the genome file into a list
def extract_cds_lt_in_genome(x):  # x : genome file PATH
    
    print ('<RUNNING>\nextract locus tag of CDSs from genome({0})\n: Extracting CDS for target organism...\n'.format(x))
    
    file = next(SeqIO.parse(x, "genbank"))

    feat = file.features

    genome = []
    for i in feat:
        if i.qualifiers.get("pseudo") == [""]:  # Exclude pseudogene
            pass
        elif i.type == 'CDS':  # Extract CDS type only
            genome.append(i)

    cds_lt = []
    for i in genome:  # Extract locus tag from each CDS type
        cds_lt.extend(i.qualifiers['locus_tag'])

    
    print ('<DONE>\nextract_cds_lt_in_genome({0})\n'.format(x))
    
    
    return cds_lt


'''
both EDGAR and BLASTP file are input before changing in '.xlsx'

BLASTP file`s columns are order to this : Query_ID, Subject_ID, Query_coverage, Identity
Query_ID : target organism`s locus tag
Subject_ID : counter organism`s locus tag
Query_coverage : Query coverage
Identity : percent identity

BLASTP files are assemble to one file. ('n' = blastp result number,  spreadsheet'n' : blastp_result'n')

BLASTP`s criteria : Query coverage > 90% and Identity > 90%

BLASTP`s maximum target sequence : 5
'''

# Convert EDGAR homologs file to dictionary type
def edgar_homolog (x):  # x : EDGAR file path

    print ('<RUNNING>\nedgar_homolog({0})\n: Extracting EDGAR homolog file...\n'.format(x))
    
    edgar_wb = openpyxl.load_workbook(x)  # load EDGAR EXCEL file
    sheet_names = edgar_wb.sheetnames  # Sheet list in EDGAR file
    edgar = edgar_wb[sheet_names[0]]  # activated sheet with EDGAR homolog data
    
    # if EDGAR 3.0 data, run this script.---------
    edgar.delete_rows(1)
    
    max_row = edgar.max_row # max_row of EDGAR file
    
    for i, j in enumerate(edgar['1']): # max_column of EDGAR
        if bool(j.value) == False:
            max_col = i
            break
    

    # col_{} (list type)
    # : create dynamic variable and insert value
    for i in range(max_col):
        globals()['pre_col_{}'.format(apb[i])] = edgar[apb[i]]
        globals()['col_{}'.format(apb[i])] = []
        for j in range(max_row):
            globals()['col_{}'.format(apb[i])].append(globals()['pre_col_{}'.format(apb[i])][j].value.replace(', ,psos,', ','))
    

    cut_row = col_A.index(' - , -')  # determining upper limit index of row

    # Remove headers and parts not related to the target organism.
    for i in range(max_col): globals()['col_{}'.format(apb[i])] = globals()['col_{}'.format(apb[i])][1:cut_row]

    # A_vs_{} (dict type)
    # Create dynamic variables and match values between homolog values and mark them as locus_tag.
    for i in range(max_col):
        if max_col - i - 1 == 0:
            break

        a = globals()['edgar_A_vs_{}'.format(apb[i + 1])] = {}
        for j in range(len(col_A)):  # col_A is target orgasnim
            b = globals()['col_{}'.format(apb[i + 1])]
            if (',IDENTICAL PARALOGS:,' in col_A[j]) and (',IDENTICAL PARALOGS:,' not in b[j]):  # if col_A element is paralog O, and col_{} element is paralog X, same value (col_{} element) for multiple str keys (col_A element)
                lst1 = col_A[j].split(',IDENTICAL PARALOGS:,')
                for para1 in lst1: a[para1.split(',')[0]] = b[j].split(',')[0].strip()

            elif (',IDENTICAL PARALOGS:,' not in col_A[j]) and (',IDENTICAL PARALOGS:,' in b[j]):  # if col_A element is paralog X, and col_{} element is paralog O, list value (col_{} element) on one str key (col_A element)
                lst2 = b[j].split(',IDENTICAL PARALOGS:,')
                lst3 = []
                for para2 in lst2: lst3.append(para2.split(',')[0].strip())
                a[col_A[j].split(',')[0]] = lst3

            elif (',IDENTICAL PARALOGS:,' in col_A[j]) and (',IDENTICAL PARALOGS:,' in b[j]):  # if col_A element is paralog O, and col_{} element is paralog O, same list value (col_{} element) for multiple str keys (col_A element)
                lst1 = col_A[j].split(',IDENTICAL PARALOGS:,')
                lst2 = b[j].split(',IDENTICAL PARALOGS:,')
                lst3 = []
                for para2 in lst2: lst3.append(para2.split(',')[0].strip())
                for para1 in lst1: a[para1.split(',')[0]] = lst3

            else: a[col_A[j].split(',')[0]] = b[j].split(',')[0].strip()  # if col_A element is paralog X, and col_{} element is paralog X, one str value (col_{} element) on one str key (col_A element)


    for i in range(max_col-1):  # Remove keys not included in the CDS locus_tag of the target organism genome file.
        a = globals()['edgar_A_vs_{}'.format(apb[i + 1])]
        a_keys = list(a.keys())
        for j in a_keys:
            if j not in target_cds_lt:
                del a[j]


    result = []
    for i in range(max_col-1):  # combine in result (list type)
        a = globals()['edgar_A_vs_{}'.format(apb[i + 1])]
        result.append(a)

    
    print ('<DONE>\nedgar_homolog({0})\n'.format(x))
    


# Convert BLASTP homolog file to dictionary type
def blast_homolog (x):  # x : BLASTP file path
    
    print ('<RUNNING>\nblast_homolog({0})\n: Extracting BLASTP homolog file...\n'.format(x))
    
    blast_wb = openpyxl.load_workbook(x)
    sheet_names = blast_wb.sheetnames

    serial = 1
    for sheet in sheet_names:
        blast = blast_wb[sheet]

        q_id_pre = blast['A']  # Change Query_ID, Subject_ID, Query_coverage, Identity to list format
        q_id_pre = q_id_pre[1:]
        q_id = []
        for i in q_id_pre: q_id.append(i.value.split(',')[0])
        s_id_pre = blast['B']
        s_id_pre = s_id_pre[1:]
        s_id = []
        for i in s_id_pre: s_id.append(i.value.split(',')[0])
        q_cov_pre = blast['C']
        q_cov_pre = q_cov_pre[1:]
        q_cov = []
        for i in q_cov_pre: q_cov.append(i.value)
        ident_pre = blast['D']
        ident_pre = ident_pre[1:]
        ident = []
        for i in ident_pre: ident.append(i.value)

        index_del = []  # Remove results that do not meet the conditions of query_coverage > 90 & Identity > 90.
        for i in range(len(q_id)):
            if (float(q_cov[i]) > 90) and (float(ident[i]) > 90): pass
            else: index_del.append(i)
        
        # 'del' is applied immediately to remove the value corresponding to the index, so if you remove it without reversing the order, the next index value before the erase is skipped.
        index_del.sort(reverse=True)

        for i in index_del:
            del q_id[i]
            del s_id[i]
            del q_cov[i]
            del ident[i]

        q_id_count = {}  # q_id Enter the number of each element
        for i in set(q_id): q_id_count[i] = q_id.count(i)

        # blast_sheet_{} (dict)
        # generate dynamic variables with matching results between query (target organism) and subject (others)
        a = globals()['blast_sheet_{}'.format(serial)] = {}
        for i in set(q_id):
            if q_id_count[i] == 1: a[i] = s_id[q_id.index(i)]
            else:
                lst = []
                for j in range(q_id.index(i), q_id.index(i) + q_id_count[i]): lst.append(s_id[j])
                a[i] = lst

        serial += 1

        
    print ('<DONE>\nblast_homolog({0})\n'.format(x))
        
        

# Combine the homologs from two input files (priority : EDGAR > BLASTP)
def combine_homolog (file_name_homol_edgar, file_name_homol_blast):
    
    print ('<RUNNING>\ncombine_homolog()\n: Combine the homologs data...\n')
    
    edgar_wb = openpyxl.load_workbook('{0}/input/{1}'.format(path, file_name_homol_edgar))
    edgar_sheet_names = edgar_wb.sheetnames
    edgar = edgar_wb[edgar_sheet_names[0]]
    
    edgar.delete_rows(1)

    for i, j in enumerate(edgar['1']):  # max_column of EDGAR
        if bool(j.value) == False:
            edgar_sample_num = i
            break

    blast_wb = openpyxl.load_workbook('{0}/input/{1}'.format(path, file_name_homol_blast))
    blast_sheet_names = blast_wb.sheetnames
    blast_sample_num = len(blast_sheet_names)  # The number of pairwise-alignment on BLASTP


    # Load dynamic variables of EDGAR and BLASTP into str type
    edgar_result_id = []
    blast_result_id = []
    for i in range(edgar_sample_num-1): edgar_result_id.append('edgar_A_vs_{}'.format(apb[i + 1]))
    for i in range(blast_sample_num): blast_result_id.append('blast_sheet_{}'.format(i+1))

    result_id = list(set(edgar_result_id) | set(blast_result_id))
    result_id.sort()

    
    # generate a complete list of keys for the locus_tag of the target organism
    total_keys = set()
    for i in result_id:
        a = globals()[i]
        a_keys = set(a.keys())
        total_keys = total_keys | a_keys
    total_keys = list(total_keys)


    # Combine EDGAR and BLAST+ into one (priority : EDGAR > BLASTP)
    total_homolog = {}
    edgar_homolog_result = {}
    blast_homolog_result = {}
    for i in total_keys:
        lst_bla = []
        lst_ed = []
        for j in result_id:
            a = globals()[j]
            if 'blast_sheet_' in j:
                if i in a:
                    if str(type(a[i])) == "<class 'str'>":
                        lst_bla.append(a[i])
                    else:
                        lst_bla.extend(a[i])
            elif 'edgar_A_vs_' in j:
                if i in a:
                    if str(type(a[i])) == "<class 'str'>":
                        lst_ed.append(a[i])
                    else:
                        lst_ed.extend(a[i])
        lst_bla = list(set(lst_bla))
        lst_bla.sort()
        if '-' in lst_bla:
            lst_bla.remove('-')
        blast_homolog_result[i] = lst_bla
        if lst_bla == []:
            del blast_homolog_result[i]
        lst_ed = list(set(lst_ed))
        lst_ed.sort()
        if '-' in lst_ed:
            lst_ed.remove('-')
        edgar_homolog_result[i] = lst_ed
        if lst_ed == []:
            del edgar_homolog_result[i]

    # put BLASTP in total_homolog (empty dict) first and then overwrite EDGAR.
    # (If the dict of EDGAR and BLASTP has the same key (locus tag of target organism), select EDGAR.)
    total_homolog.update(blast_homolog_result)
    total_homolog.update(edgar_homolog_result)
    
    print ('<DONE>\nmake_total_homolog()\n')


    return total_homolog


# Obtain reactions from reference models with gene-protein-response (GPR) relationships.
# gem_lst : List of reference GEM files
# gene_remain_lst: List of genes to keep even if they are not homolog (e.g., virtual gene of spontaneous reaction)
def get_rxn_from_gem (gem_lst, gene_remain_lst = False):
    
    print ('<RUNNING>\nget_rxn_from_gem([{0}])\n: Obtain reactions from reference GEMs with GPR relationships....\n'.format(', '.join(gem_lst)))
    
    incomplete_model = cobra.Model('incomplete_model')  # Create an empty model
    model_path_lst = []
    for models in gem_lst:
        model_ori = cobra.io.read_sbml_model('{0}/input/{1}'.format(path, models))
        model = model_ori.copy()
        model_path_lst.append('{0}/input/{1}'.format(path, models))

        model_genes = [] 
        for i in model.genes: model_genes.append(i.id)

        com_genes = []  # overlaped genes extraction between GEM and homolog data
        for k,v in total_homolog.items():
            for j in v:
                if j in model_genes:
                    com_genes.append(j)
        remove_genes = list(set(model_genes) - set(com_genes))  # remove_genes : genes without target organism and homolog
        remove_genes.sort()
        
        
        # To leave the gene_remain_lst element in incomplete_model, remove the gene_remain_lst element from remove_genes.
        if gene_remain_lst:        
            for gene in remove_genes:
                if gene in gene_remain_lst:
                    remove_genes.remove(gene)
        
        
        # It eliminates inactivated reactions without the corresponding genes according to GPR.
        cobra.manipulation.delete.remove_genes(model, remove_genes)


        # Add reactions to incomplete_model
        add_rxns = []
        for i in model.reactions:
            add_rxns.append(i)

        incomplete_model.add_reactions(add_rxns)

    
    print ('<DONE>\nget_rxn_from_gem({0})\n'.format(', '.join(gem_lst)))
    

    return incomplete_model



# Find S12 locus tags corresponding to the metabolic reactions
def rename (incomplete_model, total_homolog):
    
    print ('<RUNNING>\nrename({0}, total_homolog)\n: Find S12 locus tags corresponding to the metabolic reactions...\n'.format(incomplete_model))
    
    # Generating a list of genes in a model.
    model_genes = []
    model_genes_id = []
    for i in incomplete_model.genes:
        model_genes.append(i)
        model_genes_id.append(i.id)

    # Find the paralog and set it according to gene_reaction_rule format
    rename_dict = {}
    for i in model_genes_id:
        para_list = []
        for k,v in total_homolog.items():
            for j in v:
                if j == i:
                    para_list.append(k)
        if len(para_list) > 1:
            rename_dict[i] = ['(' + ' or '.join(para_list) + ')', para_list]  # rename_dict[i][0] = gene_reaction_rule format, rename_dict[i][1] = gene_list
        elif len(para_list) == 1:
            rename_dict[i] = [para_list[0], para_list]


    # Change genes ID of model to locus tag of target organism
    for i in incomplete_model.reactions:
        gene_id_lst = []
        for j in list(i.genes):
            gene_id_lst.append(j.id)
        for j in gene_id_lst:
            if j in rename_dict:
                new_gene_id_lst = []
                for h in i.genes:
                    new_gene_id_lst.append(h.id)
                if len(set(rename_dict[j][1]) & set(new_gene_id_lst)) == 0:
                    incomplete_model.reactions.get_by_id(i.id).gene_reaction_rule = i.gene_reaction_rule.replace(j, rename_dict[j][0])

    
    print ('<DONE>\nrename({0}, total_homolog)\n'.format(incomplete_model))
    
    
    return incomplete_model


# Function that extracts CDSs with both locus tag and old locus tag from genome file and matches locus tag with old locus tag
def match_lt_old_new(x):  # x : genome file path. (e.g. A.gb)
    
    print ('<RUNNING>\nmatch_lt_old_new({0})\n: Matching locus tag and old locus tag from CDS type of genome...\n'.format(x))
    
    file = next(SeqIO.parse(x, "genbank"))

    feat = file.features

    genome = []
    for i in feat:
        if i.qualifiers.get("pseudo") == [""]:
            pass
        elif i.type == 'CDS':
            genome.append(i)

    cds_old_lt = {}
    for i in genome:
        if 'old_locus_tag' in i.qualifiers:
            cds_old_lt[i.qualifiers['locus_tag'][0]] = i.qualifiers['old_locus_tag'][0]

    
    print ('<DONE>\nmatch_lt_old_new({0})\n'.format(x))
    
            
    return cds_old_lt



# Changes the locus tag of a gene to an old locus tag
# args : Genome file of RefSeq DB based on the same sequencing data
# file_name_homol_edgar: file name of EDGAR homolog data
def change_locus_tag (args, file_name_homol_edgar):
    
    print ('<RUNNING>\nchange_locus_tag({0})\n: Changes the locus tag of a gene to an old locus tag...\n'.format(args))
    
    edgar_wb = openpyxl.load_workbook('{0}/input/{1}'.format(path, file_name_homol_edgar))
    sheet_names = edgar_wb.sheetnames
    edgar = edgar_wb[sheet_names[0]]

    # if EDGAR 3.0 data, run this script.---------
    edgar.delete_rows(1)
    
    
    for i, j in enumerate(edgar['1']): # max column of EDGAR file
        if bool(j.value) == False:
            max_col = i
            break

    for genome_file in args:
        cds_old_lt = match_lt_old_new('{0}/input/{1}'.format(path, genome_file))
        cds_old_lt_keys = list(cds_old_lt.keys())
        for i,j in enumerate(cds_old_lt_keys[0]):
            try:
                int(j)
                num_index = i
                break
            except:
                pass
        target_chr = cds_old_lt_keys[0][: num_index]
        target_chr
        
        print ('target_chr:', target_chr)

        a_name_index = 0
        for i in range(max_col-1):
            a = globals()['edgar_A_vs_{}'.format(apb[i + 1])]
            a_values = list(a.values())
            
            for h,j in enumerate(a_values):
                if j != '-':
                    a_values_index = h
                    break
            
            for k,j in enumerate(a_values[a_values_index]):
                try:
                    int(j)
                    num_index = k
                    break
                except:
                    pass
            a_chr = a_values[a_values_index][: num_index]
            
            print ('a_chr:', a_chr)
            
            if a_chr == target_chr:
                
                print ('chrs:', a_chr, target_chr)
                
                a_name_index += i
                break
        
        print (a_name_index)

        a_copy = globals()['edgar_A_vs_{}'.format(apb[a_name_index + 1])].copy()
        globals()['edgar_A_vs_{}'.format(apb[a_name_index + 1])].clear()
        for k, v in a_copy.items():
            if str(type(v)) == "<class 'str'>":
                if v in cds_old_lt:
                    globals()['edgar_A_vs_{}'.format(apb[a_name_index + 1])][k] = cds_old_lt[v]
            else:
                lst = []
                for j in v:
                    if j in cds_old_lt:
                        lst.append(cds_old_lt[j])
                if len(lst) == 0:
                    pass
                elif len(lst) == 1:
                    globals()['edgar_A_vs_{}'.format(apb[a_name_index + 1])][k] = cds_old_lt[v[0]]
                elif len(lst) > 1:
                    globals()['edgar_A_vs_{}'.format(apb[a_name_index + 1])][k] = lst
    
    
    print ('<DONE>\nchange_locus_tag({0})\n'.format(args))


        
# Function to output the model to SBML and EXCEL format
def output_excel_sbml_file(model, model_id):  # model : target model, model_id : Storage name and model name of the output file of the model
    
    print ('<RUNNING>\noutput_excel_sbml_file({0}, {1})\n: Outputing model to excel and sbml format...\n'.format(model, model_id))
    
    model.id = model_id
    
    cobra.io.write_sbml_model(model, '{0}/output/{1}.xml'.format(path, model.id))
    
    model_rxn_info = {}
    model_rxn_id = []
    for i in model.reactions:
        model_rxn_id.append(i.id)
        model_rxn_info[i.id] = [i.id , i.name, i.reaction, i.gene_reaction_rule, i.lower_bound, i.upper_bound, i.objective_coefficient, i.subsystem]

    model_mt_info = {}
    model_mt_id = []
    for i in model.metabolites:
        model_mt_id.append(i.id)
        comp = ''
        if i.compartment == 'c':
            comp = 'Cytosol'
        elif i.compartment == 'p':
            comp = 'Periplasm'
        elif i.compartment == 'e':
            comp = 'Extracellular'

        model_mt_info[i.id] = [i.id , i.name, i.formula, i.charge, comp]

    header_rxn = ['Reaction ID', 'Description', 'Reaction', 'Gene-protein-reaction (GPR) rules', 'Lower bound', 'Upper bound', 'Objective', 'Subsystem']

    header_mt = ['Metabolite ID', 'Description', 'Charged formula', 'Charge', 'Compartment']


    write_wb = openpyxl.Workbook()
    write_ws_1 = write_wb.create_sheet('Reaction List')
    write_ws_2 = write_wb.create_sheet('Metabolite List')
    write_wb.remove(write_wb['Sheet'])

    write_ws_1 = write_wb['Reaction List']
    write_ws_1.append(header_rxn)
    for i in model_rxn_id:
        write_ws_1.append(model_rxn_info[i])

    write_ws_2 = write_wb['Metabolite List']
    write_ws_2.append(header_mt)
    for i in model_mt_id:
        write_ws_2.append(model_mt_info[i])

    write_wb.save('{0}/output/{1}.xlsx'.format(path, model.id))
    
    
    print ('<DONE>\noutput_excel_sbml_file({0}, {1})\n'.format(model, model_id))

# 1. Find homologs in the input files
- Criteria : > 90% in query coverage & > 90% in percent identity
- Combine the homologs from two input files (priority : EDGAR > BLASTP)

In [None]:
# target_organism_genome_file
file_name_target_gb = 'NZ_CP009974.1.gb'
target_cds_lt = extract_cds_lt_in_genome ('{0}/input/{1}'.format(path, file_name_target_gb))

# edgar_homolog file
file_name_homol_edgar = 'edgar_homol.xlsx'
edgar_homolog ('{0}/input/{1}'.format(path, file_name_homol_edgar))

# blastp_homolog file
file_name_homol_blast = 'blastp_homol.xlsx'
blast_homolog ('{0}/input/{1}'.format(path, file_name_homol_blast))

# In the homolog data, change the "locus_tag" of KT2440 to the "old_locus_tag" in the GEM file (iJN1462.xml).
file_name_change_gb_lst = ['NC_002947.4.gb']
change_locus_tag(file_name_change_gb_lst, file_name_homol_edgar)

# Combine the homologs data
total_homolog = combine_homolog(file_name_homol_edgar, file_name_homol_blast)

# 2. Retrieve metabolic reactions from the reference GEMs

In [None]:
# Get reactions from reference GEMs

# gem_lst : List of reference GEM files
# gene_remain_lst: List of genes to keep even if they are not homolog (e.g., virtual gene of spontaneous reaction)
gem_lst = ['iJN1463.xml', 'iPAE1146.xml'] # write model file names in order of highest priority.
gene_remain_lst = ['PP_s0001','SPONTANEOUS','PA2366']


incomplete_model = get_rxn_from_gem(gem_lst, gene_remain_lst)

# 3. Automated metabolic reconstruction of S12 using the RAVEN toolbox (https://github.com/SysBioChalmers/RAVEN/)
- Input files: NZ_CP009974.1.faa (amino acid sequences of S12 genome)
- Running RAVEN with default settings

**Matlab script**

raven_model = getMetaCycModelForOrganism('raven_model', 'NZ_CP009974.1.faa')

# 4. Mannually add metabolic reactions and genes from the RAVEN model to the incomplete model generated in step 2

In [None]:
# input raven_model
raven_model_ori = cobra.io.read_sbml_model('{0}/input/raven_model.xml'.format(path))
raven_model = raven_model_ori.copy()

# Add reactions to incomplete_model
add_raven_rxns = []
for i in raven_model.reactions:
    add_raven_rxns.append(i)

incomplete_model.add_reactions(add_raven_rxns)


# 5. Manual gap-filling

In [None]:
# Retrieving biomass equation of KT2440 GEM (iJN1463.xml)
iJN1463_ori = cobra.io.read_sbml_model('{0}/input/iJN1463.xml'.format(path))
iJN1463 = iJN1463_ori.copy()

# gap-filling
nads1 = iJN1463.reactions.get_by_id('NADS1')
nads1.gene_reaction_rule = ''
clt3_2pp = iJN1463.reactions.get_by_id('CLt3_2pp')
clt3_2pp.gene_reaction_rule = ''
incomplete_model.add_reactions([nads1,clt3_2pp])

# 6. Find S12 locus tags corresponding to the metabolic reactions

In [None]:
# find S12 locus tags corresponding to the metabolic reactions
incomplete_model = rename(incomplete_model, total_homolog)

# 7. Save the information as sbml/excel file in GEM format
- Code for retrieving biomass equation of KT2440 GEM (iJN1463.xml)

In [None]:
pre_objective_id = str(iJN1463.objective.expression)
objective_id = pre_objective_id[pre_objective_id.index('*')+1 : pre_objective_id.index(' - ')]

incomplete_model.objective = objective_id

# Set oxygen uptake rate to 18.5
medium = incomplete_model.medium
medium['EX_o2_e'] = 18.5
incomplete_model.medium = medium

# Export draft_model to output folder
output_excel_sbml_file (incomplete_model, model_id = 'draft_model')