In [1]:
import cobra
import gempipe

from memote.support.consistency import find_unconserved_metabolites

# define useful functions

In [2]:
from helper_functions import *

# 01_klebsiella

In [3]:
# load reference model
iYL1228 = cobra.io.read_sbml_model('01_klebsiella/reference/iYL1228.xml')
gempipe.sanity_report(iYL1228)

gempipe v1.37.6 - sanity_report
model ID: iYL1228
G: 1229 R: 2262 M: 1658 uM: 1055 groups: 0
Compartments: ['c', 'e', 'p']
Biomass assemblies: 1 ['BIOMASS_']
Objectives: 1 ['BIOMASS_']
Optimization: 1.0426374979773743 optimal (cplex)
Unconstrained LB-UB: (-1000.0, 1000.0)
Bad EX_change notation: 0
Sink/demand: 2
Constrained metabolic: 15
With 'artificial' atoms: 0
Missing formulas - charges: 1658 - 0
Mass - charge unbalances: 0 - 0


## get proteomes

In [4]:
# get FASTA proteomes of modeled genes
# nucleotidic
_ = fasta_to_modeled(iYL1228, ['01_klebsiella/reference/CP000647.1.fna'], '01_klebsiella/reference/iYL1228.fna')
# amino aicdic
_ = fasta_to_modeled(iYL1228, ['01_klebsiella/reference/CP000647.1.faa'], '01_klebsiella/reference/iYL1228.faa')

Recovered 1228 / 1229.
Missing 1 : {'KPN_SPONT'}
Recovered 1228 / 1229.
Missing 1 : {'KPN_SPONT'}


## remove strain-specific biom precursors

In [5]:
# delete strain specific components from biomass: udpgalur_c , udpgal_c, dtdprmn_c

r_string = iYL1228.reactions.BIOMASS_.reaction
r_string = r_string.replace(' + 0.076 udpgal_c', '')
r_string = r_string.replace(' + 0.001 udpgalur_c', '')
r_string = r_string.replace(' + 0.142 dtdprmn_c', '')
iYL1228.reactions.BIOMASS_.build_reaction_from_string(r_string)

gempipe.sanity_report(iYL1228)

gempipe v1.37.6 - sanity_report
model ID: iYL1228
G: 1229 R: 2262 M: 1658 uM: 1055 groups: 0
Compartments: ['c', 'e', 'p']
Biomass assemblies: 1 ['BIOMASS_']
Objectives: 1 ['BIOMASS_']
Optimization: 1.084689665727891 optimal (cplex)
Unconstrained LB-UB: (-1000.0, 1000.0)
Bad EX_change notation: 0
Sink/demand: 2
Constrained metabolic: 15
With 'artificial' atoms: 0
Missing formulas - charges: 1658 - 0
Mass - charge unbalances: 0 - 0


## relax diffusions

In [6]:
# relax diffusions / facilitate diffusion / flipping involved in Biolog assays

biolog_mappings = gempipe.get_biolog_mappings()
biolog_pure_mids = set(biolog_mappings[biolog_mappings['BiGG_exchange'].notna()]['BiGG_exchange'].to_list())
biolog_pure_mids = set([i.replace('EX_', '').rsplit('_', 1)[0] for i in biolog_pure_mids])

for r in iYL1228.reactions:
    
    # if it's involved in Biolog substrates:
    if any([i in biolog_pure_mids for i in [m.id.rsplit('_', 1)[0] for m in r.metabolites]]):
    
        # diffusions / facilitate diffusion / flipping:
        if len(r.metabolites)==2 and len(set([r.id.rsplit('_', 1)[-1] for r in r.metabolites]))==2:
            if r.bounds != (-1000, 1000):
                
                print(r.id, r.reaction, r.name, r.bounds)
                r.bounds = (-1000, 1000)

ACMANAtex acmana_e <-- acmana_p N-acetyl-D-mannosamine transport via diffusion (extracellular to periplasm) (-1000.0, 0.0)
ABUTtex 4abut_e <-- 4abut_p 4-aminobutyrate transport via diffusion (extracellular to periplasm) (-1000.0, 0.0)
AKGtex akg_e <-- akg_p Alpha-ketoglutarate transport via diffusion (extracellular to periplasm) (-1000.0, 0.0)
CYStpp cys__L_c --> cys__L_p L-cysteine export via facilitated transport (0.0, 1000.0)
DCAtex dca_e <-- dca_p Decanoate transport via diffusion (extracellular to periplasm) (-1000.0, 0.0)
FUMtex fum_e <-- fum_p Fumarate transport via diffusion (extracellular to periplasm) (-1000.0, 0.0)
GLCtex_copy2 glc__D_e --> glc__D_p Glucose transport via diffusion (extracellular to periplasm) (0.0, 1000.0)
HXAtex hxa_e <-- hxa_p Hexanoate transport via diffusion (extracellular to periplasm) (-1000.0, 0.0)
FORtex for_e <-- for_p Formate transport via diffusion (extracellular to periplasm) (-1000.0, 0.0)
FORtppi for_c --> for_p Formate transport via diffusion 

## save medium recipe

In [7]:
# set the main growth medium
apply_medium(iYL1228)
iYL1228.slim_optimize()

1.0630380418232899

In [8]:
# save the recipe for gap-filling
# for gempipe
save_recipe(iYL1228, flavour='gempipe', medium_id='medium_gempipe', outpath='01_klebsiella/reference/')
# for carveme
save_recipe(iYL1228, flavour='carveme', medium_id='medium_carveme', outpath='01_klebsiella/reference/')
# for bactabolize
save_recipe(iYL1228, flavour='bactabolize', medium_id='medium_bactabolize', outpath='01_klebsiella/reference/')

## check ATPM

In [9]:
# check the maintenance:
iYL1228.reactions.get_by_id('ATPM')

0,1
Reaction identifier,ATPM
Name,ATP maintenance requirement
Memory address,0x7f88d1893670
Stoichiometry,atp_c + h2o_c --> adp_c + h_c + pi_c  ATP C10H12N5O13P3 + H2O H2O --> ADP C10H12N5O10P2 + H+ + Phosphate
GPR,
Lower bound,6.8
Upper bound,6.8


## save edited version

In [10]:
# save reference:
cobra.io.write_sbml_model(iYL1228, '01_klebsiella/reference/iYL1228_edited.xml')

# 02_ralstonia

In [11]:
%%capture --no-stdout

# load reference model
# cobrapy-compatible version obtained from carveme's suppl.mat.
iRP1476 = cobra.io.read_sbml_model('02_ralstonia/reference/iRP1476.xml')
gempipe.sanity_report(iRP1476)

gempipe v1.37.6 - sanity_report
model ID: iRP1476
G: 2600 R: 2644 M: 2121 uM: 1420 groups: 253
Compartments: ['c', 'e', 'p']
Biomass assemblies: 1 ['BIOMASS']
Objectives: 1 ['BIOMASS']
Optimization: 96.36243618282448 optimal (cplex)
Unconstrained LB-UB: (-1000.0, 1000.0)
Bad EX_change notation: 2
Sink/demand: 14
Constrained metabolic: 0
With 'artificial' atoms: 0
Missing formulas - charges: 2121 - 2121
Mass - charge unbalances: 0 - 0


## restore stoichiometric consistency


In [12]:
# launch the memote function: 

un_mids = [m.id for m in find_unconserved_metabolites(iRP1476)]
print(un_mids)

['cro4_c', 'doxrbcn_e', 'zn2_e', 'hg2_p', 'fusar_e', 'acfv_e', 'naldx_p', 'cro4_e', 'cafe_e', 'mincyc_p', 'fusa_p', 'fusar_p', 'mincyc_e', 'resoc_e', 'ttrcyc_p', 'rfamp_p', 'cm_p', 'ttrcyc_e', 'ampi_e', 'rfamp_e', 'novbcn_p', 'esclt_e', 'tom_p', 'cm_e', 'cd2_p', 'cafe_p', 'ag_p', 'berb_e', 'novbcn_e', 'esclt_p', 'cd2_e', 'cd2_c', 'tom_e', 'ampi_p', 'cana__L_c', 'cana__L_e', 'ni2_c', 'ni2_e', 'hg2_c', 'fusa_e', 'ni2_p', 'zn2_p', 'cro4_p', 'hg2_e', 'acfv_p', 'cana__L_p', 'doxrbcn_p', 'resoc_p', 'zn2_c', 'berb_p', 'ag_c', 'ag_e', 'naldx_e']


In [13]:
print(iRP1476.reactions.Naldxteff.reaction)
print(iRP1476.reactions.CD2teff_e.reaction)
print(iRP1476.reactions.NI2t3pp.reaction)
print(iRP1476.reactions.CD2t3pp.reaction)
print(iRP1476.reactions.AGt3pp.reaction)
print(iRP1476.reactions.CMtpp.reaction)
print(iRP1476.reactions.Escltteff.reaction)
print(iRP1476.reactions.cafeteff.reaction)
print(iRP1476.reactions.RFAMPtpp.reaction)
print(iRP1476.reactions.MINCYCtpp.reaction)
print(iRP1476.reactions.Resocteff.reaction)
print(iRP1476.reactions.ZN2teff_e.reaction)
print(iRP1476.reactions.CRO4t3pp.reaction)
print(iRP1476.reactions.DOXRBCNtpp.reaction)
print(iRP1476.reactions.FUSAtpp.reaction)
print(iRP1476.reactions.acfvteff.reaction)
print(iRP1476.reactions.ampiteff.reaction)
print(iRP1476.reactions.NOVBCNtpp.reaction)
print(iRP1476.reactions.TTRCYCtpp.reaction)
print(iRP1476.reactions.ZN2t3pp.reaction)
print(iRP1476.reactions.Berbteff.reaction)
print(iRP1476.reactions.Tomteff.reaction)
print(iRP1476.reactions.Fusarteff.reaction)
print(iRP1476.reactions.CANAt2pp.reaction)
print(iRP1476.reactions.HG2t3pp.reaction)

print(iRP1476.reactions.HG2abcpp.reaction)
print(iRP1476.reactions.ZN2abcpp.reaction)
print(iRP1476.reactions.CD2abcpp.reaction)
print(iRP1476.reactions.NI2abcpp.reaction)

print(iRP1476.reactions.CD2t4pp.reaction)
print(iRP1476.reactions.ZN2t4pp.reaction)


h_p + naldx_p --> h_c
cd2_p + h_p --> h_c
h_p + ni2_c --> h_c
cd2_c + h_p --> h_c
ag_c + h_p --> h_c
cm_p + h_p --> h_c
esclt_p + h_p --> h_c
cafe_p + h_p --> h_c
h_p + rfamp_p --> h_c
h_p + mincyc_p --> h_c
h_p + resoc_p --> h_c
h_p + zn2_p --> h_c
cro4_c + h_p --> h_c
doxrbcn_p + h_p --> h_c
fusa_p + h_p --> h_c
acfv_p + h_p --> h_c
ampi_p + h_p --> h_c
h_p + novbcn_p --> h_c
h_p + ttrcyc_p --> h_c
h_p + zn2_c --> h_c
berb_p + h_p --> h_c
h_p + tom_p --> h_c
fusar_p + h_p --> h_c
cana__L_c + h_p --> h_c
h_p + hg2_c --> h_c
atp_c + h2o_c + hg2_c --> adp_c + h_c + pi_c
atp_c + h2o_c + zn2_c --> adp_c + h_c + pi_c
atp_c + cd2_c + h2o_c --> adp_c + h_c + pi_c
atp_c + h2o_c + ni2_c --> adp_c + h_c + pi_c
cd2_c + h_p + k_p --> h_c + k_c
h_p + k_p + zn2_c --> h_c + k_c


In [14]:
iRP1476.reactions.Naldxteff.build_reaction_from_string('h_p + naldx_p --> h_c + naldx_c')
iRP1476.reactions.CD2teff_e.build_reaction_from_string('cd2_p + h_p --> h_c + cd2_c')
iRP1476.reactions.NI2t3pp.build_reaction_from_string('h_p + ni2_c --> h_c + ni2_p')
iRP1476.reactions.CD2t3pp.build_reaction_from_string('cd2_c + h_p --> h_c + cd2_p')
iRP1476.reactions.AGt3pp.build_reaction_from_string('ag_c + h_p --> h_c + ag_p')
iRP1476.reactions.CMtpp.build_reaction_from_string('cm_p + h_p --> h_c + cm_c')
iRP1476.reactions.Escltteff.build_reaction_from_string('esclt_p + h_p --> h_c + esclt_c')
iRP1476.reactions.cafeteff.build_reaction_from_string('cafe_p + h_p --> h_c + cafe_c')
iRP1476.reactions.RFAMPtpp.build_reaction_from_string('h_p + rfamp_p --> h_c + rfamp_c')
iRP1476.reactions.MINCYCtpp.build_reaction_from_string('h_p + mincyc_p --> h_c + mincyc_c')
iRP1476.reactions.Resocteff.build_reaction_from_string('h_p + resoc_p --> h_c + resoc_c')
iRP1476.reactions.ZN2teff_e.build_reaction_from_string('h_p + zn2_p --> h_c + zn2_c')
iRP1476.reactions.CRO4t3pp.build_reaction_from_string('cro4_c + h_p --> h_c + cro4_p')
iRP1476.reactions.DOXRBCNtpp.build_reaction_from_string('doxrbcn_p + h_p --> h_c + doxrbcn_c')
iRP1476.reactions.FUSAtpp.build_reaction_from_string('fusa_p + h_p --> h_c + fusa_c')
iRP1476.reactions.acfvteff.build_reaction_from_string('acfv_p + h_p --> h_c + acfv_c')
iRP1476.reactions.ampiteff.build_reaction_from_string('ampi_p + h_p --> h_c + ampi_c')
iRP1476.reactions.NOVBCNtpp.build_reaction_from_string('h_p + novbcn_p --> h_c + novbcn_c')
iRP1476.reactions.TTRCYCtpp.build_reaction_from_string('h_p + ttrcyc_p --> h_c + ttrcyc_c')
iRP1476.reactions.ZN2t3pp.build_reaction_from_string('h_p + zn2_c --> h_c + zn2_p')
iRP1476.reactions.Berbteff.build_reaction_from_string('berb_p + h_p --> h_c + berb_c')
iRP1476.reactions.Tomteff.build_reaction_from_string('h_p + tom_p --> h_c + tom_c')
iRP1476.reactions.Fusarteff.build_reaction_from_string('fusar_p + h_p --> h_c + fusar_c')
iRP1476.reactions.CANAt2pp.build_reaction_from_string('cana__L_c + h_p --> h_c + cana__L_p')
iRP1476.reactions.HG2t3pp.build_reaction_from_string('h_p + hg2_c --> h_c + hg2_p')

iRP1476.reactions.HG2abcpp.build_reaction_from_string('atp_c + h2o_c + hg2_c --> hg2_p + adp_c + h_c + pi_c')
iRP1476.reactions.ZN2abcpp.build_reaction_from_string('atp_c + h2o_c + zn2_c --> zn2_p + adp_c + h_c + pi_c')
iRP1476.reactions.CD2abcpp.build_reaction_from_string('atp_c + h2o_c + cd2_c --> cd2_p + adp_c + h_c + pi_c')
iRP1476.reactions.NI2abcpp.build_reaction_from_string('atp_c + h2o_c + ni2_c --> ni2_p + adp_c + h_c + pi_c')

iRP1476.reactions.CD2t4pp.build_reaction_from_string('cd2_c + h_p + k_p --> cd2_p + h_c + k_c')
iRP1476.reactions.ZN2t4pp.build_reaction_from_string('zn2_c + h_p + k_p --> zn2_p + h_c + k_c')


unknown metabolite 'naldx_c' created
unknown metabolite 'cm_c' created
unknown metabolite 'esclt_c' created
unknown metabolite 'cafe_c' created
unknown metabolite 'rfamp_c' created
unknown metabolite 'mincyc_c' created
unknown metabolite 'resoc_c' created
unknown metabolite 'doxrbcn_c' created
unknown metabolite 'fusa_c' created
unknown metabolite 'acfv_c' created
unknown metabolite 'ampi_c' created
unknown metabolite 'novbcn_c' created
unknown metabolite 'ttrcyc_c' created
unknown metabolite 'berb_c' created
unknown metabolite 'tom_c' created
unknown metabolite 'fusar_c' created


In [15]:
iRP1476.metabolites.naldx_c.compartment = 'c'
iRP1476.metabolites.cm_c.compartment = 'c'
iRP1476.metabolites.esclt_c.compartment = 'c'
iRP1476.metabolites.cafe_c.compartment = 'c'
iRP1476.metabolites.rfamp_c.compartment = 'c'
iRP1476.metabolites.mincyc_c.compartment = 'c'
iRP1476.metabolites.resoc_c.compartment = 'c'
iRP1476.metabolites.doxrbcn_c.compartment = 'c'
iRP1476.metabolites.fusa_c.compartment = 'c'
iRP1476.metabolites.acfv_c.compartment = 'c'
iRP1476.metabolites.ampi_c.compartment = 'c'
iRP1476.metabolites.novbcn_c.compartment = 'c'
iRP1476.metabolites.ttrcyc_c.compartment = 'c'
iRP1476.metabolites.berb_c.compartment = 'c'
iRP1476.metabolites.tom_c.compartment = 'c'
iRP1476.metabolites.fusar_c.compartment = 'c'

In [16]:
# launch the memote function (again)
# check if the stoichiometric consistency is restored:

un_mids = [m.id for m in find_unconserved_metabolites(iRP1476)]
print(un_mids)

[]


## correct GPRs

In [17]:
# correct gprs: 

ARBTNLSYN = iRP1476.reactions.ARBTNLSYN
ARBTNLSYN.gene_reaction_rule = "RSp0419 and RSp0421 and RSp0422"  # correct RSp042 to RSp0421.
ARBTNLSYN.update_genes_from_gpr()


## remove anti-convention genes

In [18]:
# rename genes, adjust gprs:
for r in iRP1476.reactions:
    gpr = r.gene_reaction_rule
    
    # each gene must be surrounded by spaces:
    gpr = ' ' + gpr.replace('(', '( ').replace(')', ') ') + ' '
    for g in r.genes: 
        if g.id.startswith('G_'): gpr = gpr.replace(f' {g.id} ', f' {g.id[2:]} ')
        if g.id.startswith('e'):  gpr = gpr.replace(f' {g.id} ', f'  ')              # exchanges
        if g.id.startswith('d'):  gpr = gpr.replace(f' {g.id} ', f' spontaneous ')   # diffusions
        if g.id.startswith('s'):  gpr = gpr.replace(f' {g.id} ', f' spontaneous ')   # spontaneous
        if g.id == 'NoAssignment':  gpr = gpr.replace(f' {g.id} ', f'  ')
    gpr = gpr.replace('(', '( ').replace(')', ') ')[1:-1]
    r.gene_reaction_rule = gpr
    r.update_genes_from_gpr()
    
    
# remove genes remained without associated reactions: 
to_remove = []
for g in iRP1476.genes:
    if len(g.reactions) == 0:
        to_remove.append(g)
cobra.manipulation.remove_genes(iRP1476, to_remove, remove_reactions=False)

## remove strain-specific biom precursors

In [19]:
# spmd_c Spermidine
# adocbl_c Adenosylcobalamin

BIOMASS = iRP1476.reactions.BIOMASS
ba = BIOMASS.reaction
ba = ba.replace(' + 0.006744 spmd_c', '')
ba = ba.replace(' + 0.000223 adocbl_c', '')
iRP1476.reactions.BIOMASS.build_reaction_from_string(ba)

gempipe.sanity_report(iRP1476)

gempipe v1.37.6 - sanity_report
model ID: iRP1476
G: 1477 R: 2644 M: 2137 uM: 1420 groups: 253
Compartments: ['c', 'e', 'p']
Biomass assemblies: 1 ['BIOMASS']
Objectives: 1 ['BIOMASS']
Optimization: 96.373688558369 optimal (cplex)
Unconstrained LB-UB: (-1000.0, 1000.0)
Bad EX_change notation: 2
Sink/demand: 14
Constrained metabolic: 0
With 'artificial' atoms: 0
Missing formulas - charges: 2137 - 2137
Mass - charge unbalances: 0 - 0


## add Ca2+ uptake

In [20]:
uni = gempipe.get_universe('neg')

gempipe.import_from_universe(iRP1476, uni, 'EX_ca2_e')
gempipe.import_from_universe(iRP1476, uni, 'CAt4')

# now formula/charges for Ca2+ have been added
# but the reference doesn't use formula/charges:

iRP1476.metabolites.ca2_e.formula = None
iRP1476.metabolites.ca2_e.charge = None

iRP1476.metabolites.ca2_c.formula = None
iRP1476.metabolites.ca2_c.charge = None

## save medium recipe

In [21]:
apply_medium(iRP1476)
iRP1476.reactions.get_by_id('EX_glc__D_e').lower_bound = 0
iRP1476.reactions.get_by_id('EX_gln__L_e').lower_bound = -10
iRP1476.slim_optimize() 

0.999991187577652

In [22]:
# save the recipe for gap-filling
# for gempipe
save_recipe(iRP1476, flavour='gempipe', medium_id='medium_gempipe', outpath='02_ralstonia/reference/')
# for carveme
save_recipe(iRP1476, flavour='carveme', medium_id='medium_carveme', outpath='02_ralstonia/reference/')
# for bactabolize
save_recipe(iRP1476, flavour='bactabolize', medium_id='medium_bactabolize', outpath='02_ralstonia/reference/')

## get proteomes

In [23]:
# get FASTA proteomes of modeled genes
# nucleotidic
added = fasta_to_modeled(iRP1476, ['02_ralstonia/reference/AL646052.1.fna', '02_ralstonia/reference/AL646053.1.fna'], '02_ralstonia/reference/iRP1476.fna')
# amino aicdic
added = fasta_to_modeled(iRP1476, ['02_ralstonia/reference/AL646052.1.faa', '02_ralstonia/reference/AL646053.1.faa'], '02_ralstonia/reference/iRP1476.faa')

Recovered 1427 / 1476.
Missing 49 : {'RS03939', 'RS00235', 'RS04005', 'RS03937', 'RS03940', 'RS04001', 'RS05382', 'RS02662', 'RS05586', 'RS05732', 'RS04716', 'RS03460', 'RS04914', 'RS00330', 'RS05381', 'RS03325', 'RS04383', 'RS04532', 'RS05585', 'RS03461', 'RS04003', 'RS02400', 'RSc3445', 'RS05795', 'RS01162', 'RS03726', 'RS02886', 'RS05584', 'RS01507', 'RS03938', 'RS04528', 'RS01505', 'RS04531', 'RS04002', 'RS04530', 'RS01509', 'RS03760', 'RS02241', 'RS04004', 'RS00484', 'RS05583', 'RS04829', 'RALSOc_3512', 'RS00236', 'RS02887', 'RS02877', 'RS05733', 'RS03462', 'RS04529'}
Recovered 1427 / 1476.
Missing 49 : {'RS03939', 'RS00235', 'RS04005', 'RS03937', 'RS03940', 'RS04001', 'RS05382', 'RS02662', 'RS05586', 'RS05732', 'RS04716', 'RS03460', 'RS04914', 'RS00330', 'RS05381', 'RS03325', 'RS04383', 'RS04532', 'RS05585', 'RS03461', 'RS04003', 'RS02400', 'RSc3445', 'RS05795', 'RS01162', 'RS03726', 'RS02886', 'RS05584', 'RS01507', 'RS03938', 'RS04528', 'RS01505', 'RS04531', 'RS04002', 'RS04530'

In [24]:
# parsing GenBanks more sequences are recovered:
gb_files = ['02_ralstonia/reference/AL646052.1.gb', '02_ralstonia/reference/AL646053.1.gb', '02_ralstonia/reference/NC_003295.1.gb', '02_ralstonia/reference/NC_003296.1.gb']
# nucleotidic
added = gb_to_modeled(iRP1476, gb_files, '02_ralstonia/reference/iRP1476.fna')
# amino aicdic
added = gb_to_modeled(iRP1476, gb_files, '02_ralstonia/reference/iRP1476.faa')



Recovered 1474 / 1476.
Missing 2 : {'RALSOc_3512', 'RSc3445'}
Recovered 1474 / 1476.
Missing 2 : {'RALSOc_3512', 'RSc3445'}


## remove missing genes

In [25]:
# remove missing genes without removing reactions:

to_remove = [iRP1476.genes.get_by_id('RSc3445'), iRP1476.genes.get_by_id('RALSOc_3512')]
cobra.manipulation.remove_genes(iRP1476, to_remove, remove_reactions=False)

print(len(iRP1476.genes))

1475


## check ATPM

In [26]:
iRP1476.reactions.get_by_id('NGAME')

0,1
Reaction identifier,NGAME
Name,Non_growth_associated_maintenance_energy
Memory address,0x7f88af12ee80
Stoichiometry,atp_c + h2o_c --> adp_c + h_c + pi_c  ATP + H2O --> ADP + proton_H_ + Phosphate
GPR,
Lower bound,0.0
Upper bound,1000.0


In [27]:
iRP1476.reactions.get_by_id('NGAME').bounds = (8.38, 8.38)

gempipe.sanity_report(iRP1476)

gempipe v1.37.6 - sanity_report
model ID: iRP1476
G: 1475 R: 2646 M: 2139 uM: 1421 groups: 253
Compartments: ['c', 'e', 'p']
Biomass assemblies: 1 ['BIOMASS']
Objectives: 1 ['BIOMASS']
Optimization: 0.999991187577652 optimal (cplex)
Unconstrained LB-UB: (-1000.0, 1000.0)
Bad EX_change notation: 2
Sink/demand: 14
Constrained metabolic: 1
With 'artificial' atoms: 0
Missing formulas - charges: 2139 - 2139
Mass - charge unbalances: 0 - 0


## save edited version

In [28]:
# save reference:
cobra.io.write_sbml_model(iRP1476, '02_ralstonia/reference/iRP1476_edited.xml')

# 03_pseudomonas

In [29]:
# load reference model
iJN1463 = cobra.io.read_sbml_model('03_pseudomonas/reference/iJN1463.xml')
gempipe.sanity_report(iJN1463)

gempipe v1.37.6 - sanity_report
model ID: iJN1463
G: 1462 R: 2927 M: 2153 uM: 1432 groups: 0
Compartments: ['c', 'e', 'p']
Biomass assemblies: 2 ['BIOMASS_KT2440_Core2', 'BIOMASS_KT2440_WT3']
Objectives: 1 ['BIOMASS_KT2440_WT3']
Optimization: 0.5861175448479812 optimal (cplex)
Unconstrained LB-UB: (-999999.0, 999999.0)
Bad EX_change notation: 0
Sink/demand: 33
Constrained metabolic: 2449
With 'artificial' atoms: 199
Missing formulas - charges: 0 - 0
Mass - charge unbalances: 4 - 3


## restore stoichiometric consistency


In [30]:
# launch the memote function: 

un_mids = [m.id for m in find_unconserved_metabolites(iJN1463)]
print(un_mids)

['h_e', 'h_p', 'h_c']


In [31]:
iJN1463.reactions.HDH.build_reaction_from_string(
    'h2o_c + histd_c + 2.0 nad_c --> 3.0 h_c + his__L_c + 2.0 nadh_c')
iJN1463.reactions.APPAT.build_reaction_from_string(
    'atp_c + h_c + pan4p_c --> dpcoa_c + ppi_c')
iJN1463.reactions.ARGDI.build_reaction_from_string(
    'arg__L_c + h2o_c --> citr__L_c + nh4_c')
iJN1463.reactions.PPRGL.build_reaction_from_string(
    'atp_c + gly_c + pram_c --> adp_c + gar_c + h_c + pi_c')
iJN1463.reactions.REPHACCOAT.build_reaction_from_string(
    'rephaccoa_c --> coa_c + rephac_c')
iJN1463.reactions.IDPh_1.build_reaction_from_string(
    'h2o_c + ppi_c --> 2.0 pi_c + h_c')
iJN1463.reactions.MEPCT_1.build_reaction_from_string(
    '2me4p_c + ctp_c + h_c --> 4c2me_c + ppi_c')


# FE3PYOVDDR fe3pyovd_p <=> fe2_p + pyovd_p (-999999.0, 999999.0)
# FE3PYOVDL fe3pyovd_e <=> fe3_e + pyovd_e  (-999999.0, 999999.0)
iJN1463.remove_reactions([iJN1463.reactions.FE3PYOVDDR, iJN1463.reactions.FE3PYOVDL])

# PQQAC pqqA_kt_c --> 2.0 h_c + pqqAc_kt_c (0.0, 999999.0)
# PQQFEP 4.0 h2o_c + pqqAc_kt_c --> 4pqq_c + tripeptide_c (0.0, 999999.0)
iJN1463.metabolites.tripeptide_c.formula = 'RHO'
iJN1463.reactions.PQQFEP.build_reaction_from_string(
    '5.0 h2o_c + pqqAc_kt_c --> 4pqq_c + tripeptide_c')


In [32]:
# launch the memote function (again)
# check if the stoichiometric consistency is restored:

un_mids = [m.id for m in find_unconserved_metabolites(iJN1463)]
print(un_mids)

[]


## remove strain-specific biomass precursors

In [33]:
# set the unconstrained lb/ub to -1000/1000:
gempipe.reset_unconstrained_bounds(iJN1463)

# select the core biomass assembly
iJN1463.reactions.BIOMASS_KT2440_WT3.bounds = (0,0 )
iJN1463.reactions.BIOMASS_KT2440_Core2.bounds = (0,1000 )
iJN1463.objective = 'BIOMASS_KT2440_Core2'

gempipe.sanity_report(iJN1463)

gempipe v1.37.6 - sanity_report
model ID: iJN1463
G: 1462 R: 2925 M: 2153 uM: 1432 groups: 0
Compartments: ['c', 'e', 'p']
Biomass assemblies: 2 ['BIOMASS_KT2440_Core2', 'BIOMASS_KT2440_WT3']
Objectives: 1 ['BIOMASS_KT2440_Core2']
Optimization: 0.5867762516977003 optimal (cplex)
Unconstrained LB-UB: (-1000, 1000)
Bad EX_change notation: 0
Sink/demand: 33
Constrained metabolic: 5
With 'artificial' atoms: 199
Missing formulas - charges: 0 - 0
Mass - charge unbalances: 0 - 0


## save medium recipe

In [34]:
# set the main growth medium
apply_medium(iJN1463)
iJN1463.reactions.get_by_id('EX_na1_e').lower_bound = -1000   
iJN1463.reactions.get_by_id('EX_ni2_e').lower_bound = -1000   

iJN1463.slim_optimize()

0.980737772748016

In [35]:
# save the recipe for gap-filling
# for gempipe
save_recipe(iJN1463, flavour='gempipe', medium_id='medium_gempipe', outpath='03_pseudomonas/reference/')
# for carveme and gempipe-rf
with iJN1463:  # na1 are ni2 not in CarveMe's universal biomass
    iJN1463.reactions.get_by_id('EX_na1_e').lower_bound = 0   
    iJN1463.reactions.get_by_id('EX_ni2_e').lower_bound = 0 
    save_recipe(iJN1463, flavour='carveme', medium_id='medium_carveme', outpath='03_pseudomonas/reference/')
    save_recipe(iJN1463, flavour='gempipe', medium_id='medium_gempipe_rf', outpath='03_pseudomonas/reference/')
# for bactabolize
save_recipe(iJN1463, flavour='bactabolize', medium_id='medium_bactabolize', outpath='03_pseudomonas/reference/')

## get proteomes

In [36]:
# parsing GenBanks more sequences are recovered:
gb_files = ['03_pseudomonas/reference/NC_002947.4.gb']
# nucleotidic
added = gb_to_modeled(iJN1463, gb_files, '03_pseudomonas/reference/iJN1463.fna')
# amino aicdic
added = gb_to_modeled(iJN1463, gb_files, '03_pseudomonas/reference/iJN1463.faa')

Recovered 1441 / 1462.
Missing 21 : {'pWW0_130', 'pWW0_094', 'PP_s0001', 'pWW0_099', 'pWW0_100', 'pWW0_090', 'pWW0_097', 'pWW0_101', 'pWW0_091', 'PP_3462', 'pWW0_095', 'pWW0_102', 'pWW0_128', 'pWW0_096', 'pWW0_129', 'pWW0_093', 'PP_2519', 'PP_3465', 'pWW0_127', 'pWW0_092', 'pWW0_131'}
Recovered 1441 / 1462.
Missing 21 : {'pWW0_130', 'pWW0_094', 'PP_s0001', 'pWW0_099', 'pWW0_100', 'pWW0_090', 'pWW0_097', 'pWW0_101', 'pWW0_091', 'PP_3462', 'pWW0_095', 'pWW0_102', 'pWW0_128', 'pWW0_096', 'pWW0_129', 'pWW0_093', 'PP_2519', 'PP_3465', 'pWW0_127', 'pWW0_092', 'pWW0_131'}


## remove missing genes

In [37]:
# remove missing genes without removing the reaction: 
to_remove = (set([g.id for g in iJN1463.genes]) - set(['PP_s0001'])) - added 
to_remove = [iJN1463.genes.get_by_id(gid) for gid in to_remove]
cobra.manipulation.remove_genes(iJN1463, to_remove, remove_reactions=False)

print(len(iJN1463.genes))

1442


## check ATPM

In [38]:
iJN1463.reactions.get_by_id('ATPM')

0,1
Reaction identifier,ATPM
Name,ATP maintenance requirement
Memory address,0x7f885ecaf8b0
Stoichiometry,atp_c + h2o_c --> adp_c + h_c + pi_c  ATP C10H12N5O13P3 + H2O H2O --> ADP C10H12N5O10P2 + H+ + Phosphate
GPR,
Lower bound,0.92
Upper bound,0.92


## save edited version

In [39]:
# save reference:
cobra.io.write_sbml_model(iJN1463, '03_pseudomonas/reference/iJN1463_edited.xml')