In [1]:
import symengine
import cobra
from cobra.io import load_json_model
from copy import deepcopy
import pickle
import pandas as pd
import glob
import json
from optlang.symbolics import Zero
from cobra.core import Reaction
import sys
import operator
from cobra.util.util import format_long_string

#curate universal, remove poorly annotated things, sink rxns, remove mass generating loops, second step of model creation

In [2]:
universal = load_json_model('universal_mundy.json')
universal2 = deepcopy(universal)

FileNotFoundError: [Errno 2] No such file or directory: 'universal_mundy.json'

In [6]:
# Number of active reactions
temp_rxns = []
for rxn in universal2.reactions:
    if rxn.lower_bound <0 or rxn.upper_bound >0:
        temp_rxns.append(rxn.id)
print(len(temp_rxns))
universal2

20351


0,1
Name,MicrobialNegative
Memory address,0x07f896a5108d0
Number of metabolites,15078
Number of reactions,20351
Objective expression,0.0
Compartments,"c, e"


In [7]:
# Number of extracellular metabolites
len([met.id for met in universal2.metabolites if met.id.endswith('_e')])

1837

In [8]:
ec_patric = cobra.io.read_sbml_model('511145.12.xml')

In [9]:
# EC biomass reaction
biomass_rxn = deepcopy(ec_patric.reactions.get_by_id('bio1'))
biomass_snk = deepcopy(ec_patric.reactions.get_by_id('SK_cpd11416_c'))
universal2.add_reactions([biomass_rxn,biomass_snk])

# Fix water issue
universal2.reactions.get_by_id('rxn05319_c').name = "Water transport"
universal2.reactions.get_by_id('rxn05319_c').bounds = (-1000., 1000.)

In [10]:
# Number of active reactions
temp_rxns = []
for rxn in universal2.reactions:
    if rxn.lower_bound <0 or rxn.upper_bound >0:
        temp_rxns.append(rxn.id)
print(len(temp_rxns))
universal2

20353


0,1
Name,MicrobialNegative
Memory address,0x07f896a5108d0
Number of metabolites,15078
Number of reactions,20353
Objective expression,0.0
Compartments,"c, e"


In [11]:
# Remove mass imbalanced reactions
balanced_rxns = []
charge_only_rxns = []
mass_imbalanced = []

for rxn in universal2.reactions:
    try:
        if not rxn.check_mass_balance(): #Check is the dictionary is empty
            balanced_rxns.append(rxn.id)
        elif (list(rxn.check_mass_balance().keys())[0] == 'charge') & (len(list(rxn.check_mass_balance().keys())) == 1):
            charge_only_rxns.append(rxn.id)
        else:
            mass_imbalanced.append(rxn.id)
    except:
        mass_imbalanced.append(rxn.id)
        
print(len(balanced_rxns))
print(len(charge_only_rxns))
print(len(mass_imbalanced))



14978
1129
4246




In [12]:
# Save imbalanced reaction bounds and then set them all to zero.
mass_imbalanced_bounds = {}
for rxn_id in mass_imbalanced:
    rxn = universal2.reactions.get_by_id(rxn_id)
    mass_imbalanced_bounds[rxn_id] = [rxn.lower_bound, rxn.upper_bound]
    rxn.lower_bound = 0.0
    rxn.upper_bound = 0.0

In [13]:
# Number of active reactions
temp_rxns = []
for rxn in universal2.reactions:
    if rxn.lower_bound <0 or rxn.upper_bound >0:
        temp_rxns.append(rxn.id)
print(len(temp_rxns))
universal2

16107


0,1
Name,MicrobialNegative
Memory address,0x07f896a5108d0
Number of metabolites,15078
Number of reactions,20353
Objective expression,0.0
Compartments,"c, e"


In [14]:
# Save imbalanced reaction bounds and then set them all to zero.

charge_imbalanced_bounds = {}
for rxn_id in charge_only_rxns:
    rxn = universal2.reactions.get_by_id(rxn_id)
    charge_imbalanced_bounds[rxn_id] = [rxn.lower_bound, rxn.upper_bound]
#     rxn.lower_bound = 0.0
#     rxn.upper_bound = 0.0

In [15]:
# Remove reactions with no reactants
for rxn in universal2.reactions:
    if (len(rxn.reactants) == 0) & (not rxn.id.startswith('EX_')):
        rxn.lower_bound = 0.0
        rxn.upper_bound = 0.0

# Essential reactions for biomass reaction: 'bio1'
universal2.reactions.get_by_id('rxn13783_c').upper_bound = 1000.
universal2.reactions.get_by_id('rxn13784_c').upper_bound = 1000.
universal2.reactions.get_by_id('rxn13782_c').upper_bound = 1000.

In [16]:
# Number of active reactions
temp_rxns = []
for rxn in universal2.reactions:
    if rxn.lower_bound <0 or rxn.upper_bound >0:
        temp_rxns.append(rxn.id)
print(len(temp_rxns))
universal2

16087


0,1
Name,MicrobialNegative
Memory address,0x07f896a5108d0
Number of metabolites,15078
Number of reactions,20353
Objective expression,0.0
Compartments,"c, e"


In [17]:
# Add intracellular sink reactions
for met in universal2.metabolites:
    if met.id.endswith('_c'):
        snk_rxn = Reaction('SNK_' + met.id)
        snk_rxn.name = "Sink reaction for " + met.id
        snk_rxn.lower_bound = 0.0
        snk_rxn.upper_bound = 0.0
        snk_rxn.add_metabolites({met:-1})
        universal2.add_reactions([snk_rxn])

In [18]:
# Number of active reactions
temp_rxns = []
for rxn in universal2.reactions:
    if rxn.lower_bound <0 or rxn.upper_bound >0:
        temp_rxns.append(rxn.id)
print(len(temp_rxns))
universal2

16087


0,1
Name,MicrobialNegative
Memory address,0x07f896a5108d0
Number of metabolites,15078
Number of reactions,33594
Objective expression,0.0
Compartments,"c, e"


In [19]:
# for rxn in universal2.reactions:
#     if rxn.id.startswith('SNK_'):
#         rxn.lower_bound = 0.0
#         rxn.upper_bound = 0.0

In [20]:
for met in universal2.metabolites:
    if met.name.startswith('CPD'):
        for rxn in met.reactions:
            universal2.reactions.get_by_id(rxn.id).lower_bound = 0.0
            universal2.reactions.get_by_id(rxn.id).upper_bound = 0.0

In [21]:
# Number of active reactions
temp_rxns = []
for rxn in universal2.reactions:
    if rxn.lower_bound <0 or rxn.upper_bound >0:
        temp_rxns.append(rxn.id)
print(len(temp_rxns))
universal2

16030


0,1
Name,MicrobialNegative
Memory address,0x07f896a5108d0
Number of metabolites,15078
Number of reactions,33594
Objective expression,0.0
Compartments,"c, e"


In [22]:
# Reaction BLACKLIST
blacklist = []

for met in universal2.metabolites:
    if met.name.startswith('CPD'):
        for rxn in met.reactions:
            blacklist.append(rxn.id)
print(len(blacklist))
for rxn in universal2.reactions:
    if (len(rxn.reactants) == 0) & (not rxn.id.startswith('EX_')): # reactions with no reactants
        if rxn.id != 'rxn13782_c' or rxn.id != 'rxn13783_c' or rxn.id != 'rxn13784_c': # BS biomass rxns
            blacklist.append(rxn.id)
print(len(blacklist))
for rxn_id in mass_imbalanced_bounds.keys():
    if not rxn_id.startswith('EX_'): # Some exchange reactions come up as mass imbalanced
        blacklist.append(rxn_id)
print(len(blacklist))

449
488
4734


In [23]:
# Mediocre Reactions
print(len(list(charge_imbalanced_bounds.keys())))
# Remove blacklist reactions from this set. 
print(len(list(set(list(charge_imbalanced_bounds.keys())) - set(blacklist))))
mediocre = list(set(list(charge_imbalanced_bounds.keys())) - set(blacklist))

1129
1126


In [24]:
# Set all mediocre reactions to have bounds they previously had

# for rxn_id in mediocre:
#     bounds = charge_imbalanced_bounds[rxn_id]
#     rxn = universal2.reactions.get_by_id(rxn_id)
#     rxn.lower_bound = bounds[0]
#     rxn.upper_bound = bounds[1]

In [25]:
exchanges = []
for rxn in universal2.reactions:
    if rxn.id.startswith('EX'):
        exchanges.append(rxn.id)
print(len(exchanges))

0


In [27]:
all_ex_rxns = []
for met in universal2.metabolites:
    if met.id.endswith('_e'):
        ex_rxn = Reaction('EX_' + met.id)
        ex_rxn.name = "Exchange reaction for " + met.id
        ex_rxn.lower_bound = 0.0
        ex_rxn.upper_bound = 1000.0
        ex_rxn.add_metabolites({met:-1})
        all_ex_rxns.append(ex_rxn)
universal2.add_reactions(all_ex_rxns)

for rxn in universal2.reactions:
    if rxn.id.startswith('EX'):
        rxn.upper_bound = 1000.0

In [28]:
exchanges = []
for rxn in universal2.reactions:
    if rxn.id.startswith('EX'):
        exchanges.append(rxn.id)
print(len(exchanges))

1837


In [29]:
# Number of active reactions
temp_rxns = []
for rxn in universal2.reactions:
    if rxn.lower_bound < 0 or rxn.upper_bound > 0:
        temp_rxns.append(rxn.id)
print(len(temp_rxns))
universal2

17867


0,1
Name,MicrobialNegative
Memory address,0x07f896a5108d0
Number of metabolites,15078
Number of reactions,35431
Objective expression,0.0
Compartments,"c, e"


In [None]:
# for rxn in universal2.reactions:
#     if rxn.id.startswith('EX'):
#         print(str(rxn.id)+': '+str(rxn.lower_bound)+': '+str(rxn.upper_bound))

In [35]:
# set_media_to_base_biolog(universal2)
print('Beginning')

base_media=['cpd00001_e','cpd00009_e','cpd00011_e','cpd00013_e','cpd00030_e','cpd00034_e','cpd00048_e','cpd00058_e','cpd00063_e',
            'cpd00067_e','cpd00099_e','cpd00149_e','cpd00205_e','cpd00254_e','cpd00971_e','cpd10515_e','cpd10516_e']

print('Start')
# Set all exchanges to zero flux in and full flux out
for rxn in universal2.reactions:
    if rxn.id.startswith('EX_'):
        rxn.lower_bound = 0.0
        rxn.upper_bound = 1000.0

# Set all sink reactions to zero out
for rxn in universal2.reactions:
    if rxn.id.startswith('SNK_'):
        rxn.upper_bound = 0.0

print('Set Base')
# Open base media exchanges
for met in base_media:
    # Search for exchange reactions
    temp_exchange = universal2.reactions.get_by_id('EX_'+ met)
    temp_exchange.lower_bound = -1000.0
    temp_exchange.upper_bound = 1000.0

print('Set Carbon')
rxn = universal2.reactions.get_by_id('EX_cpd00027_e') # Turn on glucose exchange
rxn.lower_bound = -1000.
rxn.upper_bound = 1000.

# Print out media condition
for rxn in universal2.reactions:
    if rxn.id.startswith('EX'):
        if rxn.lower_bound < 0:
            print(str(rxn.id) +': '+str(rxn.lower_bound) +': '+ str(rxn.upper_bound))

universal2.reactions.get_by_id('bio1').upper_bound = 1000.
universal2.objective = universal2.reactions.get_by_id('bio1')

# universal2.reactions.get_by_id('SNK_cpd00002_c').upper_bound = 1000.
# universal2.objective = universal2.reactions.get_by_id('SNK_cpd00002_c')

universal2.solver.update()
solution = universal2.optimize()
print(solution)
active_rxns = set([rxn.id for rxn in universal2.reactions if abs(solution.fluxes[rxn.id]) > 1e-6])
len(active_rxns)

Beginning
Start
Set Base
Set Carbon
EX_cpd00009_e: -1000.0: 1000.0
EX_cpd00030_e: -1000.0: 1000.0
EX_cpd00067_e: -1000.0: 1000.0
EX_cpd00013_e: -1000.0: 1000.0
EX_cpd00048_e: -1000.0: 1000.0
EX_cpd00205_e: -1000.0: 1000.0
EX_cpd00001_e: -1000.0: 1000.0
EX_cpd00058_e: -1000.0: 1000.0
EX_cpd00034_e: -1000.0: 1000.0
EX_cpd00149_e: -1000.0: 1000.0
EX_cpd00971_e: -1000.0: 1000.0
EX_cpd00063_e: -1000.0: 1000.0
EX_cpd00254_e: -1000.0: 1000.0
EX_cpd00027_e: -1000.0: 1000.0
EX_cpd00099_e: -1000.0: 1000.0
EX_cpd10516_e: -1000.0: 1000.0
EX_cpd10515_e: -1000.0: 1000.0
EX_cpd00011_e: -1000.0: 1000.0
<Solution 116.673 at 0x7f8967ab7e48>


796

In [36]:
# Check Universal2 for mass generating loops with semi-novel algorithm 

# Set exchanges to allow all extracellular metabolites leave the system
for rxn in universal2.reactions:
    if rxn.id.startswith('EX_'):
        rxn.lower_bound = 0.0    # Don't allow mets in
        rxn.upper_bound = 1000.0 # Allow mets out

# Set all intracellular sink reactions open to allow mets out
for rxn in universal2.reactions:
    if rxn.id.startswith('SNK_'):
        rxn.upper_bound = 1000.0

expr = Zero
for rxn in universal2.reactions:
    if rxn.id.startswith('SNK_'):
        if rxn.upper_bound != 0.0:
            expr += 1.0 * rxn.forward_variable
            expr += 1.0 * rxn.reverse_variable

print('Expression Done.')

universal2.objective = universal2.problem.Objective(expr, direction='max', sloppy=True)
universal2.solver.update()
solution = universal2.optimize()

# Save reactions that carry flux
active_rxns_loops = set([rxn.id for rxn in universal2.reactions if abs(solution.fluxes[rxn.id]) > 1e-6])
print(solution.objective_value)

# Turn off all sinks
for rxn in universal2.reactions:
    if rxn.id.startswith('SNK_'):
        rxn.upper_bound = 0.0

Expression Done.
10075.915405211552


In [38]:
for rxn_id in active_rxns_loops:
    if rxn_id.startswith('SNK'):
        print(rxn_id)

SNK_cpd17043_c
SNK_cpd30058_c
SNK_cpd30509_c
SNK_cpd11416_c
SNK_cpd28299_c
SNK_cpd30508_c
SNK_cpd22167_c
SNK_cpd17042_c
SNK_cpd11632_c
SNK_cpd12713_c
SNK_cpd17041_c
SNK_cpd27228_c


In [79]:
# Compounds to remove
remove_cpds = ['cpd30058_c','cpd30509_c','cpd28299_c','cpd30508_c','cpd22167_c','cpd11632_c','cpd27228_c','cpd12713_c']

In [80]:
# Set zero bounds for all reactions that are associated with bad metabolites
for met_id in remove_cpds:
    met = universal2.metabolites.get_by_id(met_id)
    for rxn in met.reactions:
        rxn.lower_bound = 0.0
        rxn.upper_bound = 0.0
        blacklist.append(rxn.id)

In [81]:
# Check Universal2 for mass generating loops with semi-novel algorithm 

# Set exchanges to allow all extracellular metabolites leave the system
for rxn in universal2.reactions:
    if rxn.id.startswith('EX_'):
        rxn.lower_bound = 0.0    # Don't allow mets in
        rxn.upper_bound = 1000.0 # Allow mets out

# Set all intracellular sink reactions open to allow mets out
for rxn in universal2.reactions:
    if rxn.id.startswith('SNK_'):
        rxn.upper_bound = 1000.0

expr = Zero
for rxn in universal2.reactions:
    if rxn.id.startswith('SNK_'):
        if rxn.upper_bound != 0.0:
            expr += 1.0 * rxn.forward_variable
            expr += 1.0 * rxn.reverse_variable

print('Expression Done.')

universal2.objective = universal2.problem.Objective(expr, direction='max', sloppy=True)
universal2.solver.update()
solution = universal2.optimize()

# Save reactions that carry flux
active_rxns_loops = set([rxn.id for rxn in universal2.reactions if abs(solution.fluxes[rxn.id]) > 1e-6])
print(solution.objective_value)

# Turn off all sinks
for rxn in universal2.reactions:
    if rxn.id.startswith('SNK_'):
        rxn.upper_bound = 0.0

Expression Done.
4000.0


In [84]:
for rxn_id in active_rxns_loops:
    if rxn_id.startswith('SNK'):
        rxn = universal2.reactions.get_by_id(rxn_id)
        mets = rxn.reactants
        print(mets[0].name)
        print(rxn_id)

Biomass
SNK_cpd11416_c
Protein biosynthesis
SNK_cpd17041_c
DNA replication
SNK_cpd17042_c
RNA transcription
SNK_cpd17043_c


In [85]:
# set_media_to_base_biolog(universal2)
print('Beginning')

base_media=['cpd00001_e','cpd00009_e','cpd00011_e','cpd00013_e','cpd00030_e','cpd00034_e','cpd00048_e','cpd00058_e','cpd00063_e',
            'cpd00067_e','cpd00099_e','cpd00149_e','cpd00205_e','cpd00254_e','cpd00971_e','cpd10515_e','cpd10516_e']

print('Start')
# Set all exchanges to zero flux in and full flux out
for rxn in universal2.reactions:
    if rxn.id.startswith('EX_'):
        rxn.lower_bound = 0.0
        rxn.upper_bound = 1000.0

# Set all sink reactions to zero out
for rxn in universal2.reactions:
    if rxn.id.startswith('SNK_'):
        rxn.upper_bound = 0.0

print('Set Base')
# Open base media exchanges
for met in base_media:
    # Search for exchange reactions
    temp_exchange = universal2.reactions.get_by_id('EX_'+ met)
    temp_exchange.lower_bound = -1000.0
    temp_exchange.upper_bound = 1000.0

print('Set Carbon')
rxn = universal2.reactions.get_by_id('EX_cpd00027_e') # Turn on glucose exchange
rxn.lower_bound = -1000.
rxn.upper_bound = 1000.

# Print out media condition
for rxn in universal2.reactions:
    if rxn.id.startswith('EX'):
        if rxn.lower_bound < 0:
            print(str(rxn.id) +': '+str(rxn.lower_bound) +': '+ str(rxn.upper_bound))

universal2.reactions.get_by_id('bio1').upper_bound = 1000.
universal2.objective = universal2.reactions.get_by_id('bio1')

# universal2.reactions.get_by_id('SNK_cpd00002_c').upper_bound = 1000.
# universal2.objective = universal2.reactions.get_by_id('SNK_cpd00002_c')

universal2.solver.update()
solution = universal2.optimize()
print(solution)
active_rxns = set([rxn.id for rxn in universal2.reactions if abs(solution.fluxes[rxn.id]) > 1e-6])
len(active_rxns)

Beginning
Start
Set Base
Set Carbon
EX_cpd00009_e: -1000.0: 1000.0
EX_cpd00030_e: -1000.0: 1000.0
EX_cpd00067_e: -1000.0: 1000.0
EX_cpd00013_e: -1000.0: 1000.0
EX_cpd00048_e: -1000.0: 1000.0
EX_cpd00205_e: -1000.0: 1000.0
EX_cpd00001_e: -1000.0: 1000.0
EX_cpd00058_e: -1000.0: 1000.0
EX_cpd00034_e: -1000.0: 1000.0
EX_cpd00149_e: -1000.0: 1000.0
EX_cpd00971_e: -1000.0: 1000.0
EX_cpd00063_e: -1000.0: 1000.0
EX_cpd00254_e: -1000.0: 1000.0
EX_cpd00027_e: -1000.0: 1000.0
EX_cpd00099_e: -1000.0: 1000.0
EX_cpd10516_e: -1000.0: 1000.0
EX_cpd10515_e: -1000.0: 1000.0
EX_cpd00011_e: -1000.0: 1000.0
<Solution 116.673 at 0x7f896773d860>


755

In [88]:
# Number of active reactions
temp_rxns = []
for rxn in universal2.reactions:
    if rxn.lower_bound <0 or rxn.upper_bound >0:
        temp_rxns.append(rxn.id)
print(len(temp_rxns))
universal2

17827


0,1
Name,MicrobialNegative
Memory address,0x07f896a5108d0
Number of metabolites,15078
Number of reactions,35431
Objective expression,1.0*bio1 - 1.0*bio1_reverse_b18f7
Compartments,"c, e"


In [86]:
# Save universal2 in it's current state
pickle.dump(universal2, open('universal_moutinho.pickle',"wb"))

In [89]:
# Save blacklist as set
pickle.dump(set(blacklist), open('rxn_blacklist.pickle',"wb"))
len(set(blacklist))

4533

In [94]:
used_mets = set()
for rxn in universal2.reactions:
    if not rxn.id.startswith('EX') and not rxn.id.startswith('SNK') and not rxn.id.startswith('bio') and not rxn.id.startswith('SK'):
        if rxn.lower_bound < 0.0 or rxn.upper_bound > 0.0:
            if rxn.id.endswith('_c'):
                used_mets = used_mets | set([met.id for met in rxn.metabolites])
len(used_mets)

12318

In [101]:
import numpy as np
rxn_degrees = []
for met_id in used_mets:
    met = universal2.metabolites.get_by_id(met_id)
    rxn_degrees.append(len(met.reactions))
np.average(rxn_degrees)

8.254099691508362

In [95]:
pickle.dump(set(used_mets), open('used_mets.pickle',"wb"))