# 4.2 Balancing of reactions

According to Memote, several reactions are unbalanced, both in mass and in charge. Here each reaction is checked and when the discrepancy could be explained, the imbalance was fixed. Within this notebook the model is checked for
- Proton balance
- Missing formulas
- Other mass/charge imbalances
- Remaining imbalances

In [1]:
#import package needed
import cobra
import pandas as pd
import numpy as np
from cobra.io import load_json_model
from cobra import Reaction
from glob import glob

In [2]:
#Load EcN model 
EcN_ID = 'CP022686.1'
model = load_json_model('../data/models/%s_cur_4.1.json'%EcN_ID)
modelCopy = model.copy()

In [3]:
def imbalance_rxn(modelCopy):

    # Identify reactions that are not balanced and 
    imbalance = {}
    for rxn in modelCopy.reactions:
        balance = modelCopy.reactions.get_by_id(rxn.id).check_mass_balance()
        if balance != {}:
            if 'EX_' not in rxn.id: # Do not include exchange reactions
                if 'DM_' not in rxn.id: # Do not include sinks
                    if 'BIOMASS_' not in rxn.id: # Do not include biomass reactions
                        imbalance[rxn.id] = balance

    # Create a dataframe based on the imbalance dict
    imbalance_df = pd.DataFrame.from_dict(imbalance, orient='index')
    imbalance_df.fillna(value=0, inplace=True)
    return imbalance_df

In [4]:
imbalance_df = imbalance_rxn(modelCopy)
print('Number of imbalanced reactions:', len(imbalance_df))
imbalance_df

Number of imbalanced reactions: 51


Unnamed: 0,R,charge,H,O,C,N,P,S
PUACGAMS,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CLB6PH,0.0,-2.0,2.0,1.0,0.0,0.0,0.0,0.0
CLBptspp,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
SALCptspp,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0
SALSYN,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4HOXPACt2pp,0.0,1.0,-7.0,-3.0,-8.0,0.0,0.0,0.0
OXPTNDH,0.0,1.0,1.0,-1.0,0.0,0.0,0.0,0.0
MMM2,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0
ACGALptspp,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0
GALAMPTSpp,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0


In [5]:
# Load the rxn_origin dataframe and note reactions as imbalanced
rxn_origin_df = pd.read_csv('../tables/rxn_origin.csv', usecols=['reaction', 'origin', 'added', 'notebook', 'notes'])
rxn_origin_df.set_index('reaction', inplace=True)

imbalance_list = imbalance_df.index.tolist()

for rxn in imbalance_list:
    rxn_origin_df.loc[rxn, 'notes'] = 'Reaction imbalanced'

In [6]:
# Load the table with information on origin of genes
gene_origin_df = pd.read_csv('../tables/gene_origin.csv')
gene_origin_df.set_index('gene', inplace=True)

In [7]:
gene_origin_df

Unnamed: 0_level_0,EcN_gene,origin,added,notebook,notes
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
b2551,CIW80_06560,iML1515,automatic,3.1,none
b0870,CIW80_22360,iML1515,automatic,3.1,none
b3368,CIW80_11380,iML1515,automatic,3.1,none
b2436,CIW80_06055,iML1515,automatic,3.1,none
b3500,CIW80_12105,iML1515,automatic,3.1,none
...,...,...,...,...,...
SbBS512_E0724,CIW80_05020,iSbBS512_1146,automatic,3.1,none
SFxv_2588,CIW80_05275,iSFxv_1172,automatic,3.1,none
SFxv_4366,CIW80_14645,iSFxv_1172,automatic,3.1,none
SFxv_0967,CIW80_21840,iSFxv_1172,automatic,3.1,none


# 1. Fixing proton balance

In [8]:
# Find cases in which likely missing/excess of protons are the problem
proton_imbalance = imbalance_df.loc[(imbalance_df['H'] != 0)
                 & (imbalance_df['charge'] != 0)
                 & (imbalance_df['C'] == 0)
                 & (imbalance_df['N'] == 0)
                 & (imbalance_df['O'] == 0)
                 & (imbalance_df['P'] == 0)
                 & (imbalance_df['S'] == 0)]
proton_imbalance

Unnamed: 0,R,charge,H,O,C,N,P,S
SALCptspp,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0
ACGALptspp,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0
GALAMPTSpp,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0
GALAM6PISO,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0
HEPKA2,0.0,10.0,-20.0,0.0,0.0,0.0,0.0,0.0
ENTERH,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0
OPETDC,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
3HPAOX,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0
PHACTE,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0
35PHACTE,0.0,-4.0,-4.0,0.0,0.0,0.0,0.0,0.0


In [9]:
# Add or remove protons
modelCopy.reactions.get_by_id('SALCptspp').add_metabolites({'h_c': -2.0})
modelCopy.reactions.get_by_id('ACGALptspp').add_metabolites({'h_c': -2.0})
modelCopy.reactions.get_by_id('GALAMPTSpp').add_metabolites({'h_c': -2.0})
modelCopy.reactions.get_by_id('GALAM6PISO').add_metabolites({'h_c': 1.0})
modelCopy.reactions.get_by_id('ENTERH').add_metabolites({'h_c': 1.0})
modelCopy.reactions.get_by_id('OPETDC').add_metabolites({'h_c': -1.0})
modelCopy.reactions.get_by_id('3HPAOX').add_metabolites({'h_c': 1.0})
modelCopy.reactions.get_by_id('35PHACTE').add_metabolites({'h_c': 4.0})
modelCopy.reactions.get_by_id('PHACTE').add_metabolites({'h_c': 1.0})
modelCopy.reactions.get_by_id('3PHACTE').add_metabolites({'h_c': 4.0})

In [10]:
for rxn in proton_imbalance.index.values.tolist():
    print(rxn, modelCopy.reactions.get_by_id(rxn).check_mass_balance())

SALCptspp {}
ACGALptspp {}
GALAMPTSpp {}
GALAM6PISO {}
HEPKA2 {'charge': 10.0, 'H': -20.0}
ENTERH {}
OPETDC {}
3HPAOX {}
PHACTE {}
35PHACTE {}
3PHACTE {}


# 2. Missing formula

In [11]:
# Identify which metabolites are missing a formula
missing_formula = {}
for mtb in modelCopy.metabolites:
    formula = modelCopy.metabolites.get_by_id(mtb.id).formula
    if formula == None:
        missing_formula[mtb.id] = mtb.name
        
missing_formula

{'4hphac_c': '4-Hydroxyphenylacetate',
 'glutar_c': 'Glutarate',
 'oxptn_c': '5-Oxopentanoate',
 '5aptn_c': '5-Aminopentanoate',
 'pprdn_c': 'Piperideine',
 'LptA_p': 'Periplasmic lipid A binding protein ',
 'im4ac_c': 'Imidazole-4-acetate',
 'im4act_c': 'Imidazole-4-acetaldehyde',
 '2ombz_c': '2-Octaprenyl-6-methoxy-1,4-benzoquinone',
 '2ommb_c': '2-Octaprenyl-3-methyl-6-methoxy-1,4-benzoquinone',
 'nicrns_c': 'Nicotinate D-ribonucleoside',
 'srb1p_c': 'Sorbose 1-phosphate',
 'indpyr_c': 'Indolepyruvate C11H8NO3',
 'dhlam_c': 'Dihydrolipoamide C8H17NOS2',
 'lpam_c': 'Lipoamide C8H15NOS2',
 '3hibutcoa_c': '(S)-3-Hydroxyisobutyryl-CoA',
 '3hmp_c': '3-Hydroxy-2-methylpropanoate',
 'glutcoa_c': 'Glutaryl-CoA mitochondria',
 'ah6p__D_c': 'Arabino-3-hexulose-6-P',
 '2hyoxplac_c': '2-Hydroxyphenylacetate',
 'dkfp_c': '6-deoxy-5-ketofructose 1-phosphate',
 'raffin_c': 'Raffinose C18H32O16',
 '2dhphaccoa_c': '1,2-Dihydroxy-1,2-dihydrophenylacetyl-CoA',
 'cdpglyc_c': 'CDPglycerol C12H19N3O13P2'

### ENLIPIDAt1ex > LptA_p
Phosphoethanolamine lipid IVa transport via vector periplasm to extracellular

In [12]:
# Check mass balance
modelCopy.reactions.ENLIPIDAt1ex.check_mass_balance()

{}

In [13]:
modelCopy.reactions.ENLIPIDAt2ex.reaction

'enlipidA_LptA_p --> LptA_p + enlipidA_e'

In [14]:
# A lipid binding protein that does not have a mass
modelCopy.metabolites.LptA_p

0,1
Metabolite identifier,LptA_p
Name,Periplasmic lipid A binding protein
Memory address,0x01ab9948b9c8
Formula,
Compartment,p
In 2 reaction(s),"ENLIPIDAt2ex, ENLIPIDAt1ex"


Is a protein that can bind to lipids, but is not used. Therefore the lack of formula is not a problem

### GLCOAS > glutar_c/glutcoa_c

In [15]:
modelCopy.reactions.GLCOAS.check_mass_balance()

{'charge': 3.0,
 'C': -21.0,
 'H': -31.0,
 'N': -7.0,
 'O': -15.0,
 'P': -3.0,
 'S': -1.0}

In [16]:
modelCopy.reactions.GLCOAS.reaction

'atp_c + coa_c + glutar_c --> adp_c + glutcoa_c + pi_c'

In [17]:
# Both glutar_c and glutcoa_c are missing formula and charge

# There is only one formula for glutcoa_c in BiGG
# http://bigg.ucsd.edu/models/universal/metabolites/glutcoa
modelCopy.metabolites.glutcoa_c.formula = 'C26H37N7O19P3S'

# Based on the atoms that are missing in the mass balance, 
# ...one of the two molecules on BiGGwas chosen for glutar_c
# http://bigg.ucsd.edu/universal/reactions/GLCOAS
modelCopy.metabolites.glutar_c.formula = 'C5H6O4'

# Charge imbalance of 3. Both compounds can be uncharged or -5 and -2
# ...which would balance out the reaction
modelCopy.metabolites.glutcoa_c.charge = -5
modelCopy.metabolites.glutar_c.charge = -2

In [18]:
modelCopy.reactions.GLCOAS.check_mass_balance()

{}

### 4HOXPACt2pp > 4hphac_c
4 hydroxyphenylacetate transport in via proton symport periplasm

In [19]:
modelCopy.reactions.get_by_id('4HOXPACt2pp').check_mass_balance()

{'charge': 1.0, 'C': -8.0, 'H': -7.0, 'O': -3.0}

In [20]:
modelCopy.reactions.get_by_id('4HOXPACt2pp').reaction

'4hphac_p + h_p --> 4hphac_c + h_c'

In [21]:
# 4hphac_c is missing the formula and charge. Both are same as 4hphac_p
modelCopy.metabolites.get_by_id('4hphac_c').formula = 'C8H7O3'
modelCopy.metabolites.get_by_id('4hphac_c').charge = -1

In [22]:
modelCopy.reactions.get_by_id('4HOXPACt2pp').check_mass_balance()

{}

### OXPTNDH > oxptn_c
Glutarate-semialdehyde:NAD+ oxidoreductase

In [23]:
modelCopy.reactions.OXPTNDH.check_mass_balance()

{'charge': -1.0, 'C': 5.0, 'H': 7.0, 'O': 3.0}

In [24]:
modelCopy.reactions.OXPTNDH.reaction

'h2o_c + nad_c + oxptn_c <=> glutar_c + 2.0 h_c + nadh_c'

In [25]:
# oxptn_c does not have a formula and charge
# The formula on BiGG is similar to the missing elements
# http://bigg.ucsd.edu/models/universal/metabolites/oxptn
modelCopy.metabolites.oxptn_c.formula = 'C5H7O3'
modelCopy.metabolites.oxptn_c.charge = -1

In [26]:
modelCopy.reactions.OXPTNDH.check_mass_balance()

{}

### PPRDNDH > 5aptn_c/pprdn_c
Piperideine dehydrogenase

In [27]:
modelCopy.reactions.PPRDNDH.check_mass_balance()

{'H': -2.0, 'O': -2.0}

In [28]:
modelCopy.reactions.PPRDNDH.reaction

'2.0 h2o_c + nad_c + pprdn_c --> 5aptn_c + h_c + nadh_c'

In [29]:
# Both pprdn_c and 5aptn_c don't have a formula. The formulas were taken from BiGG
# http://bigg.ucsd.edu/models/universal/metabolites/pprdn
# http://bigg.ucsd.edu/models/universal/metabolites/5aptn
modelCopy.metabolites.pprdn_c.formula = 'C5H9N'
modelCopy.metabolites.get_by_id('5aptn_c').formula = 'C5H11NO2'

In [30]:
modelCopy.reactions.PPRDNDH.check_mass_balance()

{}

### IMACTD > im4ac_c/im4act_c
Imidazole acetaldeyde dehydrogenase

In [31]:
modelCopy.reactions.IMACTD.check_mass_balance()

{'charge': 1.0, 'H': 1.0, 'O': -1.0}

In [32]:
modelCopy.reactions.IMACTD.reaction

'h2o_c + im4act_c + nad_c --> 2.0 h_c + im4ac_c + nadh_c'

In [33]:
# im4act_c and im4ac_c are both missing the formula and charge
# im4act_c: http://bigg.ucsd.edu/models/universal/metabolites/im4act
# im4ac_c: http://bigg.ucsd.edu/models/universal/metabolites/im4ac
modelCopy.metabolites.im4act_c.formula = 'C5H6N2O'
modelCopy.metabolites.im4ac_c.formula = 'C5H5N2O2'

# im4ac_c can have a charge of both 0 and -1. 
# Based on the charge imbalance, -1 was chosen
modelCopy.metabolites.im4ac_c.charge = -1

In [34]:
modelCopy.reactions.IMACTD.check_mass_balance()

{}

### URFGTT > 2ombz_c/2ommb_c
UDP-L-rhamnose:flavonol-3-O-D-glucoside L-rhamnosyltransferase

In [35]:
modelCopy.reactions.URFGTT.check_mass_balance()

{'C': -1.0, 'H': -2.0}

In [36]:
modelCopy.reactions.URFGTT.reaction

'2ombz_c + amet_c --> 2ommb_c + ahcys_c + h_c'

In [37]:
# 2ombz_c and 2ommb_c are both missing the formula
modelCopy.metabolites.get_by_id('2ombz_c').formula = 'C47H70O3'
modelCopy.metabolites.get_by_id('2ommb_c').formula = 'C48H72O3'

In [38]:
modelCopy.reactions.URFGTT.check_mass_balance()

{}

### NICRNS > nicrns_c
Nicotinate D-ribonucleotide synthase (ATP)

In [39]:
modelCopy.reactions.NICRNS.check_mass_balance()

{'C': 11.0, 'H': 13.0, 'N': 1.0, 'O': 6.0}

In [40]:
modelCopy.reactions.NICRNS.reaction

'atp_c + nicrns_c --> adp_c + h_c + nicrnt_c'

In [41]:
# The formula is C11H14NO6+ according to Pubchem. Add one H for the charge
# https://pubchem.ncbi.nlm.nih.gov/compound/Nicotinate-D-ribonucleoside
modelCopy.metabolites.nicrns_c.formula = 'C11H13NO6'

In [42]:
modelCopy.reactions.NICRNS.check_mass_balance()

{}

### SBDH > srb1p_c
L-sorbosone dehydrogenase

In [43]:
modelCopy.reactions.SBDH.check_mass_balance()

{'charge': 2.0, 'H': -11.0, 'C': -6.0, 'O': -9.0, 'P': -1.0}

In [44]:
modelCopy.reactions.SBDH.reaction

'nadp_c + sbt6p_c --> h_c + nadph_c + srb1p_c'

In [45]:
# The formula is C6H13O9P according to pubchem.
# https://pubchem.ncbi.nlm.nih.gov/compound/L-Sorbose-1-phosphate
modelCopy.metabolites.srb1p_c.formula = 'C6H11O9P'
modelCopy.metabolites.srb1p_c.charge = -2

In [46]:
modelCopy.metabolites.srb1p_c

0,1
Metabolite identifier,srb1p_c
Name,Sorbose 1-phosphate
Memory address,0x01ab98fd4f48
Formula,C6H11O9P
Compartment,c
In 1 reaction(s),SBDH


In [47]:
modelCopy.reactions.SBDH.check_mass_balance()

{}

### TRPTA > indpyr_c
Tryptophan transaminase

In [48]:
modelCopy.reactions.TRPTA.check_mass_balance()

{'charge': 1.0, 'C': -11.0, 'H': -8.0, 'O': -3.0, 'N': -1.0}

In [49]:
modelCopy.reactions.TRPTA.reaction

'akg_c + trp__L_c <=> glu__L_c + indpyr_c'

In [50]:
# The formula of Indolepyruvate is missing (present in name)
# According to BiGG, charge can be 0 or -1. Based on charge balance, -1 was assumed
# http://bigg.ucsd.edu/models/universal/metabolites/indpyr
modelCopy.metabolites.indpyr_c.formula = 'C11H8NO3'
modelCopy.metabolites.indpyr_c.charge = -1

In [51]:
modelCopy.reactions.TRPTA.check_mass_balance()

{}

### PDHcr > dhlam_c/lpam_c
Pyruvate dehydrogenase

In [52]:
modelCopy.reactions.PDHcr.check_mass_balance()

{'H': 2.0}

In [53]:
modelCopy.reactions.PDHcr.reaction

'dhlam_c + nad_c --> h_c + lpam_c + nadh_c'

In [54]:
# both dhlam_c and lpam_c are missing formula. These can be taken from their names
modelCopy.metabolites.dhlam_c.formula = 'C8H17NOS2'
modelCopy.metabolites.lpam_c.formula = 'C8H15NOS2'

In [55]:
modelCopy.reactions.PDHcr.check_mass_balance()

{}

### 3HBCOAHL > 3hibutcoa_c/3hmp_c
3-hydroxyisobutyryl-CoA hydrolase

In [56]:
modelCopy.reactions.get_by_id('3HBCOAHL').check_mass_balance()

{'charge': -3.0, 'C': 21.0, 'H': 31.0, 'N': 7.0, 'O': 15.0, 'P': 3.0, 'S': 1.0}

In [57]:
modelCopy.reactions.get_by_id('3HBCOAHL').reaction

'3hibutcoa_c + h2o_c --> 3hmp_c + coa_c + h_c'

In [58]:
# 3hibutcoa_c and 3hmp_c are missing the formula and charge.
# Based on BiGG: http://bigg.ucsd.edu/models/universal/metabolites/3hibutcoa
modelCopy.metabolites.get_by_id('3hibutcoa_c').formula = 'C25H38N7O18P3S'
modelCopy.metabolites.get_by_id('3hibutcoa_c').charge = -4

# Based on BiGG: http://bigg.ucsd.edu/models/universal/metabolites/3hmp
modelCopy.metabolites.get_by_id('3hmp_c').formula = 'C4H7O3'
modelCopy.metabolites.get_by_id('3hmp_c').charge = -1

In [59]:
modelCopy.reactions.get_by_id('3HBCOAHL').check_mass_balance()

{}

### AH6PI > ah6p__D_c
Arabino-3-hexulose-6-P Isomerase

In [60]:
modelCopy.reactions.AH6PI.check_mass_balance()

{'charge': -2.0, 'C': 6.0, 'H': 11.0, 'O': 9.0, 'P': 1.0}

In [61]:
modelCopy.reactions.AH6PI.reaction

'ah6p__D_c --> f6p_c'

In [62]:
# The formula of Arabino-3-hexulose-6-P is missing
# Based on the formula on PubChem (C6H13O9P), a charge of -2 was assumed
# https://pubchem.ncbi.nlm.nih.gov/compound/D-Arabino-hex-3-ulose-6-phosphate
modelCopy.metabolites.ah6p__D_c.formula = 'C6H11O9P'
modelCopy.metabolites.ah6p__D_c.charge = -2

In [63]:
modelCopy.reactions.AH6PI.check_mass_balance()

{}

### PPOR > 2hyoxplac_c
Phenylpyruvate:oxygen oxidoreductase (hydroxylating,decarboxylating)

In [64]:
# Check mass balance
modelCopy.reactions.PPOR.check_mass_balance()

{'charge': 1.0, 'C': -8.0, 'O': -3.0, 'H': -7.0}

In [65]:
# Check reaction
modelCopy.reactions.PPOR.reaction

'o2_c + phpyr_c --> 2hyoxplac_c + co2_c'

In [66]:
# The formula of Hydroxyphenylacetate  is missing
# According to BiGG, charge can be 0 or -1. Based on charge balance, -1 was assumed
# http://bigg.ucsd.edu/universal/metabolites/2hyoxplac
modelCopy.metabolites.get_by_id('2hyoxplac_c').formula = 'C8H7O3'
modelCopy.metabolites.get_by_id('2hyoxplac_c').charge = -1

In [67]:
# Check mass balance
modelCopy.reactions.PPOR.check_mass_balance()

{}

### DKFPS2 > dkfp_c
6 deoxy 5 ketofructose 1 phosphate synthase 2 

In [68]:
modelCopy.reactions.DKFPS2.check_mass_balance()

{'charge': 2.0, 'C': -6.0, 'H': -9.0, 'O': -8.0, 'P': -1.0}

In [69]:
modelCopy.reactions.DKFPS2.reaction

'fdp_c + mthgxl_c --> dkfp_c + g3p_c'

In [70]:
# dkfp_c is missing the formula
# Based on the formula on PubChem (C6H11O8P), a charge of -2 was assumed
# https://pubchem.ncbi.nlm.nih.gov/compound/25137933
modelCopy.metabolites.dkfp_c.formula = 'C6H9O8P'
modelCopy.metabolites.dkfp_c.charge = -2

In [71]:
modelCopy.reactions.DKFPS2.check_mass_balance()

{}

### G3PCT > cdpglyc_c
Glycerol-3-phosphate cytidylyltransferase

In [72]:
modelCopy.reactions.G3PCT.check_mass_balance()

{'charge': 2.0, 'C': -12.0, 'H': -19.0, 'N': -3.0, 'O': -13.0, 'P': -2.0}

In [73]:
modelCopy.reactions.G3PCT.reaction

'ctp_c + glyc3p_c + h_c --> cdpglyc_c + ppi_c'

In [74]:
# CDPglycerol is missing the formula > present in name
# Based on the formula on PubChem (C12H19N3O13P2-2), a charge of -2 was assumed
# https://pubchem.ncbi.nlm.nih.gov/compound/90659090
modelCopy.metabolites.cdpglyc_c.formula = 'C12H19N3O13P2'
modelCopy.metabolites.cdpglyc_c.charge = -2

In [75]:
modelCopy.reactions.G3PCT.check_mass_balance()

{}

### RAFH > raffin_c
Raffinose hydrolyzing enzyme

In [76]:
modelCopy.reactions.RAFH.check_mass_balance()

{'C': 18.0, 'H': 32.0, 'O': 16.0}

In [77]:
modelCopy.reactions.RAFH.reaction

'h2o_c + raffin_c --> fru_c + melib_c'

In [78]:
# raffin_c is missing the formula > present in name
# Based on the formula on PubChem (C18H32O16), no charge was assumed
# https://pubchem.ncbi.nlm.nih.gov/compound/439242
modelCopy.metabolites.raffin_c.formula = 'C18H32O16'

In [79]:
modelCopy.reactions.RAFH.check_mass_balance()

{}

### PHACOAOR > 2dhphaccoa_c
Phenylacetyl-CoA oxygenase/reductase

In [80]:
modelCopy.reactions.PHACOAOR.check_mass_balance()

{'charge': 4.0,
 'H': -40.0,
 'O': -19.0,
 'C': -29.0,
 'N': -7.0,
 'P': -3.0,
 'S': -1.0}

In [81]:
modelCopy.reactions.PHACOAOR.reaction

'h2_c + o2_c + phaccoa_c --> 2dhphaccoa_c'

In [82]:
# 2dhphaccoa_c is missing the formula
# Based on different formulas this chemical structure on PubChem (C29H40O19N7P3S), a charge of -4 was assumed
# https://pubchem.ncbi.nlm.nih.gov/#query=C29H40O19N7P3S
modelCopy.metabolites.get_by_id('2dhphaccoa_c').formula = 'C29H40N7O19P3S'
modelCopy.metabolites.get_by_id('2dhphaccoa_c').charge = -4

In [83]:
modelCopy.reactions.PHACOAOR.check_mass_balance()

{}

# 3. Other imbalances

In [84]:
imbalance_df = imbalance_rxn(modelCopy)
imbalance_df

Unnamed: 0,R,charge,H,O,C,N
PUACGAMS,-1.0,0.0,0.0,0.0,0.0,0.0
CLB6PH,0.0,-2.0,2.0,1.0,0.0,0.0
CLBptspp,0.0,2.0,0.0,0.0,0.0,0.0
SALSYN,0.0,1.0,0.0,0.0,0.0,0.0
MMM2,0.0,5.0,0.0,0.0,0.0,0.0
ACGAL6PI,0.0,-2.0,-5.0,0.0,-2.0,-1.0
ACGAL6PISO,0.0,1.0,0.0,-2.0,-2.0,1.0
GALR1TRA2,0.0,-11.0,43.0,12.0,15.0,0.0
HEPKB2,0.0,2.0,12.0,6.0,7.0,0.0
HEPKA2,0.0,10.0,-20.0,0.0,0.0,0.0


### PUACGAMS
Poly-?-1,6-N-acetyl-D-glucosamine synthase

In [85]:
modelCopy.reactions.PUACGAMS

0,1
Reaction identifier,PUACGAMS
Name,"Poly-?-1,6-N-acetyl-D-glucosamine synthase"
Memory address,0x01ab9a4bea48
Stoichiometry,"puacgam_c + udp_c <=> uacgam_c  Poly-?-1,6-N-acetyl-D-glucosamine + UDP C9H11N2O12P2 <=> UDP-N-acetyl-D-glucosamine"
GPR,CIW80_23080 and CIW80_23085
Lower bound,-1000.0
Upper bound,1000.0


In [86]:
modelCopy.reactions.PUACGAMS.check_mass_balance()

{'R': -1.0}

Involves R-group, cannot be balanced.

### UAG4Ei

In [87]:
modelCopy.reactions.UAG4Ei.reaction

'uacgam_c --> udpacgal_c'

In [88]:
modelCopy.metabolites.udpacgal_c.charge

0

In [89]:
# udpacgal_c has the same formula as uacgam_c, but is missing the charge
modelCopy.metabolites.get_by_id('udpacgal_c').charge = -2

In [90]:
modelCopy.reactions.UAG4Ei.check_mass_balance()

{}

### CLB6PH & CLBptspp
Cellobiose hydrolase

In [91]:
# Looks like there is a water missing in the reaction 
# and something is wrong in charge
modelCopy.reactions.CLB6PH.check_mass_balance()

{'charge': -2.0, 'H': 2.0, 'O': 1.0}

In [92]:
modelCopy.reactions.CLB6PH.reaction

'cellb6p_c --> g6p_c + glc__D_c'

In [93]:
# Based on KeGG, cellb6p_c should have a charge of -2 with the current formula
# https://www.genome.jp/dbget-bin/www_bget?C04534+C21152
# Additionally a water molecule is consumed in this reaction
modelCopy.metabolites.cellb6p_c.charge = -2
modelCopy.reactions.CLB6PH.add_metabolites({'h2o_c': -1.0})

In [94]:
modelCopy.reactions.CLBptspp.check_mass_balance()

{}

### SALSYN
Salicylate synthase

In [95]:
modelCopy.reactions.SALSYN.check_mass_balance()

{'charge': 1.0}

In [96]:
modelCopy.reactions.SALSYN.reaction

'ichor_c --> pyr_c + salc_c'

In [97]:
# Both pyruvate and salicylate can have no and -1 charge
# Since pyruvate is properly balanced in other reactions, while salicylate
# ...is only in this reaction, the charge of salc_c is changed
modelCopy.metabolites.salc_c.charge = -1

In [98]:
modelCopy.reactions.SALSYN.check_mass_balance()

{}

### MMM2
Methylmalonyl-CoA mutase

In [99]:
modelCopy.reactions.MMM2.check_mass_balance()

{'charge': 5.0}

In [100]:
modelCopy.reactions.MMM2.reaction

'succoa_c --> mmcoa__R_c'

In [101]:
# mmcoa__R_c can be both non-charged and -5 charged. 
# http://bigg.ucsd.edu/models/universal/metabolites/mmcoa__R

# Based on the reaction, here it should be -5 (succoa_c is properly balanced in the other reactions its involved in)
modelCopy.metabolites.mmcoa__R_c.charge = -5

In [102]:
modelCopy.reactions.MMM2.check_mass_balance()

{}

### ACGAL6PI & ACGAL6PISO
ACGAL6PI: Galactosamine-6-phosphate isomerase <br>
ACGAL6PISO: D Galactosamine 6 phosphate isomerase

In [103]:
# Check the mass balance
modelCopy.reactions.ACGAL6PI
# modelCopy.reactions.ACGAL6PI.check_mass_balance()

0,1
Reaction identifier,ACGAL6PI
Name,Galactosamine-6-phosphate isomerase
Memory address,0x01ab9a49fd88
Stoichiometry,acgal6p_c --> tag6p__D_c  N Acetyl D galactosamine 6 phosphate C8H16NO9P --> D-Tagatose 6-phosphate
GPR,CIW80_10195
Lower bound,0.0
Upper bound,1000.0


In [104]:
# modelCopy.reactions.ACGAL6PI.reaction
modelCopy.reactions.ACGAL6PISO.check_mass_balance()

{'charge': 1.0, 'C': -2.0, 'N': 1.0, 'O': -2.0}

In [105]:
modelCopy.metabolites.acgal6p_c
# modelCopy.metabolites.tag6p__D_c
# modelCopy.metabolites.galam6p_c
# modelCopy.metabolites.galam6p_c

0,1
Metabolite identifier,acgal6p_c
Name,N Acetyl D galactosamine 6 phosphate C8H16NO9P
Memory address,0x01ab9948b748
Formula,C8H16NO9P
Compartment,c
In 3 reaction(s),"ACGALptspp, ACGAL6PISO, ACGAL6PI"


The conversion of GalNAc-6-P (N-Acetyl D-Galactosamine-6-phosphate) to Tag-6-P (D-Tagatose 6-phosphate) is originally descriped as a two step reaction via GalN-6-P (D-Galactosamine-6-phosphate), catalised by the product of genes AgaA (GalNAc-6-phosphate deacetylase) and AgaS (GalN-6-P deaminase). AgAI, which is currently linked to this reaction in the model was found to not be involved in the deamination and isomerization of D-Galactosamine-6-phosphate.
- https://pubmed.ncbi.nlm.nih.gov/8932697/
- https://www.sciencedirect.com/science/article/pii/S0021925820478275?via%3Dihub
- https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3668189/

The two reactions are as follows:
- GalNac-6-P + H2O --> GalN-6-P + Acetate (AgA) > ACGAL6PISO, but with acetate
- GalN-6-P + H2O --> Tag-6-P + Ammonium (AgaS) > GALAM6PISO

The AgaS gene needs to be added to the model. AgaA (and AgaE & AgaF) are present, but missing a name in the model.

In [106]:
# # Remove the reaction
modelCopy.remove_reactions(['ACGAL6PI'])

### First reaction
# Adapt ACGAL6PISO
modelCopy.reactions.ACGAL6PISO.add_metabolites({'nh4_c': -1.0})
modelCopy.reactions.ACGAL6PISO.add_metabolites({'ac_c': 1.0})
modelCopy.reactions.ACGAL6PISO.add_metabolites({'h_c': 1.0}) # Additionally add a proton, to get the mass balance right

# # Rename the reaction id, name and correct associated gene
modelCopy.reactions.ACGAL6PISO.id = 'ACGAL6PDA'
modelCopy.reactions.ACGAL6PDA.name = 'N-Acetyl D-galactosamine-6-phosphate deacetylation'
modelCopy.reactions.ACGAL6PDA.gene_reaction_rule = 'CIW80_10165'

### Second reaction
# Rename the reaction name and correct associated gene
modelCopy.reactions.GALAM6PISO.name = 'Galactosamine-6-phosphate deaminase'
modelCopy.reactions.GALAM6PISO.gene_reaction_rule = 'CIW80_10170'

### Gene names
modelCopy.genes.CIW80_10155.name = 'AgaE'
modelCopy.genes.CIW80_10160.name = 'AgaF'
modelCopy.genes.CIW80_10165.name = 'AgaA'
modelCopy.genes.CIW80_10170.name = 'AgaS'

In [107]:
modelCopy.reactions.ACGAL6PDA.check_mass_balance()

{}

In [108]:
modelCopy.reactions.ACGAL6PDA

0,1
Reaction identifier,ACGAL6PDA
Name,N-Acetyl D-galactosamine-6-phosphate deacetylation
Memory address,0x01ab9a49fac8
Stoichiometry,acgal6p_c + h2o_c --> ac_c + galam6p_c + h_c  N Acetyl D galactosamine 6 phosphate C8H16NO9P + H2O H2O --> Acetate + D Galactosamine 6 phosphate C6H14NO8P + H+
GPR,CIW80_10165
Lower bound,0.0
Upper bound,1000.0


In [109]:
# Update rxn_origin table
rxn_origin_df.loc['ACGAL6PI', 'added'] = 'removed'
rxn_origin_df.loc['ACGAL6PI', 'notes'] = 'Reaction removed in 4.2. Replaced by two other reactions, ACGAL6PDA and GALAM6PISO'

rxn_origin_df.loc['ACGAL6PISO', 'added'] = 'removed'
rxn_origin_df.loc['ACGAL6PISO', 'notes'] = 'Reaction was changed to ACGAL6PDA in 4.2'

rxn_origin_df.loc['ACGAL6PDA', 'origin'] = 'New Reaction'
rxn_origin_df.loc['ACGAL6PDA', 'added'] = 'manual'
rxn_origin_df.loc['ACGAL6PDA', 'notebook'] = '4.2'
rxn_origin_df.loc['ACGAL6PDA', 'notes'] = 'Reaction added in 4.2. Replaces one step of the reaction ACGAL6PISO'

rxn_origin_df.loc['GALAM6PISO', 'notes'] = 'Associated gene corrected to "CIW80_10170"  in 4.2'

# Update gene_origin table
gene_origin_df.loc[gene_origin_df.EcN_gene == 'CIW80_10195', 'added'] = 'removed'
gene_origin_df.loc[gene_origin_df.EcN_gene == 'CIW80_10195', 'notes'] = 'Associated reaction "ACGAL6PI" was removed. Associated reaction "ACGAL6PISO" was changed to "ACGAL6PDA" with GPR to "CIW80_10165". Associated reaction GALAM6PISO was changed to GPR CIW80_10170 in notebook 4.2'

gene_origin_df.loc[gene_origin_df.EcN_gene == 'CIW80_10165', 'gene'] = 'agaA'
gene_origin_df.loc[gene_origin_df.EcN_gene == 'CIW80_10165', 'origin'] = 'New reaction'
gene_origin_df.loc[gene_origin_df.EcN_gene == 'CIW80_10165', 'added'] = 'manual'
gene_origin_df.loc[gene_origin_df.EcN_gene == 'CIW80_10165', 'notebook'] = '4.2'
gene_origin_df.loc[gene_origin_df.EcN_gene == 'CIW80_10165', 'notes'] = 'ACGAL6PDA was associated to this gene in notebook 4.2'

gene_origin_df.loc[gene_origin_df.EcN_gene == 'CIW80_10170', 'gene'] = 'agaS'
gene_origin_df.loc[gene_origin_df.EcN_gene == 'CIW80_10170', 'origin'] = gene_origin_df.loc[gene_origin_df.EcN_gene == 'CIW80_10195', 'origin']
gene_origin_df.loc[gene_origin_df.EcN_gene == 'CIW80_10170', 'added'] = 'manual'
gene_origin_df.loc[gene_origin_df.EcN_gene == 'CIW80_10170', 'notebook'] = '4.2'
gene_origin_df.loc[gene_origin_df.EcN_gene == 'CIW80_10170', 'notes'] = 'GALAM6PISO was associated to this gene in notebook 4.2'

gene_origin_df.loc[gene_origin_df.EcN_gene == 'CIW80_10155', 'notes'] = 'Name updated to AgAE in notebook 4.2'
gene_origin_df.loc[gene_origin_df.EcN_gene == 'CIW80_10160', 'notes'] = 'Name updated to AgAF in notebook 4.2'

# Remove from imbalance list
imbalance_list.remove('ACGAL6PI')
imbalance_list.remove('ACGAL6PISO')
imbalance_list.remove('GALAM6PISO')

In [110]:
gene_origin_df

Unnamed: 0_level_0,EcN_gene,origin,added,notebook,notes,gene
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
b2551,CIW80_06560,iML1515,automatic,3.1,none,
b0870,CIW80_22360,iML1515,automatic,3.1,none,
b3368,CIW80_11380,iML1515,automatic,3.1,none,
b2436,CIW80_06055,iML1515,automatic,3.1,none,
b3500,CIW80_12105,iML1515,automatic,3.1,none,
...,...,...,...,...,...,...
SbBS512_E0724,CIW80_05020,iSbBS512_1146,automatic,3.1,none,
SFxv_2588,CIW80_05275,iSFxv_1172,automatic,3.1,none,
SFxv_4366,CIW80_14645,iSFxv_1172,automatic,3.1,none,
SFxv_0967,CIW80_21840,iSFxv_1172,automatic,3.1,none,


In [111]:
modelCopy.genes.CIW80_10195

0,1
Gene identifier,CIW80_10195
Name,agaI
Memory address,0x01ab9a679ac8
Functional,True
In 0 reaction(s),


### GALR1TRA2, HEPKA2 & HEPKB2
GALR1TRA2: Galactosyltransferase II LPS R1 core synthesis
<br>HEPKA2: LPS heptose kinase II LPS core synthesis
<br>HEPKB2: LPS heptose kinase II LPS core synthesis

In [112]:
modelCopy.reactions.GALR1TRA2.check_mass_balance()

{'charge': -11.0, 'C': 15.0, 'H': 43.0, 'O': 12.0}

In [113]:
modelCopy.reactions.GALR1TRA2.reaction

'gagggicolipaAR1_c + udpgal_c --> colipa_c + h_c + udp_c'

In [114]:
modelCopy.metabolites.colipa_c.formula
modelCopy.metabolites.colipa_c  

0,1
Metabolite identifier,colipa_c
Name,Core oligosaccharide lipid A
Memory address,0x01ab99381908
Formula,C176H303N2O100P4
Compartment,c
In 2 reaction(s),"COLIPAabcpp, GALR1TRA2"


All three are part of LPS synthesis

### 4PHACTE & 2PHACTE

In [115]:
modelCopy.reactions.get_by_id('4PHACTE').check_mass_balance()

{'charge': -5.0, 'H': -5.0}

In [116]:
modelCopy.reactions.get_by_id('4PHACTE').reaction

'h2o_c + hphaccoa_c --> 4hphac_c + coa_c'

In [117]:
# hphaccoa_c was missing a formula before and therefore not identified in the proton addition round.
# Add protons to reaction
modelCopy.reactions.get_by_id('4PHACTE').add_metabolites({'h_c': 5.0})
modelCopy.reactions.get_by_id('2PHACTE').add_metabolites({'h_c': 5.0})

In [118]:
modelCopy.reactions.get_by_id('4PHACTE').check_mass_balance()

{}

In [119]:
modelCopy.reactions.get_by_id('2PHACTE').check_mass_balance()

{}

### 34PHACTE

In [120]:
modelCopy.reactions.get_by_id('34PHACTE').check_mass_balance()

{'charge': -4.0, 'H': -4.0, 'O': -1.0}

In [121]:
modelCopy.reactions.get_by_id('34PHACTE')

0,1
Reaction identifier,34PHACTE
Name,Phenylacetyl CoA thioesterase
Memory address,0x01ab9a48cbc8
Stoichiometry,"34dhphaccoa_c + h2o_c --> 34dhpac_c + coa_c  3 4 Dihydroxyphenylacetyl CoA C29H42N7O19P3S + H2O H2O --> 3,4-Dihydroxyphenylacetaldehyde + Coenzyme A"
GPR,CIW80_04380
Lower bound,0.0
Upper bound,1000.0


In [122]:
modelCopy.reactions.get_by_id('34DHALDD')

0,1
Reaction identifier,34DHALDD
Name,Aldehyde dehydrogenase 3 4 dihydroxyphenylacetaldehyde NAD
Memory address,0x01ab9a48cd88
Stoichiometry,"34dhpac_c + h2o_c + nad_c --> 34dhpha_c + 2.0 h_c + nadh_c  3,4-Dihydroxyphenylacetaldehyde + H2O H2O + Nicotinamide adenine dinucleotide --> 3-4-Dihydroxyphenylacetate + 2.0 H+ + Nicotinamide adenine dinucleotide - reduced"
GPR,CIW80_04440
Lower bound,0.0
Upper bound,1000.0


<font color = 'red'> Was not changed </font>

The K-12 strain has a Phenylactyl-CoA thioesterase (PaaI), which converts 3,4-dihydroxyphenylacetyl-CoA directly to 3-4-dihydroxyphenylacetate, without the generation of NADH. 
- https://biocyc.org/META/NEW-IMAGE?type=REACTION&object=RXN0-5065

The reaction 34DHALDD is described for Homo Sapiens.
- https://biocyc.org/META/NEW-IMAGE?type=REACTION&object=RXN6666-5

Could not find literature that clearly describes this reaction in E. coli. Not changed for the moment

### 3PHACOAOR

In [123]:
modelCopy.reactions.get_by_id('3PHACOAOR').check_mass_balance()

{'charge': 4.0}

In [124]:
modelCopy.reactions.get_by_id('3PHACOAOR').reaction

'h2_c + o2_c + phaccoa_c --> 3dhphaccoa_c'

In [125]:
modelCopy.metabolites.get_by_id('3dhphaccoa_c').charge

0

In [126]:
# Phenylacetyl-CoA has a charge of -4
# As a result, 3dhphaccoa_c should also have a -4 charge
modelCopy.metabolites.get_by_id('3dhphaccoa_c').charge = -4

In [127]:
modelCopy.reactions.get_by_id('3PHACOAOR').check_mass_balance()

{}

### 3NTYROXDApp & 4H3NALDD & 4H3NPACAM

In [128]:
modelCopy.reactions.get_by_id('3NTYROXDApp').reaction

'3ntym_p + h2o_p + o2_p --> 4h3npacald_p + h2o2_p + nh4_p'

In [129]:
modelCopy.reactions.get_by_id('4H3NALDD').reaction

'4h3npacald_c + h2o_c + nad_c --> 4h3npac_c + 2.0 h_c + nadh_c'

In [130]:
modelCopy.reactions.get_by_id('4H3NPACAM').reaction

'4h3npac_c + h2o_c --> 4hphac_c + nh4_c'

In [131]:
# The formula of this compound is incorrect
modelCopy.metabolites.get_by_id('3ntym_p')

0,1
Metabolite identifier,3ntym_p
Name,3 Nitrotyramine C8H15N2
Memory address,0x01ab9a76f548
Formula,C8H15N2
Compartment,p
In 1 reaction(s),3NTYROXDApp


In [132]:
# The formula of this compound is incorrect
modelCopy.metabolites.get_by_id('4h3npacald_p')

0,1
Metabolite identifier,4h3npacald_p
Name,4 Hydroxy 3 Nitro Phenylacetaldehyde C8H11NO
Memory address,0x01ab9a76f288
Formula,C8H11NO
Compartment,p
In 1 reaction(s),3NTYROXDApp


In [133]:
# The formula of this compound is incorrect
modelCopy.metabolites.get_by_id('4h3npac_c')

0,1
Metabolite identifier,4h3npac_c
Name,4 Hydroxy 3 Nitro Phenylacetic acid C8H10NO2
Memory address,0x01ab9a76f0c8
Formula,C8H10NO2
Compartment,c
In 2 reaction(s),"4H3NPACAM, 4H3NALDD"


In [134]:
# The charge and formula of 4-Hydroxyphenylacetate is correct
modelCopy.metabolites.get_by_id('4hphac_c')

0,1
Metabolite identifier,4hphac_c
Name,4-Hydroxyphenylacetate
Memory address,0x01ab9947c108
Formula,C8H7O3
Compartment,c
In 6 reaction(s),"HPA3MO, 4HOXPACt2pp, 4H3NPACAM, 4HOXPACMOF, 4HALDD, 4PHACTE"


<font color = 'red'> '4H3NPACAM' not corrected </font>

### Compounds
All three compounds have a different formula
- 3-Nitrotyramine (3ntym_p) has a completely different formula (C8H15N2 vs. C8H10N2O3) https://pubchem.ncbi.nlm.nih.gov/compound/3-Nitrotyramine
- 4-Hydroxy-3-Nitro-Phenylacetaldehyde (4h3npacald_p) has a different formula (C8H11NO vs. C8H7NO4) https://pubchem.ncbi.nlm.nih.gov/compound/4-Hydroxy-3-nitrophenylacetaldehyde
- 4-Hydroxy-3-Nitro-Phenylacetic acid (4h3npac_c) has a different formula (C8H10NO2 vs. C8H7NO5) https://pubchem.ncbi.nlm.nih.gov/compound/4-Hydroxy-3-nitrophenylacetic-acid

### Reactions
The reactions are described in this paper > https://journals.asm.org/doi/10.1128/jb.00508-08?permanently=true
- The first reaction (3NTYROXDApp) involves ammonia instead of ammonium, but instead ammonium and a proton were used in the reaction
- The first reaction takes place in the periplasm, the others in the cytoplasm. The transport of the compound from the periplasm to cytoplasm is not linked to a gene in other models and was therefore not added to the model. Added here.
- Of the last reaction in this pathway, 4-hydroxy-3-nitrophenylacetate amidase, no clear description in literature was found. Denitration of 4-Hydroxy-3-Nitro-Phenylacetaldehyde (HPNA) to 3,4-dihydroxyphenylacetate (HPC) is described in Burkholderia sp., which involves the NADH-dependent stoichiometric release of nitrite from HNPA. Since no information in E. coli was found, this reaction was not changed. > https://journals.asm.org/doi/full/10.1128/AEM.72.2.1040-1044.2006#F2

In [135]:
### Correction of reactions
# Formula & name
modelCopy.metabolites.get_by_id('3ntym_p').formula = 'C8H10N2O3'
modelCopy.metabolites.get_by_id('4h3npacald_p').formula = 'C8H7NO4'
modelCopy.metabolites.get_by_id('4h3npacald_c').formula = 'C8H7NO4'
modelCopy.metabolites.get_by_id('4h3npac_c').formula = 'C8H7NO5'

modelCopy.metabolites.get_by_id('3ntym_p').name = '3-Nitrotyramine'
modelCopy.metabolites.get_by_id('4h3npacald_p').name = '4-Hydroxy-3-Nitro-Phenylacetaldehyde'
modelCopy.metabolites.get_by_id('4h3npac_c').name = '4-Hydroxy-3-Nitro-Phenylacetic acid'

# Correcting reactions
modelCopy.reactions.get_by_id('3NTYROXDApp').add_metabolites({'h_p': -1.0})
modelCopy.reactions.get_by_id('4H3NALDD').add_metabolites({'h_c': -1.0})

### Periplasm to cytoplasm
reaction = Reaction('4H3NPACALDt2rpp')
reaction.name = '4 hydroxy 3 nitrophenylacetaldehyde reversible transport via proton symport periplasm'
reaction.subsystem = 'Transport, Inner Membrane'
reaction.add_metabolites({
    modelCopy.metabolites.get_by_id('4h3npacald_p'): -1.0,
    modelCopy.metabolites.get_by_id('4h3npacald_c'): 1.0})
modelCopy.add_reactions([reaction])

In [136]:
modelCopy.reactions.get_by_id('4H3NPACALDt2rpp')

0,1
Reaction identifier,4H3NPACALDt2rpp
Name,4 hydroxy 3 nitrophenylacetaldehyde reversible transport via proton symport periplasm
Memory address,0x01ab9bd6a7c8
Stoichiometry,4h3npacald_p --> 4h3npacald_c  4-Hydroxy-3-Nitro-Phenylacetaldehyde --> 4 Hydroxy 3 Nitro Phenylacetaldehyde C8H11NO
GPR,
Lower bound,0.0
Upper bound,1000.0


In [137]:
modelCopy.reactions.get_by_id('4H3NALDD').check_mass_balance()

{}

In [138]:
modelCopy.reactions.get_by_id('4H3NPACAM').check_mass_balance()

{'H': 2.0, 'O': -3.0}

In [139]:
# Update rxn_origin table
rxn_origin_df.loc['4H3NPACALDt2rpp', 'origin'] = 'New Reaction'
rxn_origin_df.loc['4H3NPACALDt2rpp', 'added'] = 'manual'
rxn_origin_df.loc['4H3NPACALDt2rpp', 'notebook'] = '4.2'
rxn_origin_df.loc['4H3NPACALDt2rpp', 'notes'] = 'Reaction added in 4.2. Facilitates transport over inner membrane to enable pathway'

### SALCHS2FEexs

In [140]:
modelCopy.reactions.get_by_id('SALCHS2FEexs').check_mass_balance()

{'charge': -3.0}

In [141]:
modelCopy.reactions.get_by_id('SALCHS2FEexs').reaction

'fe3_e + salchs2_e --> salchs2fe_e'

In [142]:
modelCopy.metabolites.get_by_id('salchs2_e').charge

0

<font color = 'red'> Pathway will be updated in a later notebook. </font> 

### CLBH
Cellobiose hydrolase

In [143]:
# Looks like there is a water missing in the reaction
modelCopy.reactions.CLBH.check_mass_balance()

{'H': 2.0, 'O': 1.0}

In [144]:
modelCopy.reactions.CLBH.reaction

'cellb_c --> 2.0 glc__D_c'

In [145]:
# Based on metacyc there is indeed a water molecule missing
# https://biocyc.org/META/NEW-IMAGE?type=PATHWAY&object=PWY-6812
modelCopy.reactions.CLBH.add_metabolites({'h2o_c': -1.0})
modelCopy.reactions.CLBH

0,1
Reaction identifier,CLBH
Name,Cellobiose hydrolase
Memory address,0x01ab9a4b4608
Stoichiometry,cellb_c + h2o_c --> 2.0 glc__D_c  Cellobiose + H2O H2O --> 2.0 D-Glucose
GPR,CIW80_00615
Lower bound,0.0
Upper bound,1000.0


In [146]:
modelCopy.reactions.CLBH.check_mass_balance()

{}

### PENAM

In [147]:
modelCopy.reactions.get_by_id('PENAM').check_mass_balance()

{'H': -1.0}

In [148]:
modelCopy.reactions.PENAM.reaction

'h2o_p + peng_p <=> 6apa_p + pac_p'

In [149]:
# Phenylacetic acid is missing one hydrogen
# https://pubchem.ncbi.nlm.nih.gov/compound/Phenylacetic-acid
modelCopy.metabolites.get_by_id('pac_p').charge = -1

# Additionally the external phenylacetic acid needs to be updated
modelCopy.metabolites.get_by_id('pac_e').charge = -1

In [150]:
modelCopy.reactions.get_by_id('PENAM').check_mass_balance()

{'charge': -1.0, 'H': -1.0}

In [151]:
# Add a proton to the reaction to correct
modelCopy.reactions.get_by_id('PENAM').add_metabolites({'h_c': 1.0})

In [152]:
modelCopy.reactions.get_by_id('PENAM').check_mass_balance()

{}

# 4. Remaining inconsistencies

In [153]:
imbalance_df = imbalance_rxn(modelCopy)
print('Remaining number of imbalanced reactions:', len(imbalance_df), 'out of', len(modelCopy.reactions), 'reactions')
imbalance_df

Remaining number of imbalanced reactions: 7 out of 2806 reactions


Unnamed: 0,R,charge,C,H,O
PUACGAMS,-1.0,0.0,0.0,0.0,0.0
GALR1TRA2,0.0,-11.0,15.0,43.0,12.0
HEPKB2,0.0,2.0,7.0,12.0,6.0
HEPKA2,0.0,10.0,0.0,-20.0,0.0
34PHACTE,0.0,-4.0,0.0,-4.0,-1.0
SALCHS2FEexs,0.0,-3.0,0.0,0.0,0.0
4H3NPACAM,0.0,0.0,0.0,2.0,-3.0


<font color = 'red'> SALCHS2FEexs will be updated in a later notebook. Not counted as remaining imbalance reaction </font> 

In [154]:
imbalance_solved = set(imbalance_list) - set(imbalance_df.index.tolist())
imbalance_solved

for rxn in imbalance_solved:
    rxn_origin_df.loc[rxn, 'notes'] = 'Reaction imbalanced solved in 4.2'

# 5. Save updated model

In [155]:
# Save the model
cobra.io.json.save_json_model(modelCopy, str('../data/models/%s_cur_4.2.json'%EcN_ID), pretty=False)

In [156]:
# Save as a table
rxn_origin_df.to_csv('../tables/rxn_origin.csv')
gene_origin_df.to_csv('../tables/gene_origin.csv')

In [157]:
#Establish a definition that initializes models to an in silico representation of M9 media

def m9(model):
    for reaction in model.reactions:
        if 'EX_' in  reaction.id:
            reaction.lower_bound=0 
            
    model.reactions.EX_ca2_e.lower_bound=-1000
    model.reactions.EX_cl_e.lower_bound=-1000
    model.reactions.EX_co2_e.lower_bound=-1000
    model.reactions.EX_cobalt2_e.lower_bound=-1000
    model.reactions.EX_cu2_e.lower_bound=-1000
    model.reactions.EX_fe2_e.lower_bound=-1000
    model.reactions.EX_fe3_e.lower_bound=-1000
    model.reactions.EX_h_e.lower_bound=-1000
    model.reactions.EX_h2o_e.lower_bound=-1000
    model.reactions.EX_k_e.lower_bound=-1000
    model.reactions.EX_mg2_e.lower_bound=-1000
    model.reactions.EX_mn2_e.lower_bound=-1000
    model.reactions.EX_mobd_e.lower_bound=-1000
    model.reactions.EX_na1_e.lower_bound=-1000
    model.reactions.EX_tungs_e.lower_bound=-1000
    model.reactions.EX_zn2_e.lower_bound=-1000
    model.reactions.EX_ni2_e.lower_bound=-1000
    model.reactions.EX_sel_e.lower_bound=-1000
    model.reactions.EX_slnt_e.lower_bound=-1000
    model.reactions.EX_glc__D_e.lower_bound=-20
    model.reactions.EX_so4_e.lower_bound=-1000
    model.reactions.EX_nh4_e.lower_bound=-1000
    model.reactions.EX_pi_e.lower_bound=-1000
    model.reactions.EX_cbl1_e.lower_bound=-.01
    model.reactions.EX_o2_e.lower_bound=-20
       
    return model

In [158]:
# Test whether the EcN model can grown on glucose (EX_glc__D_e)
with modelCopy:
    m9(modelCopy)
    solution = modelCopy.optimize()
    
solution

Unnamed: 0,fluxes,reduced_costs
ALATA_D2,0.00000,-2.947634e-02
SHCHD2,0.00025,-5.551115e-17
CPPPGO,0.00025,5.551115e-17
GTHOr,0.00000,0.000000e+00
DHORD5,0.00000,-3.469447e-18
...,...,...
MALt5,0.00000,-3.469447e-18
SALCHS2FEexs,0.00000,0.000000e+00
DHPTDCs,0.00000,0.000000e+00
SALCHS4FEexs,0.00000,0.000000e+00
