In [67]:
import cobra
import numpy as np
import pandas as pd
import os
from copy import deepcopy
import re

# Read Pseudomonas Putida model w/ rhamnolipid biosynthesis pathway

In [68]:
def use_glyMM (model):
    
    for rxn in model.exchanges:
        rxn.lower_bound = 0

    glyc_min_media = {
        'EX_glyc_e_':-10,        # glycerol
        'EX_nh4_e_': -1000,      # ammonium
        'EX_pi_e_':  -1000,      # phosphate
        'EX_so4_e_': -1000,      # sulfate
        'EX_fe3_e_': -1000,      # fe3+
        'EX_mn2_e_': -1000,      # mn2+
        'EX_zn2_e_': -1000,      # zn2+
        'EX_cu2_e_': -1000,      # cu2+
        'EX_ca2_e_': -1000,      # ca2+
        'EX_cl_e_' : -1000,      # cl-
        'EX_cobalt2_e_': -1000,  # cobalt
        'EX_k_e_':   -1000,      # k
        'EX_mobd_e_'   : -1000,  # mo6+
        'EX_na1_e_'    : -1000,     # na+
        'EX_ni2_e_'    : -1000,     # ni2+
        'EX_mg2_e_': -1000,      # mg2+
        'EX_o2_e_':  -1000
    }

    for r in glyc_min_media.keys():
        try:
            rxn = model.reactions.get_by_id(r)
            rxn.lower_bound = glyc_min_media[r]
            rxn.upper_bound = 1000
        except:
            print("The reaction {} does not exist in model.".format(str(r)))
            continue
            
    return model

In [69]:
df_pp = cobra.io.load_json_model('SI3_iJN1411final_flux_w_rhamnolipid_biosynthesis.json')
df_pp = use_glyMM(df_pp)
df_pp.solver = 'cplex'
print(df_pp.slim_optimize())

0.6015473096312804


In [70]:
df_pp.objective.expression

0.0 + 1.0*BiomassKT2440_WT3 - 1.0*BiomassKT2440_WT3_reverse_2cf2b

# Find genes in the Pseudomonas putida model are missing in all Pseudomonas aeruginosa strains

In [71]:
df_rhl = pd.read_excel('../data/rhamnolipids/rhamnMat.xlsx', index_col=0)
df_rhl = df_rhl.rename({'PA14':'UCBPP-PA14'})
df_rhl.head()

Unnamed: 0_level_0,rhamn3cats,rhamn2cats
strain,Unnamed: 1_level_1,Unnamed: 2_level_1
F22031,2,1
F23197,2,1
F30658,1,1
F34365,2,1
F5677,0,0


In [72]:
missing_genes = {}
for g in df_pp.genes:
    missing_genes[g.id] = True
    
for strain in df_rhl.index:
    filename = '../reciprocal_blast_ref_PP_KT2440/PP_KT2440_vs_PA_'+strain+'.txt'
    if os.path.isfile(filename): 
        df_bbh = pd.read_csv(filename, ',', index_col=0)
        
        # if it's not bidirectional then remove
        df_bbh = df_bbh[df_bbh.BBH == '<=>']
        assert len(df_bbh.subject) == len(np.unique(df_bbh.subject))
        
        for g in df_pp.genes:
            if g.id in list(df_bbh.subject):
                missing_genes[g.id] = False

In [74]:
# genes that are present in at least one Pseudomonas aeruginosa strain
genes_to_remove = [k for k, v in missing_genes.items() if v==True]
print(len(genes_to_keep))

1160

In [75]:
# genes that are specific to Pseudomonas putida strain
genes_to_keep = [k for k, v in missing_genes.items() if v==False]
print(len(genes_to_remove))

255

# Remove these genes and associated reactions

In [76]:
for rxn in df_pp.reactions:
    rule = rxn.gene_reaction_rule
            
    # spontaneous reaction
    if rule=='':
        continue
                
    # enzymatic reaction
    for gid in genes_to_keep:
        rule = rule.replace(gid, '1')
            
    # the remaining unmatched reactions are set to 0
    rule = re.sub("PP_\\d+", "0", rule) 
    rule = re.sub("PP_s\\d+", "0", rule)
    rule = re.sub("pWW\d_\\d+", "0", rule)
            
    # rhamnolipid biosynthesis
    rule = rule.replace('PA3479','1')
    rule = rule.replace('PA3478','1')
    rule = rule.replace('PA1130','1')
            
    is_keep = eval(rule)
    if not is_keep:
        lb_old = float(rxn.lower_bound)
        ub_old = float(rxn.upper_bound)
        rxn.lower_bound = 0
        rxn.upper_bound = 0
        max_mu = df_pp.slim_optimize()
        if np.isnan(max_mu) or max_mu < 1e-5:
            rxn.lower_bound = lb_old
            rxn.upper_bound = ub_old
            print(rxn.id, 'growth essential, cannot be removed')
        else:
            print(rxn.id, 'removed')

cobra.io.save_json_model(df_pp, 'pruned_SI3_iJN1411final_flux_w_rhamnolipid_biosynthesis.json')

1P2CBXLCYCL removed
1PY4h3cAH removed
3HLYTCL removed
3MBZALDH removed
3MBZDH removed
3MCAT23DOX removed
3OADPCOAT removed
4ABUTabcpp removed
4CMCOAS removed
4HPRODCabcpp removed
4HPROLTabcpp removed
4MBZALDH removed
4MBZDH removed
4MCAT23DOX removed
4OD removed
4OD2 removed
5DH4DGLCD removed
6HNACt1pp removed
AADSACYCL removed
AALDCDLsi removed
ACALD removed
ADOCBLtonex removed
AGM3Pt2pp removed
AGM4Pt2pp removed
AGMT removed
AGMt2pp removed
ALAt2pp removed
ALDD19x removed
ALLTNRA removed
ALPATE160pp removed
ALPATE161pp removed
ALPATE180pp removed
ALPATE181pp removed
ALPATG160pp removed
ALPATG161pp removed
ALPATG180pp removed
ALPATG181pp removed
ALTRH removed
AOBUTDs growth essential, cannot be removed
AOXHEXCYCL removed
APENTAMAH2 removed
ARBTNexs removed
ARBTNtonex removed
ASPSALy removed
ATPHs removed
BZALDH removed
BZDH removed
BZtex removed
CACOAHA removed
CAFFCOA removed
CARBSARAH removed
CAT23DOX removed
CAt6pp growth essential, cannot be removed
CBItonex removed
CBL1tonex remo

# Make sure that the model grows and produces rhamnolipid

In [78]:
# make sure the model grows
df_pp = cobra.io.load_json_model('pruned_SI3_iJN1411final_flux_w_rhamnolipid_biosynthesis.json')
print(df_pp.slim_optimize())

0.4423766580515935


In [79]:
# test if the new model can produce rhamnolipids
df_pp.objective = df_pp.reactions.EX_llrhh_e_.flux_expression
df_pp.objective.direction = 'max'
print('max rhamnolipid production = %2.2f'% (df_pp.slim_optimize()))

max rhamnolipid production = 1.50


# Remove reactions that form futile cycles for NADH/NADPH/GSH production

In [80]:
def fix_NADH_NADPH_flux_loop(model):
    
    # NADTRHD, FMNRx and FMNR2r form a loop
    # NADTRHD: nad_c + nadph_c --> nadh_c + nadp_c
    # FMNRx: fmn_c + h_c + nadh_c --> fmnh2_c + nad_c        # missing in iMO1053
    # FMNR2r: fmn_c + h_c + nadph_c <=> fmnh2_c + nadp_c     # missing in iMO1053
    model.reactions.FMNR2r.lower_bound = 0
    
    # GLYD: h_c + hpyr_c + nadh_c <==> glyc_R_c + nad_c      # found in iMO1053, found in Pseudocyc (forward), found in Metacyc (bidirectional)
    # TRSARr: 2h3oppan_c + h_c + nadh_c <=> glyc_R_c + nad_c # missing in iMO1053,  missing in Pseudocyc, found in Metacyc (forward)
    model.reactions.GLYD.lower_bound = 0
    model.reactions.TRSARr.lower_bound = 0

    # Set of reactions that couple NADH/NAD to NADPH/NADP
    # the first two reactions are found in Metacyc, Pathway: 9-cis, 11-trans-octadecadienoyl-CoA degradation 
    # HACD31i: 3hhd58coa_c + nad_c --> 3ohd58ccoa_c + h_c + nadh_c
    # ECOAH36: 3hhd58coa_c <=> h2o_c + td58_2_coa_c                       # also found in Pseudocyc (Reverse)
    # RECOAH24: R_3htd58coa_c <=> h2o_c + td58_2_coa_c
    # RHACOAR142: 3ohd58coa_c + h_c + nadph_c <=> R_3htd58coa_c + nadp_c
    #
    # Reactions starting with "RECOAH" seems to be specific to PP but lack evidence in PA
    # We assume that they can flow in the reverse direction, similar to ECOAH36
#     for r in model.reactions:
#         if r.id.startswith('RECOAH'):
#             r.upper_bound = 0

    # We did not find evidences that NAD/NADH is substrate of fdxr_42_c in both Pseudocyc and iMO1053
    model.reactions.FRDO6r.lower_bound=0
    #model.reactions.FRDO6r.upper_bound=0
    model.reactions.FRDO7r.lower_bound=0
    #model.reactions.FRDO7r.upper_bound=0

    # GLUDx and GLUDy in Pseudocyc goes forward direction
    # GLUDx: glu__L_c + h2o_c + nad_c ⇌ akg_c + h_c + nadh_c + nh4_c
    # GLUDy: glu__L_c + h2o_c + nadp_c ⇌ akg_c + h_c + nadph_c + nh4_c
    model.reactions.GLUDx.lower_bound = 0
    #model.reactions.GLUDy.lower_bound = 0

    # CO2 will be gone after released
    #model.reactions.H2CO3D.upper_bound = 0
    #model.reactions.HCO3E.upper_bound = 0

    # SHK3Dr and SHK3D are the same reaction but conflict in reversibility
    #model.reactions.SHK3Dr.lower_bound = 0

    # PRPP is irreversible
    #model.reactions.PRPPS.lower_bound = 0
    
    # LEUDHr, VALDHr, ILEDHr are all reversible, which can be coupled to generate high flux in NADH.
    # Make them irreversible (reactions go in the direction of producing NH4).
    model.reactions.LEUDHr.lower_bound = 0
    model.reactions.VALDHr.lower_bound = 0
    model.reactions.ILEDHr.lower_bound = 0
    
    # PDHcr: dhlam_c + nad_c <=> h_c + lpam_c + nadh_c
    # iMO1053: forward direction
    model.reactions.PDHcr.lower_bound = 0
    
    
    return model

In [82]:
df_pp = cobra.io.load_json_model('pruned_SI3_iJN1411final_flux_w_rhamnolipid_biosynthesis.json')
df_pp.reactions.ATPM.lower_bound = 0 # ignore maintenance flux
df_pp = fix_NADH_NADPH_flux_loop(df_pp)
cobra.io.save_json_model(df_pp, 'pruned_SI3_iJN1411final_flux_w_rhamnolipid_biosynthesis.json')