In [11]:
import cobra
import pandas as pd
import copy
import random
import sys
from cobra.io import read_sbml_model, load_json_model
from io import StringIO

# Read Rhamnolipid production strain

In [62]:
df = pd.read_excel('../data/rhamnolipids/rhamnMat.xlsx')
df.loc[df['strain']=='PA14','strain'] = 'UCBPP-PA14'
df.head()

Unnamed: 0,strain,rhamn3cats,rhamn2cats
0,F22031,2,1
1,F23197,2,1
2,F30658,1,1
3,F34365,2,1
4,F5677,0,0


In [66]:
RL_minus = list(df.loc[df.rhamn2cats==0,'strain'])

['F5677', 'F63912', 'H27930', 'M1608', 'M55212', 'S86968', 'W36662', 'W60856']


# Read metabolic reactions and metabolites from reconstructed models

In [37]:
df_rxn = pd.DataFrame()

for i, genome in enumerate(df.strain):
    
    # read all reactions from metabolic model
    _df_rxn = pd.read_csv('../PATRIC_GMM/' + genome + '/PA_' + genome + '_GMM.rxntbl', sep='\t')
    
    # read gapfill reactions
    _df_gapfill_rxn = pd.read_csv('../PATRIC_GMM/' + genome + '/gf.0.gftbl', sep='\t', index_col=0)
    
    # remove gapfill reactions
    _df_rxn = _df_rxn[~_df_rxn['ID'].isin(_df_gapfill_rxn.ID)]
    
    # remove column Genes
    _df_rxn.drop('Genes', axis=1, inplace=True)
    
    # add a column presence/absence
    _df_rxn[genome] = 1
    
    # merge tables
    if i==0:
        df_rxn = _df_rxn
    else:
        df_rxn = pd.merge(df_rxn, 
                          _df_rxn, 
                          left_on=['ID','Name','Equation','Definition'],
                          right_on=['ID','Name','Equation','Definition'],
                          how='outer')

In [40]:
df_rxn.head()

Unnamed: 0,ID,Name,Equation,Definition,F22031,F23197,F30658,F34365,F5677,F63912,...,T63266,W16407,W25637,W36662,W45909,W60856,W70332,W91453,X78812,X9820
0,rxn02201_c0,"2-amino-4-hydroxy-6-hydroxymethyl-7,8-dihydrop...",(1) cpd00443[c0] + (1) cpd02920[c0] => (1) cpd...,(1) ABEE[c0] + (1) 2-Amino-4-hydroxy-6-hydroxy...,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,rxn00351_c0,gamma-L-glutamyl-L-cysteine:glycine ligase (AD...,(1) cpd00002[c0] + (1) cpd00033[c0] + (1) cpd0...,(1) ATP[c0] + (1) Glycine[c0] + (1) gamma-Glut...,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,rxn07431_c0,R07600_c0,(1) cpd00213[c0] + (1) cpd14700[c0] <=> (1) cp...,(1) Lipoamide[c0] + (1) 2-Methyl-1-hydroxyprop...,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,rxn00836_c0,IMP:diphosphate phospho-D-ribosyltransferase_c0,(1) cpd00012[c0] + (1) cpd00067[c0] + (1) cpd0...,(1) PPi[c0] + (1) H+[c0] + (1) IMP[c0] <= (1) ...,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,rxn00390_c0,N-Acyl-L-aspartate amidohydrolase_c0,(1) cpd00001[c0] + (1) cpd01759[c0] <=> (1) cp...,(1) H2O[c0] + (1) N-Acyl-L-aspartate[c0] <=> (...,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


# Print reactions that are missing in non producers

In [49]:
df_rxn_acc = df_rxn[df_rxn.isna().any(axis=1)]
df_rxn_acc.head()

Unnamed: 0,ID,Name,Equation,Definition,F22031,F23197,F30658,F34365,F5677,F63912,...,T63266,W16407,W25637,W36662,W45909,W60856,W70332,W91453,X78812,X9820
29,rxn05063_c0,carbamate hydro-lyase_c0,(1) cpd00001[c0] + (1) cpd01015[c0] => (1) cpd...,(1) H2O[c0] + (1) Cyanate[c0] => (1) Carbamate...,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
41,rxn04648_c0,"catechol:oxygen 1,2-oxidoreductase_c0",(1) cpd00007[c0] + (1) cpd09310[c0] <=> (2) cp...,"(1) O2[c0] + (1) 3,4,6-Trichlorocatechol[c0] <...",1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
57,rxn03456_c0,"cyclohexane-1,2-dione acylhydrolase (decyclizi...",(1) cpd00067[c0] + (1) cpd03640[c0] <=> (1) cp...,(1) H+[c0] + (1) 6-Oxohexanoate[c0] <=> (1) H2...,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
121,rxn01276_c0,D-Gluconate:NAD+ 5-oxidoreductase_c0,(1) cpd00003[c0] + (1) cpd00222[c0] <=> (1) cp...,(1) NAD[c0] + (1) GLCN[c0] <=> (1) NADH[c0] + ...,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
140,rxn01930_c0,3-Hydroxy-L-kynurenine hydrolase_c0,(1) cpd00001[c0] + (1) cpd02065[c0] => (1) cpd...,(1) H2O[c0] + (1) 3-Hydroxy-L-kynurenine[c0] =...,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [78]:
with open("accessory_reactions.txt", "a") as myfile:
    for index in df_rxn_acc.index:
    row = df_rxn_acc.loc[index,list(df.strain.values)].to_frame()
    row_nan = row[row[index].isna()]
    print('Missing in %s'%((','.join(row_nan.index))))
    print(df_rxn_acc.loc[index,'Definition'])
    print()

Missing in M1608
(1) H2O[c0] + (1) Cyanate[c0] => (1) Carbamate[c0]

Missing in M74707
(1) O2[c0] + (1) 3,4,6-Trichlorocatechol[c0] <=> (2) H+[c0] + (1) 2,3,5-Trichloro-cis,cis-muconate[c0]

Missing in M1608
(1) H+[c0] + (1) 6-Oxohexanoate[c0] <=> (1) H2O[c0] + (1) Cyclohexan-1,2-dione[c0]

Missing in PA7,PAO1,S86968
(1) NAD[c0] + (1) GLCN[c0] <=> (1) NADH[c0] + (1) H+[c0] + (1) 5-Dehydrogluconate[c0]

Missing in M1608
(1) H2O[c0] + (1) 3-Hydroxy-L-kynurenine[c0] => (1) L-Alanine[c0] + (1) H+[c0] + (1) 3-Hydroxyanthranilate[c0]

Missing in M1608
(1) Maltose[c0] + (1) Maltotetraose[c0] <=> (1) D-Glucose[c0] + (1) Maltopentaose[c0]

Missing in M1608
(1) Maltose[c0] <=> (1) TRHL[c0]

Missing in M74707
(1) O2[c0] + (1) 3-Fluorocatechol[c0] <=> (2) H+[c0] + (1) 2-Fluoro-cis,cis-muconate[c0]

Missing in M1608
(2) H+[c0] + (1) Carbamate[c0] <=> (1) CO2[c0] + (1) NH3[c0]

Missing in M1608
(1) O2[c0] + (1) Homogentisate[c0] <=> (1) H+[c0] + (1) 4-Maleylacetoacetate[c0]

Missing in M1608
(1) NAD