In [1]:
import cobra
import pandas as pd
import csv
import sys
sys.path.append('../../utils/')
import graph_utils
import biocyc_query_utils
import networkx as nx

The following script generates a number of tables parsed directly from the SBML model, which are useful during data processing workflow (e.g. if working from R). If model is updated, please re-run this enetire notebook to refresh the output

### Load SBML Model and Graph

In [2]:
model=cobra.io.read_sbml_model('../iCH360/Escherichia_coli_iCH360.xml')
graph=nx.read_gml('../../Knowledge_graph/ich360_graph.gml')

Set parameter Username
Academic license - for non-commercial use only - expires 2025-03-12


### Export reaction set

In [3]:
#All reactions
all_reactions=[r.id for r in model.reactions]
#Enzymatic reactions
def rxn_filter(r):
    if 'EX_' in r.id:
        return False
    # elif 'diffusion' in r.name:
    #     return False
    elif 's0001' in r.gene_reaction_rule:
        return False
    elif r.id == 'Biomass':
        return False
    else:
        return True    
enzymatic_reactions=[r.id for r in model.reactions if rxn_filter(r)]
print(f'All reactions: {len(all_reactions)}')
print(f'Enzymatic reactions: {len(enzymatic_reactions)}')

All reactions: 349
Enzymatic reactions: 312


In [4]:
with open('all_reactions.csv', 'w',newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows([[r] for r in all_reactions])

with open('enzymatic_reactions.csv', 'w',newline='') as csvfile:
    writer = csv.writer(csvfile,escapechar='\n')
    writer.writerows([[r] for r in enzymatic_reactions])

# Reaction, enzyme, Subunit table

In [5]:


reaction_nodes = [node for node in graph.nodes() if graph.nodes[node]['type']=='reaction']
print(f'reaction nodes: {reaction_nodes}')

parsed=[]

for reaction_node in reaction_nodes:
      isoenzymes=[node for node in graph.successors(reaction_node) if graph.edges[reaction_node,node]['type']=='catalysis']

      for enzyme_id in isoenzymes:
            mw=graph.nodes[enzyme_id]['mw']
            gpr=graph_utils.compute_node_gpr(graph, enzyme_id)
            genes=graph_utils.genes_in_gpr(gpr)
            num_subunits=(graph_utils.number_of_pp_subunits(graph,enzyme_id))
            polypeptides=[graph_utils.bnum2uniprot(graph, g) for g in genes if g !='s0001']
            new_row={
                    'reaction_id':reaction_node.replace('bigg:',''),
                    'enzyme':enzyme_id,
                    'catalysis_type':graph.edges[reaction_node,enzyme_id]['subtype'],
                    'mw':mw,
                    'gpr':gpr,
                    'genes':(','.join(genes)),
                    'polypeptides':(','.join(polypeptides)),
                    'num_subunits':num_subunits
                }
            parsed.append(new_row)

#Write
parsed_df=pd.DataFrame([pd.Series(row) for row in parsed])
parsed_df.to_csv('reaction_enzymes_subunits.csv', index=False)
parsed_df.head()

reaction nodes: ['bigg:NDPK5', 'bigg:SHK3Dr', 'bigg:NDPK6', 'bigg:NDPK8', 'bigg:DHORTS', 'bigg:OMPDC', 'bigg:G5SD', 'bigg:CS', 'bigg:ICDHyr', 'bigg:ACALD', 'bigg:PPA', 'bigg:PPCK', 'bigg:ME1', 'bigg:ALATA_L', 'bigg:XYLK', 'bigg:RBK', 'bigg:GLYK', 'bigg:ASPTA', 'bigg:FBP', 'bigg:PYK', 'bigg:GTHOr', 'bigg:ILETA', 'bigg:DHORD5', 'bigg:VALTA', 'bigg:IPPMIb', 'bigg:ORPT', 'bigg:ACHBS', 'bigg:DHAD2', 'bigg:ACLS', 'bigg:TRPS2', 'bigg:PSCVT', 'bigg:PFL', 'bigg:ANS', 'bigg:FRD2', 'bigg:ANPRT', 'bigg:CHORM', 'bigg:PTAr', 'bigg:CHORS', 'bigg:IGPS', 'bigg:ACKr', 'bigg:LEUTAi', 'bigg:ENO', 'bigg:FBA', 'bigg:HCO3E', 'bigg:IMPC', 'bigg:IMPD', 'bigg:PPS', 'bigg:PGI', 'bigg:PGK', 'bigg:PGL', 'bigg:RPE', 'bigg:DHQTi', 'bigg:IPMD', 'bigg:AIRC3', 'bigg:TALA', 'bigg:ADSL2r', 'bigg:TKT1', 'bigg:RNDR1', 'bigg:ALCD2x', 'bigg:RNDR3', 'bigg:RNDR4', 'bigg:TMDS', 'bigg:DHAD1', 'bigg:IPPMIa', 'bigg:MDH', 'bigg:FUM', 'bigg:KARA1', 'bigg:KARA2', 'bigg:ACCOAC', 'bigg:KAS14', 'bigg:ARGSS', 'bigg:AGPR', 'bigg:ICL', 'bi

Unnamed: 0,reaction_id,enzyme,catalysis_type,mw,gpr,genes,polypeptides,num_subunits
0,NDPK5,ADENYL-KIN-MONOMER,secondary,23.586,b0474,b0474,P69441,1.0
1,NDPK5,NUCLEOSIDE-DIP-KIN-CPLX,primary,61.852,b2518,b2518,P0A763,4.0
2,SHK3Dr,AROE-MONOMER,primary,29.414001,b3281,b3281,P15770,1.0
3,SHK3Dr,EG11234-MONOMER,secondary,31.228,b1692,b1692,P0A6D5,1.0
4,NDPK6,NUCLEOSIDE-DIP-KIN-CPLX,primary,61.852,b2518,b2518,P0A763,4.0


In [6]:
model_genes={'bnum':[],'uniprot':[],'name':[],'biocyc_id':[]}
for node_id in graph.nodes():
    node=graph.nodes[node_id]
    if node['type']=='polypeptide':
        bnum=node['gene']['bnum']
        uniprot=node['annotation']['UNIPROT']
        biocyc_id=node['biocyc_id']
        name=node['gene']['name']
        model_genes['bnum'].append(bnum)
        model_genes['uniprot'].append(uniprot)
        model_genes['biocyc_id'].append(biocyc_id)
        model_genes['name'].append(name)
model_genes_df=pd.DataFrame.from_dict(model_genes)
model_genes_df.to_csv('model_genes.csv', index=False)

# Metabolites BIGG-biocyc map

In [7]:
metabolites_bigg_biocyc_map={'bigg.metabolite':[],'biocyc_id':[]}
for m in model.metabolites:
    metabolites_bigg_biocyc_map['bigg.metabolite'].append(m.id[0:-2])
    if 'biocyc' in m.annotation.keys():
        metabolites_bigg_biocyc_map['biocyc_id'].append(m.annotation['biocyc'])
    else:
        metabolites_bigg_biocyc_map['biocyc_id'].append('NA')
metabolites_bigg_biocyc_map_df=pd.DataFrame.from_dict(metabolites_bigg_biocyc_map)
metabolites_bigg_biocyc_map_df.to_csv('./metabolites_bigg_biocyc_map.csv')

# Enzyme to pp Map

In [8]:
enzyme_nodes=graph_utils.compute_catalytic_nodes(graph)
pp_nodes=[node for node in graph.nodes() if graph.nodes[node]['type']=='protein' and graph.nodes[node]['subtype']=='polypeptide']
pp_nodes_uniprot=[graph.nodes[pp]['annotation']['UNIPROT'] for pp in pp_nodes]

enzyme_pp_stoichiometric_matrix=pd.DataFrame(index=enzyme_nodes,columns=pp_nodes,data=0)
for enzyme in enzyme_nodes:
    cur_enzyme_composition=graph_utils.compute_node_pp_composition(graph,enzyme)
    for pp,stoich in cur_enzyme_composition.items():
        enzyme_pp_stoichiometric_matrix.loc[enzyme,pp]=stoich

enzyme_pp_stoichiometric_matrix.to_csv('enzyme_pp_stoichiometric_matrix.csv')

enzyme_pp_stoichiometric_matrix_uniprot=enzyme_pp_stoichiometric_matrix.copy()
enzyme_pp_stoichiometric_matrix_uniprot.columns=pp_nodes_uniprot
enzyme_pp_stoichiometric_matrix_uniprot.to_csv('enzyme_pp_stoichiometric_matrix_uniprot.csv')

enzyme_pp_stoichiometric_matrix.head()

Unnamed: 0,ADENYL-KIN-MONOMER,NUCLEOSIDE-DIP-KIN-MONOMER,AROE-MONOMER,DIHYDROOROT-MONOMER,OROTPDECARB-MONOMER,GLUTSEMIALDEHYDROG-MONOMER,CITSYN-MONOMER,ISOCITDEH-SUBUNIT,MHPF-MONOMER,ADHE-MONOMER,...,G7187-MONOMER,CARBOXYL-TRANSFERASE-ALPHA-MONOMER,CARBOXYL-TRANSFERASE-BETA-MONOMER,BIOTIN-CARBOXYL-MONOMER,BCCP-MONOMER,LSERINEDEAM1-MONOMER,G7841-MONOMER,GPH-MONOMER,PGAM2-MONOMER,HOMOCYSMET-MONOMER
HOMSUCTRAN-CPLX,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
PGLYCDEHYDROG-CPLX,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ACSERLYB-CPLX,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CPLX0-7997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ADENYLOSUCCINATE-SYN-DIMER,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
enzyme='ADENYLOSUCCINATE-SYN-DIMER'
enzyme_pp_stoichiometric_matrix_uniprot.loc[enzyme][enzyme_pp_stoichiometric_matrix_uniprot.loc[enzyme]>0]

P0A7D4    2
Name: ADENYLOSUCCINATE-SYN-DIMER, dtype: int64