In [1]:
import pandas as pd
import numpy as np

import cobra
from cobra.util import create_stoichiometric_matrix
from cobra.core import Reaction

from collections import defaultdict

from typing import DefaultDict, List

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
path = "/home/tvangraft/tudelft/thesis/metaengineering/data"
model = cobra.io.read_sbml_model(f'{path}/iMM904.xml')

Scaling...
 A: min|aij| =  1.000e+00  max|aij| =  1.000e+00  ratio =  1.000e+00
Problem data seem to be well scaled


In [25]:
PRECURSOR_METABOLITES = [
    'g6p;g6p-B', 'g6p;f6p;g6p-B', 'f6p', 'dhap', '3pg;2pg',
    'pep', 'pyr', 'r5p', 'e4p', 'accoa', 'akg', 'oaa',
]

def get_gene_reaction():
    gene_to_reaction: DefaultDict[str, List[str]] = defaultdict(list)
    reaction_to_gene: DefaultDict[str, List[str]] = defaultdict(list)

    for gene in model.genes:
        for reaction in gene.reactions:
            gene_to_reaction[gene.id].append(reaction.id)
            reaction_to_gene[reaction.id].append(gene.id)
    return gene_to_reaction

def is_precursor_metabolite(metabolite_model_id: str):
    for precursor_metabolite in PRECURSOR_METABOLITES:
        if precursor_metabolite in metabolite_model_id:
            yield precursor_metabolite

def get_compartment_for_enzymes(enzymes: List[str]):
    gene_to_reaction = get_gene_reaction()
    for target_enzyme in enzymes:
        for reaction in gene_to_reaction[target_enzyme]:
            for c in model.reactions.get_by_id(reaction).compartments:
                yield c

In [4]:
gene_to_reaction: DefaultDict[str, List[str]] = defaultdict(list)
reaction_to_gene: DefaultDict[str, List[str]] = defaultdict(list)

for gene in model.genes:
    for reaction in gene.reactions:
        gene_to_reaction[gene.id].append(reaction.id)
        reaction_to_gene[reaction.id].append(gene.id)

In [None]:
precursor_model_metabolites = list(filter(lambda x: any((pc_meta in x.id for pc_meta in precursor_metabolites)), model.metabolites))
precursor_model_metabolites_id = list(map(lambda x: x.id, precursor_model_metabolites))

precursor_stiochiometric_df: pd.DataFrame = create_stoichiometric_matrix(model, array_type="DataFrame")  # type: ignore
precursor_stiochiometric_df = precursor_stiochiometric_df.loc[precursor_model_metabolites_id]
precursor_stiochiometric_df = precursor_stiochiometric_df[precursor_stiochiometric_df.columns[(precursor_stiochiometric_df.abs().sum(axis=0) != 0.0)]]

precursor_reactions = list(map(lambda x: model.reactions.get_by_id(x), precursor_stiochiometric_df.columns.to_list()))

In [11]:
precursor_df = precursor_stiochiometric_df.stack().to_frame('is_active')
precursor_df = precursor_df[precursor_df['is_active'] != 0.0].rename_axis(['metabolite_model_id', 'reaction_id']).reset_index()
precursor_df['precursor_id'] = precursor_df['metabolite_model_id'].map(lambda x: next(is_precursor_metabolite(x)))
precursor_df

Unnamed: 0,metabolite_model_id,reaction_id,is_active,precursor_id
0,2dr5p_c,DRBK,1.0,r5p
1,1pyr5c_c,G5SADs,1.0,pyr
2,1pyr5c_c,P5CR,-1.0,pyr
3,1pyr5c_m,G5SADrm,1.0,pyr
4,1pyr5c_m,P5CDm,-1.0,pyr
...,...,...,...,...
225,r5p_c,RPI,-1.0,r5p
226,r5p_c,PRPPS,-1.0,r5p
227,r5p_c,THZPSN2_SC,-1.0,r5p
228,r5p_c,TKT1,-1.0,r5p


In [12]:
_df = precursor_df[precursor_df['precursor_id'] == 'pep']

for _, data in _df.iterrows():
    message = (
        f"|{data['metabolite_model_id']:=<20}|\n"
        f"|{data['reaction_id']:=<20}|\n"
        f"{reaction_to_gene[data['reaction_id']]}\n"
    )

    # print(message)

genes_associated_with_pep = np.unique([x for reaction_id in _df['reaction_id'].values for x in reaction_to_gene[reaction_id]])
genes_associated_with_pep

array(['YAL038W', 'YBR249C', 'YBR291C', 'YDL040C', 'YDR035W', 'YDR127W',
       'YGR147C', 'YGR254W', 'YHR174W', 'YKR093W', 'YKR097W', 'YMR323W',
       'YOR347C', 'YOR393W', 'YPL281C'], dtype='<U7')

### Pyruvate

In [28]:
target_enzymes_pyr = ['YGR240C', 'YLR044C', 'YMR012W', 'YHR190W', 'YMR318C', 'YDR019C', 'YNL248C', 'YML001W', 'YDL014W']
compartments = list(get_compartment_for_enzymes(target_enzymes_pyr))
print(compartments)

['c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'm', 'm', 'm', 'm', 'm', 'm', 'm', 'm']


In [13]:
target_enzymes_pyr = ['YGR240C', 'YLR044C', 'YMR012W', 'YHR190W', 'YMR318C', 'YDR019C', 'YNL248C', 'YML001W', 'YDL014W']
for target_enzyme in target_enzymes_pyr:
    if len(gene_to_reaction[target_enzyme]) == 0:
        continue
    print(f"{target_enzyme=}")
    for reaction in gene_to_reaction[target_enzyme]:
        message = (
            f"{reaction}\n"
            f"{model.reactions.get_by_id(reaction).build_reaction_string()}"
        )
        print(message)    
    # print(gene_to_reaction[target_enzyme])


target_enzyme='YGR240C'
PFK_3
atp_c + s7p_c --> adp_c + h_c + s17bp_c
PFK_2
atp_c + tag6p__D_c --> adp_c + h_c + tagdp__D_c
PFK
atp_c + f6p_c --> adp_c + fdp_c + h_c
target_enzyme='YLR044C'
3MOBDC
3mob_c + h_c --> 2mppal_c + co2_c
3MOPDC
3mop_c + h_c --> 2mbald_c + co2_c
PYRDC2
acald_c + h_c + pyr_c --> actn__R_c + co2_c
PPYRDC
h_c + phpyr_c --> co2_c + pacald_c
INDPYRD
h_c + indpyr_c <=> co2_c + id3acald_c
ACALDCD
2.0 acald_c --> actn__R_c
PYRDC
h_c + pyr_c --> acald_c + co2_c
target_enzyme='YHR190W'
SQLS
2.0 frdp_c + h_c + nadph_c --> nadp_c + 2.0 ppi_c + sql_c
target_enzyme='YMR318C'
ALCD23yi
2mppal_c + h_c + nadph_c --> ibutoh_c + nadp_c
ALCD22yi
2mbald_c + h_c + nadph_c --> 2mbtoh_c + nadp_c
ALCD25yi
h_c + nadph_c + pacald_c --> 2phetoh_c + nadp_c
ALCD24yi
3mbald_c + h_c + nadph_c --> iamoh_c + nadp_c
target_enzyme='YDR019C'
GCCcm
dhlpro_m + nad_m <=> h_m + lpro_m + nadh_m
GCC2bim
alpam_m + thf_m --> dhlam_m + mlthf_m + nh4_m
GCCam
gly_m + h_m + lpro_m <=> alpro_m + co2_m
GCC2am
g

In [13]:
target_enzymes_pep = ['YDL014W', 'YDR019C', 'YNL248C', 'YML001W', 'YMR318C', 'YMR012W', 'YLR109W', 'YHR190W', 'YGR240C']
for target_enzyme in target_enzymes_pep:
    print(f"{target_enzyme=}")
    for reaction in gene_to_reaction[target_enzyme]:
        message = (
            f"{reaction}\n"
            f"{model.reactions.get_by_id(reaction).build_reaction_string()}"
        )
        print(message)    

target_enzyme='YJR109C'
CBPS
2.0 atp_c + gln__L_c + h2o_c + hco3_c --> 2.0 adp_c + cbp_c + glu__L_c + 2.0 h_c + pi_c
target_enzyme='YNL134C'
target_enzyme='YDR365C'
target_enzyme='YBL039C'
CTPS2
atp_c + gln__L_c + h2o_c + utp_c --> adp_c + ctp_c + glu__L_c + 2.0 h_c + pi_c
CTPS1
atp_c + nh4_c + utp_c --> adp_c + ctp_c + 2.0 h_c + pi_c
target_enzyme='YOR020C'
target_enzyme='YLR167W'
target_enzyme='YHR063C'
DPR
2dhp_c + h_c + nadph_c --> nadp_c + pant__R_c
target_enzyme='YIL118W'
target_enzyme='YLR421C'


In [14]:
target_enzyme_dhap = ['YDL014W', 'YML001W', 'YDR019C', 'YNL248C', 'YMR012W', 'YMR318C', 'YHR190W', 'YLR044C', 'YGR240C']
for target_enzyme in target_enzyme_dhap:
    print(f"{target_enzyme=}")
    for reaction in gene_to_reaction[target_enzyme]:
        message = (
            f"{reaction}\n"
            f"{model.reactions.get_by_id(reaction).build_reaction_string()}"
        )
        print(message)    

target_enzyme='YJR109C'
CBPS
2.0 atp_c + gln__L_c + h2o_c + hco3_c --> 2.0 adp_c + cbp_c + glu__L_c + 2.0 h_c + pi_c
target_enzyme='YHR063C'
DPR
2dhp_c + h_c + nadph_c --> nadp_c + pant__R_c
target_enzyme='YBL039C'
CTPS2
atp_c + gln__L_c + h2o_c + utp_c --> adp_c + ctp_c + glu__L_c + 2.0 h_c + pi_c
CTPS1
atp_c + nh4_c + utp_c --> adp_c + ctp_c + 2.0 h_c + pi_c
target_enzyme='YDR365C'
target_enzyme='YNL134C'
target_enzyme='YOR020C'
target_enzyme='YCL030C'
PRAMPC
h2o_c + prbamp_c --> prfp_c
PRATPP
h2o_c + prbatp_c --> h_c + ppi_c + prbamp_c
HISTD
h2o_c + histd_c + 2.0 nad_c --> 3.0 h_c + his__L_c + 2.0 nadh_c
target_enzyme='YIL118W'
target_enzyme='YNL178W'
