In [1]:
cd /home/tvangraft/tudelft/thesis/metaengineering

/home/tvangraft/tudelft/thesis/metaengineering


In [2]:
from collections import defaultdict
from typing import DefaultDict, List

from src.pipeline.dataloader import DataLoader


import pandas as pd
import numpy as np

import cobra
from cobra.util import create_stoichiometric_matrix
from cobra.core import Reaction


import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
path = "/home/tvangraft/tudelft/thesis/metaengineering/data"
model = cobra.io.read_sbml_model(f'{path}/iMM904.xml')

Scaling...
 A: min|aij| =  1.000e+00  max|aij| =  1.000e+00  ratio =  1.000e+00
Problem data seem to be well scaled


In [5]:
DataLoader.DATA_FOLDER = './data/training/'
dl = DataLoader()
df = dl.get_simple_protein_metabolite_dataframe()
ENZYMES = df.to_df().columns.to_list()

PRECURSOR_METABOLITES = [
    'g6p;g6p-B', 'g6p;f6p;g6p-B', 'f6p', 'dhap', '3pg;2pg',
    'pep', 'pyr', 'r5p', 'e4p', 'accoa', 'akg', 'oaa',
]

def get_gene_reaction():
    gene_to_reaction: DefaultDict[str, List[str]] = defaultdict(list)
    reaction_to_gene: DefaultDict[str, List[str]] = defaultdict(list)

    for gene in model.genes:
        if gene not in ENZYMES:
            continue

        for reaction in gene.reactions:
            gene_to_reaction[gene.id].append(reaction.id)
            reaction_to_gene[reaction.id].append(gene.id)
    return gene_to_reaction, reaction_to_gene

def is_precursor_metabolite(metabolite_model_id: str):
    for precursor_metabolite in PRECURSOR_METABOLITES:
        if precursor_metabolite in metabolite_model_id:
            yield precursor_metabolite

In [6]:
precursor_model_metabolites = list(filter(lambda x: any((pc_meta in x.id for pc_meta in PRECURSOR_METABOLITES)), model.metabolites))
precursor_model_metabolites_id = list(map(lambda x: x.id, precursor_model_metabolites))

precursor_stiochiometric_df: pd.DataFrame = create_stoichiometric_matrix(model, array_type="DataFrame").loc[precursor_model_metabolites_id]
precursor_stiochiometric_df = precursor_stiochiometric_df.loc[:, (precursor_stiochiometric_df != 0).any(axis=0)]
precursor_reactions: List[Reaction] = list(map(lambda x: model.reactions.get_by_id(x), precursor_stiochiometric_df.columns.to_list()))

In [7]:
precursor_stiochiometric_df

Unnamed: 0,ACACT6p,ACACT7p,ACACT8p,ACACT9p,ACCOAC,ACCOACrm,AGTi,ACCOAtn,ACGAM6PS,ACGSm,...,TRPTA,PSERT,TYRTAi,TYRTAim,TYRTAip,YUMPS,SERD_L,UNK3,PYK,PYRDC
2dr5p_c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1pyr5c_c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1pyr5c_m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2cpr5p_c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
akg_c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
akg_e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
akg_m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
akg_n,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
akg_x,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
accoa_c,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,-1.0,-1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
gene_to_reaction, reaction_to_gene = get_gene_reaction()

precursor_reactions: List[str] = list(map(lambda x: reaction_to_gene[x], precursor_stiochiometric_df.columns.to_list()))
precursor_reactions

[['YIL160C'],
 ['YIL160C'],
 ['YIL160C'],
 ['YIL160C'],
 ['YNR016C'],
 ['YMR207C'],
 ['YFL030W'],
 [],
 ['YFL017C'],
 ['YMR062C', 'YJL071W'],
 ['YMR108W', 'YCL009C'],
 ['YMR108W', 'YCL009C'],
 ['YBL015W'],
 ['YDR148C', 'YIL125W', 'YFL018C'],
 [],
 [],
 [],
 [],
 ['YDR111C'],
 ['YLR089C'],
 ['YKL182W', 'YPL231W'],
 ['YBR026C', 'YER061C', 'YKL192C', 'YKL055C', 'YOR221C', 'YHR067W'],
 ['YOL140W'],
 ['YLR153C'],
 ['YAL054C'],
 ['YAL054C'],
 ['YMR289W'],
 ['YML035C', 'YJL070C', 'YBR284W'],
 ['YER090W', 'YKL211C'],
 ['YLR027C'],
 ['YKL106W'],
 ['YLR027C'],
 ['YBR291C'],
 [],
 [],
 ['YML042W'],
 ['YBR249C', 'YDR035W'],
 ['YDR035W'],
 ['YML042W'],
 ['YAR035W', 'YER024W'],
 ['YNR001C', 'YPR001W'],
 ['YCR005C'],
 ['YFL053W', 'YML070W'],
 [],
 [],
 ['YFR055W'],
 ['YGL184C'],
 [],
 ['YCR036W'],
 ['YLR027C'],
 ['YKL106W'],
 ['YLR027C'],
 ['YHR174W', 'YPL281C', 'YOR393W', 'YGR254W', 'YMR323W'],
 ['YEL071W', 'YDL174C', 'YJR048W', 'YEL039C'],
 ['YDL178W', 'YJR048W', 'YEL039C'],
 [],
 [],
 [],
 [],
 ['