In [29]:
import cobra
import pandas as pd
import re
import numpy as np
import scipy.stats as st
from matplotlib import pyplot as plt
from pathlib import Path
import sys
sys.path.append('../../code/')
import leakage, utils
import pubchempy as pcp
import seaborn as sns

In [30]:
model = cobra.io.read_sbml_model('../../models/e_coli/momentiJO1366.xml')
# model.reactions.DHAPT.knock_out()
# model.reactions.THRA.knock_out()
# model.reactions.THRA2.knock_out()
# model.reactions.F6PA_TG_forward.knock_out()
# Speculations
# model.reactions.PFL.knock_out()
#I'm more and more conviced that PFL is not active in aerobic conditions
# https://doi.org/10.1016/j.jbc.2021.101423

In [31]:
def get_leakage(time):
    exometabolites_folder = Path("../../data/e_coli/")
    leakage_df = leakage.get_leakage(exometabolites_folder, "ecoli", time = time, unit = '/gDW', method = 'spline')
    leakage_df.set_index("Metabolite", inplace=True)
    leakage_df.drop_duplicates(inplace=True)
    leakage_label = "Leakage (mM/gDW/h)"
    return leakage_df

In [32]:
def get_turnover_flux(m, solution):
    total_flux = 0
    for r in m.reactions:
        flux_r = solution.fluxes[r.id]*r.get_coefficient(m_id)
        if flux_r > 0:
            total_flux += flux_r
    return total_flux

In [33]:
exometabolites_folder = Path("../../data/e_coli/")


In [34]:
met_info_df = pd.read_csv("../../data/met_info_curated.csv", encoding = "ISO-8859-1", index_col = 0)

In [35]:
# Read metabolite mapping
mapping_df = pd.read_csv('../../data/id_mapping.csv', index_col=0)
# df2 = pd.merge(leakage_df, mapping_df, left_index=True, right_index=True)
# df2.drop(columns='Metabolite name', inplace=True)

# Get leakage


In [36]:
timepoints = np.arange(4, 12.5, 0.5)#[5,6,7,8,9,10,11, 12, 13]
for i, time in enumerate(timepoints):
    leakage_df = get_leakage(time)
    # Consider to use an earlier time-point
    glucose_uptake_rate = leakage.get_glucose_uptake_rate(exometabolites_folder, "ecoli", time = time, method = 'spline')
    df2 = pd.merge(leakage_df, mapping_df, left_index=True, right_index=True)
    df2.drop(columns='Metabolite name', inplace=True)
    df = pd.merge(met_info_df, df2, left_on = 'Metabolite id', right_on = 'Ecoli metabolite')
    df['Time'] = time
    df['Glucose'] = -glucose_uptake_rate
    
    # Set model constraints
    with model:
        model.reactions.EX_glc__D_e.lower_bound = min(glucose_uptake_rate, 0)
        for j, row in df.iterrows():
            if row['Leakage (mM/gDW/h)'] < 0:
                met_ids = row['Metabolite id'].split(',')
                mets = []
                for m_id in met_ids:
                    try:
                        m = model.metabolites.get_by_id('{0}_e'.format(m_id.strip(' ')))
                    except KeyError:
                        continue
                    else:
                        mets.append(m)
                for m in mets:
                    r_ex = [r for r in m.reactions if len(r.metabolites)==1][0]
                    r_ex.lower_bound = row['Leakage (mM/gDW/h)']/len(mets)
                    # Should check soplutions
                    # print(r_ex.id)
        solution = model.optimize()
        # List already excreted metabolites
        exchanged_mets = []
        for r in model.boundary:
            flux = solution.fluxes[r.id]
            if flux != 0:
                exchanged_mets.append(list(r.metabolites.keys())[0].id[:-2])
        
        # print(model.summary())
        df['Predicted growth rate'] = solution.objective_value
        # Get turnover and shadow prices
        turnover = {}
        shadow_prices = {}        
        for j, row in df.iterrows():
            if row['Leakage (mM/gDW/h)'] > 0:
                met_ids = row['Metabolite id'].split(',')
                sp_list = []
                turnover_list = []
                for key in met_ids:
                    if key.strip() in exchanged_mets:
                        continue
                    m_id = "{0}_c".format(key.strip())
                    m = model.metabolites.get_by_id(m_id)
                    sp_list.append(leakage.estimate_shadow_price_for_met(model, m, solution, delta = 0.01))
                    turnover_list.append(get_turnover_flux(m, solution))
                # print(met_ids, sp_list, turnover_list)
                # Shadow prices
                if len(sp_list):
                    shadow_prices[j] = np.nanmean(sp_list)
                    turnover[j] = np.mean(turnover_list)
                else:
                    shadow_prices[j] = np.nan
                    turnover[j] = np.nan
                # print(met_ids, np.nanmean(sp_list))
        df["Shadow price"] = pd.Series(shadow_prices)
        df["Turnover"] = pd.Series(turnover)
        
    if i == 0:
        full_df = df
    else:
        full_df = pd.concat([full_df, df])


In [37]:
# full_leakage['Uptake (mM/gDW/h)'] = 0
new_df = full_df.copy()

In [38]:
new_df['Uptake (mM/gDW/h)'] = -1*new_df['Leakage (mM/gDW/h)']

In [39]:
new_df.loc[new_df['Leakage (mM/gDW/h)'] < 0, 'Leakage (mM/gDW/h)'] = 0
new_df.loc[new_df['Uptake (mM/gDW/h)'] < 0, 'Uptake (mM/gDW/h)'] = 0


In [40]:
new_df.head()

Unnamed: 0,Metabolite name,Metabolite id,Mass,Charge,Phosphate,Topological Polar Surface Area,Concentration in E.coli,log P,Metabolite class,Pathway,Average carbon oxidation,Leakage (mM/gDW/h),Ecoli metabolite,Yeast metabolite,Time,Glucose,Predicted growth rate,Shadow price,Turnover,Uptake (mM/gDW/h)
0,Glucose-6-phosphate,g6p,258.119901,-2.0,1,165.0,0.003,-4.5,Phosphorylated sugar,Glycolysis,0.833333,0.708294,g6p,s_0568,4.0,9.794747,0.710766,-0.018942,9.794747,0.0
1,Fructose-6-phosphate,f6p,258.119901,-2.0,1,165.0,0.003,-4.3,Phosphorylated sugar,Glycolysis,0.833333,1.720849,f6p,s_0557,4.0,9.794747,0.710766,-0.01904,8.831865,0.0
2,"Fructose-1,6-bisphosphate",fdp,336.083922,-4.0,2,215.0,0.0152,-5.9,Phosphorylated sugar,Glycolysis,3.333333,0.0,fdp,s_0555,4.0,9.794747,0.710766,,,0.014509
3,Dihydroxyacetonephosphate,dhap,168.041961,-2.0,1,104.0,0.000374,-5.9,Other,Glycolysis,1.666667,1.59885,dhap,s_0629,4.0,9.794747,0.710766,-0.014463,8.764706,0.0
4,Glyceraldehyde-3-phosphate,g3p,168.041961,-2.0,1,104.0,,-2.75,Other,Glycolysis,1.666667,0.064888,g3p,s_0764,4.0,9.794747,0.710766,-0.014467,17.740424,0.0


In [41]:
new_df['log10(-leakage)'] = np.log10(new_df['Leakage (mM/gDW/h)'])
new_df['log10(Shadow price)'] = np.log10(-new_df['Shadow price'])
new_df['log10(Turnover)'] = np.log10(new_df['Turnover'])

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [42]:
new_df.to_csv('spreadsheet_ecoli_leakage_and_info_230420.csv')

In [167]:
full_leakage = full_df.loc[~full_df['Shadow price'].isna(), :]
full_leakage = full_leakage.loc[full_leakage.Turnover <100, :]
full_leakage = full_leakage.loc[full_leakage['Leakage (mM/gDW/h)'] > 0, :]

In [168]:
full_leakage['log10(-leakage)'] = np.log10(full_leakage['Leakage (mM/gDW/h)'])
full_leakage['log10(Shadow price)'] = np.log10(-full_leakage['Shadow price'])
full_leakage['log10(Turnover)'] = np.log10(full_leakage['Turnover'])

  result = getattr(ufunc, method)(*inputs, **kwargs)
