In [1]:
import cobra
import pandas as pd
import re
import numpy as np
import scipy.stats as st
from matplotlib import pyplot as plt
from pathlib import Path
import sys
sys.path.append('../../code/')
import leakage, utils
import pubchempy as pcp
import seaborn as sns
import time


# Settings / choices

In [2]:
only_significant_changes = False
timepoints = np.arange(1.5, 12, 1)
knock_outs = False
shadow_price_for_leaked_mets = True
species = 'e_coli'

In [3]:
model = cobra.io.read_sbml_model('../../models/{0}/momentiJO1366.xml'.format(species))
if knock_outs:
    model.reactions.DHAPT.knock_out()
    model.reactions.THRA.knock_out()
    model.reactions.THRA2.knock_out()
    model.reactions.F6PA_TG_forward.knock_out()
# Speculations
# model.reactions.PFL.knock_out()
#I'm more and more conviced that PFL is not active in aerobic conditions
# https://doi.org/10.1016/j.jbc.2021.101423

Set parameter Username
Academic license - for non-commercial use only - expires 2024-02-26


In [4]:
def get_leakage(time):
    exometabolites_folder = Path("../../data/{0}/".format(species))
    leakage_df = leakage.get_leakage(exometabolites_folder, species, time = time, unit = '/gDW', method = 'one-way-diff',
                                    only_significant_changes = only_significant_changes)
    leakage_df.set_index("Metabolite", inplace=True)
    leakage_df.drop_duplicates(inplace=True)
    leakage_label = "Leakage (mmol/gDW/h)"
    return leakage_df

In [5]:
exometabolites_folder = Path("../../data/e_coli/")


In [6]:
met_info_df = pd.read_csv("../../data/met_info_curated.csv", encoding = "ISO-8859-1", index_col = 0)

In [7]:
# Read metabolite mapping
mapping_df = pd.read_csv('../../data/id_mapping.csv', index_col=0)
# df2 = pd.merge(leakage_df, mapping_df, left_index=True, right_index=True)
# df2.drop(columns='Metabolite name', inplace=True)

# Get leakage


In [8]:
# timepoints = np.arange(1.5, 12, 1)#[5,6,7,8,9,10,11, 12, 13]
for i, t in enumerate(timepoints):
    print(t)
    leakage_df = get_leakage(t)
    # Consider to use an earlier time-point
    glucose_uptake_rate = leakage.get_glucose_uptake_rate(exometabolites_folder, species, time = t, method = 'one-way-diff')
    df2 = pd.merge(leakage_df, mapping_df, left_index=True, right_index=True)
    df2.drop(columns='Metabolite name', inplace=True)
    df = pd.merge(met_info_df, df2, left_on = 'Metabolite id', right_on = 'Ecoli metabolite')
    df['Time'] = t
    df['Glucose'] = -glucose_uptake_rate
    print(glucose_uptake_rate)
    # Set model constraints
    with model:
        model.reactions.EX_glc__D_e.lower_bound = min(glucose_uptake_rate, 0)
        for j, row in df.iterrows():
            if row['Leakage (mmol/gDW/h)'] < 0:
                met_ids = row['Metabolite id'].split(',')
                mets = []
                for m_id in met_ids:
                    try:
                        m = model.metabolites.get_by_id('{0}_e'.format(m_id.strip(' ')))
                    except KeyError:
                        continue
                    else:
                        mets.append(m)
                for m in mets:
                    r_ex = [r for r in m.reactions if len(r.metabolites)==1][0]
                    r_ex.lower_bound = row['Leakage (mmol/gDW/h)']/len(mets)
                    # Should check soplutions
                    # print(r_ex.id)
        solution = model.optimize()            
        try:
            pfba_solution = cobra.flux_analysis.pfba(model)
        except:
            print('No feasible model at t: ', t)
            pfba_solution = None
            break
        # List already excreted metabolites
        exchanged_mets = {}
        for r in model.boundary:
            flux = solution.fluxes[r.id]
            if flux != 0:
                exchanged_mets[list(r.metabolites.keys())[0].id[:-2]]=(r.id, flux)
        
        # print(model.summary())
        df['Predicted growth rate'] = solution.objective_value
        # Get turnover and shadow prices
        turnover = {}
        shadow_prices = {}        
        for j, row in df.iterrows():
            if row['Leakage (mmol/gDW/h)'] > 0:
                met_ids = row['Metabolite id'].split(',')
                sp_list = []
                turnover_list = []
                for key in met_ids:
                    if key.strip() in exchanged_mets.keys():
                        if shadow_price_for_leaked_mets:
                            # the change should be taken into account the already exchanged flux
                            existing_flux = exchanged_mets[key.strip()]
                        else:
                            continue
                    else:
                        existing_flux = None
                    m_id = "{0}_c".format(key.strip())
                    m = model.metabolites.get_by_id(m_id)
                    sp_list.append(leakage.estimate_shadow_price_for_met(model, m, solution, delta = 0.01, existing_flux = existing_flux))
                    # turnover_list.append(get_turnover_flux(m, pfba_solution))
                    turnover_list.append(m.summary(pfba_solution).producing_flux['flux'].sum())
                # print(met_ids, sp_list, turnover_list)
                # Shadow prices
                if len(sp_list):
                    shadow_prices[j] = np.nanmean(sp_list)
                    turnover[j] = np.mean(turnover_list)
                else:
                    shadow_prices[j] = np.nan
                    turnover[j] = np.nan
                # print(met_ids, np.nanmean(sp_list))
        df["Shadow price"] = pd.Series(shadow_prices)
        df["Turnover"] = pd.Series(turnover)
        
    if i == 0:
        full_df = df
    else:
        full_df = pd.concat([full_df, df])


1.5
-18.86115416154087
2.5
-12.172686206067802
3.5
-7.119658816422134
4.5
-10.701950340553177
5.5
-9.166626502740078
6.5
-6.302199545792762
7.5
-6.411720412090722
8.5
-2.3586869188214408
9.5
-3.4599560965707936
10.5
-1.5484931824657564
11.5
-0.24041508784785923


In [9]:
full_df

Unnamed: 0,Metabolite name,Metabolite id,Value,Uncertainty,Mass,Charge,Phosphate,Topological Polar Surface Area [],Concentration,log P,Leakage (mmol/gDW/h),Ecoli metabolite,Yeast metabolite,Time,Glucose,Predicted growth rate,Shadow price,Turnover
0,Glucose-6-phosphate,g6p,17.65,0.53,258.119901,-2.0,1,165.0,0.003000,-4.5,0.000982,g6p,s_0568,1.5,18.861154,0.749574,-0.007632,13.785070
1,Fructose-6-phosphate,f6p,29.29,0.60,258.119901,-2.0,1,165.0,0.003000,-4.3,0.001872,f6p,s_0557,1.5,18.861154,0.749574,-0.007752,13.785070
2,"Fructose-1,6-bisphosphate",fdp,34.21,0.72,336.083922,-4.0,2,215.0,0.015200,-5.9,0.000000,fdp,s_0555,1.5,18.861154,0.749574,,
3,Dihydroxyacetonephosphate,dhap,50.40,1.74,168.041961,-2.0,1,104.0,0.000374,-2.5,0.001909,dhap,s_0629,1.5,18.861154,0.749574,-0.008903,13.135649
4,Glyceraldehyde-3-phosphate,g3p,15.04,0.24,168.041961,-2.0,1,104.0,,-2.7,-0.000086,g3p,s_0764,1.5,18.861154,0.749574,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29,Arginine,arg__L,9.61,0.91,175.208900,1.0,0,128.0,0.000569,-4.2,-0.000162,arg__L,s_0965,11.5,0.240415,0.024775,,
30,Histidine,his__L,6.84,0.01,155.154560,0.0,0,92.0,0.000068,-3.2,-0.000067,his__L,s_1006,11.5,0.240415,0.024775,,
31,Acetate,ac,12949.00,851.00,59.044020,-1.0,0,40.1,,0.4,-0.373510,ac,s_0362,11.5,0.240415,0.024775,,
32,Orotate,orot,1100.00,27.00,155.088320,-1.0,0,98.3,,-0.8,-0.017232,orot,s_1269,11.5,0.240415,0.024775,,


# Add median shadow prices


In [10]:
# full_leakage['Uptake (mmol/gDW/h)'] = 0
new_df = full_df.copy()

In [11]:
median_sp_df = pd.read_csv('../../results/e_coli/median_sp.csv', index_col = 0)

In [12]:
median_sp = []
low_sp = []
high_sp = []
for i, row in new_df.iterrows():
    
    gr = np.round(row['Predicted growth rate'], 1)
    
    tmp_m = []
    tmp_low = []
    tmp_high = []
    for key in row['Ecoli metabolite'].split(','):
        met_id = '{0}_c'.format(key.strip())
        met_df = median_sp_df.loc[met_id,:]
        tmp_m.append(met_df.loc[met_df['Growth rate']==gr, 'log10(-Shadow price)'].values[0])
        tmp_low.append(met_df.loc[met_df['Growth rate']==0.1, 'log10(-Shadow price)'].values[0])
        tmp_high.append(met_df.loc[met_df['Growth rate']==0.6, 'log10(-Shadow price)'].values[0])
    median_sp.append(np.mean(tmp_m))
    low_sp.append(np.mean(tmp_low))
    high_sp.append(np.mean(tmp_high))

In [13]:
new_df['Median log10(-Shadow price)'] = median_sp
new_df['Low log10(-Shadow price)'] = low_sp
new_df['High log10(-Shadow price)'] = high_sp

In [14]:
median_sp_df.loc['g6p_c',:]

Unnamed: 0_level_0,Growth rate,Shadow price,log10(-Shadow price)
Metabolite,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
g6p_c,0.0,-0.115184,-0.93861
g6p_c,0.1,-0.112651,-0.948263
g6p_c,0.2,-0.112651,-0.948263
g6p_c,0.3,-0.094789,-1.02324
g6p_c,0.4,-0.091241,-1.039808
g6p_c,0.5,-0.045538,-1.341631
g6p_c,0.6,-0.020794,-1.682068
g6p_c,0.7,-0.009332,-2.030039


In [15]:
# full_leakage['Uptake (mmol/gDW/h)'] = 0
# new_df = full_df.copy()

In [16]:
new_df['Uptake (mmol/gDW/h)'] = -1*new_df['Leakage (mmol/gDW/h)']

In [17]:
new_df.loc[new_df['Leakage (mmol/gDW/h)'] < 0, 'Leakage (mmol/gDW/h)'] = np.nan
new_df.loc[new_df['Uptake (mmol/gDW/h)'] < 0, 'Uptake (mmol/gDW/h)'] = 0


In [18]:
new_df['log10(Leakage [mmol/gDW/h])'] = np.log10(new_df['Leakage (mmol/gDW/h)'])
new_df['log10(-Shadow price [gDW/mmol])'] = np.log10(-new_df['Shadow price'])
new_df['log10(Turnover [mmol/gDW/h])'] = np.log10(new_df['Turnover']).replace(-np.inf, np.nan)

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [19]:
new_df.drop(columns=['Value', 'Uncertainty', 'Yeast metabolite', 'Ecoli metabolite'], inplace = True)

In [20]:
timestr = time.strftime("%Y%m%d")

if only_significant_changes:
    s1 = '_osc'
else:
    s1 = ''

if shadow_price_for_leaked_mets:
    s2 = '_SP_for_leaked'
else:
    s2 = ''

if knock_outs:
    s3 = '_KO'
else:
    s3 = ''
fn = 'spreadsheet_{0}_leakage_{1}{2}{3}{4}.csv'.format(species, timestr, s1, s2, s3)
folder = Path('../../results/{0}/'.format(species))
new_df.to_csv(folder / fn)

In [21]:
# full_leakage = full_df.loc[~full_df['Shadow price'].isna(), :]
# full_leakage = full_leakage.loc[full_leakage.Turnover <100, :]
# full_leakage = full_leakage.loc[full_leakage['Leakage (mmol/gDW/h)'] > 0, :]

In [22]:
# full_leakage['log10(leakage)'] = np.log10(full_leakage['Leakage (mmol/gDW/h)'])
# full_leakage['log10(-Shadow price)'] = np.log10(-full_leakage['Shadow price'])
# full_leakage['log10(Turnover)'] = np.log10(full_leakage['Turnover'])