In [53]:
import pandas as pd
import numpy as np
import reframed
from pathlib import Path
import seaborn as sns
from matplotlib import pyplot as plt
import scipy.stats as st
import sys 
sys.path.append('../../../code/')
import leakage
import utils
from matplotlib.colors import LogNorm, Normalize

from matplotlib import cm, colors
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
from collections import OrderedDict
from matplotlib import pyplot as plt
from sklearn import metrics

In [2]:
folder = Path('../../../data/vila_2023/')



# Read in experimental data


In [3]:
targeted_fn = folder / 'Targeted_LCMS.csv'
dft = pd.read_csv(targeted_fn)
dft.rename(columns = {'Carbon_Source': 'Carbon source', 'Metabolite_Conc': 'Concentration'}, inplace=True)

In [4]:
exomet_carbon_sources = dft['Carbon source'].unique()

In [5]:
growth_fn = folder / 'Growth_Rates_Fig2.csv'
growth_df = pd.read_csv(growth_fn)

In [6]:
sanger_to_strain = {
    160: 'Enterobacter', # Targeted data
    162: 'Pseudomonas', # Targeted data
    360: 'Enterobacter', # Untargeted data
    449: 'Pseudomonas' # Untargeted data
}

In [7]:
# For this analysis we are only interested in the targeted data
growth_df = growth_df.loc[growth_df.SangerID.isin([160, 162])]

# Estimate biomass AUC at 16h
We have to predict the Biomass AUC from estimated rates, carrying capacity and initial abundance since we don't have the time-series data. However, since we know that they have fitted a logistic function using growthcurver (https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-1016-7), we can use this function to get the auc after 16h (which corresponds to their first timepoint, more or less at the end of exponential phase).


In [8]:
# Compensate for N0 not being at T=0 (T0 = 1)
growth_df['T'] = growth_df.T16 - growth_df.T0

### Define logistic function (as defined in growthcurver) and the integral of that function

In [9]:
def logistic_auc(t1,t0, K, r, N0):
    return logistic_integral(t1,K,r,N0)-logistic_integral(t0,K,r,N0)
    
def logistic_integral(t, K, r, N0):
    return K*t + (K/r)*np.log(1+(((K-N0)/N0))*np.exp(-1*r*t))
    

def logistic(t, K, r, N0):
    return K/(1+((K-N0)/N0*np.exp(-1*r*t)))
    

In [10]:
# gcr is the growth rate obtained from growthcurver 
growth_df['AUC16'] = growth_df.apply(lambda x: logistic_auc(t1 = x['T'],t0 = 0, K = x['max_OD'], r = x['gcr'], N0 = x['N0']), axis=1)

In [11]:
gDW_per_OD = {
    'Enterobacter': 0.346, # Use same as for E. coli in galactose
    'Pseudomonas': np.mean([0.448, 0.505]) #pmc.ncbi.nlm.nih.gov/articles/PMC7809481/ https://pmc.ncbi.nlm.nih.gov/articles/PMC4656820
}

In [12]:
growth_df['Strain'] = growth_df['SangerID'].map(sanger_to_strain)

In [13]:
growth_df['AUC16gDW'] = growth_df.AUC16*growth_df.Strain.map(gDW_per_OD)

# Estimate rates

In [14]:
idx = (dft.Timepoint==16)&(dft.Strain.isin(['Enterobacter', 'Pseudomonas']))
df_rate = pd.merge(left=dft.loc[idx], right = growth_df, left_on=('Carbon source', 'Strain'), right_on=('CS', 'Strain'), how = 'left')

In [15]:
df_rate['Rate [mmol/gDW/h]'] = df_rate.Concentration/df_rate.AUC16gDW*1e-3 # 1e-3 to convert from uM to mM

In [16]:
df_meanrate = df_rate.groupby(['Metabolite', 'Strain', 'Carbon source']).agg({'Rate [mmol/gDW/h]': ('mean', 'sem', 'std')}).reset_index()

In [87]:
df_rate.loc[df_rate.Metabolite=='Acetic acid']

Unnamed: 0,Concentration,Metabolite,Carbon source,Strain,Replicate,Timepoint,SangerID,CS,N0,Nmax,...,gcr,gcauc,gcr_se,r,r16,Family,T,AUC16,AUC16gDW,Rate [mmol/gDW/h]
14,4380.0,Acetic acid,D-Glucose,Enterobacter,1,16,160,D-Glucose,0.04,0.24,...,0.835219,14.799585,0.021879,0.282022,0.228916,Enterobacteriaceae,14.773889,5.25871,1.819514,2.407237
30,4575.0,Acetic acid,D-Glucose,Enterobacter,2,16,160,D-Glucose,0.04,0.24,...,0.835219,14.799585,0.021879,0.282022,0.228916,Enterobacteriaceae,14.773889,5.25871,1.819514,2.514408
59,98.6,Acetic acid,D-Glucose,Pseudomonas,1,16,162,D-Glucose,0.039,0.319,...,0.204283,8.299333,0.013915,0.086209,0.130891,Pseudomonadaceae,14.773889,2.043431,0.973695,0.101264
74,56.1,Acetic acid,D-Glucose,Pseudomonas,2,16,162,D-Glucose,0.039,0.319,...,0.204283,8.299333,0.013915,0.086209,0.130891,Pseudomonadaceae,14.773889,2.043431,0.973695,0.057616
99,766.0,Acetic acid,L-Malate,Enterobacter,1,16,160,L-Malate,0.039,0.179,...,0.235578,10.670152,0.005029,0.143839,0.142022,Enterobacteriaceae,14.774444,2.439476,0.844059,0.90752
112,1030.0,Acetic acid,L-Malate,Enterobacter,2,16,160,L-Malate,0.039,0.179,...,0.235578,10.670152,0.005029,0.143839,0.142022,Enterobacteriaceae,14.774444,2.439476,0.844059,1.220294
132,62.6,Acetic acid,L-Malate,Pseudomonas,1,16,162,L-Malate,0.04,0.191,...,0.578328,8.82962,0.06989,0.177114,0.181264,Pseudomonadaceae,14.774444,4.099455,1.95339,0.032047
159,77.4,Acetic acid,L-Malate,Pseudomonas,2,16,162,L-Malate,0.04,0.191,...,0.578328,8.82962,0.06989,0.177114,0.181264,Pseudomonadaceae,14.774444,4.099455,1.95339,0.039623
179,4160.0,Acetic acid,D-Fructose,Enterobacter,1,16,160,D-Fructose,0.04,0.246,...,0.725609,11.306414,0.049695,0.2237,0.209554,Enterobacteriaceae,14.773889,4.517414,1.563025,2.661505
198,3980.0,Acetic acid,D-Fructose,Enterobacter,2,16,160,D-Fructose,0.04,0.246,...,0.725609,11.306414,0.049695,0.2237,0.209554,Enterobacteriaceae,14.773889,4.517414,1.563025,2.546344


# Load metabolic models to estimate values

In [17]:
model_ez = reframed.load_cbmodel('../../../models/e_coli/momentiJO1366_modified.xml')
model_ez.solver = 'gurobi'
model_ez.reactions.R_EX_glc__D_e.lb = 0

In [18]:
model_pp_fn = '../../../models/p_putida/eciJN1463_uniprot_AutoPACMEN.xml'
# '../../../models/p_putida/iJN1463.xml'
model_pp = reframed.load_cbmodel(model_pp_fn)


model_pp.solver = 'gurobi'

model_pp.reactions.R_EX_glc__D_e.lb = 0

### Verify that models have all corbon source uptake blocked

In [19]:
reframed.FBA(model_ez, objective=model_ez.biomass_reaction)

Set parameter Username
Set parameter LicenseID to value 2586336
Academic license - for non-commercial use only - expires 2025-11-18


Objective: None
Status: Infeasible or Unbounded

In [20]:
reframed.FBA(model_pp, objective=model_pp.biomass_reaction)

Objective: None
Status: Infeasible or Unbounded

In [21]:
strain_to_model = {
    'Enterobacter': model_ez,
    'Pseudomonas': model_pp,
}

# Mapping of metabolite names to bigg ids


In [22]:
translate_dict = {'alpha-Ketoglutaric acid': 'akg',
'Butyric acid': 'but',
'Valine': 'val__L',
'Pyruvic acid': 'pyr',
'Propionic acid': 'ppa',
'Acetyl-ornithine': 'acorn',
'Alanine': 'ala__L',
'Acetic acid': 'ac',
'Asparagine': 'asn__L',
'beta-Hydroxybutyric acid': 'bhb',
'Serine': 'ser__L',
'Glycine': 'gly',
'Succinic acid': 'succ',
'Methionine': 'met__L',
'Fumaric acid': 'fum',
'Valeric acid': 'pta',
'Citric acid': 'cit',
'Lactic acid': 'lac__D',
'Putrescine':  'ptrc',
'Hippuric acid': 'bgly'}
translate_dict_r = {value:key for key, value in translate_dict.items()}

In [23]:
cs_to_exchange = {'D-Glucose': 'EX_glc__D_e',
                'D-Lactate': 'EX_lac__D_e',
                'Citrate': 'EX_cit_e',
                '2-Oxoglutarate': 'EX_akg_e',
                'Succinate': 'EX_succ_e',
                'Fumarate': 'EX_fum_e',
                'L-Malate': 'EX_mal__L_e',
                'L-Glutamine': 'EX_gln__L_e',
                'Acetate': 'EX_ac_e',
                'D-Galactose': 'EX_gal_e',
                'D-Ribose': 'EX_rib__D_e',
                'L-Arabinose': 'EX_arab__L_e',
                'Glycerol': 'EX_glyc_e',
                'D-Gluconate': 'EX_glcn_e',
                '2-Ketogluconate': 'EX_25dkglcn_e',
                'Pyruvate': 'EX_pyr_e',
                'D-Fructose': 'EX_fru_e'}


In [24]:
measured_mets_ez = []
measured_metnames_ez = []
for key, value in translate_dict.items():
    m_id = f'M_{value}_c'
    try:
        m = model_ez.metabolites[m_id]
    except KeyError:
        print(f'{key} not in model')
    else:
        measured_mets_ez.append(m_id)
        measured_metnames_ez.append(key)

beta-Hydroxybutyric acid not in model
Valeric acid not in model
Hippuric acid not in model


In [25]:
measured_mets_pp = []
measured_metnames_pp = []
for key, value in translate_dict.items():
    m_id = f'M_{value}_c'
    try:
        m = model_pp.metabolites[m_id]
    except KeyError:
        print(f'{key} not in model')
    else:
        measured_mets_pp.append(m_id)
        measured_metnames_pp.append(key)

Hippuric acid not in model


In [26]:
metnames_dict = {
    'Enterobacter': (measured_mets_ez, measured_metnames_ez),
    'Pseudomonas': (measured_mets_pp, measured_metnames_pp)
}

In [27]:
df_rate['Carbon source'].unique()

array(['D-Glucose', 'L-Malate', 'D-Fructose', 'Glycerol', 'Pyruvate'],
      dtype=object)

## Estimate uptake rates from maximum growth rates


In [28]:
uptake_rates = []
uptake_dict = {}
for i, row in growth_df.iterrows():
    if not row['CS'] in exomet_carbon_sources:
        uptake_rates.append(np.nan)
        # Not interested in these
        continue
    
    model = strain_to_model[row['Strain']]
    r_ex = f"R_{cs_to_exchange[row['CS']]}"

    if not r_ex in model.reactions:
        print(f'{r_ex} not in {model.id}')
        uptake_rates.append(np.nan)
        continue
    
    growth_rate = row['r']
    print(row['CS'], row['Strain'])
    sol = reframed.FBA(model, constraints = {r_ex:(-10,0), model.biomass_reaction:(growth_rate,growth_rate)},
                      objective = {r_ex:1})
    uptake_rates.append(sol.fobj)
    uptake_dict[(row['Strain'], row['CS'])] = sol.fobj
    

D-Glucose Enterobacter
D-Fructose Enterobacter
D-Galactose Enterobacter
D-Ribose Enterobacter
L-Arabinose Enterobacter
Glycerol Enterobacter
Pyruvate Enterobacter
Succinate Enterobacter
Fumarate Enterobacter
L-Malate Enterobacter
Acetate Enterobacter
D-Glucose Pseudomonas
D-Fructose Pseudomonas
R_EX_gal_e not in eciJN1463_uniprot_AutoPACMEN
D-Ribose Pseudomonas
R_EX_arab__L_e not in eciJN1463_uniprot_AutoPACMEN
Glycerol Pseudomonas
Pyruvate Pseudomonas
Succinate Pseudomonas
Fumarate Pseudomonas
L-Malate Pseudomonas
Acetate Pseudomonas


In [29]:
growth_df['Uptake rate'] = uptake_rates

# Now calaculate shadow prices
Can't estimate shadow price / metabolite value for butyrate because all reactions related to butyrate are blocked. 

In [30]:
sp_dict = {}
for strain, model in strain_to_model.items():
    dfs = df_rate.loc[df_rate.Strain==strain]
    for cs in dfs['Carbon source'].unique():
        r_ex = f"R_{cs_to_exchange[cs]}"
        uptake_rate = uptake_dict[(strain, cs)]
        if not uptake_rate:
            continue
        measured_mets, measured_metnames = metnames_dict[strain]
        sp_dict[(strain, cs)] = leakage.estimate_shadow_prices_reframed(model, 
                                constraints={r_ex:(uptake_rate, uptake_rate)},
                                delta = 0.01, metabolites=measured_mets)


In [31]:
mv_df = (pd.DataFrame(sp_dict)*-1).reset_index()

In [32]:
# mv_df

## Turnover

In [51]:
turnover_dict = {}

for strain, model in strain_to_model.items():
    dfs = df_rate.loc[df_rate.Strain==strain]
    for cs in dfs['Carbon source'].unique():
        r_ex = f"R_{cs_to_exchange[cs]}"
        uptake_rate = uptake_dict[(strain, cs)]
        if not uptake_rate:
            continue
        solution = reframed.pFBA(model, constraints={r_ex:(uptake_rate, uptake_rate)},
                                objective=model.biomass_reaction)
        turnover = solution.get_metabolites_turnover(model)
        measured_mets, measured_metnames = metnames_dict[strain]
        turnover_dict[(strain, cs)] = {m: turnover[m] for m in measured_mets}
        # print(cs)
        # solution.show_values('BIO')
        # # solution.show_values(ex_id)
        # solution.show_values('R_EX')
        # print("####")


In [65]:
nc_dict = {}
mass_dict = {}
for strain, model in strain_to_model.items():
    measured_mets, measured_metnames = metnames_dict[strain]
    for m_id in measured_mets:
        m = model.metabolites[m_id]
        element_dict = utils.get_element_dict(m)
        nc_dict[(strain, m_id)] = element_dict['C']
        mass_dict[(strain, m_id)] = utils.get_mol_weight(m)
        

In [34]:
turnover_df = pd.DataFrame(turnover_dict).reset_index()
turnover_dfL = turnover_df.melt(id_vars=[('index','')], value_vars=list(turnover_df.columns), 
                    value_name='Turnover [mmol/gDW/h]').rename(columns = {'variable_0':'Strain', 
                    'variable_1': 'Carbon source', ('index', ''):'Metabolite ID'})

In [36]:
mv_dfL = mv_df.melt(id_vars=[('index','')], value_vars=list(mv_df.columns), 
                    value_name='Metabolite value [gDW/mmol]').rename(columns = {'variable_0':'Strain', 
                    'variable_1': 'Carbon source', ('index', ''):'Metabolite ID'})



In [37]:
mvt_dfL = pd.merge(left=turnover_dfL, right = mv_dfL, left_on=('Metabolite ID', 'Strain', 'Carbon source'),
                   right_on=('Metabolite ID', 'Strain', 'Carbon source'))



In [40]:
mvt_dfL['Metabolite ID'] = mvt_dfL['Metabolite ID'].str.replace('M_', '').str.replace('_c','')

In [44]:
mvt_dfL['Metabolite'] = mvt_dfL['Metabolite ID'].map(translate_dict_r)

In [45]:
df_meanrate.columns = ['Metabolite', 'Strain', 'Carbon source', 'Rate [mmol/gDW/h]',
                      'Std. Err [mmol/gDW/h]','Std. Dev [mmol/gDW/h]']

In [47]:
df = pd.merge(left = df_meanrate, right = mvt_dfL, left_on=('Metabolite', 'Strain', 'Carbon source'),
             right_on=('Metabolite', 'Strain', 'Carbon source'))

In [48]:
df['log10(Rate [mmol/gDW/h])'] = np.log10(df['Rate [mmol/gDW/h]'])
df['log10(Metabolite value [gDW/mmol])'] = np.log10(df['Metabolite value [gDW/mmol]'])

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [49]:
metname_translate_dict = {'Acetic acid': 'Acetate',
'Acetyl-ornithine': 'Acetyl-ornithine',
'Alanine': 'Alanine',
'Asparagine': 'Asparagine',
'Butyric acid': 'Butyrate',
'Citric acid': 'Citrate',
'Fumaric acid': 'Fumarate',
'Glycine': 'Glycine',
'Lactic acid': 'Lactate',
'Methionine': 'Methionine',
'Propionic acid': 'Propionate',
'Putrescine': 'Putrescine',
'Pyruvic acid': 'Pyruvate',
'Serine': 'Serine',
'Succinic acid': 'Succinate',
'Valeric acid':'Valerate',
'Valine': 'Valine',
'alpha-Ketoglutaric acid': 'Alpha-ketoglutarate',
'beta-Hydroxybutyric acid': 'Beta-hydroxybutyrate'}

In [50]:
df['Metabolite'] = df['Metabolite'].map(metname_translate_dict)

In [73]:
mass = []
nc = []
for i, row in df.iterrows():
    m = f'M_{row["Metabolite ID"]}_c'
    try:
        mass.append(mass_dict[(row['Strain'], m)])
        nc.append(nc_dict[(row['Strain'], m)])
    except KeyError:
        mass.append(np.nan)
        nc.append(np.nan)
df['Mol weight [g/mol]'] = mass
df['N carbons'] = nc

In [81]:
df['Dataset'] = 'Vila-23'

In [84]:
df.rename(columns={'Strain':'Species'}, inplace=True)

# Store results

In [85]:
rates_fn = folder / 'vila_2023_rates_and_values.csv'
df.to_csv(rates_fn)

In [None]:
df_notnan = df.replace(-np.inf, np.nan).dropna()

In [None]:
sns.lmplot(data = df_notnan, x = 'log10(Rate [mmol/gDW/h])',
           y = 'log10(Metabolite value [gDW/mmol])', hue = 'Strain', col = 'Carbon source', col_wrap=1)


In [None]:
df.loc[df.isna().any(axis=1)]

In [None]:
for strain in df_notnan.Strain.unique():
    # for cs in df_notnan['Carbon source'].unique():
    idx = (df_notnan.Strain==strain) #& (df_notnan['Carbon source']==cs)
    dfi = df_notnan.loc[idx]
    x = dfi['log10(Rate [mmol/gDW/h])']
    y = dfi['log10(Metabolite value [gDW/mmol])']
    print(strain, cs, st.pearsonr(x,y))

In [None]:
for strain in df_notnan.Strain.unique():
    # for cs in df_notnan['Carbon source'].unique():
    idx = (df_notnan.Strain==strain) #& (df_notnan['Carbon source']==cs)
    dfi = df_notnan.loc[idx]
    x = dfi['log10(Rate [mmol/gDW/h])']
    y = dfi['log10(Metabolite value [gDW/mmol])']
    print(strain, cs, st.pearsonr(x,y))

In [None]:
dfi