In [51]:
import sys
sys.path.append('../../code/')
import pandas as pd
import numpy as np
from pathlib import Path


import leakage

# This notebook is used to map intracellular data to extracelluar conditions
This is necessary to get the most relevant intracelluar concentrations from literature values

In [52]:
data_folder = Path('../../data')
figure_folder = Path('../../Figures/')
div_folder = Path('../../../data/this_project/5_div/')


## Get extracellular data

In [53]:
sintef_fn = data_folder / 'this_project/1_e_coli_batch_cultures/1ABE_merged_metabolomics_data.csv'
paczia_fn = data_folder / 'paczia_2012' / 'e_coli'/'e_coli_exometabolites.csv'
vila_fn = data_folder / 'vila_2023' / 'Targeted_LCMS.csv'


In [54]:
sintef_df = pd.read_csv(sintef_fn, index_col=0)
# Only intrerested in the absolute quantification
sintef_df = sintef_df.loc[sintef_df['Concentration [uM]'].notna()]

In [55]:
paczia_df, paczia_df_std = leakage.get_concentrations(paczia_fn.parent, 'e_coli')

In [56]:
vila_df = pd.read_csv(vila_fn)
vila_df = vila_df.loc[vila_df.Strain == 'Ecoli']

## Mapping


In [57]:
mapping_df = pd.read_csv(data_folder / 'this_project/5_div/5B_id_mapping.csv', index_col=0)
met_abrv_to_id = mapping_df['Ecoli metabolite'].to_dict()
met_abrv_to_name = mapping_df['Metabolite name'].to_dict()
met_name_to_abrv_paczia = {value:key for key, value in met_abrv_to_name.items()}
met_abrv_to_name['SER'] = 'Serine'
met_abrv_to_name['DHAP'] = 'Dihydroxyacetone phosphate'

## Find conditions

In [58]:
conditions_metabolites = []
for cs in sintef_df['Carbon source'].unique():
    df_cs = sintef_df.loc[sintef_df['Carbon source']==cs]
    for m in df_cs.Metabolite.unique():
        for phase in ['Stationary', 'Exponential']:
            conditions_metabolites.append([cs, phase, m])

for m in paczia_df.columns:
    m_name = met_abrv_to_name[m]
    if m_name == '2/3-phosphoglycerate':
        m_names = ['2-phosphoglycerate', '3-phosphoglycerate']
    elif m_name == 'Ribu-/Xylulose-5-phosphate':
        m_names = ['Ribulose-5-phosphate', 'Xylulose-5-phosphate']
    else:
        m_names = [m_name]
    for mi in m_names:
        for phase in ['Stationary', 'Exponential']:
            conditions_metabolites.append(['Glucose', phase, mi])



### Vila et al

In [59]:
metname_translate_dict = {'Acetic acid': 'Acetate',
'Acetyl-ornithine': 'Acetyl-ornithine',
'Alanine': 'Alanine',
'Asparagine': 'Asparagine',
'Butyric acid': 'Butyrate',
'Citric acid': 'Citrate',
'Fumaric acid': 'Fumarate',
'Glycine': 'Glycine',
'Lactic acid': 'Lactate',
'Methionine': 'Methionine',
'Propionic acid': 'Propionate',
'Putrescine': 'Putrescine',
'Pyruvic acid': 'Pyruvate',
'Serine': 'Serine',
'Succinic acid': 'Succinate',
'Valeric acid':'Valerate',
'Valine': 'Valine',
'alpha-Ketoglutaric acid': 'Alpha-ketoglutarate',
'beta-Hydroxybutyric acid': 'Beta-hydroxybutyrate'}

In [60]:
vila_peak_df = pd.read_csv(data_folder / 'vila_2023' / 'growth_curves_peaktimes.csv', index_col=0)
strain_cs_to_PT = vila_peak_df.groupby(['Strain', 'Carbon source']).agg({'Peak time':'max'}).to_dict()['Peak time']


In [61]:
time_buffer = 2
for i, row in vila_df.iterrows():
    peak_time = strain_cs_to_PT[row['Strain'], row['Carbon_Source']]
    if row['Timepoint'] < peak_time+2:
        vila_df.at[i, 'Phase'] = 'Exponential'
    else:
        vila_df.at[i, 'Phase'] = 'Stationary'

In [62]:
vila_temp = vila_df.groupby(['Carbon_Source', 'Metabolite', 'Timepoint', 'Phase']).agg({'Strain':'first'}).reset_index()

vila_temp['Metabolite name'] = vila_temp.Metabolite.map(metname_translate_dict)
vila_list = vila_temp[['Carbon_Source','Phase', 'Metabolite name']].values.tolist()

In [63]:
vila_temp.Metabolite.unique()

array(['Acetic acid', 'Acetyl-ornithine', 'Alanine', 'Asparagine',
       'Butyric acid', 'Citric acid', 'Fumaric acid', 'Glycine',
       'Hippuric acid', 'Lactic acid', 'Methionine', 'Propionic acid',
       'Putrescine', 'Pyruvic acid', 'Serine', 'Succinic acid',
       'Valeric acid', 'Valine', 'alpha-Ketoglutaric acid',
       'beta-Hydroxybutyric acid'], dtype=object)

In [64]:
conditions_df = pd.DataFrame(conditions_metabolites + vila_list, columns= ['Carbon source', 'Phase', 'Metabolite'])



In [65]:
conditions_df['Carbon source'] = conditions_df['Carbon source'].str.capitalize()

In [66]:
conditions_df.replace({'Carbon source': {'D-glucose':'Glucose'}}, inplace=True)

In [68]:
conditions_df.drop_duplicates(inplace=True)

# Get intracellular data

In [69]:
all_conc_fn = data_folder / 'this_project' / '5_div' / '5E_intracellular_concentrations.csv'
intra_df = pd.read_csv(all_conc_fn, index_col=0)


# Map the different conditions/extracellular metabolites to intracellular values
We don't differ between shake flask and bioreactor, as the difference seems small (Thorfinnsdottir et al., https://www.mdpi.com/2218-1989/13/2/150)

In [71]:

for i, row in conditions_df.iterrows():
    met = row['Metabolite']
    # print(i, row)
    intra_i = intra_df.loc[intra_df.Metabolite==met]
    if not len(intra_i):
        conditions_df.at[i, 'Intracellular concentration [uM]'] = np.nan
        continue
    minimal_idx = intra_i['Minimal/complex']=='Minimal'
    if np.sum(minimal_idx):
        idx = minimal_idx
        if row['Phase'] == 'Exponential':
            log_phase_idx = intra_i.loc[idx].growth_status.str.lower().str.contains('log')
            if np.sum(log_phase_idx):
                idx = idx & log_phase_idx
        elif row['Phase'] == 'Stationary':
            stat_phase_idx = intra_i.loc[idx].growth_status.str.lower().str.contains('stationary')
            if np.sum(stat_phase_idx):
                idx = idx & stat_phase_idx

        # Carbon source
        cs_idx = intra_i.loc[idx, 'Carbon source'].str.lower() == row['Carbon source'].lower()
        if np.sum(cs_idx):
            idx = idx & cs_idx
    else:
        idx = np.ones(len(intra_i)).astype(bool)
        
    concentration = intra_i.loc[idx].concentration.mean()
    n = len(intra_i.loc[idx])
    matched_conc_idx = ",".join([str(x) for x in intra_i.loc[idx].index])
    if n == 1:
        
        ub = intra_i.loc[idx].UB.values[0]
        lb = intra_i.loc[idx].LB.values[0]
        error = (ub-lb)/2
        sem = error/1.96
        max_ub = ub
        min_lb = lb
    else:
        sem = intra_i.loc[idx, 'concentration'].sem()
        error = 1.96*sem
        ub = concentration + error
        lb = concentration - error
        max_ub = intra_i.loc[idx].UB.max()
        min_lb = intra_i.loc[idx].LB.min()
    
    conditions_df.at[i,'Intracellular concentration [uM]'] = concentration
    conditions_df.at[i,'Error [uM]'] = error
    conditions_df.at[i,'SEM [uM]'] = sem
    conditions_df.at[i,'# values'] = n
    conditions_df.at[i,'idx concentration sheet'] = matched_conc_idx
    conditions_df.at[i,'Min'] = intra_i.loc[idx].concentration.min()
    conditions_df.at[i,'Max'] = intra_i.loc[idx].concentration.max()
    conditions_df.at[i,'LB'] = lb
    conditions_df.at[i,'Min LB'] = min_lb
    conditions_df.at[i,'UB'] = ub
    conditions_df.at[i,'Max UB'] = max_ub


In [72]:
conditions_fn = data_folder / 'this_project/4_paired_metabolomics_live_dead/4F_mapped_intracellular_conc.csv'

conditions_df.to_csv(conditions_fn)

In [73]:
conditions_df.loc[conditions_df.Min.isna()].Metabolite.unique()

array(['Alpha-aminoadipate', 'Creatine', 'Deoxycarnitine (deoxy-c0)',
       'Glutarate', 'Glutarylcarnitine (c5-dc)', 'Lactate',
       'Sebacoyl-l-carnitine (c10-dc)', 'Propanoate',
       'Alpha-aminobutyrate', 'Hexoses', 'Orotate', 'Butyrate', nan,
       'Propionate', 'Valerate', 'Beta-hydroxybutyrate'], dtype=object)

In [74]:
met = 'Alpha-ketoglutarate'
conditions_df.loc[conditions_df.Metabolite==met]

Unnamed: 0,Carbon source,Phase,Metabolite,Intracellular concentration [uM],Error [uM],SEM [uM],# values,idx concentration sheet,Min,Max,LB,Min LB,UB,Max UB
286,Glucose,Stationary,Alpha-ketoglutarate,234.244444,204.618053,104.396966,3.0,222120,126.4,443.0,29.626392,105.805072,438.862497,631.0
287,Glucose,Exponential,Alpha-ketoglutarate,234.244444,204.618053,104.396966,3.0,222120,126.4,443.0,29.626392,105.805072,438.862497,631.0
344,D-fructose,Exponential,Alpha-ketoglutarate,352.946667,188.770993,96.311731,5.0,2324222120,126.4,616.0,164.175674,105.805072,541.717659,1060.0
346,D-fructose,Stationary,Alpha-ketoglutarate,352.946667,188.770993,96.311731,5.0,2324222120,126.4,616.0,164.175674,105.805072,541.717659,1060.0
368,D-galactose,Exponential,Alpha-ketoglutarate,352.946667,188.770993,96.311731,5.0,2324222120,126.4,616.0,164.175674,105.805072,541.717659,1060.0
466,D-ribose,Exponential,Alpha-ketoglutarate,352.946667,188.770993,96.311731,5.0,2324222120,126.4,616.0,164.175674,105.805072,541.717659,1060.0
524,Glycerol,Exponential,Alpha-ketoglutarate,616.0,351.5,179.336735,1.0,23,616.0,616.0,357.0,357.0,1060.0,1060.0
566,L-arabinose,Exponential,Alpha-ketoglutarate,352.946667,188.770993,96.311731,5.0,2324222120,126.4,616.0,164.175674,105.805072,541.717659,1060.0
567,L-arabinose,Stationary,Alpha-ketoglutarate,352.946667,188.770993,96.311731,5.0,2324222120,126.4,616.0,164.175674,105.805072,541.717659,1060.0
624,L-malate,Exponential,Alpha-ketoglutarate,352.946667,188.770993,96.311731,5.0,2324222120,126.4,616.0,164.175674,105.805072,541.717659,1060.0


In [75]:
intra_df.Metabolite=='a-ketoglutarate'

0      False
1      False
2      False
3      False
4      False
       ...  
275    False
276    False
281    False
280    False
279    False
Name: Metabolite, Length: 297, dtype: bool

In [76]:
intra_df.Metabolite.unique()

array(['2-phosphoglycerate', '3-phosphoglycerate', 'Acetate',
       'Acetyl-ornithine', 'Alanine', 'Alpha-ketoglutarate', 'Arginine',
       'Asparagine', 'Aspartate', 'Beta-alanine', 'Cis-aconitate',
       'Citrate', 'Dihydroxyacetone phosphate', 'Erythrose-4-phosphate',
       'Formate', 'Fructose-1,6-bisphosphate', 'Fructose-6-phosphate',
       'Fumarate', 'Gamma-aminobutyrate', 'Glucose-6-phosphate',
       'Glutamate', 'Glutamine', 'Glyceraldehyde-3-phosphate', 'Glycine',
       'Histidine', 'Homoserine', 'Isocitrate', 'Isoleucine', 'Leucine',
       'Lysine', 'Malate', 'Methionine', 'NAD', 'Oxaloacetate',
       'Phenylalanine', 'Phosphoenolpyruvate', 'Pipecolic acid',
       'Proline', 'Putrescine', 'Pyruvate', 'Ribose-5-phosphate',
       'Ribulose-5-phosphate', 'Sedoheptulose 7-phosphate', 'Serine',
       'Shikimate', 'Succinate', 'Threonine', 'Trans-aconitate',
       'Tryptophan', 'Tyrosine', 'Uracil', 'Valine',
       'Xylulose-5-phosphate'], dtype=object)