In [None]:
import pandas as pd
import re
import os
import numpy as np

In [None]:
from a_vine import model as av

exchanges = set(i.id for i in av.exchanges)

In [None]:
from concerto.helpers.biolog_to_exchange import biolog_map

# Converting biolog plate data into something that can be used with cobra

Basically mapping biolog ids to bigg ids. There is a reference file (biolog_from_jer) that has the mappings. I still don't know the original source. Will find.

In [None]:
# parsing original file and saving to standard
def create_biolog_to_bigg():
    # read and cleanup mapping
    biolog_map = pd.read_csv('biolog_from_jer.csv')
    biolog_map.dropna(subset=['EX_ rxn ?'], inplace=True)
    # yucky spaces
    biolog_map['ex_rxn'] = biolog_map['EX_ rxn ?']
    biolog_map['compound_name'] = biolog_map['cmpd name']
    del biolog_map['EX_ rxn ?']
    del biolog_map['cmpd name']
    # standard IDs
    biolog_map['plate'] = 'PM' + biolog_map['Identifier'].str[:1] + '-' \
                        + biolog_map['Identifier'].str[1] \
                        + biolog_map['Identifier'].str[2:].astype(int).astype(str)
    biolog_map = biolog_map[['plate', 'ex_rxn', 'compound_name']]
    biolog_map.sort_values('plate', inplace=True)
    # fixing some errors where two underscores are used
    biolog_map['exchange'] = biolog_map.ex_rxn.apply(parse)
    
    # save file for loading later
    biolog_map.to_csv('plate_to_bigg.csv', index=False)
    return biolog_map

def parse(row):
    rate = row

    pattern = r'\((.*?)\)'
    matches = re.findall(pattern, rate)
    for m in matches:
        repl = f'_{m}'
        rate = re.sub(pattern, repl, rate)
    pattern = r'\_(.\w?)\_'

    matches = re.findall(pattern, rate)
    for m in matches:
        repl = f'__{m}_'
        rate = re.sub(pattern, repl, rate)
    return rate

# if os.path.exists('plate_to_bigg.csv'):
#     biolog_map = pd.read_csv('plate_to_bigg.csv', index_col=False)
# else:
#     biolog_map = create_biolog_to_bigg()
biolog_map.head()

In [None]:
biolog_to_bigg = biolog_map.set_index('plate').to_dict()['exchange']

In [None]:

# AV biolog data
# Supplementary table 2 BIOLOG PLATES.xlsx
# source: https://pubmed.ncbi.nlm.nih.gov/32551229/

av_biolog = pd.read_excel('raw_data/Supplementary table 2 BIOLOG PLATES.xlsx')
av_biolog.head()

In [None]:
def get_plate(pm):
    pm_sub = av_biolog[[pm, f'Conclusion {pm}']].copy()
    pm_sub['plate']  = pm + '-' + pm_sub[pm]
    pm_sub['outcome'] = pm_sub[f'Conclusion {pm}']
    
    pm_sub['growth']= np.nan
    pm_sub.loc[pm_sub.outcome == 'Growth', 'growth']  = True
    pm_sub.loc[pm_sub.outcome == 'Non-Growth', 'growth']  = False
    pm_sub.dropna(subset=['growth'], inplace=True)
    pm_sub['growth'] = pm_sub['growth'].astype(bool)
    return pm_sub[['plate', 'growth']]

pm1 = get_plate('PM1')
pm2 = get_plate('PM2')
pm3 = get_plate('PM3')
carbon_sources = pd.concat([pm1, pm2])
pm1

In [None]:
def merge_with_chem_ids(plate):
    
    merged = pd.concat(
        [plate.set_index('plate'), biolog_map.set_index('plate')],
        axis=1,
        ignore_index=False
    )
    merged.dropna(subset=['growth', 'ex_rxn'], inplace=True)
    
    merged = merged.reset_index()[['exchange', 'growth']]

    # define uptake, arbitrary for now
    merged['uptake'] = 10
    # reorder to match memote
    merged = merged[['exchange', 'uptake', 'growth']]
    
    return merged

def subset_to_in_model(plate, model_ex=exchanges):
    return plate.loc[plate.exchange.isin(model_ex)]


carbon_w_names = merge_with_chem_ids(carbon_sources)
carbon_w_names_in_model = subset_to_in_model(carbon_w_names, exchanges)
carbon_w_names_in_model.to_csv('growth/pm1_pm2_biolog.csv', index=False)


In [None]:
pm3_w_names = merge_with_chem_ids(pm3)
pm3_w_names_in_model = subset_to_in_model(pm3_w_names)
pm3_w_names_in_model.to_csv('growth/pm3_biolog.csv', index=False)


In [None]:
biolog_ex_pm1 = set(carbon_w_names.exchange.values)
biolog_ex_pm3 = set(pm3_w_names.exchange.values)
biolog_ex = biolog_ex_pm1.union(biolog_ex_pm3)
# biolog_ex

# TODO
Figure out why there are some missing

In [None]:
print(f'{len(exchanges)} exchange metabolites in model')
print(f'{len(biolog_ex)} exchange metabolites in biolog')

In [None]:
print(f'{len(biolog_ex.difference(exchanges))} missing in model')
print(f'{len(exchanges.difference(biolog_ex))} missing in biolog')
print(f'{len(exchanges.intersection(biolog_ex))} from model found in biolog')

<img src="https://ars.els-cdn.com/content/image/1-s2.0-S2214030120300043-gr3.jpg" alt="Experimental" /> 

In [None]:
for i in sorted(exchanges.difference(biolog_ex)):
    print(i)

In [None]:
for i in sorted(biolog_ex.difference(exchanges)):
    print(i)


Found case where extra underscores made items missing. Fixed.

<strike>Some have '__\' in model, but only 1 in biolog. EX_12ppd_S_e vs EX_12ppd__S_e. There is also a EX_12ppd__R_e and EX_12ppd__S_e (R and S) difference.

<strike>
    
Fixed

## Data from Biolog that is not in the model.

In [None]:
for i in sorted(biolog_ex.difference(exchanges)):
    print(i)