In [None]:
%load_ext autoreload
import sys
if not '/Users/nlarusstone/Documents/MPhil/Research/cf_fba' in sys.path:
    sys.path.append('/Users/nlarusstone/Documents/MPhil/Research/cf_fba')
import src.utils as utils
%autoreload 2

In [None]:
import cobra
import cobra.test
import pandas as pd
import numpy as np
import difflib
import re
import Bio.PDB.Polypeptide

In [None]:
import Bio.PDB.Polypeptide as pdb
import Bio.SeqUtils as su

aa_mix = pd.read_csv('../data/aa_mix.csv', index_col='AA')
nrg_mix = pd.read_csv('../data/energy_mix.csv', index_col='compound')
with open('../genes/rfp.txt', 'r') as f:
    seq = f.read()
cfps_conc = pd.read_csv('../data/cfps_start.csv', index_col='compound')

# amt in g, vol in mL, mw in g/mol
def calc_conc(amt, vol, mw=None, seq=None, seq_type=None):
    # seq can be DNA or protein or an amino acid
    if seq:
        mw = su.molecular_weight(seq, seq_type)
    elif not mw:
        raise Exception('Need a molecular weight for non-DNA')
    conc = (amt * 1000) / (vol * mw)
    # returns Molar concentrations
    return conc

def conc_dilution(start_conc, vol_add, tot_vol):
    return start_conc * (vol_add / tot_vol)

aa_mix['start_conc'] = aa_mix.apply(lambda row: calc_conc(row['weight_add'], 1, 
                                                          seq=pdb.three_to_one(row.name.upper()), seq_type='protein'), axis=1)
aa_mix['conc_add'] = conc_dilution(aa_mix['start_conc'], aa_mix['vol_add'], aa_mix['vol_add'].sum())
#pi
pi_conc = calc_conc(0.15, 5, mw=611.77)
nrg_mix['start_conc'] = nrg_mix.apply(lambda row: calc_conc(row['amt'], row['fill'], mw=row['mw']), axis=1)
nrg_mix['conc_add'] = conc_dilution(nrg_mix['start_conc'], nrg_mix['vol_add'], nrg_mix['vol_add'].sum())
dna_conc = calc_conc(0.000750, 0.00496, seq=seq, seq_type='DNA')

for cmpnd, vals in nrg_mix.iterrows():
    cfps_conc.loc[cmpnd] = [vals['conc_add'], 5]
for aa, vals in aa_mix.iterrows():
    cfps_conc.loc[aa] = [vals['conc_add'], 10]
cfps_conc.loc['GENE'] = [dna_conc, 4.96]
cfps_conc['final_conc'] = conc_dilution(cfps_conc['start_conc'], cfps_conc['amt'], 50)

In [None]:
model = cobra.io.read_sbml_model(filename='../models/iJO1366.xml')

In [None]:
varner = cobra.io.load_json_model('../models/varner.json')
def extract_txtl_rxns(model):
    aa_metabs = []
    for aa in Bio.PDB.Polypeptide.aa3:
        aa_metabs += model.metabolites.query(aa.lower())
    aa_rxns = []
    for aa_metab in aa_metabs:
        aa_rxns += aa_metab.reactions
    mrna_rxns = model.reactions.query(re.compile('mrna', re.IGNORECASE))
    trna_rxns = model.reactions.query('tRNA_c')
    tx_rxns = model.reactions.query('transcription')
    tl_rxns = model.reactions.query('translation')
    prot_rxns = model.reactions.query('PROTEIN')
    #txtl_rxns = list(set(aa_rxns).union(tx_rxns).union(tl_rxns).union(prot_rxns).union(mrna_rxns))
    txtl_rxns = list(set(tx_rxns).union(tl_rxns).union(prot_rxns).union(mrna_rxns).union(trna_rxns))
    return txtl_rxns
txtl_rxns = extract_txtl_rxns(varner)
txtl_rxns[:10]

In [None]:
metab_ids = [m.id.rsplit('_c', 1)[0] for m in model.metabolites if m.compartment == 'c']
varner_to_ijo = {'5pbdra': 'pram',
                'faicar': 'fprica',
                '78dhf': 'dhpt',
                '78mdp': '6hmhptpp',
                'cadav': '15dap',
                'cair': '5aizc',
                'clasp': 'cbp',
                'fgar': 'fgam',
                'gaba': '4abut',
                'glycoA': 'gcald',
                'nh3': 'nh4',
                'he': 'h',
                'omp': 'orot5p',
                 'or': 'orot',
                 'prop': 'ppcoa',
                 'mglx': 'mthgxl',
                 'saicar': '25aics',
                 '4adochor': '4adcho'
                }
metab_ids[:10]

In [None]:
def varner_to_cobra(model, metab, metab_ids, varner_to_ijo):
    if metab.id.startswith('M_'):
        metab_stem = metab.id.split('M_')[1].rsplit('_c', 1)[0]
        #print metab_stem
        if 'tRNA' in metab_stem:
            aa = metab_stem.split('_', 1)[0]
            metab_name = aa + 'trna'
        elif not metab_stem in metab_ids:
            #query = varner_to_ijo[metab_stem]
            #print metab_stem
            if metab_stem in varner_to_ijo:
                #print 'matched'
                metab_name = varner_to_ijo[metab_stem]
            elif '_L' in metab_stem or '_D' in metab_stem:
                #print difflib.get_close_matches(metab_stem, metab_ids, 1, 0.7)
                metab_name = difflib.get_close_matches(metab_stem, metab_ids, 1, 0.7)[0]
            else:
                print 'TODO: ', metab_stem
                raise Exception
        else:
            metab_name = metab_stem
    else:
        try:
            model.metabolites.get_by_id(metab_name)
        except:
            model.metabolites.add(metab)
    return model.metabolites.get_by_id(metab_name + '_c')

In [None]:
def add_txtl(model, txtl_rxns):
    mod = model.copy()
    for rxn in txtl_rxns:
        #print rxn
        for metab, amt in rxn.metabolites.items():
            if not metab.id.startswith('M_'):
                #print 'EXCEPT:', metab
                continue
            new_metab = varner_to_cobra(mod, metab, metab_ids, varner_to_ijo)
            rxn.add_metabolites({metab: -1 * amt})
            rxn.add_metabolites({new_metab: amt})
        mod.add_reaction(rxn)
    return mod
model_txtl = add_txtl(model, txtl_rxns)

In [None]:
def get_aa_metab(model, aa, cmpt='c'):
    return model.metabolites.query('{0}__._{1}'.format(aa, cmpt))

In [None]:
def replace_metab(mod, metab):
    new_id = re.sub(r'_.$', '_c', metab.id)
    try:
        cyt = mod.metabolites.get_by_id(new_id)
    except:
        cyt = metab
        cyt.id = new_id
        cyt.compartment = 'c'
    return cyt

In [None]:
def coalesce_cmpts(model):
    mod = model.copy()
    for rxn in mod.reactions:
        if 'p' in rxn.compartments or 'e' in rxn.compartments:
            #mod.remove_reactions(reactions=[rxn])
            for metab, amt in rxn.metabolites.items():
                cyt = replace_metab(mod, metab)
                rxn.add_metabolites({metab: -1 * amt})
                rxn.add_metabolites({cyt: amt})
            rxn.comparments = set('c')
            #mod.add_reaction(reaction=rxn)
    for m in mod.metabolites.query(r'.*_e$'):
        assert(len(m.reactions) == 0)
        m.remove_from_model(destructive=True)
    for m in mod.metabolites.query(r'.*_p$'):
        assert(len(m.reactions) == 0)
        m.remove_from_model(destructive=True)
    return mod
model_cyt = coalesce_cmpts(model)
model_cyt_txtl = coalesce_cmpts(model_txtl)

In [None]:
def strip_exchanges(mod, reactants):
    # Delete transmembrane transport reactions
    model = mod.copy()

    # Add exchange reactions for all metabolites
    # Exchange‐reactions for non‐substrate metabolites have lb of 0 and ub of 1000 by default
    #all_metab_ex = utils.gen_metab_dict(model, model.metabolites, cnvt=False)

    #utils.add_ex(model, all_metab_ex)
    exs = set()
    for metab in reactants:
        if metab == 'trna':
            for trna in model.metabolites.query('trna'):
                exs = exs.union(trna.reactions.intersection(model.exchanges))
        elif metab.upper() in pdb.aa3:
            aas = get_aa_metab(model, metab.lower(), cmpt='c')
            for aa in aas:
                exs = exs.union(aa.reactions.intersection(model.exchanges))
        else:
            m = model.metabolites.get_by_id('{0}_c'.format(metab))
            exs = exs.union(m.reactions.intersection(model.exchanges))
    model.remove_reactions(exs)
    #['EX_glc_e', 'EX_pi_e', 'EX_mg2_e', 'EX_k_e', 'EX_nh4_e'])

    # As objective function, we selected the exchange reaction which corresponds to the target metabolite 
    # for which a pathway should be determined.   
    return model
model_bare = strip_exchanges(model_cyt, cfps_conc.index[:-1])
model_bare_txtl = strip_exchanges(model_cyt_txtl, cfps_conc.index[:-1])

In [None]:
def build_medium(model, cfps_conc):
    mod = model.copy()
    
    for metab, vals in cfps_conc.iterrows():
        flux = utils.conc_to_flux(vals['final_conc']) * 100

        if metab == 'trna':
            ms = model.metabolites.query('trna')
        elif metab.upper() in pdb.aa3:
            ms = get_aa_metab(model, metab.lower(), cmpt='c')
        else:
            ms = mod.metabolites.query(r'^{0}_c'.format(metab))
        for m in ms:
            rxn_nm = 'EX_' + m.id
            mod.add_boundary(metabolite=m, type='exchange', lb=0, ub=flux)
            #mod.add_boundary(metabolite=m, type='cfps-medium', reaction_id=rxn_nm, lb=0, ub=flux) 
    return mod
model_cf = build_medium(model_bare, cfps_conc)
model_cf_txtl = build_medium(model_bare_txtl, cfps_conc)

In [None]:
model_cf_txtl_rfp = model_cf_txtl.copy()
model_cf_txtl_rfp.objective = model_cf_txtl_rfp.reactions.PROTEIN_export_RFP
model_cf_txtl_comb = model_cf_txtl.copy()
model_cf_txtl_comb.objective = {model_cf_txtl_comb.reactions.BIOMASS_Ec_iJO1366_core_53p95M: 1, model_cf_txtl_comb.reactions.PROTEIN_export_RFP: 100}

In [None]:
sol = model_cf.optimize()
model_cf.summary()

In [None]:
sol = model_cf_txtl_rfp.optimize()
model_cf_txtl_rfp.summary()

In [None]:
sol = model_cf_txtl_comb.optimize()
model_cf_txtl_comb.summary()

In [None]:
cobra.io.write_sbml_model(filename='../models/ecoli_cf_base.sbml', cobra_model=model_cf)
cobra.io.write_sbml_model(filename='../models/ecoli_cf_txtl_rfp_base.sbml', cobra_model=model_cf_txtl_rfp)
cobra.io.write_sbml_model(filename='../models/ecoli_cf_txtl_comb_base.sbml', cobra_model=model_cf_txtl_comb)

In [None]:
#m.metabolites.mRNA_RFP.elements = {'C': 6475, 'H': 8586, 'N': 2552, 'O': 5406, 'P': 678}
#m.reactions.transcription_RFP.annotation = {'SBO': 'SBO:0000631'}
def validate_model(m):
    cobra.manipulation.validate.check_reaction_bounds(model=m)
    cobra.manipulation.validate.check_metabolite_compartment_formula(model=m)
    cobra.manipulation.validate.check_mass_balance(model=m)

In [None]:
df = pd.read_csv('../data/Karim_MetEng_2018_Figure2_Data.csv')
df.drop(columns=['Area_1', 'Area_2', 'Conc_1', 'Conc_2'], inplace=True)
df.head()

In [None]:
reduced = df.loc[:, ['Glucose', 'NH4(Glu)', 'NAD', 'CoA']].drop_duplicates(keep='first')
reduced['AVG.1'] = df.loc[reduced.index, 'AVG.1']
reduced.to_csv('../data/reduced.csv')

In [None]:
utils.change_obj(model=model_cf_med, metab=model_cf_med.metabolites.btol_c)
utils.change_obj(model=model_cf, metab=model_cf.metabolites.btol_c)

In [None]:
def add_addl_reactants(model, df):
    mod = model.copy()
    addl_reagent_nms = ['mg2', 'nh4', 'k', 'glc__D', 'pi', 'nad', 'atp', 'coa']
    objs = []
    for i, row in df.iterrows():
        with mod:
            metab_dict = utils.gen_metab_dict(mod, addl_reagent_nms, map(lambda x: -1 * utils.conc_to_flux(x), row[4:]))
            rxn = utils.add_exchange(mod, metab_dict, additive=True)
            #different_mediums(mod, model)
            #mod.add_reactions(reaction_list=sol[0])
            obj = mod.slim_optimize()
            objs.append(obj)
            #print 'Obj: {0}'.format(obj.objective_value)
    return objs

In [None]:
model.metabolites.mg2_c

In [None]:
utils.add_addl_reactants(model_cf_med, df)

In [None]:
cobra.io.save_json_model(filename='Full_Ecoli_CF_Base.json', model=model_cf_med)

In [None]:
model.medium

In [None]:
model.optimize()
model.summary()

In [None]:
fl = -1 * utils.conc_to_flux(200)
print fl
glc = model.reactions.EX_glc_e
glc.lower_bound, glc.upper_bound = fl, fl
model.optimize()
model.summary()

In [None]:
fl = -1 * utils.conc_to_flux(120)
print fl
glc = model.reactions.EX_glc_e
glc.lower_bound, glc.upper_bound = fl, fl
model.optimize()
model.summary()

In [None]:
utils.change_obj(model, model.metabolites.btol_c)

In [None]:
glc = model.reactions.EX_glc_e
glc.lower_bound, glc.upper_bound = -10, 1000
model.reactions.EX_glc_e
model.optimize()
model.summary()

In [None]:
fl = -1 * utils.conc_to_flux(200)
glc = model.reactions.EX_glc_e
glc.lower_bound, glc.upper_bound = fl, fl
model.optimize()
model.summary()

In [None]:
fl = -1 * utils.conc_to_flux(120)
glc = model.reactions.EX_glc_e
glc.lower_bound, glc.upper_bound = fl, fl
model.optimize()
model.summary()

In [None]:
fl = -1 * utils.conc_to_flux(200)
fl_nh4 = -1 * utils.conc_to_flux(10)
print fl, fl_nh4
glc = model.reactions.EX_glc_e
nh4 = model.reactions.EX_nh4_e
glc.lower_bound, glc.upper_bound = fl, fl
nh4.lower_bound, nh4.upper_bound = fl_nh4, fl_nh4
model.optimize()
model.summary()

In [None]:
fl = -1 * utils.conc_to_flux(200)
fl_nh4 = -1 * utils.conc_to_flux(10)
fl_pi = -1 * utils.conc_to_flux(10)
fl_mg = -1 * utils.conc_to_flux(8)
print fl, fl_nh4
glc = model.reactions.EX_glc_e
nh4 = model.reactions.EX_nh4_e
pi = model.reactions.EX_pi_e
mg = model.reactions.EX_mg2_e
def update_bnds(rxn, fl): 
    rxn.lower_bound, rxn.upper_bound = fl, 0 
glc.lower_bound, glc.upper_bound = fl, fl
nh4.lower_bound, nh4.upper_bound = fl_nh4, fl_nh4
update_bnds(pi, fl_pi)
update_bnds(mg, fl_mg)
model.optimize()
model.summary()

In [None]:
elts = ['mg2', 'nh4', 'k', 'glc', 'pi', 'nadh', 'atp', 'coa']
row = df.iloc[36, 4:]
fluxes = [-1 * utils.conc_to_flux(row[i]) for i in range(len(elts))]
print elts
print row.tolist()
for elt, flux in zip(elts, fluxes):
    #print elt, '{0}_c'.format(elt)#, model.metabolites.get_by_id('{0}_c'.format(elt))
    try:
        rxn = model.reactions.get_by_id('EX_{0}_e'.format(elt))
    except KeyError:
        break
        #model.add_boundary(reaction_id='EX_{0}_e'.format(elt), type='Exchange', 
        #                   metabolite=model.metabolites.get_by_id('{0}_c'.format(elt)), ub=flux)
        #rxn = model.reactions.get_by_id('EX_{0}_e'.format(elt))
    rxn.lower_bound, rxn.upper_bound = flux, 0
model.optimize()
model.summary()

In [None]:
df2 = pd.read_csv(filepath_or_buffer='../data/4_5_18_T7_C6N_mRFP.TXT', skiprows=6)

In [None]:
df2.columns